From 1c3ea27064257d8cf7b9f36b0388b8cf2e94ab08 Mon Sep 17 00:00:00 2001 From: Leonard Kugis Date: Fri, 26 Jan 2018 01:12:49 +0100 Subject: Skeleton for OpenCL rendering, cleaned up alot, WIP and crashing --- src/render_opencl.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 src/render_opencl.c (limited to 'src/render_opencl.c') diff --git a/src/render_opencl.c b/src/render_opencl.c new file mode 100644 index 0000000..913c975 --- /dev/null +++ b/src/render_opencl.c @@ -0,0 +1,26 @@ +/* + * render_opencl.c + * + * Created on: 26.01.2018 + * Author: Superleo1810 + */ + +#include "render_opencl.h" + +// TODO: implement opencl + +void init_opencl(OpenCLConfig *config) +{ + config_opencl = config; + printf("OpenCL rendering not implemented yet\n"); +} + +void render_opencl(void) +{ + +} + +void idle_opencl(void) +{ + +} -- cgit v1.2.1 From c8a38bd8ad66eb0b35f08a4733fdee37a888b83c Mon Sep 17 00:00:00 2001 From: Leonard Kugis Date: Sat, 27 Jan 2018 02:03:31 +0100 Subject: OpenCL rendering added, not working properly --- src/render_opencl.c | 277 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 274 insertions(+), 3 deletions(-) (limited to 'src/render_opencl.c') diff --git a/src/render_opencl.c b/src/render_opencl.c index 913c975..9d650c2 100644 --- a/src/render_opencl.c +++ b/src/render_opencl.c @@ -7,20 +7,291 @@ #include "render_opencl.h" -// TODO: implement opencl - void init_opencl(OpenCLConfig *config) { + printf("cl init\n"); + x_min_s = -2.0; + x_max_s = 1.0; + y_min_s = -1.0; + y_max_s = 1.0; + x_min = x_min_s; + x_max = x_max_s; + y_min = y_min_s; + y_max = y_max_s; + config_opencl = config; - printf("OpenCL rendering not implemented yet\n"); + output = (cl_uint *) malloc((config_opencl->width) * (config_opencl->height) * sizeof(cl_uchar4)); + context = NULL; + + cl_platform_id platform_id; + cl_uint ret_num_devices; + cl_uint ret_num_platforms; + size_t device_list_size; + cl_program program; + size_t cl_src_sz; + + clGetPlatformIDs(1, &platform_id, &ret_num_platforms); + clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices); + context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret); + ret = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &device_list_size); + devices = (cl_device_id *)malloc(device_list_size); + num_devices = (cl_uint)(device_list_size/sizeof(cl_device_id)); + clGetContextInfo(context, CL_CONTEXT_DEVICES, device_list_size, devices, NULL); + + printf("lists done\n"); + + FILE *fp; + char *cl_src, *path, *flags = (char *)malloc(200 * sizeof(char)); + flags[0] = '\0'; + switch(config_opencl->fpu) + { + case OPENCL_FPU_32: + switch(config_opencl->set_func) + { + case SFUNC_JULIA: + path = "cl/julia32.cl"; + break; + case SFUNC_MANDELBROT: + default: + path = "cl/mandelbrot32.cl"; + break; + } + break; + case OPENCL_FPU_64: + switch(config_opencl->set_func) + { + case SFUNC_JULIA: + path = "cl/julia64.cl"; + break; + case SFUNC_MANDELBROT: + default: + path = "cl/mandelbrot64.cl"; + break; + } + int khrFP64 = 0; + int amdFP64 = 0; + for (cl_uint i = 0; i < num_devices; i++) + { + char deviceExtensions[8192]; + ret = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, + sizeof(deviceExtensions), deviceExtensions, 0); + if (strstr(deviceExtensions, "cl_khr_fp64")) + { + khrFP64++; + } + else + { + if (strstr(deviceExtensions, "cl_amd_fp64")) + { + amdFP64++; + } + } + } + if (khrFP64 == num_devices) + { + flags = strcat(flags, "-D KHR_DP_EXTENSION "); + } + else if (amdFP64 == num_devices) + { + flags = strcat(flags, ""); + } + break; + case OPENCL_FPU_128: + printf("128 bit precision not implemented yet\n"); + break; + } + + cl_src = (char *)malloc(MAX_SOURCE_SIZE * sizeof(char)); + fp = fopen(path, "r"); + cl_src_sz = fread(cl_src, 1, MAX_SOURCE_SIZE, fp); + fclose(fp); + + printf("reading done\n"); + + for (cl_uint i = 0; i < num_devices; i++) + { + cl_command_queue_properties prop = 0; + // if (sampleArgs->timing) + // { + // prop |= CL_QUEUE_PROFILING_ENABLE; + // } + commandQueue[i] = clCreateCommandQueue(context, devices[i], prop, &ret); + outputBuffer[i] = clCreateBuffer(context, + CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, + (sizeof(cl_uint) * (config_opencl->width) + * (config_opencl->height)) / num_devices, NULL, &ret); + } + printf("comand puffer\n"); + if (config_opencl->fma) + { + flags = strcat(flags, "-D MUL_ADD=fma "); + } + else + { + flags = strcat(flags, "-D MUL_ADD=mad "); + } + printf("flags concat\n"); + program = clCreateProgramWithSource(context, 1, (const char **)&cl_src, (const size_t *)&cl_src_sz, &ret); + ret = clBuildProgram(program, num_devices, devices, flags, NULL, NULL); + for (cl_uint i = 0; i < num_devices; i++) + { + kernel_vector[i] = clCreateKernel(program, "calculate", &ret); + } + + printf("cl init done\n"); + } void render_opencl(void) { + cl_event events[MAX_DEVICES]; + cl_int eventStatus = CL_QUEUED; + size_t globalThreads[1]; + size_t localThreads[1]; + size_t kernelWorkGroupSize; + cl_kernel kernel; + + globalThreads[0] = ((config_opencl->width) * (config_opencl->height)) + / num_devices; + localThreads[0] = 256; + + globalThreads[0] >>= 2; + + for (cl_uint i = 0; i < num_devices; i++) + { + kernel = kernel_vector[i]; + ret = clGetKernelWorkGroupInfo(kernel, devices[i], + CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, + 0); + if ((cl_uint) (localThreads[0]) > kernelWorkGroupSize) + { + localThreads[0] = kernelWorkGroupSize; + } + + xpos = 0.0; + ypos = 0.0; + xsize = 0.1; + ysize = 0.1; + + xstep = (xsize / (double) config_opencl->width); + ystep = (ysize / (double) config_opencl->height); + leftx = (xpos - xsize / 2.0); + topy = + (ypos + ysize / 2.0 + - ((double) i * ysize) / (double) num_devices); + + if (i == 0) + { + topy0 = topy; + } + + printf("xsize: %f, ysize: %f, xstep: %f, ystep: %f, leftx: %f, topy: %f\n", xsize, ysize, xstep, ystep, leftx, topy); + + ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &outputBuffer[i]); + + cl_float leftxF = (float) leftx; + cl_float topyF = (float) topy; + cl_float xstepF = (float) xstep; + cl_float ystepF = (float) ystep; + + switch (config_opencl->fpu) + { + case OPENCL_FPU_32: // lel + ret = clSetKernelArg(kernel, 1, sizeof(cl_float), (void *) &leftxF); + ret = clSetKernelArg(kernel, 2, sizeof(cl_float), (void *) &topyF); + ret = clSetKernelArg(kernel, 3, sizeof(cl_float), (void *) &xstepF); + ret = clSetKernelArg(kernel, 4, sizeof(cl_float), (void *) &ystepF); + break; + case OPENCL_FPU_64: + ret = clSetKernelArg(kernel, 1, sizeof(cl_double), (void *) &leftx); + ret = clSetKernelArg(kernel, 2, sizeof(cl_double), (void *) &topy); + ret = clSetKernelArg(kernel, 3, sizeof(cl_double), (void *) &xstep); + ret = clSetKernelArg(kernel, 4, sizeof(cl_double), (void *) &ystep); + break; + case OPENCL_FPU_128: + break; + } + + ret = clSetKernelArg(kernel, 5, sizeof(cl_uint), + (void *) &config_opencl->iterations); + ret = clSetKernelArg(kernel, 6, sizeof(cl_int), + (void *) &config_opencl->width); + ret = clEnqueueNDRangeKernel(commandQueue[i], kernel, 1, NULL, + globalThreads, localThreads, 0, NULL, &events[i]); + + + for (cl_uint i = 0; i < num_devices; i++) + { + ret = clFlush(commandQueue[i]); + } + for (cl_uint i = 0; i < num_devices; i++) + { + ret = clWaitForEvents(1, &events[num_devices - i - 1]); + ret = clReleaseEvent(events[num_devices - i - 1]); + } + + for (cl_uint i = 0; i < num_devices; i++) + { + ret = clEnqueueReadBuffer(commandQueue[i], outputBuffer[i], + CL_FALSE, 0, + ((config_opencl->width) * (config_opencl->height) + * sizeof(u32)) / num_devices, + config_opencl->arr + + (((config_opencl->width) * (config_opencl->height) * i) + / num_devices), 0, + NULL, &events[i]); + } + + for (cl_uint i = 0; i < num_devices; i++) + { + ret = clFlush(commandQueue[i]); + } + + for (cl_uint i = 0; i < num_devices; i++) + { + ret = clWaitForEvents(1, &events[num_devices - i - 1]); + ret = clReleaseEvent(events[num_devices - i - 1]); + } + } + + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + glBindTexture(GL_TEXTURE_2D, config_opencl->tex); + glEnable(GL_TEXTURE_2D); + + glBegin(GL_QUADS); + glTexCoord2i(0, 0); glVertex2i(0, 0); + glTexCoord2i(0, 1); glVertex2i(0, config_opencl->height); + glTexCoord2i(1, 1); glVertex2i(config_opencl->width, config_opencl->height); + glTexCoord2i(1, 0); glVertex2i(config_opencl->width, 0); + glEnd(); + glDisable(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D, 0); + glutSwapBuffers(); } void idle_opencl(void) { + static int t_old; + int t = 0, delta = 0; + do + { + t = glutGet(GLUT_ELAPSED_TIME); + delta = t - t_old; + } while (delta < 16); // TODO: Hardcoded FPS + t_old = t; + //glGenTextures(1, &tex); + glBindTexture(GL_TEXTURE_2D, config_opencl->tex); + //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, config_opencl->width, + config_opencl->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, config_opencl->arr); + glBindTexture(GL_TEXTURE_2D, 0); + cl_ft += (config_opencl->speed * (delta / 1000.0)); + x_min = x_min_s + config_opencl->zoom_func(cl_ft, (d64) 2.0 + config_opencl->to_x); + y_min = y_min_s + config_opencl->zoom_func(cl_ft, (d64) 1.0 + config_opencl->to_y); + x_max = x_max_s - config_opencl->zoom_func(cl_ft, (d64) 1.0 - config_opencl->to_x); + y_max = y_max_s - config_opencl->zoom_func(cl_ft, (d64) 1.0 - config_opencl->to_y); + glutPostRedisplay(); } -- cgit v1.2.1