From da4b72ea58e97d14306cfd322a8ef7a40337645d Mon Sep 17 00:00:00 2001 From: Leonard Kugis Date: Sun, 28 Jan 2018 17:34:41 +0100 Subject: OpenCL rendering working with 32-bit float --- src/render_opencl.c | 124 +++++++++++++++++++++++----------------------------- 1 file changed, 54 insertions(+), 70 deletions(-) (limited to 'src/render_opencl.c') diff --git a/src/render_opencl.c b/src/render_opencl.c index 9d650c2..84fc314 100644 --- a/src/render_opencl.c +++ b/src/render_opencl.c @@ -9,7 +9,6 @@ void init_opencl(OpenCLConfig *config) { - printf("cl init\n"); x_min_s = -2.0; x_max_s = 1.0; y_min_s = -1.0; @@ -23,6 +22,8 @@ void init_opencl(OpenCLConfig *config) output = (cl_uint *) malloc((config_opencl->width) * (config_opencl->height) * sizeof(cl_uchar4)); context = NULL; + width_cl = config_opencl->width; // Leave it in, cl needs different endianness + cl_platform_id platform_id; cl_uint ret_num_devices; cl_uint ret_num_platforms; @@ -38,8 +39,6 @@ void init_opencl(OpenCLConfig *config) num_devices = (cl_uint)(device_list_size/sizeof(cl_device_id)); clGetContextInfo(context, CL_CONTEXT_DEVICES, device_list_size, devices, NULL); - printf("lists done\n"); - FILE *fp; char *cl_src, *path, *flags = (char *)malloc(200 * sizeof(char)); flags[0] = '\0'; @@ -106,8 +105,6 @@ void init_opencl(OpenCLConfig *config) cl_src_sz = fread(cl_src, 1, MAX_SOURCE_SIZE, fp); fclose(fp); - printf("reading done\n"); - for (cl_uint i = 0; i < num_devices; i++) { cl_command_queue_properties prop = 0; @@ -121,7 +118,6 @@ void init_opencl(OpenCLConfig *config) (sizeof(cl_uint) * (config_opencl->width) * (config_opencl->height)) / num_devices, NULL, &ret); } - printf("comand puffer\n"); if (config_opencl->fma) { flags = strcat(flags, "-D MUL_ADD=fma "); @@ -130,7 +126,6 @@ void init_opencl(OpenCLConfig *config) { flags = strcat(flags, "-D MUL_ADD=mad "); } - printf("flags concat\n"); program = clCreateProgramWithSource(context, 1, (const char **)&cl_src, (const size_t *)&cl_src_sz, &ret); ret = clBuildProgram(program, num_devices, devices, flags, NULL, NULL); for (cl_uint i = 0; i < num_devices; i++) @@ -138,8 +133,6 @@ void init_opencl(OpenCLConfig *config) kernel_vector[i] = clCreateKernel(program, "calculate", &ret); } - printf("cl init done\n"); - } void render_opencl(void) @@ -151,6 +144,16 @@ void render_opencl(void) size_t kernelWorkGroupSize; cl_kernel kernel; + cl_double y_max_t; + cl_float y_max_t_f; + + cl_double x_delta = ((x_max - x_min) / (double) config_opencl->width); + cl_double y_delta = -((y_max - y_min) / (double) config_opencl->height); + + cl_float x_delta_f = (float) x_delta; + cl_float y_delta_f = (float) y_delta; + cl_float x_min_f = (float) x_min; + globalThreads[0] = ((config_opencl->width) * (config_opencl->height)) / num_devices; localThreads[0] = 256; @@ -168,45 +171,28 @@ void render_opencl(void) localThreads[0] = kernelWorkGroupSize; } - xpos = 0.0; - ypos = 0.0; - xsize = 0.1; - ysize = 0.1; - - xstep = (xsize / (double) config_opencl->width); - ystep = (ysize / (double) config_opencl->height); - leftx = (xpos - xsize / 2.0); - topy = - (ypos + ysize / 2.0 - - ((double) i * ysize) / (double) num_devices); - - if (i == 0) - { - topy0 = topy; - } + y_max_t = (((y_min + y_max) / 2.0) + (y_max - y_min) / 2.0 - ((double) i * (y_max - y_min)) / (double) num_devices); - printf("xsize: %f, ysize: %f, xstep: %f, ystep: %f, leftx: %f, topy: %f\n", xsize, ysize, xstep, ystep, leftx, topy); ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &outputBuffer[i]); - cl_float leftxF = (float) leftx; - cl_float topyF = (float) topy; - cl_float xstepF = (float) xstep; - cl_float ystepF = (float) ystep; + y_max_t_f = (float) y_max_t; + + //printf("x_delta: %f, y_delta: %f, x_delta_f: %f, y_delta_f: %f, x_min_f: %f, y_max_t: %f, y_max_t_f: %f\n", x_delta, y_delta, x_delta_f, y_delta_f, x_min_f, y_max_t, y_max_t_f); switch (config_opencl->fpu) { case OPENCL_FPU_32: // lel - ret = clSetKernelArg(kernel, 1, sizeof(cl_float), (void *) &leftxF); - ret = clSetKernelArg(kernel, 2, sizeof(cl_float), (void *) &topyF); - ret = clSetKernelArg(kernel, 3, sizeof(cl_float), (void *) &xstepF); - ret = clSetKernelArg(kernel, 4, sizeof(cl_float), (void *) &ystepF); + ret = clSetKernelArg(kernel, 1, sizeof(cl_float), (void *) &x_min_f); + ret = clSetKernelArg(kernel, 2, sizeof(cl_float), (void *) &y_max_t_f); + ret = clSetKernelArg(kernel, 3, sizeof(cl_float), (void *) &x_delta_f); + ret = clSetKernelArg(kernel, 4, sizeof(cl_float), (void *) &y_delta_f); break; case OPENCL_FPU_64: - ret = clSetKernelArg(kernel, 1, sizeof(cl_double), (void *) &leftx); - ret = clSetKernelArg(kernel, 2, sizeof(cl_double), (void *) &topy); - ret = clSetKernelArg(kernel, 3, sizeof(cl_double), (void *) &xstep); - ret = clSetKernelArg(kernel, 4, sizeof(cl_double), (void *) &ystep); + ret = clSetKernelArg(kernel, 1, sizeof(cl_double), (void *) &x_min); + ret = clSetKernelArg(kernel, 2, sizeof(cl_double), (void *) &y_max_t); + ret = clSetKernelArg(kernel, 3, sizeof(cl_double), (void *) &x_delta); + ret = clSetKernelArg(kernel, 4, sizeof(cl_double), (void *) &y_delta); break; case OPENCL_FPU_128: break; @@ -215,43 +201,41 @@ void render_opencl(void) ret = clSetKernelArg(kernel, 5, sizeof(cl_uint), (void *) &config_opencl->iterations); ret = clSetKernelArg(kernel, 6, sizeof(cl_int), - (void *) &config_opencl->width); + (void *) &width_cl); ret = clEnqueueNDRangeKernel(commandQueue[i], kernel, 1, NULL, globalThreads, localThreads, 0, NULL, &events[i]); + } + for (cl_uint i = 0; i < num_devices; i++) + { + ret = clFlush(commandQueue[i]); + } + for (cl_uint i = 0; i < num_devices; i++) + { + ret = clWaitForEvents(1, &events[num_devices - i - 1]); + ret = clReleaseEvent(events[num_devices - i - 1]); + } + for (cl_uint i = 0; i < num_devices; i++) + { + ret = clEnqueueReadBuffer(commandQueue[i], outputBuffer[i], + CL_FALSE, 0, + (config_opencl->width * config_opencl->height * sizeof(cl_int)) + / num_devices, + config_opencl->arr + + (config_opencl->width * config_opencl->height + / num_devices) * i, 0, + NULL, &events[i]); + } - for (cl_uint i = 0; i < num_devices; i++) - { - ret = clFlush(commandQueue[i]); - } - for (cl_uint i = 0; i < num_devices; i++) - { - ret = clWaitForEvents(1, &events[num_devices - i - 1]); - ret = clReleaseEvent(events[num_devices - i - 1]); - } - - for (cl_uint i = 0; i < num_devices; i++) - { - ret = clEnqueueReadBuffer(commandQueue[i], outputBuffer[i], - CL_FALSE, 0, - ((config_opencl->width) * (config_opencl->height) - * sizeof(u32)) / num_devices, - config_opencl->arr - + (((config_opencl->width) * (config_opencl->height) * i) - / num_devices), 0, - NULL, &events[i]); - } - - for (cl_uint i = 0; i < num_devices; i++) - { - ret = clFlush(commandQueue[i]); - } + for (cl_uint i = 0; i < num_devices; i++) + { + ret = clFlush(commandQueue[i]); + } - for (cl_uint i = 0; i < num_devices; i++) - { - ret = clWaitForEvents(1, &events[num_devices - i - 1]); - ret = clReleaseEvent(events[num_devices - i - 1]); - } + for (cl_uint i = 0; i < num_devices; i++) + { + ret = clWaitForEvents(1, &events[num_devices - i - 1]); + ret = clReleaseEvent(events[num_devices - i - 1]); } glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); -- cgit v1.2.1