aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Release/cl/mandelbrot32.cl10
-rw-r--r--src/render_opencl.c124
-rw-r--r--src/render_opencl.h10
3 files changed, 64 insertions, 80 deletions
diff --git a/Release/cl/mandelbrot32.cl b/Release/cl/mandelbrot32.cl
index af0d0e9..585d0ae 100644
--- a/Release/cl/mandelbrot32.cl
+++ b/Release/cl/mandelbrot32.cl
@@ -1,5 +1,13 @@
-__kernel void calculate(__global uchar4 * mandelbrotImage, const float posx, const float posy, const float stepSizeX, const float stepSizeY, const uint maxIterations, const uint width)
+__kernel void calculate(
+ __global uchar4 * mandelbrotImage,
+ const float posx,
+ const float posy,
+ const float stepSizeX,
+ const float stepSizeY,
+ const uint maxIterations,
+ const int width)
{
+//printf("%f, %f, %f, %f, %u, %d", posx, posy, stepSizeX, stepSizeY, maxIterations, width);
int tid = get_global_id(0);
int i = tid % (width / 4);
diff --git a/src/render_opencl.c b/src/render_opencl.c
index 9d650c2..84fc314 100644
--- a/src/render_opencl.c
+++ b/src/render_opencl.c
@@ -9,7 +9,6 @@
void init_opencl(OpenCLConfig *config)
{
- printf("cl init\n");
x_min_s = -2.0;
x_max_s = 1.0;
y_min_s = -1.0;
@@ -23,6 +22,8 @@ void init_opencl(OpenCLConfig *config)
output = (cl_uint *) malloc((config_opencl->width) * (config_opencl->height) * sizeof(cl_uchar4));
context = NULL;
+ width_cl = config_opencl->width; // Leave it in, cl needs different endianness
+
cl_platform_id platform_id;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
@@ -38,8 +39,6 @@ void init_opencl(OpenCLConfig *config)
num_devices = (cl_uint)(device_list_size/sizeof(cl_device_id));
clGetContextInfo(context, CL_CONTEXT_DEVICES, device_list_size, devices, NULL);
- printf("lists done\n");
-
FILE *fp;
char *cl_src, *path, *flags = (char *)malloc(200 * sizeof(char));
flags[0] = '\0';
@@ -106,8 +105,6 @@ void init_opencl(OpenCLConfig *config)
cl_src_sz = fread(cl_src, 1, MAX_SOURCE_SIZE, fp);
fclose(fp);
- printf("reading done\n");
-
for (cl_uint i = 0; i < num_devices; i++)
{
cl_command_queue_properties prop = 0;
@@ -121,7 +118,6 @@ void init_opencl(OpenCLConfig *config)
(sizeof(cl_uint) * (config_opencl->width)
* (config_opencl->height)) / num_devices, NULL, &ret);
}
- printf("comand puffer\n");
if (config_opencl->fma)
{
flags = strcat(flags, "-D MUL_ADD=fma ");
@@ -130,7 +126,6 @@ void init_opencl(OpenCLConfig *config)
{
flags = strcat(flags, "-D MUL_ADD=mad ");
}
- printf("flags concat\n");
program = clCreateProgramWithSource(context, 1, (const char **)&cl_src, (const size_t *)&cl_src_sz, &ret);
ret = clBuildProgram(program, num_devices, devices, flags, NULL, NULL);
for (cl_uint i = 0; i < num_devices; i++)
@@ -138,8 +133,6 @@ void init_opencl(OpenCLConfig *config)
kernel_vector[i] = clCreateKernel(program, "calculate", &ret);
}
- printf("cl init done\n");
-
}
void render_opencl(void)
@@ -151,6 +144,16 @@ void render_opencl(void)
size_t kernelWorkGroupSize;
cl_kernel kernel;
+ cl_double y_max_t;
+ cl_float y_max_t_f;
+
+ cl_double x_delta = ((x_max - x_min) / (double) config_opencl->width);
+ cl_double y_delta = -((y_max - y_min) / (double) config_opencl->height);
+
+ cl_float x_delta_f = (float) x_delta;
+ cl_float y_delta_f = (float) y_delta;
+ cl_float x_min_f = (float) x_min;
+
globalThreads[0] = ((config_opencl->width) * (config_opencl->height))
/ num_devices;
localThreads[0] = 256;
@@ -168,45 +171,28 @@ void render_opencl(void)
localThreads[0] = kernelWorkGroupSize;
}
- xpos = 0.0;
- ypos = 0.0;
- xsize = 0.1;
- ysize = 0.1;
-
- xstep = (xsize / (double) config_opencl->width);
- ystep = (ysize / (double) config_opencl->height);
- leftx = (xpos - xsize / 2.0);
- topy =
- (ypos + ysize / 2.0
- - ((double) i * ysize) / (double) num_devices);
-
- if (i == 0)
- {
- topy0 = topy;
- }
+ y_max_t = (((y_min + y_max) / 2.0) + (y_max - y_min) / 2.0 - ((double) i * (y_max - y_min)) / (double) num_devices);
- printf("xsize: %f, ysize: %f, xstep: %f, ystep: %f, leftx: %f, topy: %f\n", xsize, ysize, xstep, ystep, leftx, topy);
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &outputBuffer[i]);
- cl_float leftxF = (float) leftx;
- cl_float topyF = (float) topy;
- cl_float xstepF = (float) xstep;
- cl_float ystepF = (float) ystep;
+ y_max_t_f = (float) y_max_t;
+
+ //printf("x_delta: %f, y_delta: %f, x_delta_f: %f, y_delta_f: %f, x_min_f: %f, y_max_t: %f, y_max_t_f: %f\n", x_delta, y_delta, x_delta_f, y_delta_f, x_min_f, y_max_t, y_max_t_f);
switch (config_opencl->fpu)
{
case OPENCL_FPU_32: // lel
- ret = clSetKernelArg(kernel, 1, sizeof(cl_float), (void *) &leftxF);
- ret = clSetKernelArg(kernel, 2, sizeof(cl_float), (void *) &topyF);
- ret = clSetKernelArg(kernel, 3, sizeof(cl_float), (void *) &xstepF);
- ret = clSetKernelArg(kernel, 4, sizeof(cl_float), (void *) &ystepF);
+ ret = clSetKernelArg(kernel, 1, sizeof(cl_float), (void *) &x_min_f);
+ ret = clSetKernelArg(kernel, 2, sizeof(cl_float), (void *) &y_max_t_f);
+ ret = clSetKernelArg(kernel, 3, sizeof(cl_float), (void *) &x_delta_f);
+ ret = clSetKernelArg(kernel, 4, sizeof(cl_float), (void *) &y_delta_f);
break;
case OPENCL_FPU_64:
- ret = clSetKernelArg(kernel, 1, sizeof(cl_double), (void *) &leftx);
- ret = clSetKernelArg(kernel, 2, sizeof(cl_double), (void *) &topy);
- ret = clSetKernelArg(kernel, 3, sizeof(cl_double), (void *) &xstep);
- ret = clSetKernelArg(kernel, 4, sizeof(cl_double), (void *) &ystep);
+ ret = clSetKernelArg(kernel, 1, sizeof(cl_double), (void *) &x_min);
+ ret = clSetKernelArg(kernel, 2, sizeof(cl_double), (void *) &y_max_t);
+ ret = clSetKernelArg(kernel, 3, sizeof(cl_double), (void *) &x_delta);
+ ret = clSetKernelArg(kernel, 4, sizeof(cl_double), (void *) &y_delta);
break;
case OPENCL_FPU_128:
break;
@@ -215,43 +201,41 @@ void render_opencl(void)
ret = clSetKernelArg(kernel, 5, sizeof(cl_uint),
(void *) &config_opencl->iterations);
ret = clSetKernelArg(kernel, 6, sizeof(cl_int),
- (void *) &config_opencl->width);
+ (void *) &width_cl);
ret = clEnqueueNDRangeKernel(commandQueue[i], kernel, 1, NULL,
globalThreads, localThreads, 0, NULL, &events[i]);
+ }
+ for (cl_uint i = 0; i < num_devices; i++)
+ {
+ ret = clFlush(commandQueue[i]);
+ }
+ for (cl_uint i = 0; i < num_devices; i++)
+ {
+ ret = clWaitForEvents(1, &events[num_devices - i - 1]);
+ ret = clReleaseEvent(events[num_devices - i - 1]);
+ }
+ for (cl_uint i = 0; i < num_devices; i++)
+ {
+ ret = clEnqueueReadBuffer(commandQueue[i], outputBuffer[i],
+ CL_FALSE, 0,
+ (config_opencl->width * config_opencl->height * sizeof(cl_int))
+ / num_devices,
+ config_opencl->arr
+ + (config_opencl->width * config_opencl->height
+ / num_devices) * i, 0,
+ NULL, &events[i]);
+ }
- for (cl_uint i = 0; i < num_devices; i++)
- {
- ret = clFlush(commandQueue[i]);
- }
- for (cl_uint i = 0; i < num_devices; i++)
- {
- ret = clWaitForEvents(1, &events[num_devices - i - 1]);
- ret = clReleaseEvent(events[num_devices - i - 1]);
- }
-
- for (cl_uint i = 0; i < num_devices; i++)
- {
- ret = clEnqueueReadBuffer(commandQueue[i], outputBuffer[i],
- CL_FALSE, 0,
- ((config_opencl->width) * (config_opencl->height)
- * sizeof(u32)) / num_devices,
- config_opencl->arr
- + (((config_opencl->width) * (config_opencl->height) * i)
- / num_devices), 0,
- NULL, &events[i]);
- }
-
- for (cl_uint i = 0; i < num_devices; i++)
- {
- ret = clFlush(commandQueue[i]);
- }
+ for (cl_uint i = 0; i < num_devices; i++)
+ {
+ ret = clFlush(commandQueue[i]);
+ }
- for (cl_uint i = 0; i < num_devices; i++)
- {
- ret = clWaitForEvents(1, &events[num_devices - i - 1]);
- ret = clReleaseEvent(events[num_devices - i - 1]);
- }
+ for (cl_uint i = 0; i < num_devices; i++)
+ {
+ ret = clWaitForEvents(1, &events[num_devices - i - 1]);
+ ret = clReleaseEvent(events[num_devices - i - 1]);
}
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
diff --git a/src/render_opencl.h b/src/render_opencl.h
index b995c4d..720678b 100644
--- a/src/render_opencl.h
+++ b/src/render_opencl.h
@@ -56,15 +56,7 @@ cl_device_id *devices;
cl_command_queue commandQueue[MAX_DEVICES];
cl_mem outputBuffer[MAX_DEVICES];
-double xpos;
-double ypos;
-double xsize;
-double ysize;
-double xstep;
-double ystep;
-double leftx;
-double topy;
-double topy0;
+cl_int width_cl;
d64 zoom_func(d64 ft, d64 s);