From 1c3ea27064257d8cf7b9f36b0388b8cf2e94ab08 Mon Sep 17 00:00:00 2001
From: Leonard Kugis <leonardkugis@gmail.com>
Date: Fri, 26 Jan 2018 01:12:49 +0100
Subject: Skeleton for OpenCL rendering, cleaned up alot, WIP and crashing

---
 src/render_opencl.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 src/render_opencl.c

(limited to 'src/render_opencl.c')

diff --git a/src/render_opencl.c b/src/render_opencl.c
new file mode 100644
index 0000000..913c975
--- /dev/null
+++ b/src/render_opencl.c
@@ -0,0 +1,26 @@
+/*
+ * render_opencl.c
+ *
+ *  Created on: 26.01.2018
+ *      Author: Superleo1810
+ */
+
+#include "render_opencl.h"
+
+// TODO: implement opencl
+
+void init_opencl(OpenCLConfig *config)
+{
+	config_opencl = config;
+	printf("OpenCL rendering not implemented yet\n");
+}
+
+void render_opencl(void)
+{
+
+}
+
+void idle_opencl(void)
+{
+
+}
-- 
cgit v1.2.1


From c8a38bd8ad66eb0b35f08a4733fdee37a888b83c Mon Sep 17 00:00:00 2001
From: Leonard Kugis <leonardkugis@gmail.com>
Date: Sat, 27 Jan 2018 02:03:31 +0100
Subject: OpenCL rendering added, not working properly

---
 src/render_opencl.c | 277 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 274 insertions(+), 3 deletions(-)

(limited to 'src/render_opencl.c')

diff --git a/src/render_opencl.c b/src/render_opencl.c
index 913c975..9d650c2 100644
--- a/src/render_opencl.c
+++ b/src/render_opencl.c
@@ -7,20 +7,291 @@
 
 #include "render_opencl.h"
 
-// TODO: implement opencl
-
 void init_opencl(OpenCLConfig *config)
 {
+	printf("cl init\n");
+	x_min_s = -2.0;
+	x_max_s = 1.0;
+	y_min_s = -1.0;
+	y_max_s = 1.0;
+	x_min = x_min_s;
+	x_max = x_max_s;
+	y_min = y_min_s;
+	y_max = y_max_s;
+
 	config_opencl = config;
-	printf("OpenCL rendering not implemented yet\n");
+	output = (cl_uint *) malloc((config_opencl->width) * (config_opencl->height) * sizeof(cl_uchar4));
+	context = NULL;
+
+	cl_platform_id platform_id;
+	cl_uint ret_num_devices;
+	cl_uint ret_num_platforms;
+	size_t device_list_size;
+	cl_program program;
+	size_t cl_src_sz;
+
+	clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
+	clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);
+	context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
+	ret = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &device_list_size);
+	devices = (cl_device_id *)malloc(device_list_size);
+	num_devices = (cl_uint)(device_list_size/sizeof(cl_device_id));
+	clGetContextInfo(context, CL_CONTEXT_DEVICES, device_list_size, devices, NULL);
+
+	printf("lists done\n");
+
+	FILE *fp;
+	char *cl_src, *path, *flags = (char *)malloc(200 * sizeof(char));
+	flags[0] = '\0';
+	switch(config_opencl->fpu)
+	{
+	case OPENCL_FPU_32:
+		switch(config_opencl->set_func)
+		{
+		case SFUNC_JULIA:
+			path = "cl/julia32.cl";
+			break;
+		case SFUNC_MANDELBROT:
+		default:
+			path = "cl/mandelbrot32.cl";
+			break;
+		}
+		break;
+	case OPENCL_FPU_64:
+		switch(config_opencl->set_func)
+		{
+		case SFUNC_JULIA:
+			path = "cl/julia64.cl";
+			break;
+		case SFUNC_MANDELBROT:
+		default:
+			path = "cl/mandelbrot64.cl";
+			break;
+		}
+		int khrFP64 = 0;
+		int amdFP64 = 0;
+		for (cl_uint i = 0; i < num_devices; i++)
+		{
+			char deviceExtensions[8192];
+			ret = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS,
+					sizeof(deviceExtensions), deviceExtensions, 0);
+			if (strstr(deviceExtensions, "cl_khr_fp64"))
+			{
+				khrFP64++;
+			}
+			else
+			{
+				if (strstr(deviceExtensions, "cl_amd_fp64"))
+				{
+					amdFP64++;
+				}
+			}
+		}
+		if (khrFP64 == num_devices)
+		{
+			flags = strcat(flags, "-D KHR_DP_EXTENSION ");
+		}
+		else if (amdFP64 == num_devices)
+		{
+			flags = strcat(flags, "");
+		}
+		break;
+	case OPENCL_FPU_128:
+		printf("128 bit precision not implemented yet\n");
+		break;
+	}
+
+	cl_src = (char *)malloc(MAX_SOURCE_SIZE * sizeof(char));
+	fp = fopen(path, "r");
+	cl_src_sz = fread(cl_src, 1, MAX_SOURCE_SIZE, fp);
+	fclose(fp);
+
+	printf("reading done\n");
+
+	for (cl_uint i = 0; i < num_devices; i++)
+	{
+		cl_command_queue_properties prop = 0;
+		//			if (sampleArgs->timing)
+		//			{
+		//				prop |= CL_QUEUE_PROFILING_ENABLE;
+		//			}
+		commandQueue[i] = clCreateCommandQueue(context, devices[i], prop, &ret);
+		outputBuffer[i] = clCreateBuffer(context,
+				CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+				(sizeof(cl_uint) * (config_opencl->width)
+						* (config_opencl->height)) / num_devices, NULL, &ret);
+	}
+	printf("comand puffer\n");
+	if (config_opencl->fma)
+	{
+		flags = strcat(flags, "-D MUL_ADD=fma ");
+	}
+	else
+	{
+		flags = strcat(flags, "-D MUL_ADD=mad ");
+	}
+	printf("flags concat\n");
+	program = clCreateProgramWithSource(context, 1, (const char **)&cl_src, (const size_t *)&cl_src_sz, &ret);
+	ret = clBuildProgram(program, num_devices, devices, flags, NULL, NULL);
+	for (cl_uint i = 0; i < num_devices; i++)
+	{
+		kernel_vector[i] = clCreateKernel(program, "calculate", &ret);
+	}
+
+	printf("cl init done\n");
+
 }
 
 void render_opencl(void)
 {
+	cl_event events[MAX_DEVICES];
+	cl_int eventStatus = CL_QUEUED;
+	size_t globalThreads[1];
+	size_t localThreads[1];
+	size_t kernelWorkGroupSize;
+	cl_kernel kernel;
+
+	globalThreads[0] = ((config_opencl->width) * (config_opencl->height))
+			/ num_devices;
+	localThreads[0] = 256;
+
+	globalThreads[0] >>= 2;
+
+	for (cl_uint i = 0; i < num_devices; i++)
+	{
+		kernel = kernel_vector[i];
+		ret = clGetKernelWorkGroupInfo(kernel, devices[i],
+				CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize,
+				0);
+		if ((cl_uint) (localThreads[0]) > kernelWorkGroupSize)
+		{
+			localThreads[0] = kernelWorkGroupSize;
+		}
+
+		xpos = 0.0;
+		ypos = 0.0;
+		xsize = 0.1;
+		ysize = 0.1;
+
+		xstep = (xsize / (double) config_opencl->width);
+		ystep = (ysize / (double) config_opencl->height);
+		leftx = (xpos - xsize / 2.0);
+		topy =
+				(ypos + ysize / 2.0
+						- ((double) i * ysize) / (double) num_devices);
+
+		if (i == 0)
+		{
+			topy0 = topy;
+		}
+
+		printf("xsize: %f, ysize: %f, xstep: %f, ystep: %f, leftx: %f, topy: %f\n", xsize, ysize, xstep, ystep, leftx, topy);
+
+		ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &outputBuffer[i]);
+
+		cl_float leftxF = (float) leftx;
+		cl_float topyF = (float) topy;
+		cl_float xstepF = (float) xstep;
+		cl_float ystepF = (float) ystep;
+
+		switch (config_opencl->fpu)
+		{
+		case OPENCL_FPU_32: // lel
+			ret = clSetKernelArg(kernel, 1, sizeof(cl_float), (void *) &leftxF);
+			ret = clSetKernelArg(kernel, 2, sizeof(cl_float), (void *) &topyF);
+			ret = clSetKernelArg(kernel, 3, sizeof(cl_float), (void *) &xstepF);
+			ret = clSetKernelArg(kernel, 4, sizeof(cl_float), (void *) &ystepF);
+			break;
+		case OPENCL_FPU_64:
+			ret = clSetKernelArg(kernel, 1, sizeof(cl_double), (void *) &leftx);
+			ret = clSetKernelArg(kernel, 2, sizeof(cl_double), (void *) &topy);
+			ret = clSetKernelArg(kernel, 3, sizeof(cl_double), (void *) &xstep);
+			ret = clSetKernelArg(kernel, 4, sizeof(cl_double), (void *) &ystep);
+			break;
+		case OPENCL_FPU_128:
+			break;
+		}
+
+		ret = clSetKernelArg(kernel, 5, sizeof(cl_uint),
+				(void *) &config_opencl->iterations);
+		ret = clSetKernelArg(kernel, 6, sizeof(cl_int),
+				(void *) &config_opencl->width);
+		ret = clEnqueueNDRangeKernel(commandQueue[i], kernel, 1, NULL,
+				globalThreads, localThreads, 0, NULL, &events[i]);
+
+
+		for (cl_uint i = 0; i < num_devices; i++)
+		{
+			ret = clFlush(commandQueue[i]);
+		}
+		for (cl_uint i = 0; i < num_devices; i++)
+		{
+			ret = clWaitForEvents(1, &events[num_devices - i - 1]);
+			ret = clReleaseEvent(events[num_devices - i - 1]);
+		}
+
+		for (cl_uint i = 0; i < num_devices; i++)
+		{
+			ret = clEnqueueReadBuffer(commandQueue[i], outputBuffer[i],
+			CL_FALSE, 0,
+					((config_opencl->width) * (config_opencl->height)
+							* sizeof(u32)) / num_devices,
+					config_opencl->arr
+							+ (((config_opencl->width) * (config_opencl->height) * i)
+									/ num_devices), 0,
+					NULL, &events[i]);
+		}
+
+		for (cl_uint i = 0; i < num_devices; i++)
+		{
+			ret = clFlush(commandQueue[i]);
+		}
+
+		for (cl_uint i = 0; i < num_devices; i++)
+		{
+			ret = clWaitForEvents(1, &events[num_devices - i - 1]);
+			ret = clReleaseEvent(events[num_devices - i - 1]);
+		}
+	}
+
+	glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+	glBindTexture(GL_TEXTURE_2D, config_opencl->tex);
+	glEnable(GL_TEXTURE_2D);
+
+	glBegin(GL_QUADS);
+		glTexCoord2i(0, 0); glVertex2i(0, 0);
+		glTexCoord2i(0, 1); glVertex2i(0, config_opencl->height);
+		glTexCoord2i(1, 1); glVertex2i(config_opencl->width, config_opencl->height);
+		glTexCoord2i(1, 0); glVertex2i(config_opencl->width, 0);
+	glEnd();
 
+	glDisable(GL_TEXTURE_2D);
+	glBindTexture(GL_TEXTURE_2D, 0);
+	glutSwapBuffers();
 }
 
 void idle_opencl(void)
 {
+	static int t_old;
+	int t = 0, delta = 0;
+	do
+	{
+		t = glutGet(GLUT_ELAPSED_TIME);
+		delta = t - t_old;
+	} while (delta < 16); // TODO: Hardcoded FPS
+	t_old = t;
 
+	//glGenTextures(1, &tex);
+	glBindTexture(GL_TEXTURE_2D, config_opencl->tex);
+	//glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+	//glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+	glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, config_opencl->width,
+			config_opencl->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, config_opencl->arr);
+	glBindTexture(GL_TEXTURE_2D, 0);
+	cl_ft += (config_opencl->speed * (delta / 1000.0));
+	x_min = x_min_s + config_opencl->zoom_func(cl_ft, (d64) 2.0 + config_opencl->to_x);
+	y_min = y_min_s + config_opencl->zoom_func(cl_ft, (d64) 1.0 + config_opencl->to_y);
+	x_max = x_max_s - config_opencl->zoom_func(cl_ft, (d64) 1.0 - config_opencl->to_x);
+	y_max = y_max_s - config_opencl->zoom_func(cl_ft, (d64) 1.0 - config_opencl->to_y);
+	glutPostRedisplay();
 }
-- 
cgit v1.2.1