diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/cl/mandelbrot32.cl | 196 | ||||
-rw-r--r-- | src/cl/mandelbrot64.cl | 230 | ||||
-rw-r--r-- | src/defs.h | 8 | ||||
-rw-r--r-- | src/mandelbrot-zoom.c | 112 | ||||
-rw-r--r-- | src/mandelbrot-zoom.h | 5 | ||||
-rw-r--r-- | src/render.c | 111 | ||||
-rw-r--r-- | src/render.h | 44 | ||||
-rw-r--r-- | src/render_cpu.c | 99 | ||||
-rw-r--r-- | src/render_cpu.h | 61 | ||||
-rw-r--r-- | src/render_opencl.c | 297 | ||||
-rw-r--r-- | src/render_opencl.h | 75 | ||||
-rw-r--r-- | src/sets.c | 14 | ||||
-rw-r--r-- | src/sets.h | 16 |
13 files changed, 1086 insertions, 182 deletions
diff --git a/src/cl/mandelbrot32.cl b/src/cl/mandelbrot32.cl new file mode 100644 index 0000000..cda933f --- /dev/null +++ b/src/cl/mandelbrot32.cl @@ -0,0 +1,196 @@ +__kernel void calculate(__global uchar4 * mandelbrotImage, const float posx, const float posy, const float stepSizeX, const float stepSizeY, const uint maxIterations, const uint width) +{ + int tid = get_global_id(0); + + int i = tid % (width / 4); + int j = tid / (width / 4); + + int4 veci = {4 * i, 4 * i + 1, 4 * i + 2, 4 * i + 3}; + int4 vecj = {j, j, j, j}; + + float4 x0; + x0.s0 = (float)(posx + stepSizeX * (float)veci.s0); + x0.s1 = (float)(posx + stepSizeX * (float)veci.s1); + x0.s2 = (float)(posx + stepSizeX * (float)veci.s2); + x0.s3 = (float)(posx + stepSizeX * (float)veci.s3); + float4 y0; + y0.s0 = (float)(posy + stepSizeY * (float)vecj.s0); + y0.s1 = (float)(posy + stepSizeY * (float)vecj.s1); + y0.s2 = (float)(posy + stepSizeY * (float)vecj.s2); + y0.s3 = (float)(posy + stepSizeY * (float)vecj.s3); + + float4 x = x0; + float4 y = y0; + + uint iter=0; + float4 tmp; + int4 stay; + int4 ccount = 0; + + stay.s0 = (x.s0 * x.s0 + y.s0 * y.s0) <= 4.0f; + stay.s1 = (x.s1 * x.s1 + y.s1 * y.s1) <= 4.0f; + stay.s2 = (x.s2 * x.s2 + y.s2 * y.s2) <= 4.0f; + stay.s3 = (x.s3 * x.s3 + y.s3 * y.s3) <= 4.0f; + float4 savx = x; + float4 savy = y; + for(iter=0; (stay.s0 | stay.s1 | stay.s2 | stay.s3) && (iter < maxIterations); iter+= 16) + { + x = savx; + y = savy; + + // Two iterations + tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0)); + y = MUL_ADD(2.0f * x, y, y0); + x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0)); + y = MUL_ADD(2.0f * tmp, y, y0); + + // Two iterations + tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0)); + y = MUL_ADD(2.0f * x, y, y0); + x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0)); + y = MUL_ADD(2.0f * tmp, y, y0); + + // Two iterations + tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0)); + y = MUL_ADD(2.0f * x, y, y0); + x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0)); + y = MUL_ADD(2.0f * tmp, y, y0); + + // Two iterations + tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0)); + y = MUL_ADD(2.0f * x, y, y0); + x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0)); + y = MUL_ADD(2.0f * tmp, y, y0); + + // Two iterations + tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0)); + y = MUL_ADD(2.0f * x, y, y0); + x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0)); + y = MUL_ADD(2.0f * tmp, y, y0); + + // Two iterations + tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0)); + y = MUL_ADD(2.0f * x, y, y0); + x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0)); + y = MUL_ADD(2.0f * tmp, y, y0); + + // Two iterations + tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0)); + y = MUL_ADD(2.0f * x, y, y0); + x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0)); + y = MUL_ADD(2.0f * tmp, y, y0); + + // Two iterations + tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0)); + y = MUL_ADD(2.0f * x, y, y0); + x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0)); + y = MUL_ADD(2.0f * tmp, y, y0); + + stay.s0 = (x.s0 * x.s0 + y.s0 * y.s0) <= 4.0f; + stay.s1 = (x.s1 * x.s1 + y.s1 * y.s1) <= 4.0f; + stay.s2 = (x.s2 * x.s2 + y.s2 * y.s2) <= 4.0f; + stay.s3 = (x.s3 * x.s3 + y.s3 * y.s3) <= 4.0f; + + savx.s0 = (stay.s0 ? x.s0 : savx.s0); + savx.s1 = (stay.s1 ? x.s1 : savx.s1); + savx.s2 = (stay.s2 ? x.s2 : savx.s2); + savx.s3 = (stay.s3 ? x.s3 : savx.s3); + savy.s0 = (stay.s0 ? y.s0 : savy.s0); + savy.s1 = (stay.s1 ? y.s1 : savy.s1); + savy.s2 = (stay.s2 ? y.s2 : savy.s2); + savy.s3 = (stay.s3 ? y.s3 : savy.s3); + ccount += stay*16; + } + // Handle remainder + if (!(stay.s0 & stay.s1 & stay.s2 & stay.s3)) + { + iter = 16; + do + { + x = savx; + y = savy; + stay.s0 = ((x.s0 * x.s0 + y.s0 * y.s0) <= 4.0f) && + (ccount.s0 < maxIterations); + stay.s1 = ((x.s1 * x.s1 + y.s1 * y.s1) <= 4.0f) && + (ccount.s1 < maxIterations); + stay.s2 = ((x.s2 * x.s2 + y.s2 * y.s2) <= 4.0f) && + (ccount.s2 < maxIterations); + stay.s3 = ((x.s3 * x.s3 + y.s3 * y.s3) <= 4.0f) && + (ccount.s3 < maxIterations); + tmp = x; + x = MUL_ADD(-y, y, MUL_ADD(x, x, x0)); + y = MUL_ADD(2.0f * tmp, y, y0); + ccount += stay; + iter--; + savx.s0 = (stay.s0 ? x.s0 : savx.s0); + savx.s1 = (stay.s1 ? x.s1 : savx.s1); + savx.s2 = (stay.s2 ? x.s2 : savx.s2); + savx.s3 = (stay.s3 ? x.s3 : savx.s3); + savy.s0 = (stay.s0 ? y.s0 : savy.s0); + savy.s1 = (stay.s1 ? y.s1 : savy.s1); + savy.s2 = (stay.s2 ? y.s2 : savy.s2); + savy.s3 = (stay.s3 ? y.s3 : savy.s3); + } while ((stay.s0 | stay.s1 | stay.s2 | stay.s3) && iter); + } + x = savx; + y = savy; + float4 fc = convert_float4(ccount); + fc.s0 = (float)ccount.s0 + 1 - + native_log2(native_log2(x.s0 * x.s0 + y.s0 * y.s0)); + fc.s1 = (float)ccount.s1 + 1 - + native_log2(native_log2(x.s1 * x.s1 + y.s1 * y.s1)); + fc.s2 = (float)ccount.s2 + 1 - + native_log2(native_log2(x.s2 * x.s2 + y.s2 * y.s2)); + fc.s3 = (float)ccount.s3 + 1 - + native_log2(native_log2(x.s3 * x.s3 + y.s3 * y.s3)); + + float c = fc.s0 * 2.0f * 3.1416f / 256.0f; + uchar4 color[4]; + color[0].s0 = ((1.0f + native_cos(c)) * 0.5f) * 255; + color[0].s1 = ((1.0f + native_cos(2.0f * c + 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255; + color[0].s2 = ((1.0f + native_cos(c - 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255; + color[0].s3 = 0xff; + if (ccount.s0 == maxIterations) + { + color[0].s0 = 0; + color[0].s1 = 0; + color[0].s2 = 0; + } + mandelbrotImage[4 * tid] = color[0]; + c = fc.s1 * 2.0f * 3.1416f / 256.0f; + color[1].s0 = ((1.0f + native_cos(c)) * 0.5f) * 255; + color[1].s1 = ((1.0f + native_cos(2.0f * c + 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255; + color[1].s2 = ((1.0f + native_cos(c - 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255; + color[1].s3 = 0xff; + if (ccount.s1 == maxIterations) + { + color[1].s0 = 0; + color[1].s1 = 0; + color[1].s2 = 0; + } + mandelbrotImage[4 * tid + 1] = color[1]; + c = fc.s2 * 2.0f * 3.1416f / 256.0f; + color[2].s0 = ((1.0f + native_cos(c)) * 0.5f) * 255; + color[2].s1 = ((1.0f + native_cos(2.0f * c + 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255; + color[2].s2 = ((1.0f + native_cos(c - 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255; + color[2].s3 = 0xff; + if (ccount.s2 == maxIterations) + { + color[2].s0 = 0; + color[2].s1 = 0; + color[2].s2 = 0; + } + mandelbrotImage[4 * tid + 2] = color[2]; + c = fc.s3 * 2.0f * 3.1416f / 256.0f; + color[3].s0 = ((1.0f + native_cos(c)) * 0.5f) * 255; + color[3].s1 = ((1.0f + native_cos(2.0f * c + 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255; + color[3].s2 = ((1.0f + native_cos(c - 2.0f * 3.1416f / 3.0f)) * 0.5f) * 255; + color[3].s3 = 0xff; + if (ccount.s3 == maxIterations) + { + color[3].s0 = 0; + color[3].s1 = 0; + color[3].s2 = 0; + } + mandelbrotImage[4 * tid + 3] = color[3]; +}
\ No newline at end of file diff --git a/src/cl/mandelbrot64.cl b/src/cl/mandelbrot64.cl new file mode 100644 index 0000000..be38b4a --- /dev/null +++ b/src/cl/mandelbrot64.cl @@ -0,0 +1,230 @@ +#ifdef KHR_DP_EXTENSION +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +#else +#pragma OPENCL EXTENSION cl_amd_fp64 : enable +#endif + +__kernel void calculate(__global uchar4 * mandelbrotImage, const double posx, const double posy, const double stepSizeX, const double stepSizeY, const uint maxIterations, const uint width) +{ + int tid = get_global_id(0); + + int i = tid % (width / 4); + int j = tid / (width / 4); + + int4 veci = {4 * i, 4 * i + 1, 4 * i + 2, 4 * i + 3}; + int4 vecj = {j, j, j, j}; + + double4 x0; + x0.s0 = (double)(posx + stepSizeX * (double)veci.s0); + x0.s1 = (double)(posx + stepSizeX * (double)veci.s1); + x0.s2 = (double)(posx + stepSizeX * (double)veci.s2); + x0.s3 = (double)(posx + stepSizeX * (double)veci.s3); + double4 y0; + y0.s0 = (double)(posy + stepSizeY * (double)vecj.s0); + y0.s1 = (double)(posy + stepSizeY * (double)vecj.s1); + y0.s2 = (double)(posy + stepSizeY * (double)vecj.s2); + y0.s3 = (double)(posy + stepSizeY * (double)vecj.s3); + + double4 x = x0; + double4 y = y0; + + uint iter=0; + double4 tmp; + int4 stay; + int4 ccount = 0; + + stay.s0 = (x.s0 * x.s0 + y.s0 * y.s0) <= 4.0; + stay.s1 = (x.s1 * x.s1 + y.s1 * y.s1) <= 4.0; + stay.s2 = (x.s2 * x.s2 + y.s2 * y.s2) <= 4.0; + stay.s3 = (x.s3 * x.s3 + y.s3 * y.s3) <= 4.0; + double4 savx = x; + double4 savy = y; + for(iter=0; (stay.s0 | stay.s1 | stay.s2 | stay.s3) && (iter < maxIterations); iter+= 16) + { + x = savx; + y = savy; + + // Two iterations + tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0)); // tmp = x * x + x0 - y * y; + y = MUL_ADD(2.0 * x, y, y0); //y = 2.0 * x * y + y0; + x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0));// x = tmp * tmp + x0 - y * y; + y = MUL_ADD(2.0 * tmp, y, y0); //y = 2.0 * tmp * y + y0; + + // Two iterations + tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0)); + y = MUL_ADD(2.0 * x, y, y0); + x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0)); + y = MUL_ADD(2.0 * tmp, y, y0); + + // Two iterations + tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0)); + y = MUL_ADD(2.0 * x, y, y0); + x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0)); + y = MUL_ADD(2.0 * tmp, y, y0); + + // Two iterations + tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0)); + y = MUL_ADD(2.0 * x, y, y0); + x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0)); + y = MUL_ADD(2.0 * tmp, y, y0); + + // Two iterations + tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0)); + y = MUL_ADD(2.0 * x, y, y0); + x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0)); + y = MUL_ADD(2.0 * tmp, y, y0); + + // Two iterations + tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0)); + y = MUL_ADD(2.0 * x, y, y0); + x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0)); + y = MUL_ADD(2.0 * tmp, y, y0); + + // Two iterations + tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0)); + y = MUL_ADD(2.0 * x, y, y0); + x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0)); + y = MUL_ADD(2.0 * tmp, y, y0); + + // Two iterations + tmp = MUL_ADD(-y, y, MUL_ADD(x, x, x0)); + y = MUL_ADD(2.0 * x, y, y0); + x = MUL_ADD(-y, y, MUL_ADD(tmp, tmp, x0)); + y = MUL_ADD(2.0 * tmp, y, y0); + + stay.s0 = (x.s0 * x.s0 + y.s0 * y.s0) <= 4.0; + stay.s1 = (x.s1 * x.s1 + y.s1 * y.s1) <= 4.0; + stay.s2 = (x.s2 * x.s2 + y.s2 * y.s2) <= 4.0; + stay.s3 = (x.s3 * x.s3 + y.s3 * y.s3) <= 4.0; + + savx.s0 = (stay.s0 ? x.s0 : savx.s0); + savx.s1 = (stay.s1 ? x.s1 : savx.s1); + savx.s2 = (stay.s2 ? x.s2 : savx.s2); + savx.s3 = (stay.s3 ? x.s3 : savx.s3); + savy.s0 = (stay.s0 ? y.s0 : savy.s0); + savy.s1 = (stay.s1 ? y.s1 : savy.s1); + savy.s2 = (stay.s2 ? y.s2 : savy.s2); + savy.s3 = (stay.s3 ? y.s3 : savy.s3); + ccount += stay*16; + } + // Handle remainder + if (!(stay.s0 & stay.s1 & stay.s2 & stay.s3)) + { + iter = 16; + do + { + x = savx; + y = savy; + stay.s0 = ((x.s0 * x.s0 + y.s0 * y.s0) <= 4.0) && + (ccount.s0 < maxIterations); + stay.s1 = ((x.s1 * x.s1 + y.s1 * y.s1) <= 4.0) && + (ccount.s1 < maxIterations); + stay.s2 = ((x.s2 * x.s2 + y.s2 * y.s2) <= 4.0) && + (ccount.s2 < maxIterations); + stay.s3 = ((x.s3 * x.s3 + y.s3 * y.s3) <= 4.0) && + (ccount.s3 < maxIterations); + tmp = x; + x = MUL_ADD(-y, y, MUL_ADD(x, x, x0)); + y = MUL_ADD(2.0 * tmp, y, y0); //y = 2.0 * tmp * y + y0; + ccount += stay; + iter--; + savx.s0 = (stay.s0 ? x.s0 : savx.s0); + savx.s1 = (stay.s1 ? x.s1 : savx.s1); + savx.s2 = (stay.s2 ? x.s2 : savx.s2); + savx.s3 = (stay.s3 ? x.s3 : savx.s3); + savy.s0 = (stay.s0 ? y.s0 : savy.s0); + savy.s1 = (stay.s1 ? y.s1 : savy.s1); + savy.s2 = (stay.s2 ? y.s2 : savy.s2); + savy.s3 = (stay.s3 ? y.s3 : savy.s3); + } while ((stay.s0 | stay.s1 | stay.s2 | stay.s3) && iter); + } + x = savx; + y = savy; + double4 fc = convert_double4(ccount); + fc.s0 = (double)ccount.s0 + 1 - + native_log2(native_log2(x.s0 * x.s0 + y.s0 * y.s0)); + fc.s1 = (double)ccount.s1 + 1 - + native_log2(native_log2(x.s1 * x.s1 + y.s1 * y.s1)); + fc.s2 = (double)ccount.s2 + 1 - + native_log2(native_log2(x.s2 * x.s2 + y.s2 * y.s2)); + fc.s3 = (double)ccount.s3 + 1 - + native_log2(native_log2(x.s3 * x.s3 + y.s3 * y.s3)); + + double c = fc.s0 * 2.0 * 3.1416 / 256.0; + uchar4 color[4]; + color[0].s0 = ((1.0 + native_cos(c)) * 0.5) * 255; + color[0].s1 = ((1.0 + native_cos(2.0 * c + 2.0 * 3.1416 / 3.0)) * 0.5) * 255; + color[0].s2 = ((1.0 + native_cos(c - 2.0 * 3.1416 / 3.0)) * 0.5) * 255; + color[0].s3 = 0xff; + if (ccount.s0 == maxIterations) + { + color[0].s0 = 0; + color[0].s1 = 0; + color[0].s2 = 0; + } + if (bench) + { + color[0].s0 = ccount.s0 & 0xff; + color[0].s1 = (ccount.s0 & 0xff00) >> 8; + color[0].s2 = (ccount.s0 & 0xff0000) >> 16; + color[0].s3 = (ccount.s0 & 0xff000000) >> 24; + } + mandelbrotImage[4 * tid] = color[0]; + c = fc.s1 * 2.0 * 3.1416 / 256.0; + color[1].s0 = ((1.0 + native_cos(c)) * 0.5) * 255; + color[1].s1 = ((1.0 + native_cos(2.0 * c + 2.0 * 3.1416 / 3.0)) * 0.5) * 255; + color[1].s2 = ((1.0 + native_cos(c - 2.0 * 3.1416 / 3.0)) * 0.5) * 255; + color[1].s3 = 0xff; + if (ccount.s1 == maxIterations) + { + color[1].s0 = 0; + color[1].s1 = 0; + color[1].s2 = 0; + } + if (bench) + { + color[1].s0 = ccount.s1 & 0xff; + color[1].s1 = (ccount.s1 & 0xff00) >> 8; + color[1].s2 = (ccount.s1 & 0xff0000) >> 16; + color[1].s3 = (ccount.s1 & 0xff000000) >> 24; + } + mandelbrotImage[4 * tid + 1] = color[1]; + c = fc.s2 * 2.0 * 3.1416 / 256.0; + color[2].s0 = ((1.0 + native_cos(c)) * 0.5) * 255; + color[2].s1 = ((1.0 + native_cos(2.0 * c + 2.0 * 3.1416 / 3.0)) * 0.5) * 255; + color[2].s2 = ((1.0 + native_cos(c - 2.0 * 3.1416 / 3.0)) * 0.5) * 255; + color[2].s3 = 0xff; + if (ccount.s2 == maxIterations) + { + color[2].s0 = 0; + color[2].s1 = 0; + color[2].s2 = 0; + } + if (bench) + { + color[2].s0 = ccount.s2 & 0xff; + color[2].s1 = (ccount.s2 & 0xff00) >> 8; + color[2].s2 = (ccount.s2 & 0xff0000) >> 16; + color[2].s3 = (ccount.s2 & 0xff000000) >> 24; + } + mandelbrotImage[4 * tid + 2] = color[2]; + c = fc.s3 * 2.0 * 3.1416 / 256.0; + color[3].s0 = ((1.0 + native_cos(c)) * 0.5) * 255; + color[3].s1 = ((1.0 + native_cos(2.0 * c + 2.0 * 3.1416 / 3.0)) * 0.5) * 255; + color[3].s2 = ((1.0 + native_cos(c - 2.0 * 3.1416 / 3.0)) * 0.5) * 255; + color[3].s3 = 0xff; + if (ccount.s3 == maxIterations) + { + color[3].s0 = 0; + color[3].s1 = 0; + color[3].s2 = 0; + } + if (bench) + { + color[3].s0 = ccount.s3 & 0xff; + color[3].s1 = (ccount.s3 & 0xff00) >> 8; + color[3].s2 = (ccount.s3 & 0xff0000) >> 16; + color[3].s3 = (ccount.s3 & 0xff000000) >> 24; + } + mandelbrotImage[4 * tid + 3] = color[3]; +}
\ No newline at end of file @@ -8,6 +8,8 @@ #ifndef DEFS_H_ #define DEFS_H_ +#include <stdio.h> + #ifndef NULL #define NULL 0 #endif @@ -19,6 +21,10 @@ #define FALSE 0 #endif +// Rendering modes, u8 +#define MODE_CPU 0 +#define MODE_OPENCL 1 + typedef unsigned char u8; typedef signed char s8; @@ -31,6 +37,8 @@ typedef signed long int s32; typedef unsigned long long u64; typedef signed long long s64; +typedef long double d64; + //typedef u8 bool; #endif /* DEFS_H_ */ diff --git a/src/mandelbrot-zoom.c b/src/mandelbrot-zoom.c index 9410734..72b54ba 100644 --- a/src/mandelbrot-zoom.c +++ b/src/mandelbrot-zoom.c @@ -17,6 +17,7 @@ int main(int argc, char **argv) ui_settings.settings = GTK_WINDOW(gtk_builder_get_object(builder, "settings")); //gtk_builder_connect_signals(builder, NULL); + ui_settings.setCombo = GTK_COMBO_BOX_TEXT(gtk_builder_get_object(builder, "setCombo")); ui_settings.iterationsSp = GTK_SPIN_BUTTON(gtk_builder_get_object(builder, "iterationsSp")); ui_settings.threadsSp = GTK_SPIN_BUTTON(gtk_builder_get_object(builder, "threadsSp")); ui_settings.colorFromBtn = GTK_BUTTON(gtk_builder_get_object(builder, "colorFromBtn")); @@ -24,6 +25,10 @@ int main(int argc, char **argv) ui_settings.zoomToXEntry = GTK_ENTRY(gtk_builder_get_object(builder, "zoomToXEntry")); ui_settings.zoomToYEntry = GTK_ENTRY(gtk_builder_get_object(builder, "zoomToYEntry")); ui_settings.speedEntry = GTK_ENTRY(gtk_builder_get_object(builder, "speedEntry")); + ui_settings.modeCPURd = GTK_RADIO_BUTTON(gtk_builder_get_object(builder, "modeCPURd")); + ui_settings.modeGPURd = GTK_RADIO_BUTTON(gtk_builder_get_object(builder, "modeGPURd")); + ui_settings.fpuCombo = GTK_COMBO_BOX_TEXT(gtk_builder_get_object(builder, "fpuCombo")); + ui_settings.fmaCb = GTK_COMBO_BOX_TEXT(gtk_builder_get_object(builder, "fmaCb")); ui_settings.exportCb = GTK_CHECK_BUTTON(gtk_builder_get_object(builder, "exportCb")); ui_settings.gifRd = GTK_RADIO_BUTTON(gtk_builder_get_object(builder, "gifRd")); ui_settings.widthSp = GTK_SPIN_BUTTON(gtk_builder_get_object(builder, "widthSp")); @@ -60,17 +65,15 @@ int main(int argc, char **argv) gtk_spin_button_set_range(ui_settings.fpsVideoSp, 1, 60); gtk_spin_button_set_range(ui_settings.bitrateSp, 1, 65535); - g_signal_connect(ui_settings.iterationsSp, "value-changed", G_CALLBACK(on_iterationsSp_valueChanged), NULL); - g_signal_connect(ui_settings.threadsSp, "value-changed", G_CALLBACK(on_threadsSp_valueChanged), NULL); + gtk_combo_box_text_append(ui_settings.setCombo, NULL, "Mandelbrot"); + gtk_combo_box_text_append(ui_settings.setCombo, NULL, "Julia"); + + gtk_combo_box_text_append(ui_settings.fpuCombo, NULL, "32 bit"); + gtk_combo_box_text_append(ui_settings.fpuCombo, NULL, "64 bit"); + gtk_combo_box_text_append(ui_settings.fpuCombo, NULL, "128 bit"); + g_signal_connect(ui_settings.colorFromBtn, "clicked", G_CALLBACK(on_colorFromBtn_clicked), NULL); g_signal_connect(ui_settings.colorToBtn, "clicked", G_CALLBACK(on_colorToBtn_clicked), NULL); - g_signal_connect(ui_settings.exportCb, "toggled", G_CALLBACK(on_exportCb_toggled), NULL); - g_signal_connect(ui_settings.widthSp, "value-changed", G_CALLBACK(on_widthSp_valueChanged), NULL); - g_signal_connect(ui_settings.heightSp, "value-changed", G_CALLBACK(on_heightSp_valueChanged), NULL); - g_signal_connect(ui_settings.fpsRenderSp, "value-changed", G_CALLBACK(on_fpsRenderSp_valueChanged), NULL); - g_signal_connect(ui_settings.fpsVideoSp, "value-changed", G_CALLBACK(on_fpsVideoSp_valueChanged), NULL); - g_signal_connect(ui_settings.bitrateSp, "value-changed", G_CALLBACK(on_bitrateSp_valueChanged), NULL); - g_signal_connect(ui_settings.exportTf, "changed", G_CALLBACK(on_exportTf_changed), NULL); g_signal_connect(ui_settings.startBtn, "clicked", G_CALLBACK(on_startBtn_clicked), NULL); g_signal_connect(ui_settings.exitBtn, "clicked", G_CALLBACK(on_exitBtn_clicked), NULL); @@ -80,16 +83,6 @@ int main(int argc, char **argv) return 0; } -void on_iterationsSp_valueChanged() -{ - config.iterations = gtk_spin_button_get_value(ui_settings.iterationsSp); -} - -void on_threadsSp_valueChanged() -{ - config.threads = gtk_spin_button_get_value(ui_settings.threadsSp); -} - void on_colorFromBtn_clicked() { currentColor = COLOR_FROM; @@ -102,53 +95,74 @@ void on_colorToBtn_clicked() gtk_dialog_run(ui_settings.colorDialog); } -void on_exportCb_toggled() +void on_startBtn_clicked() { - config.video = gtk_toggle_button_get_active(ui_settings.exportCb); -} + config.config_cpu.iterations = gtk_spin_button_get_value(ui_settings.iterationsSp); + config.config_opencl.iterations = gtk_spin_button_get_value(ui_settings.iterationsSp); + + config.config_cpu.threads = gtk_spin_button_get_value(ui_settings.threadsSp); + + config.config_opencl.fma = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(ui_settings.fmaCb)); + + config.video = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(ui_settings.exportCb)); -void on_exportTf_changed() -{ config.path = CHAR_PTR(gtk_entry_get_text(ui_settings.exportTf)); -} -void on_widthSp_valueChanged() -{ config.width = gtk_spin_button_get_value(ui_settings.widthSp); -} + config.config_cpu.width = gtk_spin_button_get_value(ui_settings.widthSp); + config.config_opencl.width = gtk_spin_button_get_value(ui_settings.widthSp); -void on_heightSp_valueChanged() -{ config.height = gtk_spin_button_get_value(ui_settings.heightSp); -} + config.config_cpu.height = gtk_spin_button_get_value(ui_settings.heightSp); + config.config_opencl.height = gtk_spin_button_get_value(ui_settings.heightSp); -void on_fpsRenderSp_valueChanged() -{ - config.renderFPS = gtk_spin_button_get_value(ui_settings.fpsRenderSp); -} + config.config_cpu.renderFPS = gtk_spin_button_get_value(ui_settings.fpsRenderSp); + config.config_opencl.renderFPS = gtk_spin_button_get_value(ui_settings.fpsRenderSp); -void on_fpsVideoSp_valueChanged() -{ config.videoFPS = gtk_spin_button_get_value(ui_settings.fpsVideoSp); -} -void on_bitrateSp_valueChanged() -{ config.bitrate = gtk_spin_button_get_value(ui_settings.bitrateSp); -} -void on_startBtn_clicked() -{ double x, y, speed; sscanf(gtk_entry_get_text(ui_settings.zoomToXEntry), "%lf", &x); sscanf(gtk_entry_get_text(ui_settings.zoomToYEntry), "%lf", &y); sscanf(gtk_entry_get_text(ui_settings.speedEntry), "%lf", &speed); - config.to_x = x; - config.to_y = y; - config.speed = speed; - printf("config {\n\t.iterations = %u\n\t.tox = %f\n\t.toy = %f\n\t.video = %u\n\t.filetype = %u\n\t.width = %u\n\t.height = %u\n\t.renderFPS = %u\n\t.videoFPS = %u\n\t.bitrate = %u\n\t.path = %s\n}\n", config.iterations, x, y, config.video, 0, config.width, config.height, config.renderFPS, config.videoFPS, config.bitrate, config.path); - render_init(&config, mandelbrot_r); - render_show(); + config.config_cpu.to_x = x; + config.config_cpu.to_y = y; + config.config_cpu.speed = speed; + config.config_opencl.to_x = x; + config.config_opencl.to_y = y; + config.config_opencl.speed = speed; + + config.mode = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(ui_settings.modeGPURd)); + //config.mode = MODE_CPU; + + if (strcmp(gtk_combo_box_text_get_active_text(ui_settings.setCombo), "Mandelbrot") == 0) + { + config.config_cpu.set_func = mandelbrot_r; + config.config_opencl.set_func = SFUNC_MANDELBROT; + } + else if (strcmp(gtk_combo_box_text_get_active_text(ui_settings.setCombo), "Julia") == 0) + { + config.config_cpu.set_func = julia; + config.config_opencl.set_func = SFUNC_JULIA; + } + + if (strcmp(gtk_combo_box_text_get_active_text(ui_settings.fpuCombo), "32 bit") == 0) + { + config.config_opencl.fpu = OPENCL_FPU_32; + } + else if (strcmp(gtk_combo_box_text_get_active_text(ui_settings.fpuCombo), "64 bit") == 0) + { + config.config_opencl.fpu = OPENCL_FPU_64; + } + else if (strcmp(gtk_combo_box_text_get_active_text(ui_settings.fpuCombo), "128 bit") == 0) + { + config.config_opencl.fpu = OPENCL_FPU_128; + } + + init_render(&config); + show_render(); } void on_exitBtn_clicked() diff --git a/src/mandelbrot-zoom.h b/src/mandelbrot-zoom.h index a57deba..8309885 100644 --- a/src/mandelbrot-zoom.h +++ b/src/mandelbrot-zoom.h @@ -16,6 +16,7 @@ typedef struct Ui_settings { GtkWindow *settings; + GtkComboBoxText *setCombo; GtkSpinButton *iterationsSp; GtkSpinButton *threadsSp; GtkButton *colorFromBtn; @@ -23,6 +24,10 @@ typedef struct Ui_settings { GtkEntry *zoomToXEntry; GtkEntry *zoomToYEntry; GtkEntry *speedEntry; + GtkRadioButton *modeCPURd; + GtkRadioButton *modeGPURd; + GtkComboBoxText *fpuCombo; + GtkCheckButton *fmaCb; GtkCheckButton *exportCb; GtkRadioButton *gifRd; GtkSpinButton *widthSp; diff --git a/src/render.c b/src/render.c index 55d514b..191dcd9 100644 --- a/src/render.c +++ b/src/render.c @@ -9,121 +9,60 @@ #define HAVE_STRUCT_TIMESPEC #include <pthread.h> -void render_init(Config *config, u32 (*sfunc) (long double, long double, u32)) +void init_render(Config *config) { - delta = glutGet(GLUT_ELAPSED_TIME); - x_min_s = -2.0; - x_max_s = 1.0; - y_min_s= -1.0; - y_max_s = 1.0; - x_min = x_min_s; - x_max = x_max_s; - y_min = y_min_s; - y_max = y_max_s; _config = config; - _sfunc = sfunc; s_arr = (u32 *) malloc((_config->width) * (_config->height) * sizeof(u32)); - calculate(x_min, y_min, x_max, y_max, _sfunc, s_arr); + _config->config_cpu.arr = s_arr; + _config->config_opencl.arr = s_arr; //glutInit(0, NULL); glutInitWindowPosition(0, 0); glutInitWindowSize(_config->width, _config->height); glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE | GLUT_DEPTH); glutCreateWindow("Renderer"); - glutDisplayFunc(gl_render); - glutIdleFunc(gl_idle); glGenTextures(1, &tex); glBindTexture(GL_TEXTURE_2D, tex); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, _config->width, _config->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, s_arr); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, _config->width, _config->height, 0, + GL_RGBA, GL_UNSIGNED_BYTE, s_arr); glBindTexture(GL_TEXTURE_2D, 0); //glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, _config->width, _config->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, s_arr); glMatrixMode(GL_PROJECTION); glOrtho(0, _config->width, 0, _config->height, -1, 1); glMatrixMode(GL_MODELVIEW); - glutMainLoop(); -} - -void render_show() -{ -} + _config->config_cpu.tex = tex; + _config->config_opencl.tex = tex; + _config->config_cpu.zoom_func = zoom_func; + _config->config_opencl.zoom_func = zoom_func; -void gl_render(void) -{ - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - glBindTexture(GL_TEXTURE_2D, tex); - glEnable(GL_TEXTURE_2D); - glBegin(GL_QUADS); - glTexCoord2i(0, 0); glVertex2i(0, 0); - glTexCoord2i(0, 1); glVertex2i(0, _config->height); - glTexCoord2i(1, 1); glVertex2i(_config->width, _config->height); - glTexCoord2i(1, 0); glVertex2i(_config->width, 0); - glEnd(); - glDisable(GL_TEXTURE_2D); - glBindTexture(GL_TEXTURE_2D, 0); + switch(_config->mode) + { + case MODE_CPU: + init_cpu(&_config->config_cpu); + glutDisplayFunc(render_cpu); + glutIdleFunc(idle_cpu); + break; + case MODE_OPENCL: + init_opencl(&_config->config_opencl); + glutDisplayFunc(render_opencl); + glutIdleFunc(idle_opencl); + break; + } -// glBegin(GL_TRIANGLES); -// glVertex3f(-0.5,-0.5,0.0); -// glVertex3f(0.5,0.0,0.0); -// glVertex3f(0.0,0.5,0.0); -// glEnd(); + glutMainLoop(); - glutSwapBuffers(); } -void calculate(long double x_min, long double y_min, long double x_max, long double y_max, u32 (*sfunc) (long double, long double, u32), u32 *arr) +void show_render() { - pthread_t thread; - ThreadArgs *args = (ThreadArgs *) malloc(_config->threads * sizeof(ThreadArgs)); - for(u8 i = 0; i < _config->threads; i++) - { - args[i] = (ThreadArgs) { .tc = _config->threads, .tid = i, .x_min = x_min, .y_min = y_min, .x_max = x_max, .y_max = y_max, .sfunc = sfunc, .arr = arr }; - pthread_create(&thread, NULL, calculate_t, (void *)&args[i]); - } - pthread_join(thread, NULL); -} -void calculate_t(void *args) -{ - ThreadArgs *_args = (ThreadArgs *)args; - long double x_math, y_math; - u32 iterations; - for (u32 y = (_config->height/_args->tc)*(_args->tid); y < _config->height; y++) - { - for (u32 x = 0; x < _config->width; x++) - { - x_math = _args->x_min + ((long double) x * (_args->x_max - _args->x_min)) / _config->width; - y_math = _args->y_min + ((long double) (_config->height - y) * (_args->y_max - _args->y_min)) / _config->height; - iterations = _args->sfunc(x_math, y_math, _config->iterations); - _args->arr[COORDS(x, y, _config->width)] = (((1<<24)-1)*iterations)/_config->iterations; - } - } } -long double zoom_func(long double ft, long double s) +d64 zoom_func(d64 ft, d64 s) { return (s - expl(-ft)); } - -void gl_idle(void) -{ - calculate(x_min, y_min, x_max, y_max, _sfunc, s_arr); - //glGenTextures(1, &tex); - glBindTexture(GL_TEXTURE_2D, tex); - //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, _config->width, _config->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, s_arr); - glBindTexture(GL_TEXTURE_2D, 0); - int t = glutGet(GLUT_ELAPSED_TIME); - dt = (t - delta) / 1000.0; - delta = t; - ft+=(_config->speed*dt); - x_min = x_min_s + zoom_func(ft, (long double)2.0 + _config->to_x); - y_min = y_min_s + zoom_func(ft, (long double)1.0 + _config->to_y); - x_max = x_max_s - zoom_func(ft, (long double)1.0 - _config->to_x); - y_max = y_max_s - zoom_func(ft, (long double)1.0 - _config->to_y); - glutPostRedisplay(); -} diff --git a/src/render.h b/src/render.h index dde79be..a50027a 100644 --- a/src/render.h +++ b/src/render.h @@ -8,60 +8,32 @@ #ifndef RENDER_H_ #define RENDER_H_ -#define COORDS(x, y, width) ((y)*(width)+(x)) - #include "defs.h" -#include <stdlib.h> -#include <GL/glut.h> -#include <pthread.h> +#include "render_opencl.h" +#include "render_cpu.h" #include <math.h> typedef struct config { - u32 iterations; - u8 threads; - u32 colorFrom; - u32 colorTo; - long double to_x; - long double to_y; - long double speed; + CpuConfig config_cpu; + OpenCLConfig config_opencl; + u8 mode; u8 video; u8 filetype; u16 width; u16 height; - u8 renderFPS; u8 videoFPS; u32 bitrate; const char *path; // TODO: key mapping als option in die struct } Config; -typedef struct t_args { - u8 tc; - u8 tid; - long double x_min; - long double x_max; - long double y_min; - long double y_max; - u32 (*sfunc) (long double, long double, u32); - u32 *arr; -} ThreadArgs; - Config *_config; -u32 (*_sfunc) (long double, long double, u32); u32 *s_arr; GLuint tex; -u32 rendercnt; -long double x_min, x_max, y_min, y_max; -long double x_min_s, x_max_s, y_min_s, y_max_s; -int delta; -long double dt, ft; -void render_init(Config *config, u32 (*sfunc) (long double, long double, u32)); -void render_show(); -void gl_render(void); -void gl_idle(void); +d64 zoom_func(d64 ft, d64 s); -void calculate(long double x_min, long double y_min, long double x_max, long double y_max, u32 (*sfunc) (long double, long double, u32), u32 *arr); -void calculate_t(void *args); +void init_render(Config *config); +void show_render(); #endif /* RENDER_H_ */ diff --git a/src/render_cpu.c b/src/render_cpu.c new file mode 100644 index 0000000..5065d4d --- /dev/null +++ b/src/render_cpu.c @@ -0,0 +1,99 @@ +/* + * render.c + * + * Created on: 15.01.2018 + * Author: Superleo1810 + */ + +#include "render_cpu.h" +#define HAVE_STRUCT_TIMESPEC + +void init_cpu(CpuConfig *config) +{ + config_cpu = config; + x_min_s = -2.0; + x_max_s = 1.0; + y_min_s = -1.0; + y_max_s = 1.0; + x_min = x_min_s; + x_max = x_max_s; + y_min = y_min_s; + y_max = y_max_s; + + calculate(x_min, y_min, x_max, y_max, config_cpu->set_func, config_cpu->arr); +} + +void render_cpu(void) +{ + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + glBindTexture(GL_TEXTURE_2D, config_cpu->tex); + glEnable(GL_TEXTURE_2D); + + glBegin(GL_QUADS); + glTexCoord2i(0, 0); glVertex2i(0, 0); + glTexCoord2i(0, 1); glVertex2i(0, config_cpu->height); + glTexCoord2i(1, 1); glVertex2i(config_cpu->width, config_cpu->height); + glTexCoord2i(1, 0); glVertex2i(config_cpu->width, 0); + glEnd(); + + glDisable(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D, 0); + glutSwapBuffers(); +} + +void calculate(d64 x_min, d64 y_min, d64 x_max, d64 y_max, u32 (*sfunc) (d64, d64, u32), u32 *arr) +{ + pthread_t thread; + ThreadArgs *args = (ThreadArgs *) malloc(config_cpu->threads * sizeof(ThreadArgs)); + for(u8 i = 0; i < config_cpu->threads; i++) + { + args[i] = (ThreadArgs) { .tc = config_cpu->threads, .tid = i, .x_min = x_min, .y_min = y_min, .x_max = x_max, .y_max = y_max, .sfunc = sfunc, .arr = arr }; + pthread_create(&thread, NULL, calculate_t, (void *)&args[i]); + } + pthread_join(thread, NULL); + free(args); +} + +void calculate_t(void *args) +{ + ThreadArgs *_args = (ThreadArgs *)args; + d64 x_math, y_math; + u32 iterations; + for (u32 y = (config_cpu->height/_args->tc)*(_args->tid); y < config_cpu->height; y++) + { + for (u32 x = 0; x < config_cpu->width; x++) + { + x_math = _args->x_min + ((d64) x * (_args->x_max - _args->x_min)) / config_cpu->width; + y_math = _args->y_min + ((d64) (config_cpu->height - y) * (_args->y_max - _args->y_min)) / config_cpu->height; + iterations = _args->sfunc(x_math, y_math, config_cpu->iterations); + _args->arr[COORDS(x, y, config_cpu->width)] = (((1<<24)-1)*iterations)/config_cpu->iterations; + } + } +} + +void idle_cpu(void) +{ + static int t_old; + int t = 0, delta = 0; + do + { + t = glutGet(GLUT_ELAPSED_TIME); + delta = t - t_old; + } + while(delta < 16); // TODO: Hardcoded FPS + t_old = t; + + calculate(x_min, y_min, x_max, y_max, config_cpu->set_func, config_cpu->arr); + //glGenTextures(1, &tex); + glBindTexture(GL_TEXTURE_2D, config_cpu->tex); + //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, config_cpu->width, config_cpu->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, config_cpu->arr); + glBindTexture(GL_TEXTURE_2D, 0); + ft+=(config_cpu->speed*(delta/1000.0)); + x_min = x_min_s + config_cpu->zoom_func(ft, (d64)2.0 + config_cpu->to_x); + y_min = y_min_s + config_cpu->zoom_func(ft, (d64)1.0 + config_cpu->to_y); + x_max = x_max_s - config_cpu->zoom_func(ft, (d64)1.0 - config_cpu->to_x); + y_max = y_max_s - config_cpu->zoom_func(ft, (d64)1.0 - config_cpu->to_y); + glutPostRedisplay(); +} diff --git a/src/render_cpu.h b/src/render_cpu.h new file mode 100644 index 0000000..7abbdc4 --- /dev/null +++ b/src/render_cpu.h @@ -0,0 +1,61 @@ +/* + * render_cpu.h + * + * Created on: 25.01.2018 + * Author: Superleo1810 + */ + +#ifndef RENDER_CPU_H_ +#define RENDER_CPU_H_ + +#include "defs.h" +#include <stdlib.h> +#include <GL/glut.h> +#include <pthread.h> +#include <time.h> +#include <math.h> + +#define COORDS(x, y, width) ((y)*(width)+(x)) + +typedef struct config_cpu { + u8 threads; + GLuint tex; + u32 *arr; + d64 (*zoom_func)(d64, d64); + u32 (*set_func)(d64, d64, u32); + u32 iterations; + u32 colorFrom; + u32 colorTo; + d64 to_x; + d64 to_y; + d64 speed; + u16 width; + u16 height; + u8 renderFPS; +} CpuConfig; + +typedef struct t_args { + u8 tc; + u8 tid; + d64 x_min; + d64 x_max; + d64 y_min; + d64 y_max; + u32 (*sfunc) (d64, d64, u32); + u32 *arr; +} ThreadArgs; + +CpuConfig *config_cpu; +u32 rendercnt; +float ft; + +d64 x_min, x_max, y_min, y_max; +d64 x_min_s, x_max_s, y_min_s, y_max_s; + +void init_cpu(CpuConfig *config); +void render_cpu(void); +void idle_cpu(void); +void calculate(d64 x_min, d64 y_min, d64 x_max, d64 y_max, u32 (*sfunc) (d64, d64, u32), u32 *arr); +void calculate_t(void *args); + +#endif /* RENDER_CPU_H_ */ diff --git a/src/render_opencl.c b/src/render_opencl.c new file mode 100644 index 0000000..9d650c2 --- /dev/null +++ b/src/render_opencl.c @@ -0,0 +1,297 @@ +/* + * render_opencl.c + * + * Created on: 26.01.2018 + * Author: Superleo1810 + */ + +#include "render_opencl.h" + +void init_opencl(OpenCLConfig *config) +{ + printf("cl init\n"); + x_min_s = -2.0; + x_max_s = 1.0; + y_min_s = -1.0; + y_max_s = 1.0; + x_min = x_min_s; + x_max = x_max_s; + y_min = y_min_s; + y_max = y_max_s; + + config_opencl = config; + output = (cl_uint *) malloc((config_opencl->width) * (config_opencl->height) * sizeof(cl_uchar4)); + context = NULL; + + cl_platform_id platform_id; + cl_uint ret_num_devices; + cl_uint ret_num_platforms; + size_t device_list_size; + cl_program program; + size_t cl_src_sz; + + clGetPlatformIDs(1, &platform_id, &ret_num_platforms); + clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices); + context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret); + ret = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &device_list_size); + devices = (cl_device_id *)malloc(device_list_size); + num_devices = (cl_uint)(device_list_size/sizeof(cl_device_id)); + clGetContextInfo(context, CL_CONTEXT_DEVICES, device_list_size, devices, NULL); + + printf("lists done\n"); + + FILE *fp; + char *cl_src, *path, *flags = (char *)malloc(200 * sizeof(char)); + flags[0] = '\0'; + switch(config_opencl->fpu) + { + case OPENCL_FPU_32: + switch(config_opencl->set_func) + { + case SFUNC_JULIA: + path = "cl/julia32.cl"; + break; + case SFUNC_MANDELBROT: + default: + path = "cl/mandelbrot32.cl"; + break; + } + break; + case OPENCL_FPU_64: + switch(config_opencl->set_func) + { + case SFUNC_JULIA: + path = "cl/julia64.cl"; + break; + case SFUNC_MANDELBROT: + default: + path = "cl/mandelbrot64.cl"; + break; + } + int khrFP64 = 0; + int amdFP64 = 0; + for (cl_uint i = 0; i < num_devices; i++) + { + char deviceExtensions[8192]; + ret = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, + sizeof(deviceExtensions), deviceExtensions, 0); + if (strstr(deviceExtensions, "cl_khr_fp64")) + { + khrFP64++; + } + else + { + if (strstr(deviceExtensions, "cl_amd_fp64")) + { + amdFP64++; + } + } + } + if (khrFP64 == num_devices) + { + flags = strcat(flags, "-D KHR_DP_EXTENSION "); + } + else if (amdFP64 == num_devices) + { + flags = strcat(flags, ""); + } + break; + case OPENCL_FPU_128: + printf("128 bit precision not implemented yet\n"); + break; + } + + cl_src = (char *)malloc(MAX_SOURCE_SIZE * sizeof(char)); + fp = fopen(path, "r"); + cl_src_sz = fread(cl_src, 1, MAX_SOURCE_SIZE, fp); + fclose(fp); + + printf("reading done\n"); + + for (cl_uint i = 0; i < num_devices; i++) + { + cl_command_queue_properties prop = 0; + // if (sampleArgs->timing) + // { + // prop |= CL_QUEUE_PROFILING_ENABLE; + // } + commandQueue[i] = clCreateCommandQueue(context, devices[i], prop, &ret); + outputBuffer[i] = clCreateBuffer(context, + CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, + (sizeof(cl_uint) * (config_opencl->width) + * (config_opencl->height)) / num_devices, NULL, &ret); + } + printf("comand puffer\n"); + if (config_opencl->fma) + { + flags = strcat(flags, "-D MUL_ADD=fma "); + } + else + { + flags = strcat(flags, "-D MUL_ADD=mad "); + } + printf("flags concat\n"); + program = clCreateProgramWithSource(context, 1, (const char **)&cl_src, (const size_t *)&cl_src_sz, &ret); + ret = clBuildProgram(program, num_devices, devices, flags, NULL, NULL); + for (cl_uint i = 0; i < num_devices; i++) + { + kernel_vector[i] = clCreateKernel(program, "calculate", &ret); + } + + printf("cl init done\n"); + +} + +void render_opencl(void) +{ + cl_event events[MAX_DEVICES]; + cl_int eventStatus = CL_QUEUED; + size_t globalThreads[1]; + size_t localThreads[1]; + size_t kernelWorkGroupSize; + cl_kernel kernel; + + globalThreads[0] = ((config_opencl->width) * (config_opencl->height)) + / num_devices; + localThreads[0] = 256; + + globalThreads[0] >>= 2; + + for (cl_uint i = 0; i < num_devices; i++) + { + kernel = kernel_vector[i]; + ret = clGetKernelWorkGroupInfo(kernel, devices[i], + CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, + 0); + if ((cl_uint) (localThreads[0]) > kernelWorkGroupSize) + { + localThreads[0] = kernelWorkGroupSize; + } + + xpos = 0.0; + ypos = 0.0; + xsize = 0.1; + ysize = 0.1; + + xstep = (xsize / (double) config_opencl->width); + ystep = (ysize / (double) config_opencl->height); + leftx = (xpos - xsize / 2.0); + topy = + (ypos + ysize / 2.0 + - ((double) i * ysize) / (double) num_devices); + + if (i == 0) + { + topy0 = topy; + } + + printf("xsize: %f, ysize: %f, xstep: %f, ystep: %f, leftx: %f, topy: %f\n", xsize, ysize, xstep, ystep, leftx, topy); + + ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &outputBuffer[i]); + + cl_float leftxF = (float) leftx; + cl_float topyF = (float) topy; + cl_float xstepF = (float) xstep; + cl_float ystepF = (float) ystep; + + switch (config_opencl->fpu) + { + case OPENCL_FPU_32: // lel + ret = clSetKernelArg(kernel, 1, sizeof(cl_float), (void *) &leftxF); + ret = clSetKernelArg(kernel, 2, sizeof(cl_float), (void *) &topyF); + ret = clSetKernelArg(kernel, 3, sizeof(cl_float), (void *) &xstepF); + ret = clSetKernelArg(kernel, 4, sizeof(cl_float), (void *) &ystepF); + break; + case OPENCL_FPU_64: + ret = clSetKernelArg(kernel, 1, sizeof(cl_double), (void *) &leftx); + ret = clSetKernelArg(kernel, 2, sizeof(cl_double), (void *) &topy); + ret = clSetKernelArg(kernel, 3, sizeof(cl_double), (void *) &xstep); + ret = clSetKernelArg(kernel, 4, sizeof(cl_double), (void *) &ystep); + break; + case OPENCL_FPU_128: + break; + } + + ret = clSetKernelArg(kernel, 5, sizeof(cl_uint), + (void *) &config_opencl->iterations); + ret = clSetKernelArg(kernel, 6, sizeof(cl_int), + (void *) &config_opencl->width); + ret = clEnqueueNDRangeKernel(commandQueue[i], kernel, 1, NULL, + globalThreads, localThreads, 0, NULL, &events[i]); + + + for (cl_uint i = 0; i < num_devices; i++) + { + ret = clFlush(commandQueue[i]); + } + for (cl_uint i = 0; i < num_devices; i++) + { + ret = clWaitForEvents(1, &events[num_devices - i - 1]); + ret = clReleaseEvent(events[num_devices - i - 1]); + } + + for (cl_uint i = 0; i < num_devices; i++) + { + ret = clEnqueueReadBuffer(commandQueue[i], outputBuffer[i], + CL_FALSE, 0, + ((config_opencl->width) * (config_opencl->height) + * sizeof(u32)) / num_devices, + config_opencl->arr + + (((config_opencl->width) * (config_opencl->height) * i) + / num_devices), 0, + NULL, &events[i]); + } + + for (cl_uint i = 0; i < num_devices; i++) + { + ret = clFlush(commandQueue[i]); + } + + for (cl_uint i = 0; i < num_devices; i++) + { + ret = clWaitForEvents(1, &events[num_devices - i - 1]); + ret = clReleaseEvent(events[num_devices - i - 1]); + } + } + + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + glBindTexture(GL_TEXTURE_2D, config_opencl->tex); + glEnable(GL_TEXTURE_2D); + + glBegin(GL_QUADS); + glTexCoord2i(0, 0); glVertex2i(0, 0); + glTexCoord2i(0, 1); glVertex2i(0, config_opencl->height); + glTexCoord2i(1, 1); glVertex2i(config_opencl->width, config_opencl->height); + glTexCoord2i(1, 0); glVertex2i(config_opencl->width, 0); + glEnd(); + + glDisable(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D, 0); + glutSwapBuffers(); +} + +void idle_opencl(void) +{ + static int t_old; + int t = 0, delta = 0; + do + { + t = glutGet(GLUT_ELAPSED_TIME); + delta = t - t_old; + } while (delta < 16); // TODO: Hardcoded FPS + t_old = t; + + //glGenTextures(1, &tex); + glBindTexture(GL_TEXTURE_2D, config_opencl->tex); + //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, config_opencl->width, + config_opencl->height, 0, GL_RGBA, GL_UNSIGNED_BYTE, config_opencl->arr); + glBindTexture(GL_TEXTURE_2D, 0); + cl_ft += (config_opencl->speed * (delta / 1000.0)); + x_min = x_min_s + config_opencl->zoom_func(cl_ft, (d64) 2.0 + config_opencl->to_x); + y_min = y_min_s + config_opencl->zoom_func(cl_ft, (d64) 1.0 + config_opencl->to_y); + x_max = x_max_s - config_opencl->zoom_func(cl_ft, (d64) 1.0 - config_opencl->to_x); + y_max = y_max_s - config_opencl->zoom_func(cl_ft, (d64) 1.0 - config_opencl->to_y); + glutPostRedisplay(); +} diff --git a/src/render_opencl.h b/src/render_opencl.h new file mode 100644 index 0000000..b995c4d --- /dev/null +++ b/src/render_opencl.h @@ -0,0 +1,75 @@ +/* + * render_opencl.h + * + * Created on: 25.01.2018 + * Author: Superleo1810 + */ + +#ifndef RENDER_OPENCL_H_ +#define RENDER_OPENCL_H_ + +#include "defs.h" +#include <stdlib.h> +#include <GL/glut.h> +#include <CL/cl.h> +#include "sets.h" +#include <math.h> + +#define OPENCL_FPU_32 0 +#define OPENCL_FPU_64 1 +#define OPENCL_FPU_128 2 + +#define MAX_SOURCE_SIZE 0xFFFF // 64 KiB +#define MAX_DEVICES 4 + +typedef struct config_opencl { + u8 fpu; + u8 fma; + GLuint tex; + d64 (*zoom_func)(d64, d64); + u8 set_func; // id, not pointer! + u32 *arr; + u32 iterations; + u32 colorFrom; + u32 colorTo; + d64 to_x; + d64 to_y; + d64 speed; + u16 width; + u16 height; + u8 renderFPS; +} OpenCLConfig; + +OpenCLConfig *config_opencl; + +d64 x_min, x_max, y_min, y_max; +d64 x_min_s, x_max_s, y_min_s, y_max_s; + +float cl_ft; +cl_uint *output; +cl_device_id device_id; +cl_context context; +cl_int ret; +cl_kernel kernel_vector[MAX_DEVICES]; +cl_uint num_devices; +cl_device_id *devices; +cl_command_queue commandQueue[MAX_DEVICES]; +cl_mem outputBuffer[MAX_DEVICES]; + +double xpos; +double ypos; +double xsize; +double ysize; +double xstep; +double ystep; +double leftx; +double topy; +double topy0; + +d64 zoom_func(d64 ft, d64 s); + +void init_opencl(OpenCLConfig *config); +void render_opencl(void); +void idle_opencl(void); + +#endif /* RENDER_OPENCL_H_ */ @@ -7,9 +7,9 @@ #include "sets.h" -u32 mandelbrot_s(long double x, long double y, u32 iterations) +u32 mandelbrot_s(d64 x, d64 y, u32 iterations) { - long double cx = x, cy = y, x2; + d64 cx = x, cy = y, x2; u32 m = 0; while(m <= iterations && (x*x)+(y*y) <= 4) { @@ -21,16 +21,16 @@ u32 mandelbrot_s(long double x, long double y, u32 iterations) return m; } -u32 mandelbrot_r(long double x, long double y, u32 iterations) +u32 mandelbrot_r(d64 x, d64 y, u32 iterations) { return _mandelbrot_r(x, y, 0.0, 0.0, 0, iterations, 4.0); } -u32 _mandelbrot_r(long double x, long double y, long double zx, long double zy, u32 n, u32 iterations, long double threshold) +u32 _mandelbrot_r(d64 x, d64 y, d64 zx, d64 zy, u32 n, u32 iterations, d64 threshold) { if ((n < iterations) && ((zx * zx + zy * zy) < threshold)) { - long double zx_new = (zx * zx - zy * zy + x); - long double zy_new = (2 * zx * zy + y); + d64 zx_new = (zx * zx - zy * zy + x); + d64 zy_new = (2 * zx * zy + y); if ((zx_new == zx) && (zy_new == zy)) { return iterations; } @@ -39,7 +39,7 @@ u32 _mandelbrot_r(long double x, long double y, long double zx, long double zy, return n; } -u32 julia(long double x, long double y, u32 iterations) +u32 julia(d64 x, d64 y, u32 iterations) { // TODO: Julia-Menge return 0; @@ -10,9 +10,17 @@ #include "defs.h" -u32 mandelbrot_s(long double x, long double y, u32 iterations); -u32 mandelbrot_r(long double x, long double y, u32 iterations); -u32 _mandelbrot_r(long double x, long double y, long double zx, long double zy, u32 n, u32 iterations, long double threshold); -u32 julia(long double x, long double y, u32 iterations); +#define SFUNC_MANDELBROT_R 0 +#define SFUNC_MANDELBROT_S (~0) +#define SFUNC_JULIA_R 1 +#define SFUNC_JULIA_S (~1) + +#define SFUNC_MANDELBROT SFUNC_MANDELBROT_R +#define SFUNC_JULIA SFUNC_JULIA_R + +u32 mandelbrot_s(d64 x, d64 y, u32 iterations); +u32 mandelbrot_r(d64 x, d64 y, u32 iterations); +u32 _mandelbrot_r(d64 x, d64 y, d64 zx, d64 zy, u32 n, u32 iterations, d64 threshold); +u32 julia(d64 x, d64 y, u32 iterations); #endif /* SETS_H_ */ |