Commit 77598e58 authored by Victor Oliveira's avatar Victor Oliveira Committed by Øyvind "pippin" Kolås

Common color conversions performed in the GPU and fixing some bugs in point-filter

parent 78e0baf2
......@@ -20,6 +20,7 @@ libcl_public_HEADERS = \
gegl-cl-init.h \
gegl-cl-texture.h \
gegl-cl-types.h \
gegl-cl-color.h \
cl_d3d10.h \
cl_ext.h \
cl_gl_ext.h \
......@@ -33,7 +34,9 @@ libcl_sources = \
gegl-cl-init.c \
gegl-cl-init.h \
gegl-cl-texture.c \
gegl-cl-texture.h
gegl-cl-texture.h \
gegl-cl-color.c \
gegl-cl-color.h
noinst_LTLIBRARIES = libcl.la
......
......@@ -41,7 +41,7 @@ extern "C" {
/*
* For each extension, follow this template
* /* cl_VEN_extname extension */
* cl_VEN_extname extension */
/* #define cl_VEN_extname 1
* ... define new types, if any
* ... define new tokens, if any
......
static const char* kernel_color_source =
"/* This is almost a copy-paste from babl/base conversion functions in RGBA space */ \n"
" \n"
"/* Alpha threshold used in the reference implementation for \n"
" * un-pre-multiplication of color data: \n"
" * \n"
" * 0.01 / (2^16 - 1) \n"
" */ \n"
"#define BABL_ALPHA_THRESHOLD 0.000000152590219 \n"
" \n"
"float linear_to_gamma_2_2 (float value) \n"
"{ \n"
" if (value > 0.0030402477f) \n"
" return 1.055f * native_powr (value, (1.0f/2.4f)) - 0.055f; \n"
" return 12.92f * value; \n"
"} \n"
" \n"
"float gamma_2_2_to_linear (float value) \n"
"{ \n"
" if (value > 0.03928f) \n"
" return native_powr ((value + 0.055f) / 1.055f, 2.4f); \n"
" return value / 12.92f; \n"
"} \n"
" \n"
"__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | \n"
" CLK_ADDRESS_NONE | \n"
" CLK_FILTER_NEAREST; \n"
" \n"
"/* RGBA float -> RaGaBaA float */ \n"
"__kernel void non_premultiplied_to_premultiplied (__read_only image2d_t in, \n"
" __write_only image2d_t out) \n"
"{ \n"
" int2 gid = (int2)(get_global_id(0), get_global_id(1)); \n"
" float4 in_v = read_imagef(in, sampler, gid); \n"
" float4 out_v; \n"
" out_v = in_v * in_v.w; \n"
" out_v.w = in_v.w; \n"
" write_imagef(out, gid, out_v); \n"
"} \n"
" \n"
"/* RaGaBaA float -> RGBA float */ \n"
"__kernel void premultiplied_to_non_premultiplied (__read_only image2d_t in, \n"
" __write_only image2d_t out) \n"
"{ \n"
" int2 gid = (int2)(get_global_id(0), get_global_id(1)); \n"
" float4 in_v = read_imagef(in, sampler, gid); \n"
" float4 out_v; \n"
" out_v = (in_v.w > BABL_ALPHA_THRESHOLD)? in_v / in_v.w : (float4)(0.0f); \n"
" out_v.w = in_v.w; \n"
" write_imagef(out, gid, out_v); \n"
"} \n"
" \n"
"/* RGBA float -> R'G'B'A float */ \n"
"__kernel void rgba2rgba_gamma_2_2 (__read_only image2d_t in, \n"
" __write_only image2d_t out) \n"
"{ \n"
" int2 gid = (int2)(get_global_id(0), get_global_id(1)); \n"
" float4 in_v = read_imagef(in, sampler, gid); \n"
" float4 out_v; \n"
" out_v = (float4)(linear_to_gamma_2_2(in_v.x), \n"
" linear_to_gamma_2_2(in_v.y), \n"
" linear_to_gamma_2_2(in_v.z), \n"
" in_v.w); \n"
" write_imagef(out, gid, out_v); \n"
"} \n"
" \n"
"/* R'G'B'A float -> RGBA float */ \n"
"__kernel void rgba_gamma_2_22rgba (__read_only image2d_t in, \n"
" __write_only image2d_t out) \n"
"{ \n"
" int2 gid = (int2)(get_global_id(0), get_global_id(1)); \n"
" float4 in_v = read_imagef(in, sampler, gid); \n"
" float4 out_v; \n"
" out_v = (float4)(gamma_2_2_to_linear(in_v.x), \n"
" gamma_2_2_to_linear(in_v.y), \n"
" gamma_2_2_to_linear(in_v.z), \n"
" in_v.w); \n"
" write_imagef(out, gid, out_v); \n"
"} \n"
" \n"
"/* RGBA float -> R'aG'aB'aA float */ \n"
"__kernel void rgba2rgba_gamma_2_2_premultiplied (__read_only image2d_t in, \n"
" __write_only image2d_t out) \n"
"{ \n"
" int2 gid = (int2)(get_global_id(0), get_global_id(1)); \n"
" float4 in_v = read_imagef(in, sampler, gid); \n"
" float4 out_v; \n"
" out_v = (float4)(linear_to_gamma_2_2(in_v.x) * in_v.w, \n"
" linear_to_gamma_2_2(in_v.y) * in_v.w, \n"
" linear_to_gamma_2_2(in_v.z) * in_v.w, \n"
" in_v.w); \n"
" write_imagef(out, gid, out_v); \n"
"} \n"
" \n"
"/* R'aG'aB'aA float -> RGBA float */ \n"
"__kernel void rgba_gamma_2_2_premultiplied2rgba (__read_only image2d_t in, \n"
" __write_only image2d_t out) \n"
"{ \n"
" int2 gid = (int2)(get_global_id(0), get_global_id(1)); \n"
" float4 in_v = read_imagef(in, sampler, gid); \n"
" float4 out_v; \n"
" out_v = (in_v.w > BABL_ALPHA_THRESHOLD)? (float4)(linear_to_gamma_2_2(in_v.x) / in_v.w,\n"
" linear_to_gamma_2_2(in_v.y) / in_v.w,\n"
" linear_to_gamma_2_2(in_v.z) / in_v.w,\n"
" in_v.w) : \n"
" (float4)(0.0f); \n"
" write_imagef(out, gid, out_v); \n"
"} \n";
#include "gegl.h"
#include "gegl-cl-color.h"
#include "gegl-cl-init.h"
#include "gegl-cl-color-kernel.h"
static gegl_cl_run_data *kernels_color = NULL;
static const Babl *format[6];
void
gegl_cl_color_compile_kernels(void)
{
const char *kernel_name[] = {"non_premultiplied_to_premultiplied", /* 0 */
"premultiplied_to_non_premultiplied", /* 1 */
"rgba2rgba_gamma_2_2", /* 2 */
"rgba_gamma_2_22rgba", /* 3 */
"rgba2rgba_gamma_2_2_premultiplied", /* 4 */
"rgba_gamma_2_2_premultiplied2rgba", /* 5 */
NULL};
format[0] = babl_format ("RaGaBaA float"),
format[1] = babl_format ("RGBA float"),
format[2] = babl_format ("R'G'B'A float"),
format[3] = babl_format ("RGBA float"),
format[4] = babl_format ("R'aG'aB'aA float"),
format[5] = babl_format ("RGBA float"),
kernels_color = gegl_cl_compile_and_build (kernel_color_source, kernel_name);
}
gboolean
gegl_cl_color_supported (const Babl *in_format, const Babl *out_format)
{
int i;
gboolean supported_format_in = FALSE;
gboolean supported_format_out = FALSE;
for (i = 0; i < 6; i++)
{
if (format[i] == in_format) supported_format_in = TRUE;
if (format[i] == out_format) supported_format_out = TRUE;
}
return (supported_format_in && supported_format_out);
}
#define CONV_1(x) {conv[0] = x; conv[1] = -1;}
#define CONV_2(x,y) {conv[0] = x; conv[1] = y;}
//#define CL_ERROR {g_assert(0);}
#define CL_ERROR {g_printf("[OpenCL] Error in %s:%d@%s - %s\n", __FILE__, __LINE__, __func__, gegl_cl_errstring(errcode)); return FALSE;}
gboolean
gegl_cl_color_conv (cl_mem in_tex, cl_mem out_tex, const size_t size[2],
const Babl *in_format, const Babl *out_format)
{
int i;
int errcode;
int conv[2] = {-1, -1};
if (!gegl_cl_color_supported (in_format, out_format))
CL_ERROR
if (in_format == out_format)
{
const size_t origin[3] = {0, 0, 0};
const size_t region[3] = {size[0], size[1], 1};
/* just copy in_tex to out_tex */
errcode = gegl_clEnqueueCopyImage (gegl_cl_get_command_queue(),
in_tex, out_tex, origin, origin, region,
0, NULL, NULL);
if (errcode != CL_SUCCESS) CL_ERROR
errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
if (errcode != CL_SUCCESS) CL_ERROR
}
else
{
if (in_format == babl_format ("RGBA float"))
{
if (out_format == babl_format ("RaGaBaA float")) CONV_1(0)
else if (out_format == babl_format ("R'G'B'A float")) CONV_1(2)
else if (out_format == babl_format ("R'aG'aB'aA float")) CONV_1(4)
}
else if (in_format == babl_format ("RaGaBaA float"))
{
if (out_format == babl_format ("RGBA float")) CONV_1(1)
else if (out_format == babl_format ("R'G'B'A float")) CONV_2(1, 2)
else if (out_format == babl_format ("R'aG'aB'aA float")) CONV_2(1, 4)
}
else if (in_format == babl_format ("R'G'B'A float"))
{
if (out_format == babl_format ("RGBA float")) CONV_1(3)
else if (out_format == babl_format ("RaGaBaA float")) CONV_2(3, 0)
else if (out_format == babl_format ("R'aG'aB'aA float")) CONV_2(3, 4)
}
else if (in_format == babl_format ("R'aG'aB'aA float"))
{
if (out_format == babl_format ("RGBA float")) CONV_1(5)
else if (out_format == babl_format ("RaGaBaA float")) CONV_2(5, 0)
else if (out_format == babl_format ("R'G'B'A float")) CONV_2(5, 2)
}
for (i=0; i<2; i++)
{
if (conv[i] >= 0)
{
errcode = gegl_clSetKernelArg(kernels_color->kernel[conv[i]], 0, sizeof(cl_mem), (void*)&in_tex);
if (errcode != CL_SUCCESS) CL_ERROR
errcode = gegl_clSetKernelArg(kernels_color->kernel[conv[i]], 1, sizeof(cl_mem), (void*)&out_tex);
if (errcode != CL_SUCCESS) CL_ERROR
errcode = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
kernels_color->kernel[conv[i]], 2,
NULL, size, NULL,
0, NULL, NULL);
if (errcode != CL_SUCCESS) CL_ERROR
errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
if (errcode != CL_SUCCESS) CL_ERROR
}
}
}
return TRUE;
}
#undef CL_ERROR
#ifndef __GEGL_CL_COLOR_H__
#define __GEGL_CL_COLOR_H__
#include <gegl.h>
#include "gegl-cl-types.h"
void gegl_cl_color_compile_kernels(void);
gboolean gegl_cl_color_supported (const Babl *in_format, const Babl *out_format);
gboolean gegl_cl_color_conv (cl_mem in_tex, cl_mem out_tex, const size_t size[2],
const Babl *in_format, const Babl *out_format);
#endif
......@@ -6,6 +6,8 @@
#include <string.h>
#include <stdio.h>
#include "gegl-cl-color.h"
const char *gegl_cl_errstring(cl_int err) {
static const char* strings[] =
{
......@@ -257,6 +259,9 @@ gegl_cl_init (GError **error)
/* XXX: this dict is being leaked */
cl_program_hash = g_hash_table_new (g_str_hash, g_str_equal);
if (cl_state.is_accelerated)
gegl_cl_color_compile_kernels();
g_printf("[OpenCL] OK\n");
return TRUE;
......
......@@ -2,7 +2,6 @@
#define __GEGL_CL_INIT_H__
#include "gegl-cl-types.h"
#include <gmodule.h>
#define CL_SAFE_CALL(func) \
func; \
......
......@@ -4,5 +4,6 @@
#include "gegl-cl-types.h"
#include "gegl-cl-init.h"
#include "gegl-cl-texture.h"
#include "gegl-cl-color.h"
#endif
......@@ -315,6 +315,7 @@ gegl_operation_context_get_source (GeglOperationContext *context,
if (!real_input)
return NULL;
input = g_object_ref (real_input);
return input;
}
......@@ -391,6 +392,7 @@ gegl_operation_context_get_target (GeglOperationContext *context,
}
gegl_operation_context_take_object (context, padname, G_OBJECT (output));
return output;
}
......
......@@ -72,115 +72,6 @@ gegl_operation_point_filter_init (GeglOperationPointFilter *self)
{
}
static gboolean
gegl_operation_point_filter_cl_process_tiled (GeglOperation *operation,
GeglBuffer *input,
GeglBuffer *output,
const GeglRectangle *result)
{
GeglOperationPointFilterClass *point_filter_class = GEGL_OPERATION_POINT_FILTER_GET_CLASS (operation);
const gint bpp = babl_format_get_bytes_per_pixel (babl_format ("RGBA float"));
int y, x;
int errcode;
cl_mem in_tex = NULL, out_tex = NULL;
cl_image_format format;
gfloat* in_data = (gfloat*) gegl_malloc(result->width * result->height * bpp);
gfloat* out_data = (gfloat*) gegl_malloc(result->width * result->height * bpp);
if (in_data == NULL || out_data == NULL) goto error;
/* un-tile */
gegl_buffer_get (input, 1.0, result, babl_format ("RGBA float"), in_data, GEGL_AUTO_ROWSTRIDE);
format.image_channel_order = CL_RGBA;
format.image_channel_data_type = CL_FLOAT;
in_tex = gegl_clCreateImage2D (gegl_cl_get_context(),
CL_MEM_READ_ONLY,
&format,
cl_state.max_image_width,
cl_state.max_image_height,
0, NULL, &errcode);
if (errcode != CL_SUCCESS) goto error;
out_tex = gegl_clCreateImage2D (gegl_cl_get_context(),
CL_MEM_WRITE_ONLY,
&format,
cl_state.max_image_width,
cl_state.max_image_height,
0, NULL, &errcode);
if (errcode != CL_SUCCESS) goto error;
for (y=0; y < result->height; y += cl_state.max_image_height)
for (x=0; x < result->width; x += cl_state.max_image_width)
{
const size_t offset = y * (4 * result->width) + (4 * x);
const size_t origin[3] = {0, 0, 0};
const size_t region[3] = {MIN(cl_state.max_image_width, result->width -x),
MIN(cl_state.max_image_height, result->height-y),
1};
const size_t global_worksize[2] = {region[0], region[1]};
GeglRectangle roi = {x, y, region[0], region[1]};
/* CPU -> GPU */
errcode = gegl_clEnqueueWriteImage(gegl_cl_get_command_queue(), in_tex, CL_FALSE,
origin, region, result->width * 4 * sizeof(gfloat), 0, &in_data[offset],
0, NULL, NULL);
if (errcode != CL_SUCCESS) goto error;
/* Wait */
errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
if (errcode != CL_SUCCESS) goto error;
/* Process */
errcode = point_filter_class->cl_process(operation, in_tex, out_tex, global_worksize, &roi);
if (errcode != CL_SUCCESS) goto error;
/* Wait */
errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
if (errcode != CL_SUCCESS) goto error;
/* GPU -> CPU */
errcode = gegl_clEnqueueReadImage(gegl_cl_get_command_queue(), out_tex, CL_FALSE,
origin, region, result->width * 4 * sizeof(gfloat), 0, &out_data[offset],
0, NULL, NULL);
if (errcode != CL_SUCCESS) goto error;
/* Wait */
errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
if (errcode != CL_SUCCESS) goto error;
}
errcode = gegl_clFinish(gegl_cl_get_command_queue());
if (errcode != CL_SUCCESS) goto error;
/* tile-ize */
gegl_buffer_set (output, result, babl_format ("RGBA float"), out_data, GEGL_AUTO_ROWSTRIDE);
gegl_clReleaseMemObject (in_tex);
gegl_clReleaseMemObject (out_tex);
gegl_free(in_data);
gegl_free(out_data);
return TRUE;
error:
g_warning("[OpenCL] Error: %s", gegl_cl_errstring(errcode));
if (in_tex) gegl_clReleaseMemObject (in_tex);
if (out_tex) gegl_clReleaseMemObject (out_tex);
if (in_data) free (in_data);
if (out_data) free (out_data);
return FALSE;
}
struct buf_tex
{
GeglBuffer *buf;
......@@ -188,15 +79,19 @@ struct buf_tex
cl_mem *tex;
};
//#define CL_ERROR {g_assert(0);}
#define CL_ERROR {g_printf("[OpenCL] Error in %s:%d@%s - %s\n", __FILE__, __LINE__, __func__, gegl_cl_errstring(errcode)); goto error;}
static gboolean
gegl_operation_point_filter_cl_process_full (GeglOperation *operation,
GeglBuffer *input,
GeglBuffer *output,
const GeglRectangle *result)
{
GeglOperationPointFilterClass *point_filter_class = GEGL_OPERATION_POINT_FILTER_GET_CLASS (operation);
const Babl *in_format = gegl_operation_get_format (operation, "input");
const Babl *out_format = gegl_operation_get_format (operation, "output");
const gint bpp = babl_format_get_bytes_per_pixel (babl_format ("RGBA float"));
GeglOperationPointFilterClass *point_filter_class = GEGL_OPERATION_POINT_FILTER_GET_CLASS (operation);
int y, x, i;
int errcode;
......@@ -207,13 +102,14 @@ gegl_operation_point_filter_cl_process_full (GeglOperation *operation,
int ntex = 0;
struct buf_tex input_tex;
struct buf_tex output_tex;
size_t *pitch = NULL;
cl_image_format format;
format.image_channel_order = CL_RGBA;
format.image_channel_data_type = CL_FLOAT;
for (y=0; y < result->height; y += cl_state.max_image_height)
for (x=0; x < result->width; x += cl_state.max_image_width)
for (y=result->y; y < result->height; y += cl_state.max_image_height)
for (x=result->x; x < result->width; x += cl_state.max_image_width)
ntex++;
input_tex.region = (GeglRectangle *) gegl_malloc(ntex * sizeof(GeglRectangle));
......@@ -221,35 +117,50 @@ gegl_operation_point_filter_cl_process_full (GeglOperation *operation,
input_tex.tex = (cl_mem *) gegl_malloc(ntex * sizeof(cl_mem));
output_tex.tex = (cl_mem *) gegl_malloc(ntex * sizeof(cl_mem));
g_printf("[OpenCL] BABL formats: (%s,%s:%d) (%s,%s:%d)\n \t Tile Size:(%d, %d)\n", babl_get_name(gegl_buffer_get_format(input)), babl_get_name(in_format),
gegl_cl_color_supported (gegl_buffer_get_format(input), in_format),
babl_get_name(out_format), babl_get_name(gegl_buffer_get_format(output)),
gegl_cl_color_supported (out_format, gegl_buffer_get_format(output)),
input->tile_storage->tile_width,
input->tile_storage->tile_height);
input_tex.tex = (cl_mem *) gegl_malloc(ntex * sizeof(cl_mem));
output_tex.tex = (cl_mem *) gegl_malloc(ntex * sizeof(cl_mem));
if (input_tex.region == NULL || output_tex.region == NULL || input_tex.tex == NULL || output_tex.tex == NULL)
goto error;
CL_ERROR;
size_t *pitch = (size_t *) gegl_malloc(ntex * sizeof(size_t *));
pitch = (size_t *) gegl_malloc(ntex * sizeof(size_t *));
in_data = (gfloat**) gegl_malloc(ntex * sizeof(gfloat *));
out_data = (gfloat**) gegl_malloc(ntex * sizeof(gfloat *));
if (pitch == NULL || in_data == NULL || out_data == NULL) goto error;
if (pitch == NULL || in_data == NULL || out_data == NULL) CL_ERROR;
i = 0;
for (y=0; y < result->height; y += cl_state.max_image_height)
for (x=0; x < result->width; x += cl_state.max_image_width)
for (y=result->y; y < result->height; y += cl_state.max_image_height)
for (x=result->x; x < result->width; x += cl_state.max_image_width)
{
const size_t region[3] = {MIN(cl_state.max_image_width, result->width -x),
MIN(cl_state.max_image_height, result->height-y)};
MIN(cl_state.max_image_height, result->height-y),
1};
GeglRectangle r = {x, y, region[0], region[1]};
input_tex.region[i] = output_tex.region[i] = r;
input_tex.tex[i] = gegl_clCreateImage2D (gegl_cl_get_context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, &format, region[0], region[1],
input_tex.tex[i] = gegl_clCreateImage2D (gegl_cl_get_context(),
CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, &format,
region[0], region[1],
0, NULL, &errcode);
if (errcode != CL_SUCCESS) goto error;
if (errcode != CL_SUCCESS) CL_ERROR;
output_tex.tex[i] = gegl_clCreateImage2D (gegl_cl_get_context(), CL_MEM_WRITE_ONLY, &format, region[0], region[1],
output_tex.tex[i] = gegl_clCreateImage2D (gegl_cl_get_context(),
CL_MEM_READ_WRITE, &format,
region[0], region[1],
0, NULL, &errcode);
if (errcode != CL_SUCCESS) goto error;
if (errcode != CL_SUCCESS) CL_ERROR;
out_data[i] = (gfloat *) gegl_malloc(region[0] * region[1] * bpp);
if (out_data[i] == NULL) goto error;
out_data[i] = (gfloat *) gegl_malloc(region[0] * region[1] * babl_format_get_bytes_per_pixel(out_format));
if (out_data[i] == NULL) CL_ERROR;
i++;
}
......@@ -264,10 +175,13 @@ gegl_operation_point_filter_cl_process_full (GeglOperation *operation,
CL_MAP_WRITE,
origin, region, &pitch[i], NULL,
0, NULL, NULL, &errcode);
if (errcode != CL_SUCCESS) goto error;
if (errcode != CL_SUCCESS) CL_ERROR;
/* un-tile */
gegl_buffer_get (input, 1.0, &input_tex.region[i], babl_format ("RGBA float"), in_data[i], GEGL_AUTO_ROWSTRIDE);
if (gegl_cl_color_supported (gegl_buffer_get_format(input), in_format)) /* color conversion will be performed in the GPU later */
gegl_buffer_get (input, 1.0, &input_tex.region[i], gegl_buffer_get_format(input), in_data[i], GEGL_AUTO_ROWSTRIDE);
else /* color conversion using BABL */
gegl_buffer_get (input, 1.0, &input_tex.region[i], in_format, in_data[i], GEGL_AUTO_ROWSTRIDE);
}
/* CPU -> GPU */
......@@ -275,26 +189,55 @@ gegl_operation_point_filter_cl_process_full (GeglOperation *operation,
{
errcode = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), input_tex.tex[i], in_data[i],
0, NULL, NULL);
if (errcode != CL_SUCCESS) goto error;
if (errcode != CL_SUCCESS) CL_ERROR;
}
errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
if (errcode != CL_SUCCESS) goto error;
if (errcode != CL_SUCCESS) CL_ERROR;
/* color conversion in the GPU (input) */
if (gegl_cl_color_supported (gegl_buffer_get_format(input), in_format))
for (i=0; i < ntex; i++)
{
cl_mem swap;
const size_t size[2] = {input_tex.region[i].width, input_tex.region[i].height};
errcode = gegl_cl_color_conv (input_tex.tex[i], output_tex.tex[i], size, gegl_buffer_get_format(input), in_format);
if (errcode == FALSE) CL_ERROR;
swap = input_tex.tex[i];
input_tex.tex[i] = output_tex.tex[i];
output_tex.tex[i] = swap;
}
/* Process */
for (i=0; i < ntex; i++)
{
const size_t origin[3] = {0, 0, 0};
const size_t region[3] = {input_tex.region[i].width, input_tex.region[i].height, 1};
const size_t global_worksize[2] = {region[0], region[1]};
errcode = point_filter_class->cl_process(operation, input_tex.tex[i], output_tex.tex[i], global_worksize, &input_tex.region[i]);
if (errcode != CL_SUCCESS) goto error;
if (errcode != CL_SUCCESS) CL_ERROR;
}
/* Wait Processing */
errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
if (errcode != CL_SUCCESS) goto error;
if (errcode != CL_SUCCESS) CL_ERROR;
/* color conversion in the GPU (output) */
if (gegl_cl_color_supported (out_format, gegl_buffer_get_format(output)))
for (i=0; i < ntex; i++)
{
cl_mem swap;
const size_t size[2] = {output_tex.region[i].width, output_tex.region[i].height};
errcode = gegl_cl_color_conv (output_tex.tex[i], input_tex.tex[i], size, out_format, gegl_buffer_get_format(output));
if (errcode == FALSE) CL_ERROR;
swap = input_tex.tex[i];
input_tex.tex[i] = output_tex.tex[i];
output_tex.tex[i] = swap;
}
/* GPU -> CPU */
for (i=0; i < ntex; i++)
......@@ -305,21 +248,24 @@ gegl_operation_point_filter_cl_process_full (GeglOperation *operation,
errcode = gegl_clEnqueueReadImage(gegl_cl_get_command_queue(), output_tex.tex[i], CL_FALSE,
origin, region, pitch[i], 0, out_data[i],
0, NULL, NULL);
if (errcode != CL_SUCCESS) goto error;
if (errcode != CL_SUCCESS) CL_ERROR;
}
/* Wait */
errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
if (errcode != CL_SUCCESS) goto error;
if (errcode != CL_SUCCESS) CL_ERROR;
/* Run! */
errcode = gegl_clFinish(gegl_cl_get_command_queue());
if (errcode != CL_SUCCESS) goto error;
if (errcode != CL_SUCCESS) CL_ERROR;
for (i=0; i < ntex; i++)
{
/* tile-ize */
gegl_buffer_set (output, &output_tex.region[i], babl_format ("RGBA float"), out_data[i], GEGL_AUTO_ROWSTRIDE);
if (gegl_cl_color_supported (out_format, gegl_buffer_get_format(output))) /* color conversion has already been be performed in the GPU */
gegl_buffer_set (output, &output_tex.region[i], gegl_buffer_get_format(output), out_data[i], GEGL_AUTO_ROWSTRIDE);
else /* color conversion using BABL */
gegl_buffer_set (output, &output_tex.region[i], out_format, out_data[i], GEGL_AUTO_ROWSTRIDE);
}
for (i=0; i < ntex; i++)
......@@ -338,7 +284,6 @@ gegl_operation_point_filter_cl_process_full (GeglOperation *operation,
return TRUE;
error:
g_warning("[OpenCL] Error: %s", gegl_cl_errstring(errcode));
for (i=0; i < ntex; i++)
{
......@@ -356,6 +301,7 @@ error:
return FALSE;
}
#undef CL_ERROR
static gboolean
gegl_operation_point_filter_process (GeglOperation *operation,
......@@ -375,10 +321,6 @@ gegl_operation_point_filter_process (GeglOperation *operation,
{
if (gegl_operation_point_filter_cl_process_full (operation, input, output, result))
return TRUE;
/* the function above failed */
if (gegl_operation_point_filter_cl_process_tiled (operation, input, output, result))
return TRUE;
}
{
......
......@@ -152,12 +152,12 @@ cl_process (GeglOperation *op,
if (!cl_data) return 1;
CL_SAFE_CALL(errcode |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex));
CL_SAFE_CALL(errcode |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&out_tex));
CL_SAFE_CALL(errcode |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_float), (void*)&brightness));
CL_SAFE_CALL(errcode |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float), (void*)&contrast));
CL_SAFE_CALL(errcode = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex));
CL_SAFE_CALL(errcode = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&out_tex));
CL_SAFE_CALL(errcode = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_float), (void*)&brightness));
CL_SAFE_CALL(errcode = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float), (void*)&contrast));
CL_SAFE_CALL(errcode |= gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
CL_SAFE_CALL(errcode = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
cl_data->kernel[0], 2,
NULL, global_worksize, NULL,
0, NULL, NULL) );