Commit 9412a238 authored by Øyvind Kolås's avatar Øyvind Kolås

commited reworked bits of patch from Geert Jordaens that adds gcc vector

* gegl/buffer/gegl-sampler-cubic.c: (gegl_sampler_cubic_get): commited
reworked bits of patch from Geert Jordaens that adds gcc vector
extension based SSE code paths to cubic. Bug #540540

svn path=/trunk/; revision=2518
parent 86fd1396
2008-07-01 Øyvind Kolås <pippin@gimp.org>
* gegl/buffer/gegl-sampler-cubic.c: (gegl_sampler_cubic_get): commited
reworked bits of patch from Geert Jordaens that adds gcc vector
extension based SSE code paths to cubic. Bug #540540
2008-07-01 Øyvind Kolås <pippin@gimp.org>
Made the linear buffer access API of GeglBuffer public.
......
......@@ -129,7 +129,14 @@ gegl_sampler_cubic_get (GeglSampler *self,
(64-3)*4, 4, 4, 4};
gfloat *sampler_bptr;
gfloat factor;
gfloat newval[4];
#ifdef HAS_G4FLOAT
g4float newval4 = g4float_zero;
gfloat *newval = &newval4;
#else
gfloat newval[4] = {0.0, 0.0, 0.0, 0.0};
#endif
gint u,v;
gint dx,dy;
gint i;
......@@ -139,18 +146,37 @@ gegl_sampler_cubic_get (GeglSampler *self,
dy = (gint) y;
newval[0] = newval[1] = newval[2] = newval[3] = 0.;
sampler_bptr = gegl_sampler_get_ptr (self, dx, dy);
for (v=dy+context_rect.y, i=0; v < dy+context_rect.y+context_rect.height ; v++)
for (u=dx+context_rect.x ; u < dx+context_rect.x+context_rect.width ; u++, i++)
{
/*sampler_bptr = gegl_sampler_get_from_buffer (self, u, v);*/
sampler_bptr += offsets[i];
factor = cubicKernel (y - v, cubic->b, cubic->c) *
cubicKernel (x - u, cubic->b, cubic->c);
newval[0] += factor * sampler_bptr[0];
newval[1] += factor * sampler_bptr[1];
newval[2] += factor * sampler_bptr[2];
newval[3] += factor * sampler_bptr[3];
}
#ifdef HAS_G4FLOAT
if (G_LIKELY (gegl_cpu_accel_get_support () & (GEGL_CPU_ACCEL_X86_SSE|
GEGL_CPU_ACCEL_X8&_MMX)))
{
for (v=dy+context_rect.y, i=0; v < dy+context_rect.y+context_rect.height ; v++)
for (u=dx+context_rect.x ; u < dx+context_rect.x+context_rect.width ; u++, i++)
{
sampler_bptr += offsets[i];
factor = cubicKernel (y - v, cubic->b, cubic->c) *
cubicKernel (x - u, cubic->b, cubic->c);
newval4 += g4float_mul(&sampler_bptr[0], factor);
}
}
else
#endif
{
for (v=dy+context_rect.y, i=0; v < dy+context_rect.y+context_rect.height ; v++)
for (u=dx+context_rect.x ; u < dx+context_rect.x+context_rect.width ; u++, i++)
{
/*sampler_bptr = gegl_sampler_get_from_buffer (self, u, v);*/
sampler_bptr += offsets[i];
factor = cubicKernel (y - v, cubic->b, cubic->c) *
cubicKernel (x - u, cubic->b, cubic->c);
newval[0] += factor * sampler_bptr[0];
newval[1] += factor * sampler_bptr[1];
newval[2] += factor * sampler_bptr[2];
newval[3] += factor * sampler_bptr[3];
}
}
babl_process (babl_fish (self->interpolate_format, self->format),
newval, output, 1);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment