Commit 64ade977 authored by Ell's avatar Ell

app: move libappgegl's SSE2 bits to a separate library

Split libappgegl into libappgegl-generic and libappgegl-sse2, and
move the SSE2 code (part of the newly added smudge code) to the
latter, so that the rest of the code can be compiled without SSE2
compiler flags.  This allows building GIMP with SSE acceleration
enabled, while running the resulting binary on a target with no
SSE accelration.
parent 71bbd88e
......@@ -9,12 +9,14 @@ AM_CPPFLAGS = \
$(CAIRO_CFLAGS) \
$(GEGL_CFLAGS) \
$(GDK_PIXBUF_CFLAGS) \
$(SSE2_EXTRA_CFLAGS) \
-I$(includedir)
noinst_LIBRARIES = libappgegl.a
noinst_LIBRARIES = \
libappgegl-generic.a \
libappgegl-sse2.a \
libappgegl.a
libappgegl_a_sources = \
libappgegl_generic_a_sources = \
gimp-gegl-enums.h \
gimp-gegl-types.h \
gimp-babl.c \
......@@ -42,9 +44,28 @@ libappgegl_a_sources = \
gimptilehandlervalidate.c \
gimptilehandlervalidate.h
libappgegl_a_built_sources = gimp-gegl-enums.c
libappgegl_generic_a_built_sources = gimp-gegl-enums.c
libappgegl_sse2_a_sources = \
gimp-gegl-loops-sse2.c \
gimp-gegl-loops-sse2.h
libappgegl_generic_a_SOURCES = $(libappgegl_generic_a_built_sources) $(libappgegl_generic_a_sources)
libappgegl_sse2_a_SOURCES = $(libappgegl_sse2_a_sources)
libappgegl_sse2_a_CFLAGS = $(SSE2_EXTRA_CFLAGS)
libappgegl_a_SOURCES =
libappgegl.a: libappgegl-generic.a \
libappgegl-sse2.a
$(AR) $(ARFLAGS) libappgegl.a \
$(libappgegl_generic_a_OBJECTS) \
$(libappgegl_sse2_a_OBJECTS)
$(RANLIB) libappgegl.a
libappgegl_a_SOURCES = $(libappgegl_a_built_sources) $(libappgegl_a_sources)
#
# rules to generate built sources
......
/* GIMP - The GNU Image Manipulation Program
* Copyright (C) 1995 Spencer Kimball and Peter Mattis
*
* gimp-gegl-loops-sse2.c
* Copyright (C) 2012 Michael Natterer <mitch@gimp.org>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "config.h"
#include <string.h>
#include <cairo.h>
#include <gdk-pixbuf/gdk-pixbuf.h>
#include <gegl.h>
#include "gimp-gegl-types.h"
#include "gimp-gegl-loops-sse2.h"
#if COMPILE_SSE2_INTRINISICS
#include <emmintrin.h>
/* helper function of gimp_gegl_smudge_with_paint_process_sse2()
* src and dest can be the same address
*/
static inline void
gimp_gegl_smudge_with_paint_blend_sse2 (const gfloat *src1,
gfloat src1_rate,
const gfloat *src2,
gfloat src2_rate,
gfloat *dest,
gboolean no_erasing_src2)
{
/* 2017/4/13 shark0r : According to my test, SSE decreases about 25%
* execution time
*/
__m128 v_src1 = _mm_loadu_ps (src1);
__m128 v_src2 = _mm_loadu_ps (src2);
__m128 *v_dest = (__v4sf *) dest;
gfloat orginal_src2_alpha;
gfloat src1_alpha;
gfloat src2_alpha;
gfloat result_alpha;
orginal_src2_alpha = v_src2[3];
src1_alpha = src1_rate * v_src1[3];
src2_alpha = src2_rate * orginal_src2_alpha;
result_alpha = src1_alpha + src2_alpha;
if (result_alpha == 0)
{
*v_dest = _mm_set1_ps (0);
return;
}
*v_dest = (v_src1 * _mm_set1_ps (src1_alpha) +
v_src2 * _mm_set1_ps (src2_alpha)) /
_mm_set1_ps (result_alpha);
if (no_erasing_src2)
{
result_alpha = MAX (result_alpha, orginal_src2_alpha);
}
dest[3] = result_alpha;
}
/* helper function of gimp_gegl_smudge_with_paint()
*
* note that it's the caller's responsibility to verify that the buffers are
* properly aligned
*/
void
gimp_gegl_smudge_with_paint_process_sse2 (gfloat *accum,
const gfloat *canvas,
gfloat *paint,
gint count,
const gfloat *brush_color,
gfloat brush_a,
gboolean no_erasing,
gfloat flow,
gfloat rate)
{
while (count--)
{
/* blend accum_buffer and canvas_buffer to accum_buffer */
gimp_gegl_smudge_with_paint_blend_sse2 (accum, rate, canvas, 1 - rate,
accum, no_erasing);
/* blend accum_buffer and brush color/pixmap to paint_buffer */
if (brush_a == 0) /* pure smudge */
{
memcpy (paint, accum, sizeof (gfloat) * 4);
}
else
{
const gfloat *src1 = brush_color ? brush_color : paint;
gimp_gegl_smudge_with_paint_blend_sse2 (src1, flow, accum, 1 - flow,
paint, no_erasing);
}
accum += 4;
canvas += 4;
paint += 4;
}
}
#endif /* COMPILE_SSE2_INTRINISICS */
/* GIMP - The GNU Image Manipulation Program
* Copyright (C) 1995 Spencer Kimball and Peter Mattis
*
* gimp-gegl-loops-sse2.h
* Copyright (C) 2012 Michael Natterer <mitch@gimp.org>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __GIMP_GEGL_LOOPS_SSE2_H__
#define __GIMP_GEGL_LOOPS_SSE2_H__
#if COMPILE_SSE2_INTRINISICS
void gimp_gegl_smudge_with_paint_process_sse2 (gfloat *accum,
const gfloat *canvas,
gfloat *paint,
gint count,
const gfloat *brush_color,
gfloat brush_a,
gboolean no_erasing,
gfloat flow,
gfloat rate);
#endif /* COMPILE_SSE2_INTRINISICS */
#endif /* __GIMP_GEGL_LOOPS_SSE2_H__ */
......@@ -22,10 +22,6 @@
#include <string.h>
#if COMPILE_SSE2_INTRINISICS
#include <emmintrin.h>
#endif
#include <cairo.h>
#include <gdk-pixbuf/gdk-pixbuf.h>
#include <gegl.h>
......@@ -38,6 +34,7 @@
#include "gimp-babl.h"
#include "gimp-gegl-loops.h"
#include "gimp-gegl-loops-sse2.h"
#include "core/gimpprogress.h"
......@@ -354,75 +351,80 @@ gimp_gegl_dodgeburn (GeglBuffer *src_buffer,
}
}
/* helper function of gimp_gegl_smudge_with_paint()
/* helper function of gimp_gegl_smudge_with_paint_process()
src and dest can be the same address
*/
static void
static inline void
gimp_gegl_smudge_with_paint_blend (const gfloat *src1,
gfloat src1_rate,
const gfloat *src2,
gfloat src2_rate,
gfloat *dest,
gboolean no_erasing_src2,
gboolean sse)
gboolean no_erasing_src2)
{
gfloat orginal_src2_alpha;
gfloat src1_alpha;
gfloat src2_alpha;
gfloat result_alpha;
gint b;
/* 2017/4/13 shark0r : According to my test, SSE decreases about 25%
* execution time
*/
orginal_src2_alpha = src2[3];
src1_alpha = src1_rate * src1[3];
src2_alpha = src2_rate * orginal_src2_alpha;
result_alpha = src1_alpha + src2_alpha;
#if defined COMPILE_SSE2_INTRINISICS
if (sse)
if (result_alpha == 0)
{
__m128 v_src1 = _mm_loadu_ps (src1);
__m128 v_src2 = _mm_loadu_ps (src2);
__m128 *v_dest = (__v4sf *) dest;
orginal_src2_alpha = v_src2[3];
src1_alpha = src1_rate * v_src1[3];
src2_alpha = src2_rate * orginal_src2_alpha;
result_alpha = src1_alpha + src2_alpha;
memset (dest, 0, sizeof (gfloat) * 4);
return;
}
if (result_alpha == 0)
{
*v_dest = _mm_set1_ps (0);
return;
}
for (b = 0; b < 3; b++)
dest[b] = (src1[b] * src1_alpha + src2[b] * src2_alpha) / result_alpha;
*v_dest = (v_src1 * _mm_set1_ps (src1_alpha) +
v_src2 * _mm_set1_ps (src2_alpha)) /
_mm_set1_ps (result_alpha);
}
else
#endif
if (no_erasing_src2)
{
gint b;
result_alpha = MAX (result_alpha, orginal_src2_alpha);
}
orginal_src2_alpha = src2[3];
src1_alpha = src1_rate * src1[3];
src2_alpha = src2_rate * orginal_src2_alpha;
result_alpha = src1_alpha + src2_alpha;
dest[3] = result_alpha;
}
if (result_alpha == 0)
/* helper function of gimp_gegl_smudge_with_paint() */
static void
gimp_gegl_smudge_with_paint_process (gfloat *accum,
const gfloat *canvas,
gfloat *paint,
gint count,
const gfloat *brush_color,
gfloat brush_a,
gboolean no_erasing,
gfloat flow,
gfloat rate)
{
while (count--)
{
/* blend accum_buffer and canvas_buffer to accum_buffer */
gimp_gegl_smudge_with_paint_blend (accum, rate, canvas, 1 - rate,
accum, no_erasing);
/* blend accum_buffer and brush color/pixmap to paint_buffer */
if (brush_a == 0) /* pure smudge */
{
memset (dest, 0, sizeof (gfloat) * 4);
return;
memcpy (paint, accum, sizeof (gfloat) * 4);
}
else
{
const gfloat *src1 = brush_color ? brush_color : paint;
for (b = 0; b < 3; b++)
dest[b] = (src1[b] * src1_alpha + src2[b] * src2_alpha) / result_alpha;
}
gimp_gegl_smudge_with_paint_blend (src1, flow, accum, 1 - flow,
paint, no_erasing);
}
if (no_erasing_src2)
{
result_alpha = MAX (result_alpha, orginal_src2_alpha);
accum += 4;
canvas += 4;
paint += 4;
}
dest[3] = result_alpha;
}
/* smudge painting calculation. Currently only smudge tool uses this function
......@@ -449,6 +451,8 @@ gimp_gegl_smudge_with_paint (GeglBuffer *accum_buffer,
GeglAccessMode paint_buffer_access_mode = (brush_color ?
GEGL_ACCESS_WRITE :
GEGL_ACCESS_READWRITE);
gboolean sse2 = (gimp_cpu_accel_get_support () &
GIMP_CPU_ACCEL_X86_SSE2);
iter = gegl_buffer_iterator_new (accum_buffer, accum_rect, 0,
babl_format ("RGBA float"),
......@@ -480,43 +484,27 @@ gimp_gegl_smudge_with_paint (GeglBuffer *accum_buffer,
const gfloat *canvas = iter->data[1];
gfloat *paint = iter->data[2];
gint count = iter->length;
gboolean sse_canvas = FALSE;
gboolean sse_brush = FALSE;
#if defined COMPILE_SSE2_INTRINISICS
if (gimp_cpu_accel_get_support () & GIMP_CPU_ACCEL_X86_SSE2)
#if COMPILE_SSE2_INTRINISICS
if (sse2 && ((guintptr) accum |
(guintptr) canvas |
(guintptr) (brush_color ? brush_color_float : paint) |
(guintptr) paint) % 16 == 0)
{
sse_canvas = ((guintptr) accum |
(guintptr) canvas) % 16 == 0;
sse_brush = ((guintptr) (brush_color ? brush_color_float : paint) |
(guintptr) accum |
(guintptr) paint) % 16 == 0;
gimp_gegl_smudge_with_paint_process_sse2 (accum, canvas, paint, count,
brush_color ? brush_color_float :
NULL,
brush_a,
no_erasing, flow, rate);
}
else
#endif
while (count--)
{
/* blend accum_buffer and canvas_buffer to accum_buffer */
gimp_gegl_smudge_with_paint_blend (accum, rate, canvas, 1 - rate,
accum, no_erasing, sse_canvas);
/* blend accum_buffer and brush color/pixmap to paint_buffer */
if (brush_a == 0) /* pure smudge */
{
memcpy (paint, accum, sizeof (gfloat) * 4);
}
else
{
gfloat *src1 = brush_color ? brush_color_float : paint;
gimp_gegl_smudge_with_paint_blend (src1, flow, accum, 1 - flow,
paint, no_erasing, sse_brush);
}
accum += 4;
canvas += 4;
paint += 4;
gimp_gegl_smudge_with_paint_process (accum, canvas, paint, count,
brush_color ? brush_color_float :
NULL,
brush_a,
no_erasing, flow, rate);
}
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment