Commit 8bdac41d authored by Helvetix Victorinox's avatar Helvetix Victorinox
Browse files

Updated with proper register clobber lists. Some code clean-up

(removing dead code, commented out code).  Renegerated installers
and testers.
parent d831bdb3
2005-05-13 Helvetix Victorinox <helvetix@gimp.org>
* app/composite/gimp-composite-sse2.c:
Updated with proper register clobber lists. Some code clean-up
(removing dead code, commented out code).
2005-05-14 Michael Natterer <mitch@gimp.org>
* app/pdb/procedural_db.h
......
......@@ -9,44 +9,11 @@
#include "gimp-composite-altivec.h"
static struct install_table {
GimpCompositeOperation mode;
GimpPixelFormat A;
GimpPixelFormat B;
GimpPixelFormat D;
void (*function)(GimpCompositeContext *);
} _gimp_composite_altivec[] = {
#if defined(COMPILE_ALTIVEC_IS_OKAY)
{ GIMP_COMPOSITE_MULTIPLY, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_multiply_rgba8_rgba8_rgba8_altivec },
{ GIMP_COMPOSITE_SCREEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_screen_rgba8_rgba8_rgba8_altivec },
{ GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_difference_rgba8_rgba8_rgba8_altivec },
{ GIMP_COMPOSITE_ADDITION, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_addition_rgba8_rgba8_rgba8_altivec },
{ GIMP_COMPOSITE_SUBTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_subtract_rgba8_rgba8_rgba8_altivec },
{ GIMP_COMPOSITE_DARKEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_darken_rgba8_rgba8_rgba8_altivec },
{ GIMP_COMPOSITE_LIGHTEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_lighten_rgba8_rgba8_rgba8_altivec },
{ GIMP_COMPOSITE_DIVIDE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_divide_rgba8_rgba8_rgba8_altivec },
{ GIMP_COMPOSITE_DODGE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_dodge_rgba8_rgba8_rgba8_altivec },
{ GIMP_COMPOSITE_GRAIN_EXTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_grain_extract_rgba8_rgba8_rgba8_altivec },
{ GIMP_COMPOSITE_GRAIN_MERGE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_grain_merge_rgba8_rgba8_rgba8_altivec },
{ GIMP_COMPOSITE_BLEND, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_blend_rgba8_rgba8_rgba8_altivec },
{ GIMP_COMPOSITE_SWAP, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_swap_rgba8_rgba8_rgba8_altivec },
#endif
{ 0, 0, 0, 0, NULL }
};
gboolean
gimp_composite_altivec_install (void)
{
static struct install_table *t = _gimp_composite_altivec;
if (gimp_composite_altivec_init ())
{
for (t = &_gimp_composite_altivec[0]; t->function != NULL; t++)
{
gimp_composite_function[t->mode][t->A][t->B][t->D] = t->function;
}
return (TRUE);
}
/* nothing to do */
return (FALSE);
}
......@@ -64,149 +64,6 @@ gimp_composite_altivec_test (int iterations, int n_pixels)
va8M[i].a = i;
}
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_ADDITION, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_ADDITION, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_addition_rgba8_rgba8_rgba8_altivec, &special_ctx);
if (gimp_composite_regression_compare_contexts ("addition", &generic_ctx, &special_ctx))
{
printf("addition_rgba8_rgba8_rgba8 failed\n");
return (1);
}
gimp_composite_regression_timer_report ("addition_rgba8_rgba8_rgba8", ft0, ft1);
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_BLEND, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_BLEND, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_blend_rgba8_rgba8_rgba8_altivec, &special_ctx);
if (gimp_composite_regression_compare_contexts ("blend", &generic_ctx, &special_ctx))
{
printf("blend_rgba8_rgba8_rgba8 failed\n");
return (1);
}
gimp_composite_regression_timer_report ("blend_rgba8_rgba8_rgba8", ft0, ft1);
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_DARKEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_DARKEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_darken_rgba8_rgba8_rgba8_altivec, &special_ctx);
if (gimp_composite_regression_compare_contexts ("darken", &generic_ctx, &special_ctx))
{
printf("darken_rgba8_rgba8_rgba8 failed\n");
return (1);
}
gimp_composite_regression_timer_report ("darken_rgba8_rgba8_rgba8", ft0, ft1);
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_difference_rgba8_rgba8_rgba8_altivec, &special_ctx);
if (gimp_composite_regression_compare_contexts ("difference", &generic_ctx, &special_ctx))
{
printf("difference_rgba8_rgba8_rgba8 failed\n");
return (1);
}
gimp_composite_regression_timer_report ("difference_rgba8_rgba8_rgba8", ft0, ft1);
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_DIVIDE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_DIVIDE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_divide_rgba8_rgba8_rgba8_altivec, &special_ctx);
if (gimp_composite_regression_compare_contexts ("divide", &generic_ctx, &special_ctx))
{
printf("divide_rgba8_rgba8_rgba8 failed\n");
return (1);
}
gimp_composite_regression_timer_report ("divide_rgba8_rgba8_rgba8", ft0, ft1);
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_DODGE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_DODGE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_dodge_rgba8_rgba8_rgba8_altivec, &special_ctx);
if (gimp_composite_regression_compare_contexts ("dodge", &generic_ctx, &special_ctx))
{
printf("dodge_rgba8_rgba8_rgba8 failed\n");
return (1);
}
gimp_composite_regression_timer_report ("dodge_rgba8_rgba8_rgba8", ft0, ft1);
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_GRAIN_EXTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_GRAIN_EXTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_grain_extract_rgba8_rgba8_rgba8_altivec, &special_ctx);
if (gimp_composite_regression_compare_contexts ("grain_extract", &generic_ctx, &special_ctx))
{
printf("grain_extract_rgba8_rgba8_rgba8 failed\n");
return (1);
}
gimp_composite_regression_timer_report ("grain_extract_rgba8_rgba8_rgba8", ft0, ft1);
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_GRAIN_MERGE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_GRAIN_MERGE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_grain_merge_rgba8_rgba8_rgba8_altivec, &special_ctx);
if (gimp_composite_regression_compare_contexts ("grain_merge", &generic_ctx, &special_ctx))
{
printf("grain_merge_rgba8_rgba8_rgba8 failed\n");
return (1);
}
gimp_composite_regression_timer_report ("grain_merge_rgba8_rgba8_rgba8", ft0, ft1);
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_LIGHTEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_LIGHTEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_lighten_rgba8_rgba8_rgba8_altivec, &special_ctx);
if (gimp_composite_regression_compare_contexts ("lighten", &generic_ctx, &special_ctx))
{
printf("lighten_rgba8_rgba8_rgba8 failed\n");
return (1);
}
gimp_composite_regression_timer_report ("lighten_rgba8_rgba8_rgba8", ft0, ft1);
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_MULTIPLY, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_MULTIPLY, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_multiply_rgba8_rgba8_rgba8_altivec, &special_ctx);
if (gimp_composite_regression_compare_contexts ("multiply", &generic_ctx, &special_ctx))
{
printf("multiply_rgba8_rgba8_rgba8 failed\n");
return (1);
}
gimp_composite_regression_timer_report ("multiply_rgba8_rgba8_rgba8", ft0, ft1);
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_SCREEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_SCREEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_screen_rgba8_rgba8_rgba8_altivec, &special_ctx);
if (gimp_composite_regression_compare_contexts ("screen", &generic_ctx, &special_ctx))
{
printf("screen_rgba8_rgba8_rgba8 failed\n");
return (1);
}
gimp_composite_regression_timer_report ("screen_rgba8_rgba8_rgba8", ft0, ft1);
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_SUBTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_SUBTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_subtract_rgba8_rgba8_rgba8_altivec, &special_ctx);
if (gimp_composite_regression_compare_contexts ("subtract", &generic_ctx, &special_ctx))
{
printf("subtract_rgba8_rgba8_rgba8 failed\n");
return (1);
}
gimp_composite_regression_timer_report ("subtract_rgba8_rgba8_rgba8", ft0, ft1);
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_SWAP, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_SWAP, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_swap_rgba8_rgba8_rgba8_altivec, &special_ctx);
if (gimp_composite_regression_compare_contexts ("swap", &generic_ctx, &special_ctx))
{
printf("swap_rgba8_rgba8_rgba8 failed\n");
return (1);
}
gimp_composite_regression_timer_report ("swap_rgba8_rgba8_rgba8", ft0, ft1);
#endif
return (0);
}
......
......@@ -186,7 +186,7 @@ gimp_composite_darken_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tmovdqu %%xmm2,%0\n"
: "=m" (*D)
: "m" (*A), "m" (*B)
: "%xmm1", "%xmm2", "%xmm3", "%xmm4");
: "%xmm2", "%xmm3");
A++;
B++;
D++;
......@@ -198,12 +198,13 @@ gimp_composite_darken_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
for (; n_pixels >= 2; n_pixels -= 2)
{
asm volatile (" movq %1, %%mm2\n"
"\tpminub %2, %%mm2\n"
"\tmovntq %%mm2, %0\n"
asm volatile (" movq %1,%%mm2\n"
"\tmovq %2,%%mm3\n"
"\tpminub %%mm3,%%mm2\n"
"\tmovntq %%mm2,%0\n"
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4");
: "%mm2", "%mm3");
a++;
b++;
d++;
......@@ -609,7 +610,6 @@ gimp_composite_swap_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
*/
for (; op.n_pixels >= 16; op.n_pixels -= 16)
{
#ifdef __OPTIMIZE__
asm volatile (" movdqu %0,%%xmm0\n"
"\tmovdqu %1,%%xmm1\n"
"\tmovdqu %2,%%xmm2\n"
......@@ -618,40 +618,11 @@ gimp_composite_swap_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tmovdqu %5,%%xmm5\n"
"\tmovdqu %6,%%xmm6\n"
"\tmovdqu %7,%%xmm7\n"
"\tmovdqu %%xmm0,%1\n"
"\tmovdqu %%xmm1,%0\n"
"\tmovdqu %%xmm2,%3\n"
"\tmovdqu %%xmm3,%2\n"
"\tmovdqu %%xmm4,%5\n"
"\tmovdqu %%xmm5,%4\n"
"\tmovdqu %%xmm6,%7\n"
"\tmovdqu %%xmm7,%6\n"
: "+m" (op.A[0]), "+m" (op.B[0]),
"+m" (op.A[1]), "+m" (op.B[1]),
"+m" (op.A[2]), "+m" (op.B[2]),
"+m" (op.A[3]), "+m" (op.B[3])
: /* empty */
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
);
#else
asm volatile (" movdqu %0,%%xmm0\n"
"\tmovdqu %1,%%xmm1\n"
"\tmovdqu %2,%%xmm2\n"
"\tmovdqu %3,%%xmm3\n"
: "+m" (op.A[0]), "+m" (op.B[0]),
"+m" (op.A[1]), "+m" (op.B[1])
: /* empty */
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
);
asm volatile ("\tmovdqu %4,%%xmm4\n"
"\tmovdqu %5,%%xmm5\n"
"\tmovdqu %6,%%xmm6\n"
"\tmovdqu %7,%%xmm7\n"
: "+m" (op.A[2]), "+m" (op.B[2]),
"+m" (op.A[3]), "+m" (op.B[3])
: /* empty */
:
: "m" (op.A[0]), "m" (op.B[0]),
"m" (op.A[1]), "m" (op.B[1]),
"m" (op.A[2]), "m" (op.B[2]),
"m" (op.A[3]), "m" (op.B[3])
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
);
......@@ -659,22 +630,16 @@ gimp_composite_swap_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tmovdqu %%xmm1,%0\n"
"\tmovdqu %%xmm2,%3\n"
"\tmovdqu %%xmm3,%2\n"
: "+m" (op.A[0]), "+m" (op.B[0]),
"+m" (op.A[1]), "+m" (op.B[1])
: /* empty */
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
);
asm volatile ("\tmovdqu %%xmm4,%5\n"
"\tmovdqu %%xmm4,%5\n"
"\tmovdqu %%xmm5,%4\n"
"\tmovdqu %%xmm6,%7\n"
"\tmovdqu %%xmm7,%6\n"
: "+m" (op.A[2]), "+m" (op.B[2]),
"+m" (op.A[3]), "+m" (op.B[3])
: "=m" (op.A[0]), "=m" (op.B[0]),
"=m" (op.A[1]), "=m" (op.B[1]),
"=m" (op.A[2]), "=m" (op.B[2]),
"=m" (op.A[3]), "=m" (op.B[3])
: /* empty */
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
);
#endif
op.A += 64;
op.B += 64;
}
......@@ -714,7 +679,7 @@ gimp_composite_swap_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tmovd %%mm2,%1\n"
: "+m" (*op.A), "+m" (*op.B)
: /* empty */
: "%mm1", "%mm2", "%mm3", "%mm4");
: "%mm3", "%mm4");
}
asm("emms");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment