Commit 8a3bdf52 authored by Helvetix Victorinox's avatar Helvetix Victorinox

app/composite/gimp-composite-sse.c More updates to accomodate the clobber

* app/composite/gimp-composite-sse.c
* app/composite/gimp-composite-sse2.c: More updates to accomodate
  the clobber registers. Additional progress against bug #147013.

* app/composite/gimp-composite-sse.h: Fixed a bug where the wrong
  manifest constant definition caused sse2 instructions to never be
  compiled.
parent d230b228
2004-08-28 Helvetix Victorinox <helvetix@gimp.org>
* app/composite/gimp-composite-sse.c
* app/composite/gimp-composite-sse2.c: More updates to accomodate
the clobber registers. Additional progress against bug #147013.
* app/composite/gimp-composite-sse.h: Fixed a bug where the wrong
manifest constant definition caused sse2 instructions to never be
compiled.
2004-08-28 Sven Neumann <sven@gimp.org>
* plug-ins/common/vpropagate.c (run): fixed confusion about which
......
......@@ -109,8 +109,7 @@
"movd %%eax,%%" #divisor ";" \
"psllq $32,%%" #divisor ";" \
"por %%" #divisor ",%%" #quotient ";"
#endif
#if 0
/* equivalent to the INT_MULT() macro in gimp-composite-generic.c */
/*
* opr2 = INT_MULT(opr1, opr2, t)
......@@ -129,8 +128,7 @@
"\tpsrlw $8, %%"#opr2"; " \
"\tpaddw %%"#opr1", %%"#opr2"; " \
"\tpsrlw $8, %%"#opr2"\n"
#endif
#if 0
/* a = INT_MULT(a,b) */
#define mmx_int_mult(a,b,w128) \
"\tpmullw %%"#b", %%"#a"; " \
......@@ -139,9 +137,7 @@
"\tpsrlw $8, %%"#b"; " \
"\tpaddw %%"#a", %%"#b"; " \
"\tpsrlw $8, %%"#b"\n"
#endif
#if 0
static const guint32 rgba8_alpha_mask_64[2] = { 0xFF000000, 0xFF000000 };
static const guint32 rgba8_b1_64[2] = { 0x01010101, 0x01010101 };
static const guint32 rgba8_b255_64[2] = { 0xFFFFFFFF, 0xFFFFFFFF };
......@@ -334,29 +330,33 @@ gimp_composite_burn_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
void
gimp_composite_darken_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
{
GimpCompositeContext op = *_op;
uint64 *d = (uint64 *) _op->D;
uint64 *a = (uint64 *) _op->A;
uint64 *b = (uint64 *) _op->B;
gulong n_pixels = _op->n_pixels;
for (; op.n_pixels >= 2; op.n_pixels -= 2)
for (; n_pixels >= 2; n_pixels -= 2)
{
asm volatile (" movq %1, %%mm2\n"
"\tpminub %2, %%mm2\n"
asm volatile (" movq %1, %%mm2\n"
"\tmovq %2, %%mm3\n"
"\t" pminub(mm3, mm2, mm4) "\n"
"\tmovq %%mm2, %0\n"
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4");
op.A += 8;
op.B += 8;
op.D += 8;
a++;
b++;
d++;
}
if (op.n_pixels)
if (n_pixels > 0)
{
asm volatile (" movd %1, %%mm2\n"
"\tmovd %2, %%mm3\n"
"\tpminub %%mm3, %%mm2\n"
asm volatile (" movd %1, %%mm2\n"
"\tmovd %2, %%mm3\n"
"\t" pminub(mm3, mm2, mm4) "\n"
"\tmovd %%mm2, %0\n"
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm2", "%mm3", "%mm4");
}
......@@ -366,11 +366,14 @@ gimp_composite_darken_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
void
gimp_composite_difference_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
{
GimpCompositeContext op = *_op;
uint64 *d = (uint64 *) _op->D;
uint64 *a = (uint64 *) _op->A;
uint64 *b = (uint64 *) _op->B;
gulong n_pixels = _op->n_pixels;
asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0");
for (; op.n_pixels >= 2; op.n_pixels -= 2)
for (; n_pixels >= 2; n_pixels -= 2)
{
asm volatile (" movq %1, %%mm2\n"
"\tmovq %2, %%mm3\n"
......@@ -385,15 +388,15 @@ gimp_composite_difference_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
"\tpand %%mm0, %%mm2\n"
"\tpor %%mm2, %%mm1\n"
"\tmovq %%mm1, %0\n"
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4");
op.A += 8;
op.B += 8;
op.D += 8;
a++;
b++;
d++;
}
if (op.n_pixels)
if (n_pixels > 0)
{
asm volatile (" movd %1, %%mm2\n"
"\tmovd %2, %%mm3\n"
......@@ -408,14 +411,15 @@ gimp_composite_difference_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
"\tpand %%mm0, %%mm2\n"
"\tpor %%mm2, %%mm1\n"
"\tmovd %%mm1, %0\n"
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4");
}
asm("emms");
}
#if 0
void
xxxgimp_composite_divide_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
{
......@@ -515,7 +519,9 @@ xxxgimp_composite_divide_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
asm("emms");
}
#endif
#if 0
void
xxxgimp_composite_dodge_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
{
......@@ -606,11 +612,17 @@ xxxgimp_composite_dodge_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
asm("emms");
}
#endif
void
gimp_composite_grain_extract_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
{
GimpCompositeContext op = *_op;
/* GimpCompositeContext op = *_op;*/
uint64 *d = (uint64 *) _op->D;
uint64 *a = (uint64 *) _op->A;
uint64 *b = (uint64 *) _op->B;
gulong n_pixels = _op->n_pixels;
asm volatile (" movq %0,%%mm0\n"
"\tpxor %%mm6,%%mm6\n"
......@@ -620,7 +632,7 @@ gimp_composite_grain_extract_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
: "%mm0", "%mm6", "%mm7");
for (; op.n_pixels >= 2; op.n_pixels -= 2)
for (; n_pixels >= 2; n_pixels -= 2)
{
asm volatile (" movq %1,%%mm2\n"
"\tmovq %2,%%mm3\n"
......@@ -647,15 +659,15 @@ gimp_composite_grain_extract_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
"\tpor %%mm2,%%mm1\n"
"\tmovq %%mm1,%0\n"
: "+m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4");
op.A += 8;
op.B += 8;
op.D += 8;
a++;
b++;
d++;
}
if (op.n_pixels)
if (n_pixels > 0)
{
asm volatile (" movd %1, %%mm2\n"
"\tmovd %2, %%mm3\n"
......@@ -679,8 +691,8 @@ gimp_composite_grain_extract_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
"\tpor %%mm2, %%mm1\n"
"\tmovd %%mm1, %0\n"
: "+m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4");
}
......@@ -690,7 +702,10 @@ gimp_composite_grain_extract_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
void
gimp_composite_grain_merge_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
{
GimpCompositeContext op = *_op;
uint64 *d = (uint64 *) _op->D;
uint64 *a = (uint64 *) _op->A;
uint64 *b = (uint64 *) _op->B;
gulong n_pixels = _op->n_pixels;
asm volatile ("movq %0, %%mm0\n"
"pxor %%mm6, %%mm6\n"
......@@ -699,7 +714,7 @@ gimp_composite_grain_merge_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
: "m" (*rgba8_alpha_mask_64), "m" (*rgba8_w128_64)
: "%mm0", "%mm6", "%mm7");
for (; op.n_pixels >= 2; op.n_pixels -= 2)
for (; n_pixels >= 2; n_pixels -= 2)
{
asm volatile (" movq %1, %%mm2\n"
"\tmovq %2, %%mm3\n"
......@@ -723,15 +738,15 @@ gimp_composite_grain_merge_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
"\tpandn %%mm4, %%mm1\n"
"\tpor %%mm2, %%mm1\n"
"\tmovq %%mm1, %0\n"
: "+m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4");
op.A += 8;
op.B += 8;
op.D += 8;
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4");
a++;
b++;
d++;
}
if (op.n_pixels)
if (n_pixels > 0)
{
asm volatile (" movd %1, %%mm2\n"
"\tmovd %2, %%mm3\n"
......@@ -754,9 +769,9 @@ gimp_composite_grain_merge_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
"\tpor %%mm2, %%mm1\n"
"\tmovd %%mm1, %0\n"
: "+m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4");
: "+m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4");
}
asm("emms");
......@@ -765,11 +780,14 @@ gimp_composite_grain_merge_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
void
gimp_composite_lighten_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
{
GimpCompositeContext op = *_op;
uint64 *d = (uint64 *) _op->D;
uint64 *a = (uint64 *) _op->A;
uint64 *b = (uint64 *) _op->B;
gulong n_pixels = _op->n_pixels;
asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0");
for (; op.n_pixels >= 2; op.n_pixels -= 2)
for (; n_pixels >= 2; n_pixels -= 2)
{
asm volatile (" movq %1, %%mm2\n"
"\tmovq %2, %%mm3\n"
......@@ -781,15 +799,15 @@ gimp_composite_lighten_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
"\tpand %%mm0, %%mm3\n"
"\tpor %%mm3, %%mm1\n"
"\tmovq %%mm1, %0\n"
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
op.A += 8;
op.B += 8;
op.D += 8;
a++;
b++;
d++;
}
if (op.n_pixels)
if (n_pixels > 0)
{
asm volatile (" movd %1, %%mm2\n"
"\tmovd %2, %%mm3\n"
......@@ -804,8 +822,8 @@ gimp_composite_lighten_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
"\tpand %%mm0, %%mm3\n"
"\tpor %%mm3, %%mm1\n"
"\tmovd %%mm1, %0\n"
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
}
......@@ -815,16 +833,19 @@ gimp_composite_lighten_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
void
gimp_composite_multiply_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
{
GimpCompositeContext op = *_op;
uint64 *d = (uint64 *) _op->D;
uint64 *a = (uint64 *) _op->A;
uint64 *b = (uint64 *) _op->B;
gulong n_pixels = _op->n_pixels;
asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0");
asm volatile ("movq %0,%%mm7" : : "m" (*rgba8_w128_64) : "%mm7");
asm volatile ("pxor %%mm6,%%mm6" : : : "%mm6");
for (; op.n_pixels >= 2; op.n_pixels -= 2)
for (; n_pixels >= 2; n_pixels -= 2)
{
asm volatile (" movq %0, %%mm2\n"
"\tmovq %1, %%mm3\n"
asm volatile (" movq %1, %%mm2\n"
"\tmovq %2, %%mm3\n"
mmx_low_bytes_to_words(mm2,mm1,mm6)
mmx_low_bytes_to_words(mm3,mm5,mm6)
......@@ -843,19 +864,19 @@ gimp_composite_multiply_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
"\tpand %%mm0, %%mm2\n"
"\tpor %%mm2, %%mm1\n"
"\tmovq %%mm1, %2\n"
: /* empty */
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
op.A += 8;
op.B += 8;
op.D += 8;
"\tmovq %%mm1, %0\n"
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
a++;
b++;
d++;
}
if (op.n_pixels)
if (n_pixels > 0)
{
asm volatile (" movd %0, %%mm2\n"
"\tmovd %1, %%mm3\n"
asm volatile (" movd %1, %%mm2\n"
"\tmovd %2, %%mm3\n"
mmx_low_bytes_to_words(mm2,mm1,mm6)
mmx_low_bytes_to_words(mm3,mm5,mm6)
......@@ -870,15 +891,16 @@ gimp_composite_multiply_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
"\tpand %%mm0, %%mm2\n"
"\tpor %%mm2, %%mm1\n"
"\tmovd %%mm1, %2\n"
: /* empty */
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
"\tmovd %%mm1, %0\n"
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
}
asm("emms");
}
#if 0
static void
sse_op_overlay(void)
{
......@@ -1045,12 +1067,15 @@ xxxgimp_composite_overlay_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
asm("emms");
}
#endif
void
gimp_composite_scale_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
{
GimpCompositeContext op = *_op;
uint64 *d = (uint64 *) _op->D;
uint64 *a = (uint64 *) _op->A;
/*uint64 *b = (uint64 *) _op->B;*/
gulong n_pixels = _op->n_pixels;
asm volatile ("pxor %%mm0,%%mm0\n"
"\tmovl %0,%%eax\n"
......@@ -1063,10 +1088,10 @@ gimp_composite_scale_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
"\tpor %%mm5,%%mm3\n"
"\tmovq %1,%%mm7\n"
: /* empty */
: "m" (op.scale.scale), "m" (*rgba8_w128_64)
: "m" (_op->scale.scale), "m" (*rgba8_w128_64)
: "%eax", "%ebx", "%mm0", "%mm3", "%mm5", "%mm6", "%mm7");
for (; op.n_pixels >= 2; op.n_pixels -= 2)
for (; n_pixels >= 2; n_pixels -= 2)
{
asm volatile ("movq %1,%%mm2\n"
"\tmovq %%mm2,%%mm1\n"
......@@ -1084,14 +1109,14 @@ gimp_composite_scale_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
"\tpackuswb %%mm4,%%mm1\n"
"\tmovq %%mm1,%0\n"
: "+m" (*op.D)
: "m" (*op.A)
: "=m" (*d)
: "m" (*a)
: "%mm1", "%mm2", "%mm4", "%mm5", "%mm7");
op.A += 8;
op.D += 8;
a++;
d++;
}
if (op.n_pixels)
if (n_pixels > 0)
{
asm volatile (" movd %1,%%mm2\n"
"\tmovq %%mm2,%%mm1\n"
......@@ -1102,8 +1127,8 @@ gimp_composite_scale_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
"\tpackuswb %%mm0,%%mm1\n"
"\tmovd %%mm1,%0\n"
: "+m" (*op.D)
: "m" (*op.A)
: "=m" (*d)
: "m" (*a)
: "%mm1", "%mm2", "%mm4", "%mm5", "%mm6", "%mm7");
}
......@@ -1113,16 +1138,19 @@ gimp_composite_scale_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
void
gimp_composite_screen_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
{
GimpCompositeContext op = *_op;
uint64 *d = (uint64 *) _op->D;
uint64 *a = (uint64 *) _op->A;
uint64 *b = (uint64 *) _op->B;
gulong n_pixels = _op->n_pixels;
asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0");
asm volatile ("movq %0,%%mm7" : : "m" (*rgba8_w128_64) : "%mm7");
asm volatile ("pxor %mm6, %mm6");
for (; op.n_pixels >= 2; op.n_pixels -= 2)
for (; n_pixels >= 2; n_pixels -= 2)
{
asm volatile (" movq %0,%%mm2\n"
"\tmovq %1,%%mm3\n"
asm volatile (" movq %1,%%mm2\n"
"\tmovq %2,%%mm3\n"
"\tpcmpeqb %%mm4,%%mm4\n"
"\tpsubb %%mm2,%%mm4\n"
......@@ -1165,19 +1193,19 @@ gimp_composite_screen_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
"\tpor %%mm3,%%mm1\n"
"\tmovq %%mm1,%2\n"
: /* empty */
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
op.A += 8;
op.B += 8;
op.D += 8;
"\tmovq %%mm1,%0\n"
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
a++;
b++;
d++;
}
if (op.n_pixels)
if (n_pixels > 0)
{
asm volatile (" movd %0,%%mm2\n"
"\tmovd %1,%%mm3\n"
asm volatile (" movd %1,%%mm2\n"
"\tmovd %2,%%mm3\n"
"\tpcmpeqb %%mm4,%%mm4\n"
"\tpsubb %%mm2,%%mm4\n"
......@@ -1220,10 +1248,10 @@ gimp_composite_screen_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
"\tpor %%mm3,%%mm1\n"
"\tmovd %%mm1,%2\n"
: /* empty */
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
"\tmovd %%mm1,%0\n"
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
}
asm("emms");
......
......@@ -35,7 +35,7 @@
#include "gimp-composite.h"
#include "gimp-composite-sse2.h"
#ifdef COMPILE_SSE2_OKAY
#ifdef COMPILE_SSE2_IS_OKAY
#include "gimp-composite-x86.h"
......@@ -109,7 +109,7 @@ gimp_composite_addition_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tpand %%xmm0,%%xmm2\n"
"\tpor %%xmm2,%%xmm1\n"
"\tmovdqu %%xmm1,%0\n"
: "+m" (*op.D)
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7");
op.A += 16;
......@@ -129,7 +129,7 @@ gimp_composite_addition_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tpand %%mm0,%%mm2\n"
"\tpor %%mm2,%%mm1\n"
"\tmovq %%mm1,%0\n"
: "+m" (*op.D)
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
op.A += 8;
......@@ -149,7 +149,7 @@ gimp_composite_addition_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tpand %%mm0,%%mm2\n"
"\tpor %%mm2,%%mm1\n"
"\tmovd %%mm1,%0\n"
: "+m" (*op.D)
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
op.A += 4;
......@@ -220,7 +220,7 @@ gimp_composite_difference_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
asm volatile (" movq %0,%%mm0\n"
"\tmovdqu %1,%%xmm0"
:
: /* */
: "m" (*rgba8_alpha_mask_64), "m" (*rgba8_alpha_mask_128)
: "%mm0", "%xmm0");
......@@ -239,7 +239,7 @@ gimp_composite_difference_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tpand %%xmm0,%%xmm2\n"
"\tpor %%xmm2,%%xmm1\n"
"\tmovdqu %%xmm1,%0\n"
: "+m" (*op.D)
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5");
op.A += 16;
......@@ -262,7 +262,7 @@ gimp_composite_difference_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tpand %%mm0, %%mm2\n"
"\tpor %%mm2, %%mm1\n"
"\tmovq %%mm1, %0\n"
: "+m" (*op.D)
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
op.A += 8;
......@@ -285,7 +285,7 @@ gimp_composite_difference_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tpand %%mm0, %%mm2\n"
"\tpor %%mm2, %%mm1\n"
"\tmovd %%mm1, %0\n"
: "+m" (*op.D)
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
}
......@@ -438,7 +438,7 @@ gimp_composite_grain_extract_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
asm volatile (" movq %0,%%mm0\n"
"\tpxor %%mm6,%%mm6\n"
"\tmovq %1,%%mm7\n"
"\tmovdqu %2,%%xmm0\n"
"\tmovdqu %2,%%xmm0\n"
"\tpxor %%xmm6,%%xmm6\n"
"\tmovdqu %3,%%xmm7\n"
: /* empty */
......@@ -472,7 +472,7 @@ gimp_composite_grain_extract_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tpor %%xmm2,%%xmm1\n"
"\tmovdqu %%xmm1,%0\n"
: "+m" (*op.D)
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "%xmm1", "%xmm2", "%xmm3", "%xmm4");
op.A += 16;
......@@ -507,7 +507,7 @@ gimp_composite_grain_extract_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tpor %%mm2,%%mm1\n"
"\tmovq %%mm1,%0\n"
: "+m" (*op.D)
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "%mm1", "%mm2", "%mm3", "%mm4");
op.A += 8;
......@@ -532,7 +532,7 @@ gimp_composite_grain_extract_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tpand %%mm0, %%mm2\n"
"\tpor %%mm2, %%mm1\n"
"\tmovd %%mm1, %0\n"
: "+m" (*op.D)
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "%mm1", "%mm2", "%mm3", "%mm4");
}
......@@ -631,7 +631,7 @@ gimp_composite_subtract_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tpand %%xmm0,%%xmm2\n"
"\tpor %%xmm2,%%xmm1\n"
"\tmovdqu %%xmm1,%0\n"
: "+m" (*op.D)
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "%xmm1", "%xmm2", "%xmm3", "%xmm4");
op.A += 16;
......@@ -651,7 +651,7 @@ gimp_composite_subtract_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tpand %%mm0,%%mm2\n"
"\tpor %%mm2,%%mm1\n"
"\tmovq %%mm1,%0\n"
: "+m" (*op.D)
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "%mm1", "%mm2", "%mm3", "%mm4");
op.A += 8;
......@@ -671,7 +671,7 @@ gimp_composite_subtract_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tpand %%mm0,%%mm2\n"
"\tpor %%mm2,%%mm1\n"
"\tmovd %%mm1,%0\n"
: "+m" (*op.D)
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B)
: "%mm1", "%mm2", "%mm3", "%mm4");
}
......@@ -801,12 +801,12 @@ gimp_composite_swap_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
asm("emms");
}
#endif /* COMPILE_SSE2_OKAY */
#endif /* COMPILE_SSE2_IS_OKAY */
gboolean
gimp_composite_sse2_init (void)
{
#ifdef COMPILE_SSE2_OKAY
#ifdef COMPILE_SSE2_IS_OKAY
guint32 cpu = cpu_accel ();
if (cpu & CPU_ACCEL_X86_SSE2)
......
......@@ -14,14 +14,14 @@ extern gboolean gimp_composite_sse2_install (void);
#if defined(ARCH_X86)
#if __GNUC__ >= 3
#if defined(ARCH_X86_64) || !defined(PIC)
#define COMPILE_SSE2_OKAY (1)
#define COMPILE_SSE2_IS_OKAY (1)
#endif /* defined(ARCH_X86_64) || !defined(PIC) */
#endif /* __GNUC__ >= 3*/
#endif /* defined(ARCH_X86) */
#endif /* defined(USE_SSE) */
#endif /* !defined(__INTEL_COMPILER) */
#ifdef COMPILE_SSE2_OKAY
#ifdef COMPILE_SSE2_IS_OKAY
extern void gimp_composite_addition_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *ctx);
extern void gimp_composite_darken_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *ctx);
extern void gimp_composite_difference_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *ctx);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment