Commit 32ca225a authored by Helvetix Victorinox's avatar Helvetix Victorinox

Repaired a latent bug in gimp_composite_lighten_rgba8_rgba8_rgba8_sse2

* app/composite/gimp-composite-sse2.c: Repaired a latent bug in
gimp_composite_lighten_rgba8_rgba8_rgba8_sse2 where setting up
the alpha masks were not done correctly.  A redundant (better?)
fix to bug #164061

Use movntq instructions to avoid processor cache pollution.

Some adjustments to register allocation specifications.
parent 2cec4583
2005-05-09 Helvetix Victorinox <helvetix@gimp.org>
* app/composite/gimp-composite-sse2.c: Repaired a latent bug in
gimp_composite_lighten_rgba8_rgba8_rgba8_sse2 where setting up
the alpha masks were not done correctly. A redundant (better?)
fix to bug #164061
Use movntq instructions to avoid processor cache pollution.
Some adjustments to register allocation specifications.
2005-05-09 DindinX <dindinx@gimp.org>
* plug-ins/common/grid.c: use the correct bounds for the spinners.
......
......@@ -1315,11 +1315,7 @@ gimp_composite_addition_va8_va8_va8_mmx (GimpCompositeContext *_op)
"\t" pminub(mm3, mm2, mm4) "\n"
"\tpand %%mm0, %%mm2\n"
"\tpor %%mm2, %%mm1\n"
#if 1
"\tmovq %%mm1, %0\n"
#else
"\tmovntq %%mm1, %0\n"
#endif
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm0", "%mm1", "%mm2", "%mm3", "%mm4");
......
......@@ -85,7 +85,7 @@ gimp_composite_addition_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
"\t" pminub(mm3, mm2, mm4) "\n"
"\tpand %%mm0, %%mm2\n"
"\tpor %%mm2, %%mm1\n"
"\tmovq %%mm1, %0\n"
"\tmovntq %%mm1, %0\n"
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm0", "%mm1", "%mm2", "%mm3", "%mm4");
......@@ -245,7 +245,7 @@ gimp_composite_darken_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
asm volatile (" movq %1, %%mm2\n"
"\tmovq %2, %%mm3\n"
"\t" pminub(mm3, mm2, mm4) "\n"
"\tmovq %%mm2, %0\n"
"\tmovntq %%mm2, %0\n"
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4");
......@@ -717,12 +717,9 @@ gimp_composite_lighten_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
"\tmovd %2, %%mm3\n"
"\tmovq %%mm2, %%mm4\n"
"\tpmaxub %%mm3, %%mm4\n"
"\tmovq %%mm0, %%mm1\n"
"\tpandn %%mm4, %%mm1\n"
"\tpminub %%mm2,%%mm3\n"
"\tpminub %%mm2, %%mm3\n"
"\tpand %%mm0, %%mm3\n"
"\tpor %%mm3, %%mm1\n"
"\tmovd %%mm1, %0\n"
......
......@@ -138,7 +138,7 @@ gimp_composite_addition_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tpminub %%mm3,%%mm2\n"
"\tpand %%mm0,%%mm2\n"
"\tpor %%mm2,%%mm1\n"
"\tmovq %%mm1,%0\n"
"\tmovntq %%mm1,%0\n"
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
......@@ -167,15 +167,6 @@ gimp_composite_addition_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
asm("emms");
}
#if 0
void
xxxgimp_composite_burn_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
{
}
#endif
void
gimp_composite_darken_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
{
......@@ -209,7 +200,7 @@ gimp_composite_darken_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
{
asm volatile (" movq %1, %%mm2\n"
"\tpminub %2, %%mm2\n"
"\tmovq %%mm2, %0\n"
"\tmovntq %%mm2, %0\n"
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4");
......@@ -290,7 +281,7 @@ gimp_composite_difference_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tpminub %%mm3, %%mm2\n"
"\tpand %%mm0, %%mm2\n"
"\tpor %%mm2, %%mm1\n"
"\tmovq %%mm1, %0\n"
"\tmovntq %%mm1, %0\n"
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
......@@ -323,142 +314,6 @@ gimp_composite_difference_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
}
#if 0
void
xxxgimp_composite_dodge_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
{
GimpCompositeContext op = *_op;
for (; op.n_pixels >= 4; op.n_pixels -= 4)
{
asm volatile (" movdqu %1,%%xmm0\n"
"\tmovdqu %2,%%xmm1\n"
"\tmovdqu %%xmm1,%%xmm3\n"
"\tpxor %%xmm2,%%xmm2\n"
"\tpunpcklbw %%xmm2,%%xmm3\n"
"\tpunpcklbw %%xmm0,%%xmm2\n"
"\tmovdqu %3,%%xmm4\n"
"\tpsubw %%xmm3,%%xmm4\n"
"\t" xmm_pdivwuqX(xmm2,xmm4,xmm5,xmm6) "\n"
"\tmovdqu %%xmm1,%%xmm3\n"
"\tpxor %%xmm2,%%xmm2\n"
"\tpunpckhbw %%xmm2,%%xmm3\n"
"\tpunpckhbw %%xmm0,%%xmm2\n"
"\tmovdqu %3,%%xmm4\n"
"\tpsubw %%xmm3,%%xmm4\n"
"\t" xmm_pdivwuqX(xmm2,xmm4,xmm6,xmm7) "\n"
"\tpackuswb %%xmm6,%%xmm5\n"
"\tmovdqu %4,%%xmm6\n"
"\tmovdqu %%xmm1,%%xmm7\n"
"\tpminub %%xmm0,%%xmm7\n"
"\tpand %%xmm6,%%xmm7\n"
"\tpandn %%xmm5,%%xmm6\n"
"\tpor %%xmm6,%%xmm7\n"
"\tmovdqu %%xmm7,%0\n"
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B), "m" (*rgba8_w256_128), "m" (*rgba8_alpha_mask_128)
: "%eax", "%ecx", "%edx", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7");
op.A += 16;
op.B += 16;
op.D += 16;
}
for (; op.n_pixels >= 2; op.n_pixels -= 2)
{
asm volatile (" movq %1,%%mm0\n"
"\tmovq %2,%%mm1\n"
"\tmovq %%mm1,%%mm3\n"
"\tpxor %%mm2,%%mm2\n"
"\tpunpcklbw %%mm2,%%mm3\n"
"\tpunpcklbw %%mm0,%%mm2\n"
"\tmovq %3,%%mm4\n"
"\tpsubw %%mm3,%%mm4\n"
"\t" pdivwuqX(mm2,mm4,mm5) "\n"
"\tmovq %%mm1,%%mm3\n"
"\tpxor %%mm2,%%mm2\n"
"\tpunpckhbw %%mm2,%%mm3\n"
"\tpunpckhbw %%mm0,%%mm2\n"
"\tmovq %3,%%mm4\n"
"\tpsubw %%mm3,%%mm4\n"
"\t" pdivwuqX(mm2,mm4,mm6) "\n"
"\tpackuswb %%mm6,%%mm5\n"
"\tmovq %4,%%mm6\n"
"\tmovq %%mm1,%%mm7\n"
"\tpminub %%mm0,%%mm7\n"
"\tpand %%mm6,%%mm7\n"
"\tpandn %%mm5,%%mm6\n"
"\tpor %%mm6,%%mm7\n"
"\tmovq %%mm7,%0\n"
: (*op.D)
: "m" (*op.A), "m" (*op.B), "m" (*rgba8_w256_64), "m" (*rgba8_alpha_mask_64)
: "%eax", "%ecx", "%edx", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
op.A += 8;
op.B += 8;
op.D += 8;
}
if (op.n_pixels)
{
asm volatile (" movd %1,%%mm0\n"
"\tmovq %2,%%mm1\n"
"\tmovq %%mm1,%%mm3\n"
"\tpxor %%mm2,%%mm2\n"
"\tpunpcklbw %%mm2,%%mm3\n"
"\tpunpcklbw %%mm0,%%mm2\n"
"\tmovq %3,%%mm4\n"
"\tpsubw %%mm3,%%mm4\n"
"\t" pdivwuqX(mm2,mm4,mm5) "\n"
"\tmovq %%mm1,%%mm3\n"
"\tpxor %%mm2,%%mm2\n"
"\tpunpckhbw %%mm2,%%mm3\n"
"\tpunpckhbw %%mm0,%%mm2\n"
"\tmovq %3,%%mm4\n"
"\tpsubw %%mm3,%%mm4\n"
"\t" pdivwuqX(mm2,mm4,mm6) "\n"
"\tpackuswb %%mm6,%%mm5\n"
"\tmovq %4,%%mm6\n"
"\tmovq %%mm1,%%mm7\n"
"\tpminub %%mm0,%%mm7\n"
"\tpand %%mm6,%%mm7\n"
"\tpandn %%mm5,%%mm6\n"
"\tpor %%mm6,%%mm7\n"
"\tmovd %%mm7,%0\n"
: "=m" (*op.D)
: "m" (*op.A), "m" (*op.B), "m" (*rgba8_w256_64), "m" (*rgba8_alpha_mask_64)
: "%eax", "%ecx", "%edx", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
}
asm("emms");
}
#endif
void
gimp_composite_grain_extract_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
{
......@@ -509,7 +364,7 @@ gimp_composite_grain_extract_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tmovdqu %%xmm1,%0\n"
: "=m" (*D)
: "m" (*A), "m" (*B)
: "%xmm1", "%xmm2", "%xmm3", "%xmm4");
: "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5");
A++;
B++;
D++;
......@@ -545,10 +400,10 @@ gimp_composite_grain_extract_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tpand %%mm0,%%mm2\n"
"\tpor %%mm2,%%mm1\n"
"\tmovq %%mm1,%0\n"
"\tmovntq %%mm1,%0\n"
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4");
: "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
a++;
b++;
d++;
......@@ -573,7 +428,7 @@ gimp_composite_grain_extract_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tmovd %%mm1, %0\n"
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4");
: "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
}
asm("emms");
......@@ -590,7 +445,12 @@ gimp_composite_lighten_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
uint128 *B = (uint128 *) _op->B;
gulong n_pixels = _op->n_pixels;
asm volatile ("movdqu %0,%%xmm0" : : "m" (*rgba8_alpha_mask_64) : "%xmm0");
asm volatile (" movdqu %0,%%xmm0\n"
"\tmovq %1,%%mm0"
: /* empty */
: "m" (*rgba8_alpha_mask_128), "m" (*rgba8_alpha_mask_64)
: "%xmm0", "%mm0");
for (; n_pixels >= 4; n_pixels -= 4)
{
......@@ -627,7 +487,7 @@ gimp_composite_lighten_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tpminub %%mm2, %%mm3\n"
"\tpand %%mm0, %%mm3\n"
"\tpor %%mm3, %%mm1\n"
"\tmovq %%mm1, %0\n"
"\tmovntq %%mm1, %0\n"
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4");
......@@ -644,7 +504,7 @@ gimp_composite_lighten_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tpmaxub %%mm3, %%mm4\n"
"\tmovq %%mm0, %%mm1\n"
"\tpandn %%mm4, %%mm1\n"
"\tpminub %%mm2,%%mm3\n"
"\tpminub %%mm2, %%mm3\n"
"\tpand %%mm0, %%mm3\n"
"\tpor %%mm3, %%mm1\n"
"\tmovd %%mm1, %0\n"
......@@ -709,7 +569,7 @@ gimp_composite_subtract_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
"\tpminub %%mm3,%%mm2\n"
"\tpand %%mm0,%%mm2\n"
"\tpor %%mm2,%%mm1\n"
"\tmovq %%mm1,%0\n"
"\tmovntq %%mm1,%0\n"
: "=m" (*d)
: "m" (*a), "m" (*b)
: "%mm1", "%mm2", "%mm3", "%mm4");
......@@ -838,7 +698,7 @@ gimp_composite_swap_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
asm volatile (" movq %0,%%mm2\n"
"\tmovq %1,%%mm3\n"
"\tmovq %%mm3,%0\n"
"\tmovq %%mm2,%1\n"
"\tmovntq %%mm2,%1\n"
: "+m" (*op.A), "+m" (*op.B)
: /* empty */
: "%mm2", "%mm3");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment