Commit 46c62c8f authored by David Monniaux's avatar David Monniaux

MMX paint_funcs Should autodetect whether the assembler handles MMX and

MMX paint_funcs
Should autodetect whether the assembler handles MMX and whether the C
compiler allows gcc-like __attribute function specifications.
Should detect MMX at run-time.
parent f9e34112
......@@ -16,6 +16,7 @@
#undef ENABLE_MP
#undef ENABLE_NLS
#undef HAVE_ASM_MMX
#undef HAVE_CATGETS
#undef HAVE_DIRENT_H
#undef HAVE_DOPRNT
......@@ -49,7 +50,6 @@
#undef SRAND_FUNC
#undef USE_PTHREADS
/* Leave that blank line there!! Autoheader needs it.
If you're adding to this file, keep in mind:
......
......@@ -314,7 +314,8 @@ gimp_SOURCES = \
marching_ants.h \
pixmaps.h \
pixmaps2.h \
wilber.h
wilber.h \
paint_funcs_simd.S
EXTRA_DIST = \
makefile.mingw \
......@@ -322,7 +323,8 @@ EXTRA_DIST = \
makefile.msc \
gimp.rc \
gimp.sym \
wilber.ico
wilber.ico \
arch/i386/mmx/paint_funcs_mmx.S
gimp_LDFLAGS = -export-dynamic -export-symbols $(srcdir)/gimp.sym
......
This diff is collapsed.
/*
MMX code to supplement some functions in paint_funcs.c
for the Gimp.
Copyright (C) 1999, 2001 David Monniaux
*/
.text
.align 4
.globl intel_cpu_features
intel_cpu_features:
pushl %ebx
pushfl
popl %eax
xor $ 0x200000, %eax
pushl %eax
popfl
pushfl
popl %edx
xor %eax, %edx
xor %eax, %eax
test $ 0x200000, %edx
jnz .intel_cpu_features_end
movl $ 1, %eax
cpuid
movl %edx, %eax
.intel_cpu_features_end:
popl %ebx
ret
.alpha_mask_1a: .int 0xFF00FF00, 0xFF00FF00
.mult_shift: .int 0x00800080, 0x00800080
.alpha_mask_3a: .int 0xFF000000, 0xFF000000
define(`MMX_PIXEL_OP_3A_1A', `
.globl $1_pixels_3a_3a
.align 16
$1_pixels_3a_3a:
pushl %edi
pushl %ebx
movl 12(%esp), %edi
movq .alpha_mask_3a, %mm0
$2
subl $ 2, %ecx
jl .$1_pixels_3a_3a_last
movl $ 8, %ebx
.$1_pixels_3a_3a_loop:
movq (%eax), %mm2
movq (%edx), %mm3
$3
movq %mm1, (%edi)
addl %ebx, %eax
addl %ebx, %edx
addl %ebx, %edi
subl $ 2, %ecx
jge .$1_pixels_3a_3a_loop
.$1_pixels_3a_3a_last:
test $ 1, %ecx
jz .$1_pixels_3a_3a_end
movd (%eax), %mm2
movd (%edx), %mm3
$3
movd %mm1, (%edi)
.$1_pixels_3a_3a_end:
$4
emms
popl %ebx
popl %edi
ret
.globl $1_pixels_1a_1a
.align 16
$1_pixels_1a_1a:
pushl %edi
pushl %ebx
movl 12(%esp), %edi
movq .alpha_mask_1a, %mm0
subl $ 4, %ecx
jl .$1_pixels_1a_1a_last3
movl $ 8, %ebx
.$1_pixels_1a_1a_loop:
movq (%eax), %mm2
movq (%edx), %mm3
$3
movq %mm1, (%edi)
addl %ebx, %eax
addl %ebx, %edx
addl %ebx, %edi
subl $ 4, %ecx
jge .$1_pixels_1a_1a_loop
.$1_pixels_1a_1a_last3:
test $ 2, %ecx
jz .$1_pixels_1a_1a_last1
movd (%eax), %mm2
movd (%edx), %mm3
$3
addl $ 4, %eax
addl $ 4, %edx
addl $ 4, %edi
.$1_pixels_1a_1a_last1:
test $ 1, %ecx
jz .$1_pixels_1a_1a_end
movw (%eax), %bx
movd %ebx, %mm2
movw (%edx), %bx
movd %ebx, %mm3
$3
movd %mm1, %ebx
movw %bx, (%edi)
.$1_pixels_1a_1a_end:
$4
emms
popl %ebx
popl %edi
ret')
/* min(a,b) = a - max(a-b, 0) */
MMX_PIXEL_OP_3A_1A(`add', `', `
movq %mm2, %mm4
paddusb %mm3, %mm4
movq %mm0, %mm1
pandn %mm4, %mm1
movq %mm2, %mm4
psubusb %mm3, %mm4
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1', `')
MMX_PIXEL_OP_3A_1A(`substract', `', `
movq %mm2, %mm4
psubusb %mm3, %mm4
movq %mm0, %mm1
pandn %mm4, %mm1
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1', `')
MMX_PIXEL_OP_3A_1A(`difference', `', `
movq %mm2, %mm4
movq %mm3, %mm5
psubusb %mm3, %mm4
psubusb %mm2, %mm5
movq %mm0, %mm1
paddb %mm5, %mm4
pandn %mm4, %mm1
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1', `')
MMX_PIXEL_OP_3A_1A(`multiply', `
movq .mult_shift, %mm7
pxor %mm6, %mm6',`
movq %mm2, %mm1
punpcklbw %mm6, %mm1
movq %mm3, %mm5
punpcklbw %mm6, %mm5
pmullw %mm5, %mm1
paddw %mm7, %mm1
movq %mm1, %mm5
psrlw $ 8, %mm5
paddw %mm5, %mm1
psrlw $ 8, %mm1
movq %mm2, %mm4
punpckhbw %mm6, %mm4
movq %mm3, %mm5
punpckhbw %mm6, %mm5
pmullw %mm5, %mm4
paddw %mm7, %mm4
movq %mm4, %mm5
psrlw $ 8, %mm5
paddw %mm5, %mm4
psrlw $ 8, %mm4
packuswb %mm4, %mm1
movq %mm0, %mm4
pandn %mm1, %mm4
movq %mm4, %mm1
movq %mm2, %mm4
psubusb %mm3, %mm4
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1', `')
/* Could be perhaps more optimized */
MMX_PIXEL_OP_3A_1A(`darken', `', `
movq %mm2, %mm4
psubusb %mm3, %mm4
psubb %mm4, %mm2
movq %mm2, %mm1', `')
MMX_PIXEL_OP_3A_1A(`lighten', `', `
movq %mm2, %mm4
psubusb %mm3, %mm4
paddb %mm4, %mm3
movq %mm0, %mm1
pandn %mm3, %mm1
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1', `')
MMX_PIXEL_OP_3A_1A(`screen', `
movq .mult_shift, %mm7
pxor %mm6, %mm6',`
pcmpeqb %mm4, %mm4
psubb %mm2, %mm4
pcmpeqb %mm5, %mm5
psubb %mm3, %mm5
movq %mm4, %mm1
punpcklbw %mm6, %mm1
movq %mm5, %mm3
punpcklbw %mm6, %mm3
pmullw %mm3, %mm1
paddw %mm7, %mm1
movq %mm1, %mm3
psrlw $ 8, %mm3
paddw %mm3, %mm1
psrlw $ 8, %mm1
movq %mm4, %mm2
punpckhbw %mm6, %mm2
movq %mm5, %mm3
punpckhbw %mm6, %mm3
pmullw %mm3, %mm2
paddw %mm7, %mm2
movq %mm2, %mm3
psrlw $ 8, %mm3
paddw %mm3, %mm2
psrlw $ 8, %mm2
packuswb %mm2, %mm1
pcmpeqb %mm3, %mm3
psubb %mm1, %mm3
movq %mm0, %mm1
pandn %mm3, %mm1
movq %mm2, %mm4
psubusb %mm5, %mm2
paddb %mm2, %mm5
pcmpeqb %mm3, %mm3
psubb %mm5, %mm3
pand %mm0, %mm3
por %mm3, %mm1', `')
.lower_ff: .int 0x00FF00FF, 0x00FF00FF
MMX_PIXEL_OP_3A_1A(`overlay', `
movq .mult_shift, %mm7
pxor %mm6, %mm6 ',
`call op_overlay', `')
op_overlay:
movq %mm2, %mm1
punpcklbw %mm6, %mm1
movq %mm3, %mm5
punpcklbw %mm6, %mm5
pmullw %mm5, %mm1
paddw %mm7, %mm1
movq %mm1, %mm5
psrlw $ 8, %mm5
paddw %mm5, %mm1
psrlw $ 8, %mm1
pcmpeqb %mm4, %mm4
psubb %mm2, %mm4
punpcklbw %mm6, %mm4
pcmpeqb %mm5, %mm5
psubb %mm3, %mm5
punpcklbw %mm6, %mm5
pmullw %mm5, %mm4
paddw %mm7, %mm4
movq %mm4, %mm5
psrlw $ 8, %mm5
paddw %mm5, %mm4
psrlw $ 8, %mm4
movq .lower_ff, %mm5
psubw %mm4, %mm5
psubw %mm1, %mm5
movq %mm2, %mm4
punpcklbw %mm6, %mm4
pmullw %mm4, %mm5
paddw %mm7, %mm5
movq %mm5, %mm4
psrlw $ 8, %mm4
paddw %mm4, %mm5
psrlw $ 8, %mm5
paddw %mm1, %mm5
subl $ 8, %esp
movq %mm5, (%esp)
movq %mm2, %mm1
punpckhbw %mm6, %mm1
movq %mm3, %mm5
punpckhbw %mm6, %mm5
pmullw %mm5, %mm1
paddw %mm7, %mm1
movq %mm1, %mm5
psrlw $ 8, %mm5
paddw %mm5, %mm1
psrlw $ 8, %mm1
pcmpeqb %mm4, %mm4
psubb %mm2, %mm4
punpckhbw %mm6, %mm4
pcmpeqb %mm5, %mm5
psubb %mm3, %mm5
punpckhbw %mm6, %mm5
pmullw %mm5, %mm4
paddw %mm7, %mm4
movq %mm4, %mm5
psrlw $ 8, %mm5
paddw %mm5, %mm4
psrlw $ 8, %mm4
movq .lower_ff, %mm5
psubw %mm4, %mm5
psubw %mm1, %mm5
movq %mm2, %mm4
punpckhbw %mm6, %mm4
pmullw %mm4, %mm5
paddw %mm7, %mm5
movq %mm5, %mm4
psrlw $ 8, %mm4
paddw %mm4, %mm5
psrlw $ 8, %mm5
paddw %mm1, %mm5
movq (%esp), %mm4
addl $ 8, %esp
packuswb %mm5, %mm4
movq %mm0, %mm1
pandn %mm4, %mm1
movq %mm2, %mm4
psubusb %mm3, %mm4
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1
ret
\ No newline at end of file
......@@ -78,6 +78,12 @@ gboolean use_debug_handler = FALSE;
gboolean console_messages = FALSE;
gboolean restore_session = FALSE;
gboolean double_speed = FALSE;
gboolean use_mmx = FALSE;
/* TODO: this should probably go into a header file */
#ifdef USE_GCC_INTEL_MMX
unsigned long intel_cpu_features(void);
#endif
MessageHandlerType message_handler = CONSOLE;
......@@ -149,6 +155,11 @@ main (int argc,
use_shm = TRUE;
#endif
#ifdef HAVE_ASM_MMX
use_mmx = (intel_cpu_features() & (1 << 23)) ? 1 : 0;
fprintf(stderr, "MMX : %s\n", use_mmx ? "yes" : "no");
#endif
batch_cmds = g_new (char *, argc);
batch_cmds[0] = NULL;
......
#include "config.h"
#ifdef HAVE_ASM_MMX
#include <arch/i386/mmx/paint_funcs_mmx.S>
#endif
\ No newline at end of file
......@@ -58,7 +58,6 @@
#define INT_BLEND(a,b,alpha,tmp) (INT_MULT((a)-(b), alpha, tmp) + (b))
typedef enum
{
MinifyX_MinifyY,
......@@ -153,7 +152,41 @@ static void apply_layer_mode_replace (guchar *src1,
gboolean *affect);
static void rotate_pointers (gpointer *p,
guint32 n);
/* MMX stuff */
extern gboolean use_mmx;
#define USE_GCC_INTEL_MMX
#ifdef USE_GCC_INTEL_MMX
extern int use_mmx;
#define MMX_PIXEL_OP(x) \
void \
x( \
const unsigned char *src1, \
const unsigned char *src2, \
unsigned count, \
unsigned char *dst) __attribute((regparm(3)));
#define MMX_PIXEL_OP_3A_1A(op) \
MMX_PIXEL_OP(op##_pixels_3a_3a) \
MMX_PIXEL_OP(op##_pixels_1a_1a)
#define USE_MMX_PIXEL_OP_3A_1A(op) \
if (use_mmx && has_alpha1 && has_alpha2) \
{ \
if (bytes1==2 && bytes2==2) \
return op##_pixels_1a_1a(src1, src2, length, dest); \
if (bytes1==4 && bytes2==4) \
return op##_pixels_3a_3a(src1, src2, length, dest); \
} \
/*fprintf(stderr, "non-MMX: %s(%d, %d, %d, %d)\n", #op, \
bytes1, bytes2, has_alpha1, has_alpha2);*/
#else
#define MMX_PIXEL_OP_3A_1A(op)
#define USE_MMX_PIXEL_OP_3A_1A(op)
#endif
void
......@@ -715,6 +748,7 @@ extract_alpha_pixels (const guchar *src,
}
}
MMX_PIXEL_OP_3A_1A(darken)
void
darken_pixels (const guchar *src1,
const guchar *src2,
......@@ -728,6 +762,8 @@ darken_pixels (const guchar *src1,
gint b, alpha;
guchar s1, s2;
USE_MMX_PIXEL_OP_3A_1A(darken)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length--)
......@@ -750,7 +786,7 @@ darken_pixels (const guchar *src1,
}
}
MMX_PIXEL_OP_3A_1A(lighten)
void
lighten_pixels (const guchar *src1,
const guchar *src2,
......@@ -764,6 +800,8 @@ lighten_pixels (const guchar *src1,
gint b, alpha;
guchar s1, s2;
USE_MMX_PIXEL_OP_3A_1A(lighten)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length--)
......@@ -881,6 +919,7 @@ color_only_pixels (const guchar *src1,
}
}
MMX_PIXEL_OP_3A_1A(multiply)
void
multiply_pixels (const guchar *src1,
const guchar *src2,
......@@ -894,6 +933,8 @@ multiply_pixels (const guchar *src1,
gint alpha, b;
gint tmp;
USE_MMX_PIXEL_OP_3A_1A(multiply)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
if (has_alpha1 && has_alpha2)
......@@ -973,6 +1014,8 @@ divide_pixels (const guchar *src1,
}
MMX_PIXEL_OP_3A_1A(screen)
void
screen_pixels (const guchar *src1,
const guchar *src2,
......@@ -986,6 +1029,8 @@ screen_pixels (const guchar *src1,
gint alpha, b;
gint tmp;
USE_MMX_PIXEL_OP_3A_1A(screen)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length --)
......@@ -1005,6 +1050,8 @@ screen_pixels (const guchar *src1,
}
MMX_PIXEL_OP_3A_1A(overlay)
void
overlay_pixels (const guchar *src1,
const guchar *src2,
......@@ -1153,6 +1200,8 @@ hardlight_pixels (const guchar *src1,
}
MMX_PIXEL_OP_3A_1A(add)
void
add_pixels (const guchar *src1,
const guchar *src2,
......@@ -1165,6 +1214,8 @@ add_pixels (const guchar *src1,
{
gint alpha, b;
USE_MMX_PIXEL_OP_3A_1A(add)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length --)
......@@ -1187,6 +1238,8 @@ add_pixels (const guchar *src1,
}
MMX_PIXEL_OP_3A_1A(substract)
void
subtract_pixels (const guchar *src1,
const guchar *src2,
......@@ -1200,6 +1253,8 @@ subtract_pixels (const guchar *src1,
gint alpha, b;
gint diff;
USE_MMX_PIXEL_OP_3A_1A(substract)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length --)
......@@ -1222,6 +1277,8 @@ subtract_pixels (const guchar *src1,
}
MMX_PIXEL_OP_3A_1A(difference)
void
difference_pixels (const guchar *src1,
const guchar *src2,
......@@ -1235,6 +1292,8 @@ difference_pixels (const guchar *src1,
gint alpha, b;
gint diff;
USE_MMX_PIXEL_OP_3A_1A(difference)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length --)
......
......@@ -58,7 +58,6 @@
#define INT_BLEND(a,b,alpha,tmp) (INT_MULT((a)-(b), alpha, tmp) + (b))
typedef enum
{
MinifyX_MinifyY,
......@@ -153,7 +152,41 @@ static void apply_layer_mode_replace (guchar *src1,
gboolean *affect);
static void rotate_pointers (gpointer *p,
guint32 n);
/* MMX stuff */
extern gboolean use_mmx;
#define USE_GCC_INTEL_MMX
#ifdef USE_GCC_INTEL_MMX
extern int use_mmx;
#define MMX_PIXEL_OP(x) \
void \
x( \
const unsigned char *src1, \
const unsigned char *src2, \
unsigned count, \
unsigned char *dst) __attribute((regparm(3)));
#define MMX_PIXEL_OP_3A_1A(op) \
MMX_PIXEL_OP(op##_pixels_3a_3a) \
MMX_PIXEL_OP(op##_pixels_1a_1a)
#define USE_MMX_PIXEL_OP_3A_1A(op) \
if (use_mmx && has_alpha1 && has_alpha2) \
{ \
if (bytes1==2 && bytes2==2) \
return op##_pixels_1a_1a(src1, src2, length, dest); \
if (bytes1==4 && bytes2==4) \
return op##_pixels_3a_3a(src1, src2, length, dest); \
} \
/*fprintf(stderr, "non-MMX: %s(%d, %d, %d, %d)\n", #op, \
bytes1, bytes2, has_alpha1, has_alpha2);*/
#else
#define MMX_PIXEL_OP_3A_1A(op)
#define USE_MMX_PIXEL_OP_3A_1A(op)
#endif
void
......@@ -715,6 +748,7 @@ extract_alpha_pixels (const guchar *src,
}
}
MMX_PIXEL_OP_3A_1A(darken)
void
darken_pixels (const guchar *src1,
const guchar *src2,
......@@ -728,6 +762,8 @@ darken_pixels (const guchar *src1,
gint b, alpha;
guchar s1, s2;
USE_MMX_PIXEL_OP_3A_1A(darken)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length--)
......@@ -750,7 +786,7 @@ darken_pixels (const guchar *src1,
}
}
MMX_PIXEL_OP_3A_1A(lighten)
void
lighten_pixels (const guchar *src1,
const guchar *src2,
......@@ -764,6 +800,8 @@ lighten_pixels (const guchar *src1,
gint b, alpha;
guchar s1, s2;
USE_MMX_PIXEL_OP_3A_1A(lighten)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length--)
......@@ -881,6 +919,7 @@ color_only_pixels (const guchar *src1,
}
}
MMX_PIXEL_OP_3A_1A(multiply)
void
multiply_pixels (const guchar *src1,
const guchar *src2,
......@@ -894,6 +933,8 @@ multiply_pixels (const guchar *src1,
gint alpha, b;
gint tmp;
USE_MMX_PIXEL_OP_3A_1A(multiply)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
if (has_alpha1 && has_alpha2)
......@@ -973,6 +1014,8 @@ divide_pixels (const guchar *src1,
}
MMX_PIXEL_OP_3A_1A(screen)
void
screen_pixels (const guchar *src1,
const guchar *src2,
......@@ -986,6 +1029,8 @@ screen_pixels (const guchar *src1,
gint alpha, b;
gint tmp;
USE_MMX_PIXEL_OP_3A_1A(screen)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length --)
......@@ -1005,6 +1050,8 @@ screen_pixels (const guchar *src1,
}
MMX_PIXEL_OP_3A_1A(overlay)
void
overlay_pixels (const guchar *src1,
const guchar *src2,
......@@ -1153,6 +1200,8 @@ hardlight_pixels (const guchar *src1,
}
MMX_PIXEL_OP_3A_1A(add)
void
add_pixels (const guchar *src1,
const guchar *src2,
......@@ -1165,6 +1214,8 @@ add_pixels (const guchar *src1,
{
gint alpha, b;
USE_MMX_PIXEL_OP_3A_1A(add)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length --)
......@@ -1187,6 +1238,8 @@ add_pixels (const guchar *src1,
}
MMX_PIXEL_OP_3A_1A(substract)
void
subtract_pixels (const guchar *src1,
const guchar *src2,
......@@ -1200,6 +1253,8 @@ subtract_pixels (const guchar *src1,
gint alpha, b;
gint diff;
USE_MMX_PIXEL_OP_3A_1A(substract)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length --)
......@@ -1222,6 +1277,8 @@ subtract_pixels (const guchar *src1,
}
MMX_PIXEL_OP_3A_1A(difference)
void
difference_pixels (const guchar *src1,
const guchar *src2,
......@@ -1235,6 +1292,8 @@ difference_pixels (const guchar *src1,
gint alpha, b;
gint diff;
USE_MMX_PIXEL_OP_3A_1A(difference)
alpha = (has_alpha1 || has_alpha2) ? MAX (bytes1, bytes2) - 1 : bytes1;
while (length --)
......