Commit 54adf6ff authored by Sven Neumann's avatar Sven Neumann Committed by Sven Neumann

redone target platform detection and MMX assembly checks. Basically copied

2003-07-04  Sven Neumann  <sven@gimp.org>

	* configure.in: redone target platform detection and MMX assembly
	checks. Basically copied from DirectFB.

	* app/Makefile.am
	* app/arch/i386/mmx/detect_mmx.S
	* app/arch/i386/mmx/paint_funcs_mmx.S
	* app/paint-funcs/Makefile.am
	* app/paint-funcs/paint-funcs-mmx.h
	* app/paint-funcs/paint-funcs-simd.S: removed the old MMX routines.
	New ones will come instead.

	* app/base/Makefile.am
	* app/base/detect-mmx.[Sh]: removed these two files...

	* app/base/cpu-accel.[ch]: ... and added new ones that do more
	fine-grained cpu acceleration detection. Again taken from DirectFB
	but the code is originally from mpeg2dec.

	* app/base/base.c: print results from cpu feature tests.
parent 6bbec4dd
2003-07-04 Sven Neumann <sven@gimp.org>
* configure.in: redone target platform detection and MMX assembly
checks. Basically copied from DirectFB.
* app/Makefile.am
* app/arch/i386/mmx/detect_mmx.S
* app/arch/i386/mmx/paint_funcs_mmx.S
* app/paint-funcs/Makefile.am
* app/paint-funcs/paint-funcs-mmx.h
* app/paint-funcs/paint-funcs-simd.S: removed the old MMX routines.
New ones will come instead.
* app/base/Makefile.am
* app/base/detect-mmx.[Sh]: removed these two files...
* app/base/cpu-accel.[ch]: ... and added new ones that do more
fine-grained cpu acceleration detection. Again taken from DirectFB
but the code is originally from mpeg2dec.
* app/base/base.c: print results from cpu feature tests.
2003-07-04 Dave Neary <bolsh@gimp.org>
* app/core/gimpdrawable-bucket-fill.c: Add alpha channel
......@@ -26,24 +26,22 @@ scriptdata =
bin_PROGRAMS = gimp-1.3
gimp_1_3_SOURCES = \
appenv.h \
app_procs.c \
app_procs.h \
main.c \
batch.c \
batch.h \
errors.c \
errors.h \
gimp-intl.h \
libgimp_glue.c \
appenv.h \
app_procs.c \
app_procs.h \
main.c \
batch.c \
batch.h \
errors.c \
errors.h \
gimp-intl.h \
libgimp_glue.c \
libgimp_glue.h
EXTRA_DIST = \
makefile.msc \
gimp.rc \
wilber.ico \
arch/i386/mmx/detect_mmx.S \
arch/i386/mmx/paint_funcs_mmx.S
makefile.msc \
gimp.rc \
wilber.ico
if HAVE_GLIBC_REGEX
REGEXREPL =
......
.data
cpu_flags: .long 0
.text
.align 4
#ifndef __MINGW32__
.globl intel_cpu_features
.type intel_cpu_features,@function
intel_cpu_features:
#else
.globl _intel_cpu_features
_intel_cpu_features:
#endif
pushl %ebx
pushfl
popl %eax
movl %eax,%ecx
xorl $0x040000,%eax
pushl %eax
popfl
pushfl
popl %eax
xorl %ecx,%eax
jz .intel_cpu_features_end # Processor is 386
pushl %ecx
popfl
movl %ecx,%eax
xorl $0x200000,%eax
pushl %eax
popfl
pushfl
popl %eax
xorl %ecx,%eax
je .intel_cpu_features_end
pushal
movl $1,%eax
cpuid
movl %edx,cpu_flags
popal
movl cpu_flags,%eax
.intel_cpu_features_end:
popl %ebx
ret
/*
MMX code to supplement some functions in paint_funcs.c
for the Gimp.
Copyright (C) 1999, 2001 David Monniaux
*/
.text
.align 4
.alpha_mask_1a: .int 0xFF00FF00, 0xFF00FF00
.mult_shift: .int 0x00800080, 0x00800080
.alpha_mask_3a: .int 0xFF000000, 0xFF000000
/* min(a,b) = a - max(a-b, 0) */
#ifndef __MINGW32__
.globl add_pixels_3a_3a
.type add_pixels_3a_3a,@function
add_pixels_3a_3a:
#else
.globl _add_pixels_3a_3a
_add_pixels_3a_3a:
#endif
.align 16
pushl %edi
pushl %ebx
movl 12(%esp), %edi
movq .alpha_mask_3a, %mm0
subl $ 2, %ecx
jl .add_pixels_3a_3a_last
movl $ 8, %ebx
.add_pixels_3a_3a_loop:
movq (%eax), %mm2
movq (%edx), %mm3
movq %mm2, %mm4
paddusb %mm3, %mm4
movq %mm0, %mm1
pandn %mm4, %mm1
movq %mm2, %mm4
psubusb %mm3, %mm4
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1
movq %mm1, (%edi)
addl %ebx, %eax
addl %ebx, %edx
addl %ebx, %edi
subl $ 2, %ecx
jge .add_pixels_3a_3a_loop
.add_pixels_3a_3a_last:
test $ 1, %ecx
jz .add_pixels_3a_3a_end
movd (%eax), %mm2
movd (%edx), %mm3
movq %mm2, %mm4
paddusb %mm3, %mm4
movq %mm0, %mm1
pandn %mm4, %mm1
movq %mm2, %mm4
psubusb %mm3, %mm4
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1
movd %mm1, (%edi)
.add_pixels_3a_3a_end:
emms
popl %ebx
popl %edi
ret
#ifndef __MINGW32__
.globl add_pixels_1a_1a
.type add_pixels_1a_1a,@function
add_pixels_1a_1a:
#else
.globl _add_pixels_1a_1a
_add_pixels_1a_1a:
#endif
.align 16
pushl %edi
pushl %ebx
movl 12(%esp), %edi
movq .alpha_mask_1a, %mm0
subl $ 4, %ecx
jl .add_pixels_1a_1a_last3
movl $ 8, %ebx
.add_pixels_1a_1a_loop:
movq (%eax), %mm2
movq (%edx), %mm3
movq %mm2, %mm4
paddusb %mm3, %mm4
movq %mm0, %mm1
pandn %mm4, %mm1
movq %mm2, %mm4
psubusb %mm3, %mm4
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1
movq %mm1, (%edi)
addl %ebx, %eax
addl %ebx, %edx
addl %ebx, %edi
subl $ 4, %ecx
jge .add_pixels_1a_1a_loop
.add_pixels_1a_1a_last3:
test $ 2, %ecx
jz .add_pixels_1a_1a_last1
movd (%eax), %mm2
movd (%edx), %mm3
movq %mm2, %mm4
paddusb %mm3, %mm4
movq %mm0, %mm1
pandn %mm4, %mm1
movq %mm2, %mm4
psubusb %mm3, %mm4
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1
addl $ 4, %eax
addl $ 4, %edx
addl $ 4, %edi
.add_pixels_1a_1a_last1:
test $ 1, %ecx
jz .add_pixels_1a_1a_end
movw (%eax), %bx
movd %ebx, %mm2
movw (%edx), %bx
movd %ebx, %mm3
movq %mm2, %mm4
paddusb %mm3, %mm4
movq %mm0, %mm1
pandn %mm4, %mm1
movq %mm2, %mm4
psubusb %mm3, %mm4
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1
movd %mm1, %ebx
movw %bx, (%edi)
.add_pixels_1a_1a_end:
emms
popl %ebx
popl %edi
ret
#ifndef __MINGW32__
.globl substract_pixels_3a_3a
.type substract_pixels_3a_3a,@function
substract_pixels_3a_3a:
#else
.globl _substract_pixels_3a_3a
_substract_pixels_3a_3a:
#endif
.align 16
pushl %edi
pushl %ebx
movl 12(%esp), %edi
movq .alpha_mask_3a, %mm0
subl $ 2, %ecx
jl .substract_pixels_3a_3a_last
movl $ 8, %ebx
.substract_pixels_3a_3a_loop:
movq (%eax), %mm2
movq (%edx), %mm3
movq %mm2, %mm4
psubusb %mm3, %mm4
movq %mm0, %mm1
pandn %mm4, %mm1
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1
movq %mm1, (%edi)
addl %ebx, %eax
addl %ebx, %edx
addl %ebx, %edi
subl $ 2, %ecx
jge .substract_pixels_3a_3a_loop
.substract_pixels_3a_3a_last:
test $ 1, %ecx
jz .substract_pixels_3a_3a_end
movd (%eax), %mm2
movd (%edx), %mm3
movq %mm2, %mm4
psubusb %mm3, %mm4
movq %mm0, %mm1
pandn %mm4, %mm1
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1
movd %mm1, (%edi)
.substract_pixels_3a_3a_end:
emms
popl %ebx
popl %edi
ret
#ifndef __MINGW32__
.globl substract_pixels_1a_1a
.type substract_pixels_1a_1a,@function
substract_pixels_1a_1a:
#else
.globl _substract_pixels_1a_1a
_substract_pixels_1a_1a:
#endif
.align 16
pushl %edi
pushl %ebx
movl 12(%esp), %edi
movq .alpha_mask_1a, %mm0
subl $ 4, %ecx
jl .substract_pixels_1a_1a_last3
movl $ 8, %ebx
.substract_pixels_1a_1a_loop:
movq (%eax), %mm2
movq (%edx), %mm3
movq %mm2, %mm4
psubusb %mm3, %mm4
movq %mm0, %mm1
pandn %mm4, %mm1
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1
movq %mm1, (%edi)
addl %ebx, %eax
addl %ebx, %edx
addl %ebx, %edi
subl $ 4, %ecx
jge .substract_pixels_1a_1a_loop
.substract_pixels_1a_1a_last3:
test $ 2, %ecx
jz .substract_pixels_1a_1a_last1
movd (%eax), %mm2
movd (%edx), %mm3
movq %mm2, %mm4
psubusb %mm3, %mm4
movq %mm0, %mm1
pandn %mm4, %mm1
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1
addl $ 4, %eax
addl $ 4, %edx
addl $ 4, %edi
.substract_pixels_1a_1a_last1:
test $ 1, %ecx
jz .substract_pixels_1a_1a_end
movw (%eax), %bx
movd %ebx, %mm2
movw (%edx), %bx
movd %ebx, %mm3
movq %mm2, %mm4
psubusb %mm3, %mm4
movq %mm0, %mm1
pandn %mm4, %mm1
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1
movd %mm1, %ebx
movw %bx, (%edi)
.substract_pixels_1a_1a_end:
emms
popl %ebx
popl %edi
ret
#ifndef __MINGW32__
.globl difference_pixels_3a_3a
.type difference_pixels_3a_3a,@function
difference_pixels_3a_3a:
#else
.globl _difference_pixels_3a_3a
_difference_pixels_3a_3a:
#endif
.align 16
pushl %edi
pushl %ebx
movl 12(%esp), %edi
movq .alpha_mask_3a, %mm0
subl $ 2, %ecx
jl .difference_pixels_3a_3a_last
movl $ 8, %ebx
.difference_pixels_3a_3a_loop:
movq (%eax), %mm2
movq (%edx), %mm3
movq %mm2, %mm4
movq %mm3, %mm5
psubusb %mm3, %mm4
psubusb %mm2, %mm5
movq %mm0, %mm1
paddb %mm5, %mm4
pandn %mm4, %mm1
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1
movq %mm1, (%edi)
addl %ebx, %eax
addl %ebx, %edx
addl %ebx, %edi
subl $ 2, %ecx
jge .difference_pixels_3a_3a_loop
.difference_pixels_3a_3a_last:
test $ 1, %ecx
jz .difference_pixels_3a_3a_end
movd (%eax), %mm2
movd (%edx), %mm3
movq %mm2, %mm4
movq %mm3, %mm5
psubusb %mm3, %mm4
psubusb %mm2, %mm5
movq %mm0, %mm1
paddb %mm5, %mm4
pandn %mm4, %mm1
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1
movd %mm1, (%edi)
.difference_pixels_3a_3a_end:
emms
popl %ebx
popl %edi
ret
#ifndef __MINGW32__
.globl difference_pixels_1a_1a
.type difference_pixels_1a_1a,@function
difference_pixels_1a_1a:
#else
.globl _difference_pixels_1a_1a
_difference_pixels_1a_1a:
#endif
.align 16
pushl %edi
pushl %ebx
movl 12(%esp), %edi
movq .alpha_mask_1a, %mm0
subl $ 4, %ecx
jl .difference_pixels_1a_1a_last3
movl $ 8, %ebx
.difference_pixels_1a_1a_loop:
movq (%eax), %mm2
movq (%edx), %mm3
movq %mm2, %mm4
movq %mm3, %mm5
psubusb %mm3, %mm4
psubusb %mm2, %mm5
movq %mm0, %mm1
paddb %mm5, %mm4
pandn %mm4, %mm1
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1
movq %mm1, (%edi)
addl %ebx, %eax
addl %ebx, %edx
addl %ebx, %edi
subl $ 4, %ecx
jge .difference_pixels_1a_1a_loop
.difference_pixels_1a_1a_last3:
test $ 2, %ecx
jz .difference_pixels_1a_1a_last1
movd (%eax), %mm2
movd (%edx), %mm3
movq %mm2, %mm4
movq %mm3, %mm5
psubusb %mm3, %mm4
psubusb %mm2, %mm5
movq %mm0, %mm1
paddb %mm5, %mm4
pandn %mm4, %mm1
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1
addl $ 4, %eax
addl $ 4, %edx
addl $ 4, %edi
.difference_pixels_1a_1a_last1:
test $ 1, %ecx
jz .difference_pixels_1a_1a_end
movw (%eax), %bx
movd %ebx, %mm2
movw (%edx), %bx
movd %ebx, %mm3
movq %mm2, %mm4
movq %mm3, %mm5
psubusb %mm3, %mm4
psubusb %mm2, %mm5
movq %mm0, %mm1
paddb %mm5, %mm4
pandn %mm4, %mm1
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1
movd %mm1, %ebx
movw %bx, (%edi)
.difference_pixels_1a_1a_end:
emms
popl %ebx
popl %edi
ret
#ifndef __MINGW32__
.globl multiply_pixels_3a_3a
.type multiply_pixels_3a_3a,@function
multiply_pixels_3a_3a:
#else
.globl _multiply_pixels_3a_3a
_multiply_pixels_3a_3a:
#endif
.align 16
pushl %edi
pushl %ebx
movl 12(%esp), %edi
movq .alpha_mask_3a, %mm0
movq .mult_shift, %mm7
pxor %mm6, %mm6
subl $ 2, %ecx
jl .multiply_pixels_3a_3a_last
movl $ 8, %ebx
.multiply_pixels_3a_3a_loop:
movq (%eax), %mm2
movq (%edx), %mm3
movq %mm2, %mm1
punpcklbw %mm6, %mm1
movq %mm3, %mm5
punpcklbw %mm6, %mm5
pmullw %mm5, %mm1
paddw %mm7, %mm1
movq %mm1, %mm5
psrlw $ 8, %mm5
paddw %mm5, %mm1
psrlw $ 8, %mm1
movq %mm2, %mm4
punpckhbw %mm6, %mm4
movq %mm3, %mm5
punpckhbw %mm6, %mm5
pmullw %mm5, %mm4
paddw %mm7, %mm4
movq %mm4, %mm5
psrlw $ 8, %mm5
paddw %mm5, %mm4
psrlw $ 8, %mm4
packuswb %mm4, %mm1
movq %mm0, %mm4
pandn %mm1, %mm4
movq %mm4, %mm1
movq %mm2, %mm4
psubusb %mm3, %mm4
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1
movq %mm1, (%edi)
addl %ebx, %eax
addl %ebx, %edx
addl %ebx, %edi
subl $ 2, %ecx
jge .multiply_pixels_3a_3a_loop
.multiply_pixels_3a_3a_last:
test $ 1, %ecx
jz .multiply_pixels_3a_3a_end
movd (%eax), %mm2
movd (%edx), %mm3
movq %mm2, %mm1
punpcklbw %mm6, %mm1
movq %mm3, %mm5
punpcklbw %mm6, %mm5
pmullw %mm5, %mm1
paddw %mm7, %mm1
movq %mm1, %mm5
psrlw $ 8, %mm5
paddw %mm5, %mm1
psrlw $ 8, %mm1
movq %mm2, %mm4
punpckhbw %mm6, %mm4
movq %mm3, %mm5
punpckhbw %mm6, %mm5
pmullw %mm5, %mm4
paddw %mm7, %mm4
movq %mm4, %mm5
psrlw $ 8, %mm5
paddw %mm5, %mm4
psrlw $ 8, %mm4
packuswb %mm4, %mm1
movq %mm0, %mm4
pandn %mm1, %mm4
movq %mm4, %mm1
movq %mm2, %mm4
psubusb %mm3, %mm4
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1
movd %mm1, (%edi)
.multiply_pixels_3a_3a_end:
emms
popl %ebx
popl %edi
ret
#ifndef __MINGW32__
.globl multiply_pixels_1a_1a
.type multiply_pixels_1a_1a,@function
multiply_pixels_1a_1a:
#else
.globl _multiply_pixels_1a_1a
_multiply_pixels_1a_1a:
#endif
.align 16
pushl %edi
pushl %ebx
movl 12(%esp), %edi
movq .alpha_mask_1a, %mm0
subl $ 4, %ecx
jl .multiply_pixels_1a_1a_last3
movl $ 8, %ebx
.multiply_pixels_1a_1a_loop:
movq (%eax), %mm2
movq (%edx), %mm3
movq %mm2, %mm1
punpcklbw %mm6, %mm1
movq %mm3, %mm5
punpcklbw %mm6, %mm5
pmullw %mm5, %mm1
paddw %mm7, %mm1
movq %mm1, %mm5
psrlw $ 8, %mm5
paddw %mm5, %mm1
psrlw $ 8, %mm1
movq %mm2, %mm4
punpckhbw %mm6, %mm4
movq %mm3, %mm5
punpckhbw %mm6, %mm5
pmullw %mm5, %mm4
paddw %mm7, %mm4
movq %mm4, %mm5
psrlw $ 8, %mm5
paddw %mm5, %mm4
psrlw $ 8, %mm4
packuswb %mm4, %mm1
movq %mm0, %mm4
pandn %mm1, %mm4
movq %mm4, %mm1
movq %mm2, %mm4
psubusb %mm3, %mm4
psubb %mm4, %mm2
pand %mm0, %mm2
por %mm2, %mm1
movq %mm1, (%edi)