gimpcpuaccel.c 10.2 KB
Newer Older
1 2
/* LIBGIMP - The GIMP Library
 * Copyright (C) 1995-1997 Peter Mattis and Spencer Kimball
3
 *
4
 * This library is free software: you can redistribute it and/or
5 6
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
7
 * version 3 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15 16
 * License along with this library.  If not, see
 * <http://www.gnu.org/licenses/>.
17 18 19
 */

/*
20 21 22 23 24
 * x86 bits Copyright (C) Manish Singh <yosh@gimp.org>
 */

/*
 * PPC CPU acceleration detection was taken from DirectFB but seems to be
25 26 27 28 29 30 31
 * originating from mpeg2dec with the following copyright:
 *
 * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
 */

#include "config.h"

32
#include <string.h>
33 34 35 36 37
#include <signal.h>
#include <setjmp.h>

#include <glib.h>

38 39 40
#include "gimpcpuaccel.h"


41 42 43 44 45 46 47 48 49
/**
 * SECTION: gimpcpuaccel
 * @title: gimpcpuaccel
 * @short_description: Functions to query and configure CPU acceleration.
 *
 * Functions to query and configure CPU acceleration.
 **/


50
static GimpCpuAccelFlags  cpu_accel (void) G_GNUC_CONST;
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84


static gboolean  use_cpu_accel = TRUE;


/**
 * gimp_cpu_accel_get_support:
 *
 * Query for CPU acceleration support.
 *
 * Return value: #GimpCpuAccelFlags as supported by the CPU.
 *
 * Since: GIMP 2.4
 */
GimpCpuAccelFlags
gimp_cpu_accel_get_support (void)
{
  return use_cpu_accel ? cpu_accel () : GIMP_CPU_ACCEL_NONE;
}

/**
 * gimp_cpu_accel_set_use:
 * @use:  whether to use CPU acceleration features or not
 *
 * This function is for internal use only.
 *
 * Since: GIMP 2.4
 */
void
gimp_cpu_accel_set_use (gboolean use)
{
  use_cpu_accel = use ? TRUE : FALSE;
}

85

86
#if defined(ARCH_X86) && defined(USE_MMX) && defined(__GNUC__)
87

88
#define HAVE_ACCEL 1
89

90

91
typedef enum
92
{
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
  ARCH_X86_VENDOR_NONE,
  ARCH_X86_VENDOR_INTEL,
  ARCH_X86_VENDOR_AMD,
  ARCH_X86_VENDOR_CENTAUR,
  ARCH_X86_VENDOR_CYRIX,
  ARCH_X86_VENDOR_NSC,
  ARCH_X86_VENDOR_TRANSMETA,
  ARCH_X86_VENDOR_NEXGEN,
  ARCH_X86_VENDOR_RISE,
  ARCH_X86_VENDOR_UMC,
  ARCH_X86_VENDOR_SIS,
  ARCH_X86_VENDOR_UNKNOWN    = 0xff
} X86Vendor;

enum
{
  ARCH_X86_INTEL_FEATURE_MMX      = 1 << 23,
  ARCH_X86_INTEL_FEATURE_XMM      = 1 << 25,
  ARCH_X86_INTEL_FEATURE_XMM2     = 1 << 26,

  ARCH_X86_AMD_FEATURE_MMXEXT     = 1 << 22,
  ARCH_X86_AMD_FEATURE_3DNOW      = 1 << 31,
115

116 117 118
  ARCH_X86_CENTAUR_FEATURE_MMX    = 1 << 23,
  ARCH_X86_CENTAUR_FEATURE_MMXEXT = 1 << 24,
  ARCH_X86_CENTAUR_FEATURE_3DNOW  = 1 << 31,
119

120 121 122 123
  ARCH_X86_CYRIX_FEATURE_MMX      = 1 << 23,
  ARCH_X86_CYRIX_FEATURE_MMXEXT   = 1 << 24
};

Manish Singh's avatar
Manish Singh committed
124 125 126 127 128
enum
{
  ARCH_X86_INTEL_FEATURE_PNI      = 1 << 0
};

129
#if !defined(ARCH_X86_64) && (defined(PIC) || defined(__PIC__))
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
#define cpuid(op,eax,ebx,ecx,edx)  \
  __asm__ ("movl %%ebx, %%esi\n\t" \
           "cpuid\n\t"             \
           "xchgl %%ebx,%%esi"     \
           : "=a" (eax),           \
             "=S" (ebx),           \
             "=c" (ecx),           \
             "=d" (edx)            \
           : "0" (op))
#else
#define cpuid(op,eax,ebx,ecx,edx)  \
  __asm__ ("cpuid"                 \
           : "=a" (eax),           \
             "=b" (ebx),           \
             "=c" (ecx),           \
             "=d" (edx)            \
           : "0" (op))
#endif
148 149 150 151 152 153


static X86Vendor
arch_get_vendor (void)
{
  guint32 eax, ebx, ecx, edx;
154 155 156 157
  union{
      gchar idaschar[16];
      int   idasint[4];
  }id;
158 159 160

#ifndef ARCH_X86_64
  /* Only need to check this on ia32 */
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
  __asm__ ("pushfl\n\t"
           "pushfl\n\t"
           "popl %0\n\t"
           "movl %0,%1\n\t"
           "xorl $0x200000,%0\n\t"
           "pushl %0\n\t"
           "popfl\n\t"
           "pushfl\n\t"
           "popl %0\n\t"
           "popfl"
           : "=a" (eax),
             "=c" (ecx)
           :
           : "cc");

  if (eax == ecx)
177 178
    return ARCH_X86_VENDOR_NONE;
#endif
179

180 181 182 183 184
  cpuid (0, eax, ebx, ecx, edx);

  if (eax == 0)
    return ARCH_X86_VENDOR_NONE;

185 186 187
  id.idasint[0] = ebx;
  id.idasint[1] = edx;
  id.idasint[2] = ecx;
188

189
  id.idaschar[12] = '\0';
190 191

#ifdef ARCH_X86_64
192
  if (strcmp (id.idaschar, "AuthenticAMD") == 0)
193
    return ARCH_X86_VENDOR_AMD;
194
  else if (strcmp (id.idaschar, "GenuineIntel") == 0)
195
    return ARCH_X86_VENDOR_INTEL;
196
#else
197
  if (strcmp (id.idaschar, "GenuineIntel") == 0)
198
    return ARCH_X86_VENDOR_INTEL;
199
  else if (strcmp (id.idaschar, "AuthenticAMD") == 0)
200
    return ARCH_X86_VENDOR_AMD;
201
  else if (strcmp (id.idaschar, "CentaurHauls") == 0)
202
    return ARCH_X86_VENDOR_CENTAUR;
203
  else if (strcmp (id.idaschar, "CyrixInstead") == 0)
204
    return ARCH_X86_VENDOR_CYRIX;
205
  else if (strcmp (id.idaschar, "Geode by NSC") == 0)
206
    return ARCH_X86_VENDOR_NSC;
207 208
  else if (strcmp (id.idaschar, "GenuineTMx86") == 0 ||
           strcmp (id.idaschar, "TransmetaCPU") == 0)
209
    return ARCH_X86_VENDOR_TRANSMETA;
210
  else if (strcmp (id.idaschar, "NexGenDriven") == 0)
211
    return ARCH_X86_VENDOR_NEXGEN;
212
  else if (strcmp (id.idaschar, "RiseRiseRise") == 0)
213
    return ARCH_X86_VENDOR_RISE;
214
  else if (strcmp (id.idaschar, "UMC UMC UMC ") == 0)
215
    return ARCH_X86_VENDOR_UMC;
216
  else if (strcmp (id.idaschar, "SiS SiS SiS ") == 0)
217 218
    return ARCH_X86_VENDOR_SIS;
#endif
219

220 221
  return ARCH_X86_VENDOR_UNKNOWN;
}
222

223 224 225 226
static guint32
arch_accel_intel (void)
{
  guint32 caps = 0;
227 228

#ifdef USE_MMX
229 230 231 232 233 234 235 236
  {
    guint32 eax, ebx, ecx, edx;

    cpuid (1, eax, ebx, ecx, edx);

    if ((edx & ARCH_X86_INTEL_FEATURE_MMX) == 0)
      return 0;

237
    caps = GIMP_CPU_ACCEL_X86_MMX;
238

239
#ifdef USE_SSE
240
    if (edx & ARCH_X86_INTEL_FEATURE_XMM)
241
      caps |= GIMP_CPU_ACCEL_X86_SSE | GIMP_CPU_ACCEL_X86_MMXEXT;
242 243

    if (edx & ARCH_X86_INTEL_FEATURE_XMM2)
244
      caps |= GIMP_CPU_ACCEL_X86_SSE2;
Manish Singh's avatar
Manish Singh committed
245 246

    if (ecx & ARCH_X86_INTEL_FEATURE_PNI)
247
      caps |= GIMP_CPU_ACCEL_X86_SSE3;
248 249 250 251 252 253
#endif /* USE_SSE */
  }
#endif /* USE_MMX */

  return caps;
}
254

255 256 257
static guint32
arch_accel_amd (void)
{
258 259 260
  guint32 caps;

  caps = arch_accel_intel ();
261 262 263 264 265 266

#ifdef USE_MMX
  {
    guint32 eax, ebx, ecx, edx;

    cpuid (0x80000000, eax, ebx, ecx, edx);
267

268
    if (eax < 0x80000001)
269
      return caps;
270

271 272
#ifdef USE_SSE
    cpuid (0x80000001, eax, ebx, ecx, edx);
273

274
    if (edx & ARCH_X86_AMD_FEATURE_3DNOW)
275
      caps |= GIMP_CPU_ACCEL_X86_3DNOW;
276

277
    if (edx & ARCH_X86_AMD_FEATURE_MMXEXT)
278
      caps |= GIMP_CPU_ACCEL_X86_MMXEXT;
279
#endif /* USE_SSE */
280
  }
281 282 283 284 285
#endif /* USE_MMX */

  return caps;
}

286 287 288
static guint32
arch_accel_centaur (void)
{
289 290 291
  guint32 caps;

  caps = arch_accel_intel ();
292 293 294 295 296 297 298 299

#ifdef USE_MMX
  {
    guint32 eax, ebx, ecx, edx;

    cpuid (0x80000000, eax, ebx, ecx, edx);

    if (eax < 0x80000001)
300
      return caps;
301 302 303 304

    cpuid (0x80000001, eax, ebx, ecx, edx);

    if (edx & ARCH_X86_CENTAUR_FEATURE_MMX)
305
      caps |= GIMP_CPU_ACCEL_X86_MMX;
306 307 308

#ifdef USE_SSE
    if (edx & ARCH_X86_CENTAUR_FEATURE_3DNOW)
309
      caps |= GIMP_CPU_ACCEL_X86_3DNOW;
310 311

    if (edx & ARCH_X86_CENTAUR_FEATURE_MMXEXT)
312
      caps |= GIMP_CPU_ACCEL_X86_MMXEXT;
313 314 315 316 317 318 319 320 321 322
#endif /* USE_SSE */
  }
#endif /* USE_MMX */

  return caps;
}

static guint32
arch_accel_cyrix (void)
{
323 324 325
  guint32 caps;

  caps = arch_accel_intel ();
326 327 328 329 330 331 332 333

#ifdef USE_MMX
  {
    guint32 eax, ebx, ecx, edx;

    cpuid (0, eax, ebx, ecx, edx);

    if (eax != 2)
334
      return caps;
335 336 337 338

    cpuid (0x80000001, eax, ebx, ecx, edx);

    if (edx & ARCH_X86_CYRIX_FEATURE_MMX)
339
      caps |= GIMP_CPU_ACCEL_X86_MMX;
340 341 342

#ifdef USE_SSE
    if (edx & ARCH_X86_CYRIX_FEATURE_MMXEXT)
343
      caps |= GIMP_CPU_ACCEL_X86_MMXEXT;
344 345 346 347 348 349 350
#endif /* USE_SSE */
  }
#endif /* USE_MMX */

  return caps;
}

351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
#ifdef USE_SSE
static jmp_buf sigill_return;

static void
sigill_handler (gint n)
{
  longjmp (sigill_return, 1);
}

static gboolean
arch_accel_sse_os_support (void)
{
  if (setjmp (sigill_return))
    {
      return FALSE;
    }
  else
    {
      signal (SIGILL, sigill_handler);
      __asm__ __volatile__ ("xorps %xmm0, %xmm0");
      signal (SIGILL, SIG_DFL);
    }

  return TRUE;
}
#endif /* USE_SSE */

378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
static guint32
arch_accel (void)
{
  guint32 caps;
  X86Vendor vendor;

  vendor = arch_get_vendor ();

  switch (vendor)
    {
    case ARCH_X86_VENDOR_NONE:
      caps = 0;
      break;

    case ARCH_X86_VENDOR_AMD:
      caps = arch_accel_amd ();
      break;

    case ARCH_X86_VENDOR_CENTAUR:
      caps = arch_accel_centaur ();
      break;

    case ARCH_X86_VENDOR_CYRIX:
    case ARCH_X86_VENDOR_NSC:
      caps = arch_accel_cyrix ();
      break;

    /* check for what Intel speced, even if UNKNOWN */
    default:
      caps = arch_accel_intel ();
      break;
    }

411
#ifdef USE_SSE
412 413
  if ((caps & GIMP_CPU_ACCEL_X86_SSE) && !arch_accel_sse_os_support ())
    caps &= ~(GIMP_CPU_ACCEL_X86_SSE | GIMP_CPU_ACCEL_X86_SSE2);
414 415
#endif

416 417 418
  return caps;
}

419
#endif /* ARCH_X86 && USE_MMX && __GNUC__ */
420 421


422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440
#if defined(ARCH_PPC) && defined (USE_ALTIVEC)

#if defined(HAVE_ALTIVEC_SYSCTL)

#include <sys/sysctl.h>

#define HAVE_ACCEL 1

static guint32
arch_accel (void)
{
  gint     sels[2] = { CTL_HW, HW_VECTORUNIT };
  gboolean has_vu  = FALSE;
  gsize    length  = sizeof(has_vu);
  gint     err;

  err = sysctl (sels, 2, &has_vu, &length, NULL, 0);

  if (err == 0 && has_vu)
441
    return GIMP_CPU_ACCEL_PPC_ALTIVEC;
442 443 444 445 446

  return 0;
}

#elif defined(__GNUC__)
447

448
#define HAVE_ACCEL 1
449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465

static          sigjmp_buf   jmpbuf;
static volatile sig_atomic_t canjump = 0;

static void
sigill_handler (gint sig)
{
  if (!canjump)
    {
      signal (sig, SIG_DFL);
      raise (sig);
    }

  canjump = 0;
  siglongjmp (jmpbuf, 1);
}

466 467
static guint32
arch_accel (void)
468 469
{
  signal (SIGILL, sigill_handler);
470

471 472 473 474 475 476 477 478 479
  if (sigsetjmp (jmpbuf, 1))
    {
      signal (SIGILL, SIG_DFL);
      return 0;
    }

  canjump = 1;

  asm volatile ("mtspr 256, %0\n\t"
480 481 482
                "vand %%v0, %%v0, %%v0"
                :
                : "r" (-1));
483 484 485

  signal (SIGILL, SIG_DFL);

486
  return GIMP_CPU_ACCEL_PPC_ALTIVEC;
487
}
488
#endif /* __GNUC__ */
489

490
#endif /* ARCH_PPC && USE_ALTIVEC */
491 492


493
static GimpCpuAccelFlags
494 495
cpu_accel (void)
{
496
#ifdef HAVE_ACCEL
497 498 499 500 501 502 503
  static guint32 accel = ~0U;

  if (accel != ~0U)
    return accel;

  accel = arch_accel ();

504
  return (GimpCpuAccelFlags) accel;
505

506
#else /* !HAVE_ACCEL */
507
  return GIMP_CPU_ACCEL_NONE;
508 509
#endif
}