blur-gauss-selective.c 26.8 KB
Newer Older
1
/* Selective gaussian blur filter for GIMP, version 0.1
Marc Lehmann's avatar
Marc Lehmann committed
2 3 4 5 6
 * Adapted from the original gaussian blur filter by Spencer Kimball and
 * Peter Mattis.
 *
 * Copyright (C) 1995 Spencer Kimball and Peter Mattis
 * Copyright (C) 1999 Thom van Os <thom@vanos.com>
7
 * Copyright (C) 2006 Loren Merritt
Marc Lehmann's avatar
Marc Lehmann committed
8
 *
9
 * This program is free software: you can redistribute it and/or modify
Marc Lehmann's avatar
Marc Lehmann committed
10
 * it under the terms of the GNU General Public License as published by
11
 * the Free Software Foundation; either version 3 of the License, or
Marc Lehmann's avatar
Marc Lehmann committed
12 13 14 15 16 17 18 19
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
20
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
Marc Lehmann's avatar
Marc Lehmann committed
21 22
 *
 * To do:
David Odin's avatar
David Odin committed
23 24 25 26 27 28
 *      - support for horizontal or vertical only blur
 *      - use memory more efficiently, smaller regions at a time
 *      - integrating with other convolution matrix based filters ?
 *      - create more selective and adaptive filters
 *      - threading
 *      - optimization
Marc Lehmann's avatar
Marc Lehmann committed
29
 */
30

Sven Neumann's avatar
Sven Neumann committed
31
#include "config.h"
32 33 34 35

#include <libgimp/gimp.h>
#include <libgimp/gimpui.h>

Sven Neumann's avatar
Sven Neumann committed
36
#include "libgimp/stdplugins-intl.h"
Marc Lehmann's avatar
Marc Lehmann committed
37

38

39
#define PLUG_IN_PROC   "plug-in-sel-gauss"
40
#define PLUG_IN_BINARY "blur-gauss-selective"
41
#define PLUG_IN_ROLE   "gimp-blur-gauss-selective"
42

43 44 45 46 47 48 49 50
#ifndef ALWAYS_INLINE
#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
#    define ALWAYS_INLINE __attribute__((always_inline)) inline
#else
#    define ALWAYS_INLINE inline
#endif
#endif

51

Marc Lehmann's avatar
Marc Lehmann committed
52 53
typedef struct
{
David Odin's avatar
David Odin committed
54 55
  gdouble  radius;
  gint     maxdelta;
Marc Lehmann's avatar
Marc Lehmann committed
56 57 58 59 60
} BlurValues;


/* Declare local functions.
 */
David Odin's avatar
David Odin committed
61
static void      query            (void);
62 63 64 65 66 67 68 69 70 71 72 73
static void      run              (const gchar      *name,
                                   gint              nparams,
                                   const GimpParam  *param,
                                   gint             *nreturn_vals,
                                   GimpParam       **return_vals);

static void      sel_gauss        (GimpDrawable     *drawable,
                                   gdouble           radius,
                                   gint              maxdelta);
static gboolean  sel_gauss_dialog (GimpDrawable     *drawable);
static void      preview_update   (GimpPreview      *preview);

Marc Lehmann's avatar
Marc Lehmann committed
74

75
const GimpPlugInInfo PLUG_IN_INFO =
Marc Lehmann's avatar
Marc Lehmann committed
76
{
77 78 79 80
  NULL,  /* init_proc  */
  NULL,  /* quit_proc  */
  query, /* query_proc */
  run,   /* run_proc   */
Marc Lehmann's avatar
Marc Lehmann committed
81 82 83 84
};

static BlurValues bvals =
{
85
  5.0, /* radius   */
Sven Neumann's avatar
Sven Neumann committed
86
  50   /* maxdelta */
Marc Lehmann's avatar
Marc Lehmann committed
87 88 89 90
};

MAIN ()

91 92
static void
query (void)
Marc Lehmann's avatar
Marc Lehmann committed
93
{
94
  static const GimpParamDef args[] =
Marc Lehmann's avatar
Marc Lehmann committed
95
  {
96
    { GIMP_PDB_INT32,    "run-mode",  "The run mode { RUN-INTERACTIVE (0), RUN-NONINTERACTIVE (1) }" },
97 98 99 100
    { GIMP_PDB_IMAGE,    "image",     "Input image (unused)"         },
    { GIMP_PDB_DRAWABLE, "drawable",  "Input drawable"               },
    { GIMP_PDB_FLOAT,    "radius",    "Radius of gaussian blur (in pixels, > 0.0)" },
    { GIMP_PDB_INT32,    "max-delta", "Maximum delta"                }
Marc Lehmann's avatar
Marc Lehmann committed
101
  };
Sven Neumann's avatar
Sven Neumann committed
102

103
  gimp_install_procedure (PLUG_IN_PROC,
104
                          N_("Blur neighboring pixels, but only in low-contrast areas"),
David Odin's avatar
David Odin committed
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
                          "This filter functions similar to the regular "
                          "gaussian blur filter except that neighbouring "
                          "pixels that differ more than the given maxdelta "
                          "parameter will not be blended with. This way with "
                          "the correct parameters, an image can be smoothed "
                          "out without losing details. However, this filter "
                          "can be rather slow.",
                          "Thom van Os",
                          "Thom van Os",
                          "1999",
                          N_("_Selective Gaussian Blur..."),
                          "RGB*, GRAY*",
                          GIMP_PLUGIN,
                          G_N_ELEMENTS (args), 0,
                          args, NULL);
120

121
  gimp_plugin_menu_register (PLUG_IN_PROC, "<Image>/Filters/Blur");
Marc Lehmann's avatar
Marc Lehmann committed
122 123
}

124
static void
125 126 127 128 129
run (const gchar      *name,
     gint              nparams,
     const GimpParam  *param,
     gint             *nreturn_vals,
     GimpParam       **return_vals)
Marc Lehmann's avatar
Marc Lehmann committed
130
{
131 132 133
  static GimpParam   values[1];
  GimpRunMode        run_mode;
  GimpPDBStatusType  status = GIMP_PDB_SUCCESS;
David Odin's avatar
David Odin committed
134 135
  GimpDrawable      *drawable;
  gdouble            radius;
136 137 138 139

  run_mode = param[0].data.d_int32;

  *nreturn_vals = 1;
140
  *return_vals  = values;
141

Sven Neumann's avatar
Sven Neumann committed
142
  INIT_I18N ();
143

144
  values[0].type          = GIMP_PDB_STATUS;
145 146
  values[0].data.d_status = status;

David Odin's avatar
David Odin committed
147 148
  /* Get the specified drawable */
  drawable = gimp_drawable_get (param[2].data.d_drawable);
149
  gimp_tile_cache_ntiles (2 * drawable->ntile_cols);
David Odin's avatar
David Odin committed
150

151 152
  switch (run_mode)
    {
Sven Neumann's avatar
Sven Neumann committed
153
    case GIMP_RUN_INTERACTIVE:
154
      /* Possibly retrieve data */
155
      gimp_get_data (PLUG_IN_PROC, &bvals);
156 157

      /* First acquire information with a dialog */
David Odin's avatar
David Odin committed
158 159
      if (! sel_gauss_dialog (drawable))
        return;
160 161
      break;

Sven Neumann's avatar
Sven Neumann committed
162
    case GIMP_RUN_NONINTERACTIVE:
163
      /* Make sure all the arguments are there! */
164
      if (nparams != 5)
David Odin's avatar
David Odin committed
165
        status = GIMP_PDB_CALLING_ERROR;
Sven Neumann's avatar
Sven Neumann committed
166
      if (status == GIMP_PDB_SUCCESS)
David Odin's avatar
David Odin committed
167 168 169
        {
          bvals.radius   = param[3].data.d_float;
          bvals.maxdelta = CLAMP (param[4].data.d_int32, 0, 255);
170

171
          if (bvals.radius <= 0.0)
172
            status = GIMP_PDB_CALLING_ERROR;
David Odin's avatar
David Odin committed
173
        }
174 175
      break;

Sven Neumann's avatar
Sven Neumann committed
176
    case GIMP_RUN_WITH_LAST_VALS:
177
      /* Possibly retrieve data */
178
      gimp_get_data (PLUG_IN_PROC, &bvals);
179 180 181 182 183 184
      break;

    default:
      break;
    }

Sven Neumann's avatar
Sven Neumann committed
185
  if (status != GIMP_PDB_SUCCESS)
186 187 188 189 190 191
    {
      values[0].data.d_status = status;
      return;
    }

  /* Make sure that the drawable is gray or RGB color */
192 193
  if (gimp_drawable_is_rgb (drawable->drawable_id) ||
      gimp_drawable_is_gray (drawable->drawable_id))
194
    {
195
      gimp_progress_init (_("Selective Gaussian Blur"));
196 197 198 199 200 201 202

      radius = fabs (bvals.radius) + 1.0;

      /* run the gaussian blur */
      sel_gauss (drawable, radius, bvals.maxdelta);

      /* Store data */
Sven Neumann's avatar
Sven Neumann committed
203
      if (run_mode == GIMP_RUN_INTERACTIVE)
204
        gimp_set_data (PLUG_IN_PROC, &bvals, sizeof (BlurValues));
205

Sven Neumann's avatar
Sven Neumann committed
206
      if (run_mode != GIMP_RUN_NONINTERACTIVE)
David Odin's avatar
David Odin committed
207
        gimp_displays_flush ();
208 209 210
    }
  else
    {
211
      gimp_message (_("Cannot operate on indexed color images."));
Sven Neumann's avatar
Sven Neumann committed
212
      status = GIMP_PDB_EXECUTION_ERROR;
213 214 215 216
    }

  gimp_drawable_detach (drawable);
  values[0].data.d_status = status;
Marc Lehmann's avatar
Marc Lehmann committed
217 218
}

Sven Neumann's avatar
Sven Neumann committed
219
static gboolean
David Odin's avatar
David Odin committed
220
sel_gauss_dialog (GimpDrawable *drawable)
Marc Lehmann's avatar
Marc Lehmann committed
221
{
222 223
  GtkWidget *dialog;
  GtkWidget *main_vbox;
David Odin's avatar
David Odin committed
224
  GtkWidget *preview;
225
  GtkWidget *table;
226 227
  GtkWidget *spinbutton;
  GtkObject *adj;
228
  gboolean   run;
229

230
  gimp_ui_init (PLUG_IN_BINARY, FALSE);
231

232
  dialog = gimp_dialog_new (_("Selective Gaussian Blur"), PLUG_IN_ROLE,
233
                            NULL, 0,
234
                            gimp_standard_help_func, PLUG_IN_PROC,
235

236 237
                            GTK_STOCK_CANCEL, GTK_RESPONSE_CANCEL,
                            GTK_STOCK_OK,     GTK_RESPONSE_OK,
238

239
                            NULL);
240

241
  gtk_dialog_set_alternative_button_order (GTK_DIALOG (dialog),
242 243 244
                                           GTK_RESPONSE_OK,
                                           GTK_RESPONSE_CANCEL,
                                           -1);
245

246
  gimp_window_set_transient (GTK_WINDOW (dialog));
247

Michael Natterer's avatar
Michael Natterer committed
248
  main_vbox = gtk_box_new (GTK_ORIENTATION_VERTICAL, 12);
249
  gtk_container_set_border_width (GTK_CONTAINER (main_vbox), 12);
250 251
  gtk_box_pack_start (GTK_BOX (gtk_dialog_get_content_area (GTK_DIALOG (dialog))),
                      main_vbox, TRUE, TRUE, 0);
252
  gtk_widget_show (main_vbox);
253

Sven Neumann's avatar
Sven Neumann committed
254
  preview = gimp_drawable_preview_new (drawable, NULL);
255
  gtk_box_pack_start (GTK_BOX (main_vbox), preview, TRUE, TRUE, 0);
David Odin's avatar
David Odin committed
256
  gtk_widget_show (preview);
257

David Odin's avatar
David Odin committed
258
  g_signal_connect (preview, "invalidated",
259 260
                    G_CALLBACK (preview_update),
                    NULL);
David Odin's avatar
David Odin committed
261

262
  table = gtk_table_new (2, 3, FALSE);
Sven Neumann's avatar
Sven Neumann committed
263 264
  gtk_table_set_col_spacings (GTK_TABLE (table), 6);
  gtk_table_set_row_spacings (GTK_TABLE (table), 6);
265 266
  gtk_box_pack_start (GTK_BOX (main_vbox), table, FALSE, FALSE, 0);
  gtk_widget_show (table);
267

268
  spinbutton = gimp_spin_button_new (&adj,
David Odin's avatar
David Odin committed
269 270
                                     bvals.radius, 0.0, G_MAXINT, 1.0, 5.0,
                                     0, 1, 2);
271
  gimp_table_attach_aligned (GTK_TABLE (table), 0, 0,
David Odin's avatar
David Odin committed
272 273
                             _("_Blur radius:"), 0.0, 0.5,
                             spinbutton, 1, TRUE);
274
  g_signal_connect (adj, "value-changed",
275 276
                    G_CALLBACK (gimp_double_adjustment_update),
                    &bvals.radius);
277
  g_signal_connect_swapped (spinbutton, "value-changed",
David Odin's avatar
David Odin committed
278 279
                            G_CALLBACK (gimp_preview_invalidate),
                            preview);
280 281

  adj = gimp_scale_entry_new (GTK_TABLE (table), 0, 1,
David Odin's avatar
David Odin committed
282 283 284
                              _("_Max. delta:"), 128, 0,
                              bvals.maxdelta, 0, 255, 1, 8, 0,
                              TRUE, 0, 0,
285
                              NULL, NULL);
286
  g_signal_connect (adj, "value-changed",
287 288
                    G_CALLBACK (gimp_int_adjustment_update),
                    &bvals.maxdelta);
289
  g_signal_connect_swapped (adj, "value-changed",
David Odin's avatar
David Odin committed
290 291
                            G_CALLBACK (gimp_preview_invalidate),
                            preview);
292

293
  gtk_widget_show (dialog);
294

295
  run = (gimp_dialog_run (GIMP_DIALOG (dialog)) == GTK_RESPONSE_OK);
Marc Lehmann's avatar
Marc Lehmann committed
296

297
  gtk_widget_destroy (dialog);
298

299
  return run;
300 301 302
}

static void
303 304 305
init_matrix (gdouble  radius,
             gdouble *mat,
             gint     num)
Marc Lehmann's avatar
Marc Lehmann committed
306
{
307
  gint    dx;
308 309 310 311
  gdouble sd, c1, c2;

  /* This formula isn't really correct, but it'll do */
  sd = radius / 3.329042969;
312
  c1 = 1.0 / sqrt (2.0 * G_PI * sd);
313 314
  c2 = -2.0 * (sd * sd);

315 316
  for (dx = 0; dx < num; dx++)
    mat[dx] = c1 * exp ((dx * dx)/ c2);
Marc Lehmann's avatar
Marc Lehmann committed
317 318
}

319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342

#if defined(ARCH_X86) && defined(USE_MMX) && defined(__GNUC__)
#define HAVE_ACCEL 1

static ALWAYS_INLINE void
matrixmult_mmx (const guchar  *src,
                guchar        *dest,
                gint           width,
                gint           height,
                const gdouble *mat,
                gint           numrad,
                gint           bytes,
                gboolean       has_alpha,
                gint           maxdelta,
                gboolean       preview_mode)
{
  const gint       rowstride = width * bytes;
  const long long  maxdelta4 = maxdelta * 0x0001000100010001ULL;
  gushort         *imat;
  gdouble          fsum, fscale;
  gint             i, j, x, y, d;

  g_assert (has_alpha ? (bytes == 4) : (bytes == 3 || bytes == 1));

343
  imat = g_newa (gushort, 2 * numrad + 3);
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457

  fsum = 0.0;
  for (y = 1 - numrad; y < numrad; y++)
    fsum += mat[ABS(y)];

  /* Ensure that one pixel's product fits in 16bits,
   * and that the sum fits in 32bits.
   */
  fscale = MIN (0x100 / mat[0], 0x1000 / fsum);
  for (y = 0; y < numrad; y++)
    imat[numrad - y] = imat[numrad + y] = mat[y] * fscale;

  for (y = numrad; y < numrad + 3; y++)
    imat[numrad + y] = 0;

  for (y = 0; y < height; y++)
    {
      asm volatile (
        "pxor  %%mm7, %%mm7 \n\t":
      );

      for (x = 0; x < width; x++)
        {
          guint r, g, b, fr, fg, fb;
          gint  offset;
          gint  dix;

          r = g = b = fr = fg = fb = 0;

          dix = bytes * (width * y + x);

          if (has_alpha)
            {
              *(guint*) &dest[dix] = *(guint*) &src[dix];

              if (!src[dix + 3])
                continue;
            }

          asm volatile (
            "movd         %0, %%mm6 \n\t"
            "punpcklbw %%mm7, %%mm6 \n\t" /* center pixel */
            :: "m"(src[dix])
          );

          offset = rowstride * (y - numrad) + bytes * (x - numrad);

          if (bytes == 1)
            {
              asm volatile (
                "pshufw $0, %%mm6, %%mm6 \n\t": /* center pixel x4 */
              );
              for (j = 1 - numrad; j < numrad; j++)
                {
                  const guchar *src_b;
                  guint         rowsum  = 0;
                  guint         rowfact = 0;

                  offset += rowstride;

                  if (y + j < 0 || y + j >= height)
                    continue;

                  src_b = src + offset - 3;

                  asm volatile (
                    "pxor  %%mm5, %%mm5 \n\t" /* row fact */
                    "pxor  %%mm4, %%mm4 \n\t" /* row sum  */
                    :
                  );

                  for (i = 1 - numrad; i < numrad; i += 4)
                    {
                      src_b += 4;
                      if (x + i < 0 || x + i >= width)
                        continue;

                      asm volatile (
                        "movd         %0, %%mm0 \n\t"
                        "movq      %%mm6, %%mm1 \n\t"
                        "punpcklbw %%mm7, %%mm0 \n\t" /* one pixel      */
                        "psubusw   %%mm0, %%mm1 \n\t" /* diff           */
                        "movq      %%mm0, %%mm2 \n\t"
                        "psubusw   %%mm6, %%mm2 \n\t"
                        "por       %%mm2, %%mm1 \n\t" /* abs diff       */
                        "pcmpgtw      %1, %%mm1 \n\t" /* threshold      */
                        "pandn        %2, %%mm1 \n\t" /* weight         */
                        "pmullw    %%mm1, %%mm0 \n\t" /* pixel * weight */
                        "paddusw   %%mm1, %%mm5 \n\t" /* fact           */
                        "movq      %%mm0, %%mm2 \n\t"
                        "punpcklwd %%mm7, %%mm0 \n\t"
                        "punpckhwd %%mm7, %%mm2 \n\t"
                        "paddd     %%mm0, %%mm4 \n\t"
                        "paddd     %%mm2, %%mm4 \n\t" /* sum            */
                        :: "m"(*src_b), "m"(maxdelta4), "m"(imat[numrad + i])
                      );
                    }

                  asm volatile (
                    "pshufw $0xb1, %%mm5, %%mm3 \n\t"
                    "paddusw       %%mm3, %%mm5 \n\t"
                    "pshufw $0x0e, %%mm4, %%mm2 \n\t"
                    "pshufw $0x0e, %%mm5, %%mm3 \n\t"
                    "paddd         %%mm2, %%mm4 \n\t"
                    "paddusw       %%mm3, %%mm5 \n\t"
                    "movd          %%mm4, %0    \n\t"
                    "movd          %%mm5, %1    \n\t"
                    :"=g"(rowsum), "=g"(rowfact)
                  );
                  d = imat[numrad + j];
                  r += d * rowsum;
                  fr += d * (gushort) rowfact;
                }

458 459
              if (fr)
                dest[dix] = r / fr;
460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570
            }
          else
            {
              for (j = 1 - numrad; j < numrad; j++)
                {
                  const guchar *src_b;
                  gushort       rf[4];
                  guint         rr, rg, rb;

                  offset += rowstride;
                  if (y + j < 0 || y + j >= height)
                    continue;

                  src_b = src + offset;

                  asm volatile (
                    "pxor  %%mm5, %%mm5 \n\t" /* row fact   */
                    "pxor  %%mm4, %%mm4 \n\t" /* row sum RG */
                    "pxor  %%mm3, %%mm3 \n\t" /* row sum B  */
                    :
                  );

                  for (i = 1 - numrad; i < numrad; i++)
                    {
                      src_b += bytes;
                      if (x + i < 0 || x + i >= width)
                        continue;

                      if (has_alpha)
                        asm volatile (
                          "movd         %0, %%mm0 \n\t"
                          "movq      %%mm6, %%mm1 \n\t"
                          "punpcklbw %%mm7, %%mm0 \n\t" /* one pixel       */
                          "psubusw   %%mm0, %%mm1 \n\t" /* diff            */
                          "movq      %%mm0, %%mm2 \n\t"
                          "psubusw   %%mm6, %%mm2 \n\t"
                          "por       %%mm2, %%mm1 \n\t" /* abs diff        */
                          "pcmpgtw      %1, %%mm1 \n\t" /* threshold       */
                          "pshufw   $0, %2, %%mm2 \n\t" /* weight          */
                          "pandn     %%mm2, %%mm1 \n\t"
                          "pshufw $0xff, %%mm0, %%mm2 \n\t" /* alpha       */
                          "psllw        $8, %%mm2 \n\t"
                          "pmulhuw   %%mm2, %%mm1 \n\t" /* weight *= alpha */
                          "pmullw    %%mm1, %%mm0 \n\t" /* pixel * weight  */
                          "paddusw   %%mm1, %%mm5 \n\t" /* fact            */
                          "movq      %%mm0, %%mm2 \n\t"
                          "punpcklwd %%mm7, %%mm0 \n\t" /* RG              */
                          "punpckhwd %%mm7, %%mm2 \n\t" /* B               */
                          "paddd     %%mm0, %%mm4 \n\t"
                          "paddd     %%mm2, %%mm3 \n\t"
                          :: "m"(*src_b), "m"(maxdelta4), "m"(imat[numrad + i])
                        );
                      else
                        asm volatile (
                          "movd         %0, %%mm0 \n\t"
                          "movq      %%mm6, %%mm1 \n\t"
                          "punpcklbw %%mm7, %%mm0 \n\t" /* one pixel       */
                          "psubusw   %%mm0, %%mm1 \n\t" /* diff            */
                          "movq      %%mm0, %%mm2 \n\t"
                          "psubusw   %%mm6, %%mm2 \n\t"
                          "por       %%mm2, %%mm1 \n\t" /* abs diff        */
                          "pcmpgtw      %1, %%mm1 \n\t" /* threshold       */
                          "pshufw   $0, %2, %%mm2 \n\t" /* weight          */
                          "pandn     %%mm2, %%mm1 \n\t"
                          "pmullw    %%mm1, %%mm0 \n\t" /* pixel * weight  */
                          "paddusw   %%mm1, %%mm5 \n\t" /* fact            */
                          "movq      %%mm0, %%mm2 \n\t"
                          "punpcklwd %%mm7, %%mm0 \n\t" /* RG              */
                          "punpckhwd %%mm7, %%mm2 \n\t" /* B               */
                          "paddd     %%mm0, %%mm4 \n\t"
                          "paddd     %%mm2, %%mm3 \n\t"
                          :: "m"(*src_b), "m"(maxdelta4), "m"(imat[numrad + i])
                        );
                    }

                  asm volatile (
                    "movd    %%mm4, %0 \n\t"
                    "movd    %%mm3, %2 \n\t"
                    "psrlq  $32, %%mm4 \n\t"
                    "movq    %%mm5, %3 \n\t"
                    "movd    %%mm4, %1 \n\t"
                    :"=g"(rr), "=g"(rg), "=g"(rb), "=m"(*rf)
                    ::"memory"
                  );
                  d = imat[numrad + j];
                  r += d * rr;
                  g += d * rg;
                  b += d * rb;
                  fr += d * rf[0];
                  fg += d * rf[1];
                  fb += d * rf[2];
                }

              if (has_alpha)
                {
                  if (fr)
                    dest[dix+0] = r / fr;
                  if (fg)
                    dest[dix+1] = g / fg;
                  if (fb)
                    dest[dix+2] = b / fb;
                }
              else
                {
                  dest[dix+0] = r / fr;
                  dest[dix+1] = g / fg;
                  dest[dix+2] = b / fb;
                }
            }
        }

571
      if (!(y % 16) && !preview_mode)
572 573
        {
          asm volatile ("emms");
574
          gimp_progress_update ((gdouble) y / (gdouble) height);
575 576 577 578 579 580 581 582
        }
    }

  asm volatile ("emms");
}
#endif /* ARCH_X86 && USE_MMX && __GNUC__ */


583 584 585 586 587 588 589 590 591 592 593
static ALWAYS_INLINE void
matrixmult_int (const guchar  *src,
                guchar        *dest,
                gint           width,
                gint           height,
                const gdouble *mat,
                gint           numrad,
                gint           bytes,
                gboolean       has_alpha,
                gint           maxdelta,
                gboolean       preview_mode)
Marc Lehmann's avatar
Marc Lehmann committed
594
{
595 596
  const gint  nb        = bytes - (has_alpha ? 1 : 0);
  const gint  rowstride = width * bytes;
597 598 599 600 601
  gushort    *imat;
  gdouble     fsum, fscale;
  gint        i, j, b, x, y, d;

#ifdef HAVE_ACCEL
602 603 604
  if (has_alpha ? (bytes == 4) : (bytes == 3 || bytes == 1))
    {
      GimpCpuAccelFlags cpu = gimp_cpu_accel_get_support ();
605

606 607 608 609
      if (cpu & (GIMP_CPU_ACCEL_X86_MMXEXT | GIMP_CPU_ACCEL_X86_SSE))
        return matrixmult_mmx (src, dest, width, height, mat, numrad,
                               bytes, has_alpha, maxdelta, preview_mode);
    }
610
#endif
611

612
  imat = g_newa (gushort, 2 * numrad);
613

614 615 616 617 618 619 620 621
  fsum = 0.0;
  for (y = 1 - numrad; y < numrad; y++)
    fsum += mat[ABS(y)];

  /* Ensure that the sum fits in 32bits. */
  fscale = 0x1000 / fsum;
  for (y = 0; y < numrad; y++)
    imat[numrad - y] = imat[numrad + y] = mat[y] * fscale;
622

623
  for (y = 0; y < height; y++)
624
    {
625
      for (x = 0; x < width; x++)
David Odin's avatar
David Odin committed
626
        {
627 628
          gint dix = bytes * (width * y + x);

David Odin's avatar
David Odin committed
629 630 631 632 633
          if (has_alpha)
            dest[dix + nb] = src[dix + nb];

          for (b = 0; b < nb; b++)
            {
634 635 636 637
              const guchar *src_db = src + dix + b;
              guint         sum    = 0;
              guint         fact   = 0;
              gint          offset;
David Odin's avatar
David Odin committed
638 639 640

              offset = rowstride * (y - numrad) + bytes * (x - numrad);

641
              for (j = 1 - numrad; j < numrad; j++)
David Odin's avatar
David Odin committed
642
                {
643 644 645
                  const guchar *src_b;
                  guint         rowsum  = 0;
                  guint         rowfact = 0;
David Odin's avatar
David Odin committed
646

647 648 649
                  offset += rowstride;
                  if (y + j < 0 || y + j >= height)
                    continue;
David Odin's avatar
David Odin committed
650

651
                  src_b = src + offset + b;
David Odin's avatar
David Odin committed
652

653
                  for (i = 1 - numrad; i < numrad; i++)
David Odin's avatar
David Odin committed
654
                    {
655 656 657
                      gint tmp;

                      src_b += bytes;
David Odin's avatar
David Odin committed
658

659
                      if (x + i < 0 || x + i >= width)
David Odin's avatar
David Odin committed
660 661 662 663 664 665
                        continue;

                      tmp = *src_db - *src_b;
                      if (tmp > maxdelta || tmp < -maxdelta)
                        continue;

666
                      d = imat[numrad+i];
David Odin's avatar
David Odin committed
667
                      if (has_alpha)
668 669 670 671
                        d *= src_b[nb - b];

                      rowsum += d * *src_b;
                      rowfact += d;
David Odin's avatar
David Odin committed
672
                    }
673 674 675 676 677 678 679 680 681 682 683

                  d = imat[numrad+j];

                  if (has_alpha)
                    {
                      rowsum >>= 8;
                      rowfact >>= 8;
                    }

                  sum += d * rowsum;
                  fact += d * rowfact;
David Odin's avatar
David Odin committed
684
                }
685 686

              if (fact == 0)
David Odin's avatar
David Odin committed
687 688 689 690 691 692
                dest[dix + b] = *src_db;
              else
                dest[dix + b] = sum / fact;
            }
        }

693
      if (!(y % 16) && !preview_mode)
694
        gimp_progress_update ((gdouble) y / (gdouble) height);
695
    }
696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714
}

/* Force compilation of several versions with inlined constants. */
static void
matrixmult (const guchar  *src,
            guchar        *dest,
            gint           width,
            gint           height,
            const gdouble *mat,
            gint           numrad,
            gint           bytes,
            gboolean       has_alpha,
            gint           maxdelta,
            gboolean       preview_mode)
{
  has_alpha = has_alpha ? 1 : 0;

#define EXPAND(BYTES, ALPHA)\
  if (bytes == BYTES && has_alpha == ALPHA)\
715 716 717 718 719 720
    {\
      matrixmult_int (src, dest, width, height, mat, numrad,\
                      BYTES, ALPHA, maxdelta, preview_mode);\
      return;\
    }

721 722 723 724 725 726
  EXPAND (1, 0)
  EXPAND (2, 1)
  EXPAND (3, 0)
  EXPAND (4, 1)
  EXPAND (bytes, has_alpha)
#undef EXPAND
Marc Lehmann's avatar
Marc Lehmann committed
727 728
}

729
static void
Sven Neumann's avatar
Sven Neumann committed
730
sel_gauss (GimpDrawable *drawable,
731 732
           gdouble       radius,
           gint          maxdelta)
Marc Lehmann's avatar
Marc Lehmann committed
733
{
Sven Neumann's avatar
Sven Neumann committed
734
  GimpPixelRgn src_rgn, dest_rgn;
735 736 737 738
  gint         bytes;
  gboolean     has_alpha;
  guchar      *dest;
  guchar      *src;
739 740
  gint         x, y;
  gint         width, height;
741
  gdouble     *mat;
742
  gint         numrad;
743

744 745 746
  if (! gimp_drawable_mask_intersect (drawable->drawable_id,
                                      &x, &y, &width, &height))
    return;
747

748 749
  bytes     = drawable->bpp;
  has_alpha = gimp_drawable_has_alpha (drawable->drawable_id);
750

751
  numrad = (gint) (radius + 1.0);
752
  mat = g_new (gdouble, numrad);
Sven Neumann's avatar
Sven Neumann committed
753
  init_matrix (radius, mat, numrad);
754

755 756 757
  /*  allocate with extra padding because MMX instructions may read
      more than strictly necessary  */
  src  = g_new (guchar, width * height * bytes + 16);
758
  dest = g_new (guchar, width * height * bytes);
759

760
  gimp_pixel_rgn_init (&src_rgn,
761 762
                       drawable, x, y, width, height, FALSE, FALSE);
  gimp_pixel_rgn_get_rect (&src_rgn, src, x, y, width, height);
763

764
  matrixmult (src, dest, width, height, mat, numrad,
David Odin's avatar
David Odin committed
765
              bytes, has_alpha, maxdelta, FALSE);
766
  gimp_progress_update (1.0);
767

768
  gimp_pixel_rgn_init (&dest_rgn,
769 770
                       drawable, x, y, width, height, TRUE, TRUE);
  gimp_pixel_rgn_set_rect (&dest_rgn, dest, x, y, width, height);
771 772 773

  /*  merge the shadow, update the drawable  */
  gimp_drawable_flush (drawable);
774
  gimp_drawable_merge_shadow (drawable->drawable_id, TRUE);
775
  gimp_drawable_update (drawable->drawable_id, x, y, width, height);
776 777 778 779 780

  /* free up buffers */
  g_free (src);
  g_free (dest);
  g_free (mat);
Marc Lehmann's avatar
Marc Lehmann committed
781
}
David Odin's avatar
David Odin committed
782 783

static void
784
preview_update (GimpPreview *preview)
David Odin's avatar
David Odin committed
785
{
786
  GimpDrawable  *drawable;
David Odin's avatar
David Odin committed
787
  glong          bytes;
788
  gint           x, y;
David Odin's avatar
David Odin committed
789 790 791 792 793 794 795
  guchar        *render_buffer;  /* Buffer to hold rendered image */
  gint           width;          /* Width of preview widget */
  gint           height;         /* Height of preview widget */

  GimpPixelRgn   srcPR;           /* Pixel region */
  guchar        *src;
  gboolean       has_alpha;
796 797
  gint           numrad;
  gdouble       *mat;
David Odin's avatar
David Odin committed
798
  gdouble       radius;
David Odin's avatar
David Odin committed
799 800

  /* Get drawable info */
801 802
  drawable =
    gimp_drawable_preview_get_drawable (GIMP_DRAWABLE_PREVIEW (preview));
803
  bytes = drawable->bpp;
David Odin's avatar
David Odin committed
804 805 806 807

  /*
   * Setup for filter...
   */
808
  gimp_preview_get_position (preview, &x, &y);
809
  gimp_preview_get_size (preview, &width, &height);
David Odin's avatar
David Odin committed
810 811

  /* initialize pixel regions */
812
  gimp_pixel_rgn_init (&srcPR, drawable,
813
                       x, y, width, height,
David Odin's avatar
David Odin committed
814 815
                       FALSE, FALSE);
  render_buffer = g_new (guchar, width * height * bytes);
David Odin's avatar
David Odin committed
816

817 818 819
  /*  allocate with extra padding because MMX instructions may read
      more than strictly necessary  */
  src = g_new (guchar, width * height * bytes + 16);
David Odin's avatar
David Odin committed
820

David Odin's avatar
David Odin committed
821
  /* render image */
822
  gimp_pixel_rgn_get_rect (&srcPR, src, x, y, width, height);
823
  has_alpha = gimp_drawable_has_alpha (drawable->drawable_id);
David Odin's avatar
David Odin committed
824 825 826 827

  radius = fabs (bvals.radius) + 1.0;
  numrad = (gint) (radius + 1.0);

828 829
  mat = g_new (gdouble, numrad);
  init_matrix (radius, mat, numrad);
David Odin's avatar
David Odin committed
830 831 832 833 834 835 836 837

  matrixmult (src, render_buffer,
              width, height,
              mat, numrad,
              bytes, has_alpha, bvals.maxdelta, TRUE);

  g_free (mat);
  g_free (src);
David Odin's avatar
David Odin committed
838 839 840 841

  /*
   * Draw the preview image on the screen...
   */
842
  gimp_preview_draw_buffer (preview, render_buffer, width * bytes);
David Odin's avatar
David Odin committed
843 844 845

  g_free (render_buffer);
}