blur-gauss-selective.c 26.8 KB
Newer Older
1
/* Selective gaussian blur filter for GIMP, version 0.1
Marc Lehmann's avatar
Marc Lehmann committed
2
3
4
5
6
 * Adapted from the original gaussian blur filter by Spencer Kimball and
 * Peter Mattis.
 *
 * Copyright (C) 1995 Spencer Kimball and Peter Mattis
 * Copyright (C) 1999 Thom van Os <thom@vanos.com>
7
 * Copyright (C) 2006 Loren Merritt
Marc Lehmann's avatar
Marc Lehmann committed
8
 *
9
 * This program is free software: you can redistribute it and/or modify
Marc Lehmann's avatar
Marc Lehmann committed
10
 * it under the terms of the GNU General Public License as published by
11
 * the Free Software Foundation; either version 3 of the License, or
Marc Lehmann's avatar
Marc Lehmann committed
12
13
14
15
16
17
18
19
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
20
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
Marc Lehmann's avatar
Marc Lehmann committed
21
22
 *
 * To do:
David Odin's avatar
David Odin committed
23
24
25
26
27
28
 *      - support for horizontal or vertical only blur
 *      - use memory more efficiently, smaller regions at a time
 *      - integrating with other convolution matrix based filters ?
 *      - create more selective and adaptive filters
 *      - threading
 *      - optimization
Marc Lehmann's avatar
Marc Lehmann committed
29
 */
30

Sven Neumann's avatar
Sven Neumann committed
31
#include "config.h"
32
33
34
35

#include <libgimp/gimp.h>
#include <libgimp/gimpui.h>

Sven Neumann's avatar
Sven Neumann committed
36
#include "libgimp/stdplugins-intl.h"
Marc Lehmann's avatar
Marc Lehmann committed
37

38

39
#define PLUG_IN_PROC   "plug-in-sel-gauss"
40
#define PLUG_IN_BINARY "blur-gauss-selective"
41
#define PLUG_IN_ROLE   "gimp-blur-gauss-selective"
42

43
44
45
46
47
48
49
50
#ifndef ALWAYS_INLINE
#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
#    define ALWAYS_INLINE __attribute__((always_inline)) inline
#else
#    define ALWAYS_INLINE inline
#endif
#endif

51

Marc Lehmann's avatar
Marc Lehmann committed
52
53
typedef struct
{
David Odin's avatar
David Odin committed
54
55
  gdouble  radius;
  gint     maxdelta;
Marc Lehmann's avatar
Marc Lehmann committed
56
57
58
59
60
} BlurValues;


/* Declare local functions.
 */
David Odin's avatar
David Odin committed
61
static void      query            (void);
62
63
64
65
66
67
68
69
70
71
72
73
static void      run              (const gchar      *name,
                                   gint              nparams,
                                   const GimpParam  *param,
                                   gint             *nreturn_vals,
                                   GimpParam       **return_vals);

static void      sel_gauss        (GimpDrawable     *drawable,
                                   gdouble           radius,
                                   gint              maxdelta);
static gboolean  sel_gauss_dialog (GimpDrawable     *drawable);
static void      preview_update   (GimpPreview      *preview);

Marc Lehmann's avatar
Marc Lehmann committed
74

75
const GimpPlugInInfo PLUG_IN_INFO =
Marc Lehmann's avatar
Marc Lehmann committed
76
{
77
78
79
80
  NULL,  /* init_proc  */
  NULL,  /* quit_proc  */
  query, /* query_proc */
  run,   /* run_proc   */
Marc Lehmann's avatar
Marc Lehmann committed
81
82
83
84
};

static BlurValues bvals =
{
85
  5.0, /* radius   */
Sven Neumann's avatar
Sven Neumann committed
86
  50   /* maxdelta */
Marc Lehmann's avatar
Marc Lehmann committed
87
88
89
90
};

MAIN ()

91
92
static void
query (void)
Marc Lehmann's avatar
Marc Lehmann committed
93
{
94
  static const GimpParamDef args[] =
Marc Lehmann's avatar
Marc Lehmann committed
95
  {
96
    { GIMP_PDB_INT32,    "run-mode",  "The run mode { RUN-INTERACTIVE (0), RUN-NONINTERACTIVE (1) }" },
97
98
99
100
    { GIMP_PDB_IMAGE,    "image",     "Input image (unused)"         },
    { GIMP_PDB_DRAWABLE, "drawable",  "Input drawable"               },
    { GIMP_PDB_FLOAT,    "radius",    "Radius of gaussian blur (in pixels, > 0.0)" },
    { GIMP_PDB_INT32,    "max-delta", "Maximum delta"                }
Marc Lehmann's avatar
Marc Lehmann committed
101
  };
Sven Neumann's avatar
Sven Neumann committed
102

103
  gimp_install_procedure (PLUG_IN_PROC,
104
                          N_("Blur neighboring pixels, but only in low-contrast areas"),
David Odin's avatar
David Odin committed
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
                          "This filter functions similar to the regular "
                          "gaussian blur filter except that neighbouring "
                          "pixels that differ more than the given maxdelta "
                          "parameter will not be blended with. This way with "
                          "the correct parameters, an image can be smoothed "
                          "out without losing details. However, this filter "
                          "can be rather slow.",
                          "Thom van Os",
                          "Thom van Os",
                          "1999",
                          N_("_Selective Gaussian Blur..."),
                          "RGB*, GRAY*",
                          GIMP_PLUGIN,
                          G_N_ELEMENTS (args), 0,
                          args, NULL);
120

121
  gimp_plugin_menu_register (PLUG_IN_PROC, "<Image>/Filters/Blur");
Marc Lehmann's avatar
Marc Lehmann committed
122
123
}

124
static void
125
126
127
128
129
run (const gchar      *name,
     gint              nparams,
     const GimpParam  *param,
     gint             *nreturn_vals,
     GimpParam       **return_vals)
Marc Lehmann's avatar
Marc Lehmann committed
130
{
131
132
133
  static GimpParam   values[1];
  GimpRunMode        run_mode;
  GimpPDBStatusType  status = GIMP_PDB_SUCCESS;
David Odin's avatar
David Odin committed
134
135
  GimpDrawable      *drawable;
  gdouble            radius;
136
137
138
139

  run_mode = param[0].data.d_int32;

  *nreturn_vals = 1;
140
  *return_vals  = values;
141

Sven Neumann's avatar
Sven Neumann committed
142
  INIT_I18N ();
143

144
  values[0].type          = GIMP_PDB_STATUS;
145
146
  values[0].data.d_status = status;

David Odin's avatar
David Odin committed
147
148
  /* Get the specified drawable */
  drawable = gimp_drawable_get (param[2].data.d_drawable);
149
  gimp_tile_cache_ntiles (2 * drawable->ntile_cols);
David Odin's avatar
David Odin committed
150

151
152
  switch (run_mode)
    {
Sven Neumann's avatar
Sven Neumann committed
153
    case GIMP_RUN_INTERACTIVE:
154
      /* Possibly retrieve data */
155
      gimp_get_data (PLUG_IN_PROC, &bvals);
156
157

      /* First acquire information with a dialog */
David Odin's avatar
David Odin committed
158
159
      if (! sel_gauss_dialog (drawable))
        return;
160
161
      break;

Sven Neumann's avatar
Sven Neumann committed
162
    case GIMP_RUN_NONINTERACTIVE:
163
      /* Make sure all the arguments are there! */
164
      if (nparams != 5)
David Odin's avatar
David Odin committed
165
        status = GIMP_PDB_CALLING_ERROR;
Sven Neumann's avatar
Sven Neumann committed
166
      if (status == GIMP_PDB_SUCCESS)
David Odin's avatar
David Odin committed
167
168
169
        {
          bvals.radius   = param[3].data.d_float;
          bvals.maxdelta = CLAMP (param[4].data.d_int32, 0, 255);
170

171
          if (bvals.radius <= 0.0)
172
            status = GIMP_PDB_CALLING_ERROR;
David Odin's avatar
David Odin committed
173
        }
174
175
      break;

Sven Neumann's avatar
Sven Neumann committed
176
    case GIMP_RUN_WITH_LAST_VALS:
177
      /* Possibly retrieve data */
178
      gimp_get_data (PLUG_IN_PROC, &bvals);
179
180
181
182
183
184
      break;

    default:
      break;
    }

Sven Neumann's avatar
Sven Neumann committed
185
  if (status != GIMP_PDB_SUCCESS)
186
187
188
189
190
191
    {
      values[0].data.d_status = status;
      return;
    }

  /* Make sure that the drawable is gray or RGB color */
192
193
  if (gimp_drawable_is_rgb (drawable->drawable_id) ||
      gimp_drawable_is_gray (drawable->drawable_id))
194
    {
195
      gimp_progress_init (_("Selective Gaussian Blur"));
196
197
198
199
200
201
202

      radius = fabs (bvals.radius) + 1.0;

      /* run the gaussian blur */
      sel_gauss (drawable, radius, bvals.maxdelta);

      /* Store data */
Sven Neumann's avatar
Sven Neumann committed
203
      if (run_mode == GIMP_RUN_INTERACTIVE)
204
        gimp_set_data (PLUG_IN_PROC, &bvals, sizeof (BlurValues));
205

Sven Neumann's avatar
Sven Neumann committed
206
      if (run_mode != GIMP_RUN_NONINTERACTIVE)
David Odin's avatar
David Odin committed
207
        gimp_displays_flush ();
208
209
210
    }
  else
    {
211
      gimp_message (_("Cannot operate on indexed color images."));
Sven Neumann's avatar
Sven Neumann committed
212
      status = GIMP_PDB_EXECUTION_ERROR;
213
214
215
216
    }

  gimp_drawable_detach (drawable);
  values[0].data.d_status = status;
Marc Lehmann's avatar
Marc Lehmann committed
217
218
}

Sven Neumann's avatar
Sven Neumann committed
219
static gboolean
David Odin's avatar
David Odin committed
220
sel_gauss_dialog (GimpDrawable *drawable)
Marc Lehmann's avatar
Marc Lehmann committed
221
{
222
223
  GtkWidget *dialog;
  GtkWidget *main_vbox;
David Odin's avatar
David Odin committed
224
  GtkWidget *preview;
225
  GtkWidget *table;
226
227
  GtkWidget *spinbutton;
  GtkObject *adj;
228
  gboolean   run;
229

230
  gimp_ui_init (PLUG_IN_BINARY, FALSE);
231

232
  dialog = gimp_dialog_new (_("Selective Gaussian Blur"), PLUG_IN_ROLE,
233
                            NULL, 0,
234
                            gimp_standard_help_func, PLUG_IN_PROC,
235

236
237
                            GTK_STOCK_CANCEL, GTK_RESPONSE_CANCEL,
                            GTK_STOCK_OK,     GTK_RESPONSE_OK,
238

239
                            NULL);
240

241
  gtk_dialog_set_alternative_button_order (GTK_DIALOG (dialog),
242
243
244
                                           GTK_RESPONSE_OK,
                                           GTK_RESPONSE_CANCEL,
                                           -1);
245

246
  gimp_window_set_transient (GTK_WINDOW (dialog));
247

Michael Natterer's avatar
Michael Natterer committed
248
  main_vbox = gtk_box_new (GTK_ORIENTATION_VERTICAL, 12);
249
  gtk_container_set_border_width (GTK_CONTAINER (main_vbox), 12);
250
251
  gtk_box_pack_start (GTK_BOX (gtk_dialog_get_content_area (GTK_DIALOG (dialog))),
                      main_vbox, TRUE, TRUE, 0);
252
  gtk_widget_show (main_vbox);
253

Sven Neumann's avatar
Sven Neumann committed
254
  preview = gimp_drawable_preview_new (drawable, NULL);
255
  gtk_box_pack_start (GTK_BOX (main_vbox), preview, TRUE, TRUE, 0);
David Odin's avatar
David Odin committed
256
  gtk_widget_show (preview);
257

David Odin's avatar
David Odin committed
258
  g_signal_connect (preview, "invalidated",
259
260
                    G_CALLBACK (preview_update),
                    NULL);
David Odin's avatar
David Odin committed
261

262
  table = gtk_table_new (2, 3, FALSE);
Sven Neumann's avatar
Sven Neumann committed
263
264
  gtk_table_set_col_spacings (GTK_TABLE (table), 6);
  gtk_table_set_row_spacings (GTK_TABLE (table), 6);
265
266
  gtk_box_pack_start (GTK_BOX (main_vbox), table, FALSE, FALSE, 0);
  gtk_widget_show (table);
267

268
  spinbutton = gimp_spin_button_new (&adj,
David Odin's avatar
David Odin committed
269
270
                                     bvals.radius, 0.0, G_MAXINT, 1.0, 5.0,
                                     0, 1, 2);
271
  gimp_table_attach_aligned (GTK_TABLE (table), 0, 0,
David Odin's avatar
David Odin committed
272
273
                             _("_Blur radius:"), 0.0, 0.5,
                             spinbutton, 1, TRUE);
274
  g_signal_connect (adj, "value-changed",
275
276
                    G_CALLBACK (gimp_double_adjustment_update),
                    &bvals.radius);
277
  g_signal_connect_swapped (spinbutton, "value-changed",
David Odin's avatar
David Odin committed
278
279
                            G_CALLBACK (gimp_preview_invalidate),
                            preview);
280
281

  adj = gimp_scale_entry_new (GTK_TABLE (table), 0, 1,
David Odin's avatar
David Odin committed
282
283
284
                              _("_Max. delta:"), 128, 0,
                              bvals.maxdelta, 0, 255, 1, 8, 0,
                              TRUE, 0, 0,
285
                              NULL, NULL);
286
  g_signal_connect (adj, "value-changed",
287
288
                    G_CALLBACK (gimp_int_adjustment_update),
                    &bvals.maxdelta);
289
  g_signal_connect_swapped (adj, "value-changed",
David Odin's avatar
David Odin committed
290
291
                            G_CALLBACK (gimp_preview_invalidate),
                            preview);
292

293
  gtk_widget_show (dialog);
294

295
  run = (gimp_dialog_run (GIMP_DIALOG (dialog)) == GTK_RESPONSE_OK);
Marc Lehmann's avatar
Marc Lehmann committed
296

297
  gtk_widget_destroy (dialog);
298

299
  return run;
300
301
302
}

static void
303
304
305
init_matrix (gdouble  radius,
             gdouble *mat,
             gint     num)
Marc Lehmann's avatar
Marc Lehmann committed
306
{
307
  gint    dx;
308
309
310
311
  gdouble sd, c1, c2;

  /* This formula isn't really correct, but it'll do */
  sd = radius / 3.329042969;
312
  c1 = 1.0 / sqrt (2.0 * G_PI * sd);
313
314
  c2 = -2.0 * (sd * sd);

315
316
  for (dx = 0; dx < num; dx++)
    mat[dx] = c1 * exp ((dx * dx)/ c2);
Marc Lehmann's avatar
Marc Lehmann committed
317
318
}

319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342

#if defined(ARCH_X86) && defined(USE_MMX) && defined(__GNUC__)
#define HAVE_ACCEL 1

static ALWAYS_INLINE void
matrixmult_mmx (const guchar  *src,
                guchar        *dest,
                gint           width,
                gint           height,
                const gdouble *mat,
                gint           numrad,
                gint           bytes,
                gboolean       has_alpha,
                gint           maxdelta,
                gboolean       preview_mode)
{
  const gint       rowstride = width * bytes;
  const long long  maxdelta4 = maxdelta * 0x0001000100010001ULL;
  gushort         *imat;
  gdouble          fsum, fscale;
  gint             i, j, x, y, d;

  g_assert (has_alpha ? (bytes == 4) : (bytes == 3 || bytes == 1));

343
  imat = g_newa (gushort, 2 * numrad + 3);
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457

  fsum = 0.0;
  for (y = 1 - numrad; y < numrad; y++)
    fsum += mat[ABS(y)];

  /* Ensure that one pixel's product fits in 16bits,
   * and that the sum fits in 32bits.
   */
  fscale = MIN (0x100 / mat[0], 0x1000 / fsum);
  for (y = 0; y < numrad; y++)
    imat[numrad - y] = imat[numrad + y] = mat[y] * fscale;

  for (y = numrad; y < numrad + 3; y++)
    imat[numrad + y] = 0;

  for (y = 0; y < height; y++)
    {
      asm volatile (
        "pxor  %%mm7, %%mm7 \n\t":
      );

      for (x = 0; x < width; x++)
        {
          guint r, g, b, fr, fg, fb;
          gint  offset;
          gint  dix;

          r = g = b = fr = fg = fb = 0;

          dix = bytes * (width * y + x);

          if (has_alpha)
            {
              *(guint*) &dest[dix] = *(guint*) &src[dix];

              if (!src[dix + 3])
                continue;
            }

          asm volatile (
            "movd         %0, %%mm6 \n\t"
            "punpcklbw %%mm7, %%mm6 \n\t" /* center pixel */
            :: "m"(src[dix])
          );

          offset = rowstride * (y - numrad) + bytes * (x - numrad);

          if (bytes == 1)
            {
              asm volatile (
                "pshufw $0, %%mm6, %%mm6 \n\t": /* center pixel x4 */
              );
              for (j = 1 - numrad; j < numrad; j++)
                {
                  const guchar *src_b;
                  guint         rowsum  = 0;
                  guint         rowfact = 0;

                  offset += rowstride;

                  if (y + j < 0 || y + j >= height)
                    continue;

                  src_b = src + offset - 3;

                  asm volatile (
                    "pxor  %%mm5, %%mm5 \n\t" /* row fact */
                    "pxor  %%mm4, %%mm4 \n\t" /* row sum  */
                    :
                  );

                  for (i = 1 - numrad; i < numrad; i += 4)
                    {
                      src_b += 4;
                      if (x + i < 0 || x + i >= width)
                        continue;

                      asm volatile (
                        "movd         %0, %%mm0 \n\t"
                        "movq      %%mm6, %%mm1 \n\t"
                        "punpcklbw %%mm7, %%mm0 \n\t" /* one pixel      */
                        "psubusw   %%mm0, %%mm1 \n\t" /* diff           */
                        "movq      %%mm0, %%mm2 \n\t"
                        "psubusw   %%mm6, %%mm2 \n\t"
                        "por       %%mm2, %%mm1 \n\t" /* abs diff       */
                        "pcmpgtw      %1, %%mm1 \n\t" /* threshold      */
                        "pandn        %2, %%mm1 \n\t" /* weight         */
                        "pmullw    %%mm1, %%mm0 \n\t" /* pixel * weight */
                        "paddusw   %%mm1, %%mm5 \n\t" /* fact           */
                        "movq      %%mm0, %%mm2 \n\t"
                        "punpcklwd %%mm7, %%mm0 \n\t"
                        "punpckhwd %%mm7, %%mm2 \n\t"
                        "paddd     %%mm0, %%mm4 \n\t"
                        "paddd     %%mm2, %%mm4 \n\t" /* sum            */
                        :: "m"(*src_b), "m"(maxdelta4), "m"(imat[numrad + i])
                      );
                    }

                  asm volatile (
                    "pshufw $0xb1, %%mm5, %%mm3 \n\t"
                    "paddusw       %%mm3, %%mm5 \n\t"
                    "pshufw $0x0e, %%mm4, %%mm2 \n\t"
                    "pshufw $0x0e, %%mm5, %%mm3 \n\t"
                    "paddd         %%mm2, %%mm4 \n\t"
                    "paddusw       %%mm3, %%mm5 \n\t"
                    "movd          %%mm4, %0    \n\t"
                    "movd          %%mm5, %1    \n\t"
                    :"=g"(rowsum), "=g"(rowfact)
                  );
                  d = imat[numrad + j];
                  r += d * rowsum;
                  fr += d * (gushort) rowfact;
                }

458
459
              if (fr)
                dest[dix] = r / fr;
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
            }
          else
            {
              for (j = 1 - numrad; j < numrad; j++)
                {
                  const guchar *src_b;
                  gushort       rf[4];
                  guint         rr, rg, rb;

                  offset += rowstride;
                  if (y + j < 0 || y + j >= height)
                    continue;

                  src_b = src + offset;

                  asm volatile (
                    "pxor  %%mm5, %%mm5 \n\t" /* row fact   */
                    "pxor  %%mm4, %%mm4 \n\t" /* row sum RG */
                    "pxor  %%mm3, %%mm3 \n\t" /* row sum B  */
                    :
                  );

                  for (i = 1 - numrad; i < numrad; i++)
                    {
                      src_b += bytes;
                      if (x + i < 0 || x + i >= width)
                        continue;

                      if (has_alpha)
                        asm volatile (
                          "movd         %0, %%mm0 \n\t"
                          "movq      %%mm6, %%mm1 \n\t"
                          "punpcklbw %%mm7, %%mm0 \n\t" /* one pixel       */
                          "psubusw   %%mm0, %%mm1 \n\t" /* diff            */
                          "movq      %%mm0, %%mm2 \n\t"
                          "psubusw   %%mm6, %%mm2 \n\t"
                          "por       %%mm2, %%mm1 \n\t" /* abs diff        */
                          "pcmpgtw      %1, %%mm1 \n\t" /* threshold       */
                          "pshufw   $0, %2, %%mm2 \n\t" /* weight          */
                          "pandn     %%mm2, %%mm1 \n\t"
                          "pshufw $0xff, %%mm0, %%mm2 \n\t" /* alpha       */
                          "psllw        $8, %%mm2 \n\t"
                          "pmulhuw   %%mm2, %%mm1 \n\t" /* weight *= alpha */
                          "pmullw    %%mm1, %%mm0 \n\t" /* pixel * weight  */
                          "paddusw   %%mm1, %%mm5 \n\t" /* fact            */
                          "movq      %%mm0, %%mm2 \n\t"
                          "punpcklwd %%mm7, %%mm0 \n\t" /* RG              */
                          "punpckhwd %%mm7, %%mm2 \n\t" /* B               */
                          "paddd     %%mm0, %%mm4 \n\t"
                          "paddd     %%mm2, %%mm3 \n\t"
                          :: "m"(*src_b), "m"(maxdelta4), "m"(imat[numrad + i])
                        );
                      else
                        asm volatile (
                          "movd         %0, %%mm0 \n\t"
                          "movq      %%mm6, %%mm1 \n\t"
                          "punpcklbw %%mm7, %%mm0 \n\t" /* one pixel       */
                          "psubusw   %%mm0, %%mm1 \n\t" /* diff            */
                          "movq      %%mm0, %%mm2 \n\t"
                          "psubusw   %%mm6, %%mm2 \n\t"
                          "por       %%mm2, %%mm1 \n\t" /* abs diff        */
                          "pcmpgtw      %1, %%mm1 \n\t" /* threshold       */
                          "pshufw   $0, %2, %%mm2 \n\t" /* weight          */
                          "pandn     %%mm2, %%mm1 \n\t"
                          "pmullw    %%mm1, %%mm0 \n\t" /* pixel * weight  */
                          "paddusw   %%mm1, %%mm5 \n\t" /* fact            */
                          "movq      %%mm0, %%mm2 \n\t"
                          "punpcklwd %%mm7, %%mm0 \n\t" /* RG              */
                          "punpckhwd %%mm7, %%mm2 \n\t" /* B               */
                          "paddd     %%mm0, %%mm4 \n\t"
                          "paddd     %%mm2, %%mm3 \n\t"
                          :: "m"(*src_b), "m"(maxdelta4), "m"(imat[numrad + i])
                        );
                    }

                  asm volatile (
                    "movd    %%mm4, %0 \n\t"
                    "movd    %%mm3, %2 \n\t"
                    "psrlq  $32, %%mm4 \n\t"
                    "movq    %%mm5, %3 \n\t"
                    "movd    %%mm4, %1 \n\t"
                    :"=g"(rr), "=g"(rg), "=g"(rb), "=m"(*rf)
                    ::"memory"
                  );
                  d = imat[numrad + j];
                  r += d * rr;
                  g += d * rg;
                  b += d * rb;
                  fr += d * rf[0];
                  fg += d * rf[1];
                  fb += d * rf[2];
                }

              if (has_alpha)
                {
                  if (fr)
                    dest[dix+0] = r / fr;
                  if (fg)
                    dest[dix+1] = g / fg;
                  if (fb)
                    dest[dix+2] = b / fb;
                }
              else
                {
                  dest[dix+0] = r / fr;
                  dest[dix+1] = g / fg;
                  dest[dix+2] = b / fb;
                }
            }
        }

571
      if (!(y % 16) && !preview_mode)
572
573
        {
          asm volatile ("emms");
574
          gimp_progress_update ((gdouble) y / (gdouble) height);
575
576
577
578
579
580
581
582
        }
    }

  asm volatile ("emms");
}
#endif /* ARCH_X86 && USE_MMX && __GNUC__ */


583
584
585
586
587
588
589
590
591
592
593
static ALWAYS_INLINE void
matrixmult_int (const guchar  *src,
                guchar        *dest,
                gint           width,
                gint           height,
                const gdouble *mat,
                gint           numrad,
                gint           bytes,
                gboolean       has_alpha,
                gint           maxdelta,
                gboolean       preview_mode)
Marc Lehmann's avatar
Marc Lehmann committed
594
{
595
596
  const gint  nb        = bytes - (has_alpha ? 1 : 0);
  const gint  rowstride = width * bytes;
597
598
599
600
601
  gushort    *imat;
  gdouble     fsum, fscale;
  gint        i, j, b, x, y, d;

#ifdef HAVE_ACCEL
602
603
604
  if (has_alpha ? (bytes == 4) : (bytes == 3 || bytes == 1))
    {
      GimpCpuAccelFlags cpu = gimp_cpu_accel_get_support ();
605

606
607
608
609
      if (cpu & (GIMP_CPU_ACCEL_X86_MMXEXT | GIMP_CPU_ACCEL_X86_SSE))
        return matrixmult_mmx (src, dest, width, height, mat, numrad,
                               bytes, has_alpha, maxdelta, preview_mode);
    }
610
#endif
611

612
  imat = g_newa (gushort, 2 * numrad);
613

614
615
616
617
618
619
620
621
  fsum = 0.0;
  for (y = 1 - numrad; y < numrad; y++)
    fsum += mat[ABS(y)];

  /* Ensure that the sum fits in 32bits. */
  fscale = 0x1000 / fsum;
  for (y = 0; y < numrad; y++)
    imat[numrad - y] = imat[numrad + y] = mat[y] * fscale;
622

623
  for (y = 0; y < height; y++)
624
    {
625
      for (x = 0; x < width; x++)
David Odin's avatar
David Odin committed
626
        {
627
628
          gint dix = bytes * (width * y + x);

David Odin's avatar
David Odin committed
629
630
631
632
633
          if (has_alpha)
            dest[dix + nb] = src[dix + nb];

          for (b = 0; b < nb; b++)
            {
634
635
636
637
              const guchar *src_db = src + dix + b;
              guint         sum    = 0;
              guint         fact   = 0;
              gint          offset;
David Odin's avatar
David Odin committed
638
639
640

              offset = rowstride * (y - numrad) + bytes * (x - numrad);

641
              for (j = 1 - numrad; j < numrad; j++)
David Odin's avatar
David Odin committed
642
                {
643
644
645
                  const guchar *src_b;
                  guint         rowsum  = 0;
                  guint         rowfact = 0;
David Odin's avatar
David Odin committed
646

647
648
649
                  offset += rowstride;
                  if (y + j < 0 || y + j >= height)
                    continue;
David Odin's avatar
David Odin committed
650

651
                  src_b = src + offset + b;
David Odin's avatar
David Odin committed
652

653
                  for (i = 1 - numrad; i < numrad; i++)
David Odin's avatar
David Odin committed
654
                    {
655
656
657
                      gint tmp;

                      src_b += bytes;
David Odin's avatar
David Odin committed
658

659
                      if (x + i < 0 || x + i >= width)
David Odin's avatar
David Odin committed
660
661
662
663
664
665
                        continue;

                      tmp = *src_db - *src_b;
                      if (tmp > maxdelta || tmp < -maxdelta)
                        continue;

666
                      d = imat[numrad+i];
David Odin's avatar
David Odin committed
667
                      if (has_alpha)
668
669
670
671
                        d *= src_b[nb - b];

                      rowsum += d * *src_b;
                      rowfact += d;
David Odin's avatar
David Odin committed
672
                    }
673
674
675
676
677
678
679
680
681
682
683

                  d = imat[numrad+j];

                  if (has_alpha)
                    {
                      rowsum >>= 8;
                      rowfact >>= 8;
                    }

                  sum += d * rowsum;
                  fact += d * rowfact;
David Odin's avatar
David Odin committed
684
                }
685
686

              if (fact == 0)
David Odin's avatar
David Odin committed
687
688
689
690
691
692
                dest[dix + b] = *src_db;
              else
                dest[dix + b] = sum / fact;
            }
        }

693
      if (!(y % 16) && !preview_mode)
694
        gimp_progress_update ((gdouble) y / (gdouble) height);
695
    }
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
}

/* Force compilation of several versions with inlined constants. */
static void
matrixmult (const guchar  *src,
            guchar        *dest,
            gint           width,
            gint           height,
            const gdouble *mat,
            gint           numrad,
            gint           bytes,
            gboolean       has_alpha,
            gint           maxdelta,
            gboolean       preview_mode)
{
  has_alpha = has_alpha ? 1 : 0;

#define EXPAND(BYTES, ALPHA)\
  if (bytes == BYTES && has_alpha == ALPHA)\
715
716
717
718
719
720
    {\
      matrixmult_int (src, dest, width, height, mat, numrad,\
                      BYTES, ALPHA, maxdelta, preview_mode);\
      return;\
    }

721
722
723
724
725
726
  EXPAND (1, 0)
  EXPAND (2, 1)
  EXPAND (3, 0)
  EXPAND (4, 1)
  EXPAND (bytes, has_alpha)
#undef EXPAND
Marc Lehmann's avatar
Marc Lehmann committed
727
728
}

729
static void
Sven Neumann's avatar
Sven Neumann committed
730
sel_gauss (GimpDrawable *drawable,
731
732
           gdouble       radius,
           gint          maxdelta)
Marc Lehmann's avatar
Marc Lehmann committed
733
{
Sven Neumann's avatar
Sven Neumann committed
734
  GimpPixelRgn src_rgn, dest_rgn;
735
736
737
738
  gint         bytes;
  gboolean     has_alpha;
  guchar      *dest;
  guchar      *src;
739
740
  gint         x, y;
  gint         width, height;
741
  gdouble     *mat;
742
  gint         numrad;
743

744
745
746
  if (! gimp_drawable_mask_intersect (drawable->drawable_id,
                                      &x, &y, &width, &height))
    return;
747

748
749
  bytes     = drawable->bpp;
  has_alpha = gimp_drawable_has_alpha (drawable->drawable_id);
750

751
  numrad = (gint) (radius + 1.0);
752
  mat = g_new (gdouble, numrad);
Sven Neumann's avatar
Sven Neumann committed
753
  init_matrix (radius, mat, numrad);
754

755
756
757
  /*  allocate with extra padding because MMX instructions may read
      more than strictly necessary  */
  src  = g_new (guchar, width * height * bytes + 16);
758
  dest = g_new (guchar, width * height * bytes);
759

760
  gimp_pixel_rgn_init (&src_rgn,
761
762
                       drawable, x, y, width, height, FALSE, FALSE);
  gimp_pixel_rgn_get_rect (&src_rgn, src, x, y, width, height);
763

764
  matrixmult (src, dest, width, height, mat, numrad,
David Odin's avatar
David Odin committed
765
              bytes, has_alpha, maxdelta, FALSE);
766
  gimp_progress_update (1.0);
767

768
  gimp_pixel_rgn_init (&dest_rgn,
769
770
                       drawable, x, y, width, height, TRUE, TRUE);
  gimp_pixel_rgn_set_rect (&dest_rgn, dest, x, y, width, height);
771
772
773

  /*  merge the shadow, update the drawable  */
  gimp_drawable_flush (drawable);
774
  gimp_drawable_merge_shadow (drawable->drawable_id, TRUE);
775
  gimp_drawable_update (drawable->drawable_id, x, y, width, height);
776
777
778
779
780

  /* free up buffers */
  g_free (src);
  g_free (dest);
  g_free (mat);
Marc Lehmann's avatar
Marc Lehmann committed
781
}
David Odin's avatar
David Odin committed
782
783

static void
784
preview_update (GimpPreview *preview)
David Odin's avatar
David Odin committed
785
{
786
  GimpDrawable  *drawable;
David Odin's avatar
David Odin committed
787
  glong          bytes;
788
  gint           x, y;
David Odin's avatar
David Odin committed
789
790
791
792
793
794
795
  guchar        *render_buffer;  /* Buffer to hold rendered image */
  gint           width;          /* Width of preview widget */
  gint           height;         /* Height of preview widget */

  GimpPixelRgn   srcPR;           /* Pixel region */
  guchar        *src;
  gboolean       has_alpha;
796
797
  gint           numrad;
  gdouble       *mat;
David Odin's avatar
David Odin committed
798
  gdouble       radius;
David Odin's avatar
David Odin committed
799
800

  /* Get drawable info */
801
802
  drawable =
    gimp_drawable_preview_get_drawable (GIMP_DRAWABLE_PREVIEW (preview));
803
  bytes = drawable->bpp;
David Odin's avatar
David Odin committed
804
805
806
807

  /*
   * Setup for filter...
   */
808
  gimp_preview_get_position (preview, &x, &y);
809
  gimp_preview_get_size (preview, &width, &height);
David Odin's avatar
David Odin committed
810
811

  /* initialize pixel regions */
812
  gimp_pixel_rgn_init (&srcPR, drawable,
813
                       x, y, width, height,
David Odin's avatar
David Odin committed
814
815
                       FALSE, FALSE);
  render_buffer = g_new (guchar, width * height * bytes);
David Odin's avatar
David Odin committed
816

817
818
819
  /*  allocate with extra padding because MMX instructions may read
      more than strictly necessary  */
  src = g_new (guchar, width * height * bytes + 16);
David Odin's avatar
David Odin committed
820

David Odin's avatar
David Odin committed
821
  /* render image */
822
  gimp_pixel_rgn_get_rect (&srcPR, src, x, y, width, height);
823
  has_alpha = gimp_drawable_has_alpha (drawable->drawable_id);
David Odin's avatar
David Odin committed
824
825
826
827

  radius = fabs (bvals.radius) + 1.0;
  numrad = (gint) (radius + 1.0);

828
829
  mat = g_new (gdouble, numrad);
  init_matrix (radius, mat, numrad);
David Odin's avatar
David Odin committed
830
831
832
833
834
835
836
837

  matrixmult (src, render_buffer,
              width, height,
              mat, numrad,
              bytes, has_alpha, bvals.maxdelta, TRUE);

  g_free (mat);
  g_free (src);
David Odin's avatar
David Odin committed
838
839
840
841

  /*
   * Draw the preview image on the screen...
   */
842
  gimp_preview_draw_buffer (preview, render_buffer, width * bytes);
David Odin's avatar
David Odin committed
843
844
845

  g_free (render_buffer);
}