Commit 61e3edeb authored by Martin Guy's avatar Martin Guy Committed by Bastien Nocera
Browse files

pixops: Add two-step scaler optimization

As scaling becomes incredibly slow at high reduction factors, detect the
pathological cases and do the scaling in two steps instead.

https://bugzilla.gnome.org/show_bug.cgi?id=80925
parent 9afcd69e
......@@ -1745,6 +1745,139 @@ make_weights (PixopsFilter *filter,
}
}
/* Two-step scaler begins */
/* make_filter_table() bloats out in VM usage and consumes 100% CPU for
* tens of seconds when downscaling by a large factor.
* https://bugzilla.gnome.org/show_bug.cgi?id=80925
* We work round this by doing extreme reductions in two steps.
*
* The excessive CPU usage is accompanied by an excessive RAM usage because
* make_weights() allocates two arrays of weights proportional in size to
* n_x = (1 / scale_x + 3) and n_y = (1 / scale_y + 3), then make_filter_table()
* allocates and fills an array of SUBSAMPLE * SUBSAMPLE * n_x * n_y doubles.
* Empirically, on machines with plenty of RAM, the execution time slopes upward
* when n_filters > 1000.
* SUBSAMPLE is 16 so each filter takes 16 x 16 doubles (8 bytes) = 2kb RAM.
* Limiting it to 1000 filters limits the scaler's RAM consumption to about 2MB
* which should be OK on machines with relatively little memory.
*
* GDK_INTER_BILINEAR, GDK_INTERP_TILES and GDK_INTER_HYPER all have
* similar symptoms; only GDK_INTERP_NEAREST does not need this trick.
**/
#define MAX_FILTERS 1000
/* Check whether prescaling is necessary to avoid the bug */
static gboolean
need_to_prescale (double scale_x,
double scale_y,
PixopsInterpType interp_type)
{
int n_x, n_y; /* See make_weights() */
/* The testsuite sets this to compare the results with and without it. */
if (g_getenv ("GDK_PIXBUF_DISABLE_TWO_STEP_SCALER"))
return FALSE;
/* Calculate the number of weights created in make_weights() */
switch (interp_type) {
case PIXOPS_INTERP_HYPER:
n_x = ceil (1 / scale_x + 3);
n_y = ceil (1 / scale_y + 3);
break;
case PIXOPS_INTERP_TILES:
case PIXOPS_INTERP_BILINEAR:
n_x = ceil (1 / scale_x + 1);
n_y = ceil (1 / scale_y + 1);
break;
case PIXOPS_INTERP_NEAREST:
/* Doesn't need the optimization */
return FALSE;
}
/* Limit the number of filters created by make_filter_table(). */
return (n_x * n_y > MAX_FILTERS);
}
/* Prescale the source image.
* If successful, it changes the source buffer's parameters to reflect the
* half-scaled image and the scaling factors to reflect the scaling left to do.
* It returns a pointer to the new image data or NULL, so that the caller knows
* whether they have to free the temporary buffer or not.
*/
static guchar *
prescale (const guchar **src_bufp,
int *src_widthp,
int *src_heightp,
int *src_rowstridep,
int src_channels,
gboolean src_has_alpha,
double *scale_xp,
double *scale_yp,
PixopsInterpType interp_type)
{
/* Give local names to parameters that may be modified */
const guchar *src_buf = *src_bufp;
int src_width = *src_widthp;
int src_height = *src_heightp;
int src_rowstride = *src_rowstridep;
double scale_x = *scale_xp;
double scale_y = *scale_yp;
/* How much we prescale each axis by */
double prescale_x, prescale_y;
/* The prescaled image */
int tmp_width, tmp_height;
int tmp_rowstride;
int tmp_channels;
gboolean tmp_has_alpha;
guchar *tmp_buf;
/* The time taken by make_filter_table() is roughly proportional to
* 1/scale_x * 1/scale_y, i.e. to the area reduction factor, so we
* reduce the image in two steps, each of which reduces the total area
* by the same factor. */
prescale_x = sqrt (scale_x);
prescale_y = sqrt (scale_y);
/* Scale the whole source image into a top-left-aligned temporary pixbuf.
* render_[xy][01] are done in the final scaling, not here, as they are
* measured in the coordinate system of the scaled image. */
tmp_width = lrint (src_width * prescale_x);
tmp_height = lrint (src_height * prescale_y);
/* We are below the gdk_ interface, so create the temp image manually.
* Code copied from gdk_pixbuf_new() */
tmp_channels = src_channels;
tmp_has_alpha = src_has_alpha;
tmp_rowstride = ((tmp_width * tmp_channels) + 3) & ~3;
tmp_buf = g_try_malloc_n (tmp_height, tmp_rowstride);
if (!tmp_buf)
return NULL; /* Skip the prescaling */
/* Prescale to an intermediate size */
_pixops_scale (tmp_buf, tmp_width, tmp_height, tmp_rowstride,
tmp_channels, tmp_has_alpha, src_buf, src_width,
src_height, src_rowstride, src_channels, src_has_alpha,
0, 0, tmp_width, tmp_height, 0.0, 0.0,
prescale_x, prescale_y,
interp_type);
/* The second call to the scaler reads from the prescaled image */
*src_bufp = tmp_buf;
*src_widthp = tmp_width;
*src_heightp = tmp_height;
*src_rowstridep = tmp_rowstride;
/* Calculate how much scaling is left to do */
*scale_xp /= prescale_x;
*scale_yp /= prescale_y;
return tmp_buf;
}
/* End of two-step scaler */
static void
_pixops_composite_color_real (guchar *dest_buf,
int render_x0,
......@@ -1772,6 +1905,7 @@ _pixops_composite_color_real (guchar *dest_buf,
{
PixopsFilter filter;
PixopsLineFunc line_func;
guchar *tmp_buf = NULL;
#ifdef USE_MMX
gboolean found_mmx = _pixops_have_mmx ();
......@@ -1794,6 +1928,11 @@ _pixops_composite_color_real (guchar *dest_buf,
check_size, color1, color2);
return;
}
if (need_to_prescale (scale_x, scale_y, interp_type))
tmp_buf = prescale (&src_buf, &src_width, &src_height, &src_rowstride,
src_channels, src_has_alpha,
&scale_x, &scale_y, interp_type);
filter.overall_alpha = overall_alpha / 255.;
if (!make_weights (&filter, interp_type, scale_x, scale_y))
......@@ -1816,6 +1955,8 @@ _pixops_composite_color_real (guchar *dest_buf,
g_free (filter.x.weights);
g_free (filter.y.weights);
if (tmp_buf)
g_free (tmp_buf);
}
void
......@@ -1925,6 +2066,7 @@ _pixops_composite_real (guchar *dest_buf,
{
PixopsFilter filter;
PixopsLineFunc line_func;
guchar *tmp_buf = NULL;
#ifdef USE_MMX
gboolean found_mmx = _pixops_have_mmx ();
......@@ -1950,6 +2092,11 @@ _pixops_composite_real (guchar *dest_buf,
src_has_alpha, scale_x, scale_y, overall_alpha);
return;
}
if (need_to_prescale (scale_x, scale_y, interp_type))
tmp_buf = prescale (&src_buf, &src_width, &src_height, &src_rowstride,
src_channels, src_has_alpha,
&scale_x, &scale_y, interp_type);
filter.overall_alpha = overall_alpha / 255.;
if (!make_weights (&filter, interp_type, scale_x, scale_y))
......@@ -1976,6 +2123,8 @@ _pixops_composite_real (guchar *dest_buf,
g_free (filter.x.weights);
g_free (filter.y.weights);
if (tmp_buf)
g_free (tmp_buf);
}
void
......@@ -2337,6 +2486,7 @@ _pixops_scale_real (guchar *dest_buf,
{
PixopsFilter filter;
PixopsLineFunc line_func;
guchar *tmp_buf = NULL; /* Temporary image for two-step scaling */
#ifdef USE_MMX
gboolean found_mmx = _pixops_have_mmx ();
......@@ -2358,6 +2508,11 @@ _pixops_scale_real (guchar *dest_buf,
scale_x, scale_y);
return;
}
if (need_to_prescale (scale_x, scale_y, interp_type))
tmp_buf = prescale (&src_buf, &src_width, &src_height, &src_rowstride,
src_channels, src_has_alpha,
&scale_x, &scale_y, interp_type);
filter.overall_alpha = 1.0;
if (!make_weights (&filter, interp_type, scale_x, scale_y))
......@@ -2383,6 +2538,7 @@ _pixops_scale_real (guchar *dest_buf,
g_free (filter.x.weights);
g_free (filter.y.weights);
g_clear_pointer (&tmp_buf, g_free);
}
void
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment