buffer: use internal floorf/ceilf implementations

This avoids both function call overhead and extra checks for values that
are outside MININT/MAXINT range, for the uses inside GeglBuffer we only
care about coordinates that are representable as integers - thus these
inline-able shortcut versions are good enough for us.

The speedup seems to consistently make all samplers about 10% faster
according to the tests in perf/
parent e0d79197
Pipeline #82503 failed with stages
in 19 minutes and 23 seconds
...@@ -141,11 +141,6 @@ void _gegl_init_u8_lut (void) ...@@ -141,11 +141,6 @@ void _gegl_init_u8_lut (void)
lut_inited = 1; lut_inited = 1;
} }
static inline int int_floorf (float x)
{
int i = (int)x; /* truncate */
return i - ( i > x ); /* convert trunc to floor */
}
static void static void
...@@ -1247,14 +1242,14 @@ gegl_resample_nearest (guchar *dst, ...@@ -1247,14 +1242,14 @@ gegl_resample_nearest (guchar *dst,
for (x = 0; x < dst_rect->width; x++) for (x = 0; x < dst_rect->width; x++)
{ {
const gfloat sx = (dst_rect->x + .5 + x) / scale - src_rect->x; const gfloat sx = (dst_rect->x + .5 + x) / scale - src_rect->x;
jj[x] = int_floorf (sx + GEGL_SCALE_EPSILON) * bpp; jj[x] = int_floorf (sx ) * bpp;
} }
#define IMPL(...) do{ \ #define IMPL(...) do{ \
for (y = 0; y < dst_rect->height; y++)\ for (y = 0; y < dst_rect->height; y++)\
{\ {\
const gfloat sy = (dst_rect->y + .5 + y) / scale - src_rect->y;\ const gfloat sy = (dst_rect->y + .5 + y) / scale - src_rect->y;\
const gint ii = int_floorf (sy + GEGL_SCALE_EPSILON);\ const gint ii = int_floorf (sy);\
gint *ijj = &jj[0];\ gint *ijj = &jj[0];\
guchar *d = &dst[y*dst_stride];\ guchar *d = &dst[y*dst_stride];\
const guchar *s = &src[ii * src_stride];\ const guchar *s = &src[ii * src_stride];\
......
...@@ -215,6 +215,7 @@ void gegl_resample_nearest (guchar *dst, ...@@ -215,6 +215,7 @@ void gegl_resample_nearest (guchar *dst,
GeglDownscale2x2Fun gegl_downscale_2x2_get_fun (const Babl *format); GeglDownscale2x2Fun gegl_downscale_2x2_get_fun (const Babl *format);
G_END_DECLS G_END_DECLS
#endif /* __GEGL_ALGORITHMS_H__ */ #endif /* __GEGL_ALGORITHMS_H__ */
...@@ -1992,10 +1992,10 @@ _gegl_get_required_for_scale (const GeglRectangle *roi, ...@@ -1992,10 +1992,10 @@ _gegl_get_required_for_scale (const GeglRectangle *roi,
return *roi; return *roi;
else else
{ {
gint x1 = floorf (roi->x / scale + GEGL_SCALE_EPSILON); gint x1 = int_floorf (roi->x / scale + GEGL_SCALE_EPSILON);
gint x2 = ceilf ((roi->x + roi->width) / scale - GEGL_SCALE_EPSILON); gint x2 = int_ceilf ((roi->x + roi->width) / scale - GEGL_SCALE_EPSILON);
gint y1 = floorf (roi->y / scale + GEGL_SCALE_EPSILON); gint y1 = int_floorf (roi->y / scale + GEGL_SCALE_EPSILON);
gint y2 = ceilf ((roi->y + roi->height) / scale - GEGL_SCALE_EPSILON); gint y2 = int_ceilf ((roi->y + roi->height) / scale - GEGL_SCALE_EPSILON);
gint pad = (1.0 / scale > 1.0) ? ceilf (1.0 / scale) : 1; gint pad = (1.0 / scale > 1.0) ? ceilf (1.0 / scale) : 1;
...@@ -2098,8 +2098,8 @@ _gegl_buffer_get_unlocked (GeglBuffer *buffer, ...@@ -2098,8 +2098,8 @@ _gegl_buffer_get_unlocked (GeglBuffer *buffer,
float scale_orig = scale; float scale_orig = scale;
gint level = 0; gint level = 0;
void *sample_buf; void *sample_buf;
gint x1 = floorf (rect->x / scale_orig + GEGL_SCALE_EPSILON); gint x1 = int_floorf (rect->x / scale_orig + GEGL_SCALE_EPSILON);
gint x2 = ceilf ((rect->x + rect->width) / scale_orig - GEGL_SCALE_EPSILON); gint x2 = int_ceilf ((rect->x + rect->width) / scale_orig - GEGL_SCALE_EPSILON);
int max_bytes_per_row = ((rect->width+1) * bpp * 2); int max_bytes_per_row = ((rect->width+1) * bpp * 2);
int allocated = 0; int allocated = 0;
gint interpolation = (flags & GEGL_BUFFER_FILTER_ALL); gint interpolation = (flags & GEGL_BUFFER_FILTER_ALL);
...@@ -2118,8 +2118,8 @@ _gegl_buffer_get_unlocked (GeglBuffer *buffer, ...@@ -2118,8 +2118,8 @@ _gegl_buffer_get_unlocked (GeglBuffer *buffer,
{ {
GeglRectangle rect0; GeglRectangle rect0;
rect0.x = floorf (rect->x / scale_orig + GEGL_SCALE_EPSILON); rect0.x = int_floorf (rect->x / scale_orig + GEGL_SCALE_EPSILON);
rect0.y = floorf (rect->y / scale_orig + GEGL_SCALE_EPSILON); rect0.y = int_floorf (rect->y / scale_orig + GEGL_SCALE_EPSILON);
rect0.width = ceilf ((rect->x + rect->width) / scale_orig - rect0.width = ceilf ((rect->x + rect->width) / scale_orig -
GEGL_SCALE_EPSILON) - GEGL_SCALE_EPSILON) -
rect0.x; rect0.x;
...@@ -2170,8 +2170,8 @@ _gegl_buffer_get_unlocked (GeglBuffer *buffer, ...@@ -2170,8 +2170,8 @@ _gegl_buffer_get_unlocked (GeglBuffer *buffer,
{ {
GeglRectangle sample_rect; GeglRectangle sample_rect;
gint buf_width, buf_height; gint buf_width, buf_height;
gint y1 = floorf (rect2.y / scale_orig + GEGL_SCALE_EPSILON); gint y1 = int_floorf (rect2.y / scale_orig + GEGL_SCALE_EPSILON);
gint y2 = ceilf ((rect2.y + rect2.height) / scale_orig - GEGL_SCALE_EPSILON); gint y2 = int_ceilf ((rect2.y + rect2.height) / scale_orig - GEGL_SCALE_EPSILON);
scale = scale_orig; scale = scale_orig;
while (scale <= 0.5) while (scale <= 0.5)
......
...@@ -69,6 +69,17 @@ GEGL_CACHED_BABL(format, yA_linear_float, "YaA float") ...@@ -69,6 +69,17 @@ GEGL_CACHED_BABL(format, yA_linear_float, "YaA float")
#define GEGL_ALLOCA_THRESHOLD (1024*1024/2) #define GEGL_ALLOCA_THRESHOLD (1024*1024/2)
#endif #endif
static inline int int_floorf (float x)
{
int i = (int)x; /* truncate */
return i - ( i > x ); /* convert trunc to floor */
}
static inline int int_ceilf (float x)
{
return -int_floorf(-(x));
}
//G_END_DECLS //G_END_DECLS
#endif /* __GEGL_BUFFER_FORMATS_H__ */ #endif /* __GEGL_BUFFER_FORMATS_H__ */
...@@ -181,8 +181,8 @@ gegl_sampler_cubic_interpolate ( GeglSampler *self, ...@@ -181,8 +181,8 @@ gegl_sampler_cubic_interpolate ( GeglSampler *self,
const double iabsolute_x = (double) absolute_x - 0.5; const double iabsolute_x = (double) absolute_x - 0.5;
const double iabsolute_y = (double) absolute_y - 0.5; const double iabsolute_y = (double) absolute_y - 0.5;
const gint ix = floorf (iabsolute_x); const gint ix = int_floorf (iabsolute_x);
const gint iy = floorf (iabsolute_y); const gint iy = int_floorf (iabsolute_y);
/* /*
* x is the x-coordinate of the sampling point relative to the * x is the x-coordinate of the sampling point relative to the
......
...@@ -101,8 +101,8 @@ gegl_sampler_linear_interpolate ( GeglSampler *self, ...@@ -101,8 +101,8 @@ gegl_sampler_linear_interpolate ( GeglSampler *self,
const float iabsolute_x = (float) absolute_x - 0.5; const float iabsolute_x = (float) absolute_x - 0.5;
const float iabsolute_y = (float) absolute_y - 0.5; const float iabsolute_y = (float) absolute_y - 0.5;
const gint ix = floorf (iabsolute_x); const gint ix = int_floorf (iabsolute_x);
const gint iy = floorf (iabsolute_y); const gint iy = int_floorf (iabsolute_y);
/* /*
* Point the data tile pointer to the first channel of the top_left * Point the data tile pointer to the first channel of the top_left
......
...@@ -921,28 +921,28 @@ gegl_sampler_lohalo_get ( GeglSampler* restrict self, ...@@ -921,28 +921,28 @@ gegl_sampler_lohalo_get ( GeglSampler* restrict self,
const gint out_left_0 = const gint out_left_0 =
LOHALO_MAX LOHALO_MAX
( (
(gint) ceilf ( (double) ( x_0 - bounding_box_half_width ) ) (gint) int_ceilf ( (float) ( x_0 - bounding_box_half_width ) )
, ,
-LOHALO_OFFSET_0 -LOHALO_OFFSET_0
); );
const gint out_rite_0 = const gint out_rite_0 =
LOHALO_MIN LOHALO_MIN
( (
(gint) floorf ( (double) ( x_0 + bounding_box_half_width ) ) (gint) int_floorf ( (float) ( x_0 + bounding_box_half_width ) )
, ,
LOHALO_OFFSET_0 LOHALO_OFFSET_0
); );
const gint out_top_0 = const gint out_top_0 =
LOHALO_MAX LOHALO_MAX
( (
(gint) ceilf ( (double) ( y_0 - bounding_box_half_height ) ) (gint) int_ceilf ( (float) ( y_0 - bounding_box_half_height ) )
, ,
-LOHALO_OFFSET_0 -LOHALO_OFFSET_0
); );
const gint out_bot_0 = const gint out_bot_0 =
LOHALO_MIN LOHALO_MIN
( (
(gint) floorf ( (double) ( y_0 + bounding_box_half_height ) ) (gint) int_floorf ( (float) ( y_0 + bounding_box_half_height ) )
, ,
LOHALO_OFFSET_0 LOHALO_OFFSET_0
); );
......
...@@ -211,7 +211,7 @@ gegl_sampler_nearest_get ( GeglSampler* restrict sampler, ...@@ -211,7 +211,7 @@ gegl_sampler_nearest_get ( GeglSampler* restrict sampler,
GeglAbyssPolicy repeat_mode) GeglAbyssPolicy repeat_mode)
{ {
gegl_sampler_get_pixel (sampler, gegl_sampler_get_pixel (sampler,
floorf(absolute_x), floorf(absolute_y), int_floorf(absolute_x), int_floorf(absolute_y),
output, repeat_mode); output, repeat_mode);
} }
......
...@@ -1845,28 +1845,28 @@ gegl_sampler_nohalo_get ( GeglSampler* restrict self, ...@@ -1845,28 +1845,28 @@ gegl_sampler_nohalo_get ( GeglSampler* restrict self,
const gint out_left_0 = const gint out_left_0 =
NOHALO_MAX NOHALO_MAX
( (
(gint) ceilf ( (double) ( x_0 - bounding_box_half_width ) ) (gint) int_ceilf ( (float) ( x_0 - bounding_box_half_width ) )
, ,
-NOHALO_OFFSET_0 -NOHALO_OFFSET_0
); );
const gint out_rite_0 = const gint out_rite_0 =
NOHALO_MIN NOHALO_MIN
( (
(gint) floorf ( (double) ( x_0 + bounding_box_half_width ) ) (gint) int_floorf ( (float) ( x_0 + bounding_box_half_width ) )
, ,
NOHALO_OFFSET_0 NOHALO_OFFSET_0
); );
const gint out_top_0 = const gint out_top_0 =
NOHALO_MAX NOHALO_MAX
( (
(gint) ceilf ( (double) ( y_0 - bounding_box_half_height ) ) (gint) int_ceilf ( (float) ( y_0 - bounding_box_half_height ) )
, ,
-NOHALO_OFFSET_0 -NOHALO_OFFSET_0
); );
const gint out_bot_0 = const gint out_bot_0 =
NOHALO_MIN NOHALO_MIN
( (
(gint) floorf ( (double) ( y_0 + bounding_box_half_height ) ) (gint) int_floorf ( (float) ( y_0 + bounding_box_half_height ) )
, ,
NOHALO_OFFSET_0 NOHALO_OFFSET_0
); );
......
...@@ -179,7 +179,7 @@ gegl_sampler_get (GeglSampler *self, ...@@ -179,7 +179,7 @@ gegl_sampler_get (GeglSampler *self,
if (self->lvel) if (self->lvel)
{ {
double factor = 1.0 / (1 << self->lvel); double factor = 1.0 / (1 << self->lvel);
GeglRectangle rect={floorf (x * factor), floorf (y * factor),1,1}; GeglRectangle rect={int_floorf (x * factor), int_floorf (y * factor),1,1};
gegl_buffer_get (self->buffer, &rect, factor, self->format, output, GEGL_AUTO_ROWSTRIDE, repeat_mode); gegl_buffer_get (self->buffer, &rect, factor, self->format, output, GEGL_AUTO_ROWSTRIDE, repeat_mode);
return; return;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment