Commit 137e66e4 authored by Ell's avatar Ell

buffer: add dedicated allocator for tile data

Add gegl-tile-alloc -- a dedicated allocator for tile data.  The
allocator amortizes the cost of memory allocation, which is
especially high on Windows, over multiple tiles, by allocating
tiles in bulk, in groups called blocks (currently, each block is
roughly 1% of the tile-cache size.)

The allocator is currently limited to tiles whose size is a power
of 2, possibly multiplied by 3 or 5 (to support tiles whose
dimensions are powers of 2, with up to 5 components).  The rest of
the tiles are allocated using the normal allocator.

Move the call to malloc_trim() from GeglBuffer's destructor to
gegl-tile-alloc.  We call it whenever tile data equivalent to ~10%
of the tile-cache size is freed.

Add a tile-alloc-total stat, which reports the total amount of
memory currently used by the tile allocator.
parent e99622d8
......@@ -1283,6 +1283,13 @@ AC_CHECK_FUNC(rint, AC_DEFINE(HAVE_RINT, 1,
[Define to 1 if you have the rint function.]), [
AC_CHECK_LIB(m, rint, [AC_DEFINE(HAVE_RINT)])])
#check for malloc_trim()
AC_CHECK_HEADER([malloc.h],
[AC_CHECK_FUNCS(malloc_trim)])
#check for __builtin_clz()
AX_GCC_BUILTIN(__builtin_clz)
#######################
# Enable extra warnings
......
......@@ -55,6 +55,7 @@ libbuffer_la_SOURCES = \
gegl-sampler-lohalo.c \
gegl-scratch.c \
gegl-tile.c \
gegl-tile-alloc.c \
gegl-tile-source.c \
gegl-tile-storage.c \
gegl-tile-backend.c \
......@@ -94,6 +95,7 @@ libbuffer_la_SOURCES = \
gegl-scratch.h \
gegl-scratch-private.h \
gegl-tile.h \
gegl-tile-alloc.h \
gegl-tile-source.h \
gegl-tile-storage.h \
gegl-tile-backend.h \
......
......@@ -53,10 +53,6 @@
#include <execinfo.h>
#endif
#ifdef __GLIBC__
#include <malloc.h>
#endif
G_DEFINE_TYPE (GeglBuffer, gegl_buffer, GEGL_TYPE_TILE_HANDLER)
......@@ -412,11 +408,6 @@ gegl_buffer_finalize (GObject *object)
g_free (GEGL_BUFFER (object)->path);
g_atomic_int_inc (&de_allocated_buffers);
G_OBJECT_CLASS (parent_class)->finalize (object);
#ifdef __GLIBC__
malloc_trim (1024 * 1024);
#endif
}
......
/* This file is part of GEGL
*
* GEGL is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* GEGL is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with GEGL; if not, see <https://www.gnu.org/licenses/>.
*
* Copyright 2019 Ell
*/
#include "config.h"
#include <math.h>
#include <string.h>
#ifdef HAVE_MALLOC_TRIM
#include <malloc.h>
#endif
#include <glib-object.h>
#include "gegl-buffer-config.h"
#include "gegl-memory.h"
#include "gegl-memory-private.h"
#include "gegl-tile-alloc.h"
#define GEGL_TILE_MIN_SIZE sizeof (gpointer)
#define GEGL_TILE_MAX_SIZE_LOG2 24
#define GEGL_TILE_MAX_SIZE (1 << GEGL_TILE_MAX_SIZE_LOG2)
#define GEGL_TILE_BUFFER_DATA_OFFSET GEGL_ALIGNMENT
#define GEGL_TILE_BLOCK_BUFFER_OFFSET GEGL_ALIGN (sizeof (GeglTileBlock))
#define GEGL_TILE_BLOCK_SIZE_RATIO 0.01
#define GEGL_TILE_BLOCKS_PER_TRIM 10
#define GEGL_TILE_SENTINEL_BLOCK ((GeglTileBlock *) ~(guintptr) 0)
/* private types */
typedef struct _GeglTileBuffer GeglTileBuffer;
typedef struct _GeglTileBlock GeglTileBlock;
struct _GeglTileBuffer
{
GeglTileBlock *block;
};
G_STATIC_ASSERT (sizeof (GeglTileBuffer) + 2 * sizeof (gint) <=
GEGL_TILE_BUFFER_DATA_OFFSET);
struct _GeglTileBlock
{
GeglTileBlock * volatile *block_ptr;
guintptr size;
GeglTileBuffer *head;
gint n_allocated;
GeglTileBlock *next;
GeglTileBlock *prev;
};
/* local function prototypes */
static gint gegl_tile_log2i (guint n);
static GeglTileBlock * gegl_tile_block_new (GeglTileBlock * volatile *block_ptr,
gsize size);
static void gegl_tile_block_free (GeglTileBlock *block,
GeglTileBlock **head_block);
static inline gpointer gegl_tile_buffer_to_data (GeglTileBuffer *buffer);
static inline GeglTileBuffer * gegl_tile_buffer_from_data (gpointer data);
static gpointer gegl_tile_alloc_fallback (gsize size);
/* local variables */
static const gint gegl_tile_divisors[] = {1, 3, 5};
static GeglTileBlock *gegl_tile_blocks[G_N_ELEMENTS (gegl_tile_divisors)]
[GEGL_TILE_MAX_SIZE_LOG2];
static gint gegl_tile_n_blocks;
static gint gegl_tile_max_n_blocks;
static guintptr gegl_tile_alloc_total;
/* private functions */
#ifdef HAVE___BUILTIN_CLZ
static gint
gegl_tile_log2i (guint n)
{
return 8 * sizeof (guint) - __builtin_clz (n) - 1;
}
#else /* ! HAVE___BUILTIN_CLZ */
static gint
gegl_tile_log2i (guint n)
{
guint result = 0;
gint i;
for (i = 8 * sizeof (guint) / 2; i; i /= 2)
{
guint m = n >> i;
if (m)
{
n = m;
result |= i;
}
}
return result;
}
#endif /* HAVE___BUILTIN_CLZ */
static GeglTileBlock *
gegl_tile_block_new (GeglTileBlock * volatile *block_ptr,
gsize size)
{
GeglTileBlock *block;
GeglTileBuffer *buffer;
GeglTileBuffer **next_buffer;
gsize block_size;
gsize buffer_size;
gint i;
buffer_size = GEGL_ALIGNMENT + GEGL_ALIGN (size);
block_size = floor (gegl_buffer_config ()->tile_cache_size *
GEGL_TILE_BLOCK_SIZE_RATIO);
block_size -= block_size % buffer_size;
if (block_size <= buffer_size)
return NULL;
block = gegl_malloc (GEGL_TILE_BLOCK_BUFFER_OFFSET + block_size);
block->block_ptr = block_ptr;
block->size = GEGL_TILE_BLOCK_BUFFER_OFFSET + block_size;
block->head = (GeglTileBuffer *) ((guint8 *) block +
GEGL_TILE_BLOCK_BUFFER_OFFSET);
block->n_allocated = 0;
block->prev = NULL;
block->next = NULL;
buffer = block->head;
for (i = block_size / buffer_size; i; i--)
{
buffer->block = block;
next_buffer = gegl_tile_buffer_to_data (buffer);
if (i > 1)
buffer = (GeglTileBuffer *) ((guint8 *) buffer + buffer_size);
else
buffer = NULL;
*next_buffer = buffer;
}
gegl_tile_n_blocks++;
if (gegl_tile_n_blocks % GEGL_TILE_BLOCKS_PER_TRIM == 0)
{
gegl_tile_max_n_blocks = MAX (gegl_tile_max_n_blocks,
gegl_tile_n_blocks);
}
gegl_tile_alloc_total += block->size;
return block;
}
static void
gegl_tile_block_free (GeglTileBlock *block,
GeglTileBlock **head_block)
{
guintptr block_size = block->size;
if (block->prev)
block->prev->next = block->next;
else
*head_block = block->next;
if (block->next)
block->next->prev = block->prev;
gegl_free (block);
gegl_tile_n_blocks--;
gegl_tile_alloc_total -= block_size;
#ifdef HAVE_MALLOC_TRIM
if (gegl_tile_max_n_blocks - gegl_tile_n_blocks ==
GEGL_TILE_BLOCKS_PER_TRIM)
{
gegl_tile_max_n_blocks = gegl_tile_n_blocks;
malloc_trim (block_size);
}
#endif
}
static inline gpointer
gegl_tile_buffer_to_data (GeglTileBuffer *buffer)
{
return (guint8 *) buffer + GEGL_TILE_BUFFER_DATA_OFFSET;
}
static inline GeglTileBuffer *
gegl_tile_buffer_from_data (gpointer data)
{
return (GeglTileBuffer *) ((guint8 *) data - GEGL_TILE_BUFFER_DATA_OFFSET);
}
static gpointer
gegl_tile_alloc_fallback (gsize size)
{
GeglTileBuffer *buffer = gegl_malloc (GEGL_TILE_BUFFER_DATA_OFFSET + size);
buffer->block = NULL;
return gegl_tile_buffer_to_data (buffer);
}
/* public functions */
gpointer
gegl_tile_alloc (gsize size)
{
GeglTileBlock * volatile *block_ptr;
GeglTileBlock *block;
GeglTileBuffer *buffer;
GeglTileBuffer **next_buffer;
gint n;
gint i;
gint j;
if (size > GEGL_TILE_MAX_SIZE)
return gegl_tile_alloc_fallback (size);
size = MAX (size, GEGL_TILE_MIN_SIZE);
n = size;
for (i = G_N_ELEMENTS (gegl_tile_divisors) - 1; i; i--)
{
if (size % gegl_tile_divisors[i] == 0)
{
n /= gegl_tile_divisors[i];
break;
}
}
if (n & (n - 1))
return gegl_tile_alloc_fallback (size);
j = gegl_tile_log2i (n);
block_ptr = &gegl_tile_blocks[i][j];
do
{
block = *block_ptr;
}
while (block == GEGL_TILE_SENTINEL_BLOCK ||
! g_atomic_pointer_compare_and_exchange (block_ptr,
block,
GEGL_TILE_SENTINEL_BLOCK));
if (! block)
{
block = gegl_tile_block_new (block_ptr, size);
if (! block)
{
g_atomic_pointer_set (block_ptr, block);
return gegl_tile_alloc_fallback (size);
}
}
buffer = block->head;
next_buffer = gegl_tile_buffer_to_data (buffer);
block->head = *next_buffer;
block->n_allocated++;
if (! block->head)
{
if (block->next)
block->next->prev = NULL;
block = block->next;
}
g_atomic_pointer_set (block_ptr, block);
return gegl_tile_buffer_to_data (buffer);
}
gpointer
gegl_tile_alloc0 (gsize size)
{
gpointer result = gegl_tile_alloc (size);
memset (result, 0, size);
return result;
}
void
gegl_tile_free (gpointer ptr)
{
GeglTileBlock * volatile *block_ptr;
GeglTileBlock *block;
GeglTileBlock *head_block;
GeglTileBuffer *buffer;
if (! ptr)
return;
buffer = gegl_tile_buffer_from_data (ptr);
if (! buffer->block)
{
gegl_free (buffer);
return;
}
block = buffer->block;
block_ptr = block->block_ptr;
do
{
head_block = *block_ptr;
}
while (head_block == GEGL_TILE_SENTINEL_BLOCK ||
! g_atomic_pointer_compare_and_exchange (block_ptr,
head_block,
GEGL_TILE_SENTINEL_BLOCK));
block->n_allocated--;
if (block->n_allocated == 0)
{
gegl_tile_block_free (block, &head_block);
}
else
{
GeglTileBuffer **next = gegl_tile_buffer_to_data (buffer);
*next = block->head;
if (! block->head)
{
block->prev = NULL;
block->next = head_block;
if (head_block)
head_block->prev = block;
head_block = block;
}
block->head = buffer;
}
g_atomic_pointer_set (block_ptr, head_block);
}
/* public functions (stats) */
guint64
gegl_tile_alloc_get_total (void)
{
return gegl_tile_alloc_total;
}
/* This file is part of GEGL
*
* GEGL is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* GEGL is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with GEGL; if not, see <https://www.gnu.org/licenses/>.
*
* Copyright 2019 Ell
*/
#ifndef __GEGL_TILE_ALLOC_H__
#define __GEGL_TILE_ALLOC_H__
/* the buffer returned by gegl_tile_alloc() and gegl_tile_alloc0() is
* guaranteed to have room for two `int`s in front of the buffer.
*/
gpointer gegl_tile_alloc (gsize size) G_GNUC_MALLOC;
gpointer gegl_tile_alloc0 (gsize size) G_GNUC_MALLOC;
void gegl_tile_free (gpointer ptr);
guint64 gegl_tile_alloc_get_total (void);
#endif /* __GEGL_TILE_ALLOC_H__ */
......@@ -24,6 +24,7 @@
#include "gegl-types-internal.h"
#include "buffer/gegl-buffer-types.h"
#include "buffer/gegl-scratch-private.h"
#include "buffer/gegl-tile-alloc.h"
#include "buffer/gegl-tile-handler-cache.h"
#include "buffer/gegl-tile-backend-swap.h"
#include "buffer/gegl-tile-handler-zoom.h"
......@@ -50,6 +51,7 @@ enum
PROP_SWAP_WRITING,
PROP_SWAP_WRITE_TOTAL,
PROP_ZOOM_TOTAL,
PROP_TILE_ALLOC_TOTAL,
PROP_SCRATCH_TOTAL
};
......@@ -199,6 +201,13 @@ gegl_stats_class_init (GeglStatsClass *klass)
0, G_MAXUINT64, 0,
G_PARAM_READABLE));
g_object_class_install_property (object_class, PROP_TILE_ALLOC_TOTAL,
g_param_spec_uint64 ("tile-alloc-total",
"Tile allocator total",
"Total size of tile-allocator memory",
0, G_MAXUINT64, 0,
G_PARAM_READABLE));
g_object_class_install_property (object_class, PROP_SCRATCH_TOTAL,
g_param_spec_uint64 ("scratch-total",
"Scratch total",
......@@ -302,6 +311,10 @@ gegl_stats_get_property (GObject *object,
g_value_set_uint64 (value, gegl_tile_handler_zoom_get_total ());
break;
case PROP_TILE_ALLOC_TOTAL:
g_value_set_uint64 (value, gegl_tile_alloc_get_total ());
break;
case PROP_SCRATCH_TOTAL:
g_value_set_uint64 (value, gegl_scratch_get_total ());
break;
......
# ===========================================================================
# https://www.gnu.org/software/autoconf-archive/ax_gcc_builtin.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_GCC_BUILTIN(BUILTIN)
#
# DESCRIPTION
#
# This macro checks if the compiler supports one of GCC's built-in
# functions; many other compilers also provide those same built-ins.
#
# The BUILTIN parameter is the name of the built-in function.
#
# If BUILTIN is supported define HAVE_<BUILTIN>. Keep in mind that since
# builtins usually start with two underscores they will be copied over
# into the HAVE_<BUILTIN> definition (e.g. HAVE___BUILTIN_EXPECT for
# __builtin_expect()).
#
# The macro caches its result in the ax_cv_have_<BUILTIN> variable (e.g.
# ax_cv_have___builtin_expect).
#
# The macro currently supports the following built-in functions:
#
# __builtin_assume_aligned
# __builtin_bswap16
# __builtin_bswap32
# __builtin_bswap64
# __builtin_choose_expr
# __builtin___clear_cache
# __builtin_clrsb
# __builtin_clrsbl
# __builtin_clrsbll
# __builtin_clz
# __builtin_clzl
# __builtin_clzll
# __builtin_complex
# __builtin_constant_p
# __builtin_ctz
# __builtin_ctzl
# __builtin_ctzll
# __builtin_expect
# __builtin_ffs
# __builtin_ffsl
# __builtin_ffsll
# __builtin_fpclassify
# __builtin_huge_val
# __builtin_huge_valf
# __builtin_huge_vall
# __builtin_inf
# __builtin_infd128
# __builtin_infd32
# __builtin_infd64
# __builtin_inff
# __builtin_infl
# __builtin_isinf_sign
# __builtin_nan
# __builtin_nand128
# __builtin_nand32
# __builtin_nand64
# __builtin_nanf
# __builtin_nanl
# __builtin_nans
# __builtin_nansf
# __builtin_nansl
# __builtin_object_size
# __builtin_parity
# __builtin_parityl
# __builtin_parityll
# __builtin_popcount
# __builtin_popcountl
# __builtin_popcountll
# __builtin_powi
# __builtin_powif
# __builtin_powil
# __builtin_prefetch
# __builtin_trap
# __builtin_types_compatible_p
# __builtin_unreachable
#
# Unsupported built-ins will be tested with an empty parameter set and the
# result of the check might be wrong or meaningless so use with care.
#
# LICENSE
#
# Copyright (c) 2013 Gabriele Svelto <gabriele.svelto@gmail.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 6
AC_DEFUN([AX_GCC_BUILTIN], [
AS_VAR_PUSHDEF([ac_var], [ax_cv_have_$1])
AC_CACHE_CHECK([for $1], [ac_var], [
AC_LINK_IFELSE([AC_LANG_PROGRAM([], [
m4_case([$1],
[__builtin_assume_aligned], [$1("", 0)],
[__builtin_bswap16], [$1(0)],
[__builtin_bswap32], [$1(0)],
[__builtin_bswap64], [$1(0)],
[__builtin_choose_expr], [$1(0, 0, 0)],
[__builtin___clear_cache], [$1("", "")],
[__builtin_clrsb], [$1(0)],
[__builtin_clrsbl], [$1(0)],
[__builtin_clrsbll], [$1(0)],
[__builtin_clz], [$1(0)],
[__builtin_clzl], [$1(0)],
[__builtin_clzll], [$1(0)],
[__builtin_complex], [$1(0.0, 0.0)],
[__builtin_constant_p], [$1(0)],
[__builtin_ctz], [$1(0)],
[__builtin_ctzl], [$1(0)],
[__builtin_ctzll], [$1(0)],
[__builtin_expect], [$1(0, 0)],
[__builtin_ffs], [$1(0)],
[__builtin_ffsl], [$1(0)],
[__builtin_ffsll], [$1(0)],
[__builtin_fpclassify], [$1(0, 1, 2, 3, 4, 0.0)],
[__builtin_huge_val], [$1()],
[__builtin_huge_valf], [$1()],
[__builtin_huge_vall], [$1()],
[__builtin_inf], [$1()],
[__builtin_infd128], [$1()],
[__builtin_infd32], [$1()],
[__builtin_infd64], [$1()],
[__builtin_inff], [$1()],
[__builtin_infl], [$1()],
[__builtin_isinf_sign], [$1(0.0)],
[__builtin_nan], [$1("")],
[__builtin_nand128], [$1("")],
[__builtin_nand32], [$1("")],
[__builtin_nand64], [$1("")],
[__builtin_nanf], [$1("")],
[__builtin_nanl], [$1("")],
[__builtin_nans], [$1("")],
[__builtin_nansf], [$1("")],
[__builtin_nansl], [$1("")],
[__builtin_object_size], [$1("", 0)],
[__builtin_parity], [$1(0)],
[__builtin_parityl], [$1(0)],
[__builtin_parityll], [$1(0)],
[__builtin_popcount], [$1(0)],
[__builtin_popcountl], [$1(0)],
[__builtin_popcountll], [$1(0)],
[__builtin_powi], [$1(0, 0)],
[__builtin_powif], [$1(0, 0)],
[__builtin_powil], [$1(0, 0)],
[__builtin_prefetch], [$1("")],
[__builtin_trap], [$1()],
[__builtin_types_compatible_p], [$1(int, int)],
[__builtin_unreachable], [$1()],
[m4_warn([syntax], [Unsupported built-in $1, the test may fail])
$1()]
)
])],
[AS_VAR_SET([ac_var], [yes])],
[AS_VAR_SET([ac_var], [no])])
])
AS_IF([test yes = AS_VAR_GET([ac_var])],
[AC_DEFINE_UNQUOTED(AS_TR_CPP(HAVE_$1), 1,
[Define to 1 if the system has the `$1' built-in function])], [])
AS_VAR_POPDEF([ac_var])
])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment