Commit e1d01be8 authored by Ell's avatar Ell

gegl-parallel: improve optimal thread-count calculation

Previously, the number of threads used by
gegl_parallel_distribute_{range,area}() was proportional to the
number of elements to be processed, such that each thread processed
at least a user-provided minimal number of elements.  This,
however, fails to take into account the fact that each additional
thread lowers the effective cost of processing each additional
element, since processing is spread over more threads, and
therefore more elements are necessary to justify each additional
thread.

To find the optimal number of threads to use, we assume that the
cost of processing the elements is proportional to the number of
elements to be processed by each thread, and that each thread
additional incurs a fixed cost.  This cost is specified by a user-
provided parameter, relative to the cost of processing a single
element, replacing the old minimal per-thread element-count
paramter (it is expected, however, that this parameter will take
the same value as the old parameter; in particular, the minimal
number of per-thread elements for using two threads equals the the
cost.)

In other words, the cost of processing n elements, using t threads,
with a fixed realtive per-thread cost c, is assumed to be
proportional to:

  n / t + c * t

The number of threads, t, that minimize this cost, for a given per-
thread cost, c, is roughly proportional to the square root of the
number of elements, n.
parent f4128b16
......@@ -18,6 +18,8 @@
#include "config.h"
#include <math.h>
#include <glib.h>
#include "gegl.h"
......@@ -51,13 +53,16 @@ typedef struct
/* local function prototypes */
static void gegl_parallel_notify_threads (GeglConfig *config);
static void gegl_parallel_notify_threads (GeglConfig *config);
static void gegl_parallel_set_n_threads (gint n_threads,
gboolean finish_tasks);
static void gegl_parallel_set_n_threads (gint n_threads,
gboolean finish_tasks);
static void gegl_parallel_distribute_set_n_threads (gint n_threads);
static gpointer gegl_parallel_distribute_thread_func (GeglParallelDistributeThread *thread);
static void gegl_parallel_distribute_set_n_threads (gint n_threads);
static gpointer gegl_parallel_distribute_thread_func (GeglParallelDistributeThread *thread);
static inline gint gegl_parallel_distribute_get_optimal_n_threads (gdouble n_elements,
gdouble thread_cost);
/* local variables */
......@@ -184,29 +189,35 @@ gegl_parallel_distribute_range_func (gint i,
void
gegl_parallel_distribute_range (gsize size,
gsize min_sub_size,
gdouble thread_cost,
GeglParallelDistributeRangeFunc func,
gpointer user_data)
{
GeglParallelDistributeRangeData data;
gsize n = size;
gint n_threads;
g_return_if_fail (func != NULL);
if (size == 0)
return;
if (min_sub_size > 1)
n /= min_sub_size;
n_threads = gegl_parallel_distribute_get_optimal_n_threads (
size,
thread_cost);
if (n_threads == 1)
{
func (0, size, user_data);
n = CLAMP (n, 1, gegl_parallel_distribute_n_threads);
return;
}
data.size = size;
data.func = func;
data.user_data = user_data;
gegl_parallel_distribute (
n,
n_threads,
(GeglParallelDistributeFunc) gegl_parallel_distribute_range_func,
&data);
}
......@@ -261,13 +272,13 @@ gegl_parallel_distribute_area_func (gint i,
void
gegl_parallel_distribute_area (const GeglRectangle *area,
gsize min_sub_area,
gdouble thread_cost,
GeglSplitStrategy split_strategy,
GeglParallelDistributeAreaFunc func,
gpointer user_data)
{
GeglParallelDistributeAreaData data;
gsize n;
gint n_threads;
g_return_if_fail (area != NULL);
g_return_if_fail (func != NULL);
......@@ -275,6 +286,17 @@ gegl_parallel_distribute_area (const GeglRectangle *area,
if (area->width <= 0 || area->height <= 0)
return;
n_threads = gegl_parallel_distribute_get_optimal_n_threads (
(gdouble) area->width * (gdouble) area->height,
thread_cost);
if (n_threads == 1)
{
func (area, user_data);
return;
}
if (split_strategy == GEGL_SPLIT_STRATEGY_AUTO)
{
if (area->width > area->height)
......@@ -283,20 +305,13 @@ gegl_parallel_distribute_area (const GeglRectangle *area,
split_strategy = GEGL_SPLIT_STRATEGY_HORIZONTAL;
}
n = (gsize) area->width * (gsize) area->height;
if (min_sub_area > 1)
n /= min_sub_area;
n = CLAMP (n, 1, gegl_parallel_distribute_n_threads);
data.area = area;
data.split_strategy = split_strategy;
data.func = func;
data.user_data = user_data;
gegl_parallel_distribute (
n,
n_threads,
(GeglParallelDistributeFunc) gegl_parallel_distribute_area_func,
&data);
}
......@@ -416,3 +431,37 @@ gegl_parallel_distribute_thread_func (GeglParallelDistributeThread *thread)
return NULL;
}
/* calculates the optimal number of threads, n_threads, to process n_elements
* elements, assuming the cost of processing the elements is proportional to
* the number of elements to be processed by each thread, and assuming that
* each thread additionally incurs a fixed cost of thread_cost, relative to the
* cost of processing a single element.
*
* in other words, the assumption is that the total cost of processing the
* elements is proportional to:
*
* n_elements / n_threads + thread_cost * n_threads
*/
static inline gint
gegl_parallel_distribute_get_optimal_n_threads (gdouble n_elements,
gdouble thread_cost)
{
gint n_threads;
if (n_elements > 0 && thread_cost > 0.0)
{
gdouble n = n_elements;
gdouble c = thread_cost;
n_threads = floor ((c + sqrt (c * (c + 4.0 * n))) / (2.0 * c));
n_threads = CLAMP (n_threads, 1, gegl_parallel_distribute_n_threads);
}
else
{
n_threads = n_elements;
n_threads = CLAMP (n_threads, 0, gegl_parallel_distribute_n_threads);
}
return n_threads;
}
......@@ -45,11 +45,11 @@ typedef void (* GeglParallelDistributeFunc) (gint i,
* @size: the current data size
* @user_data: user data pointer
*
* Specifies the type of function passed to gegl_parallel_distribute_range().
* Specifies the type of function passed to
* gegl_parallel_distribute_range().
*
* The function should process @size elements of the data, starting
* at @offset. @size may be greater-than or equal-to the @min_sub_size
* argument passed to gegl_parallel_distribute_range().
* at @offset.
*/
typedef void (* GeglParallelDistributeRangeFunc) (gsize offset,
gsize size,
......@@ -57,14 +57,13 @@ typedef void (* GeglParallelDistributeRangeFunc) (gsize offset,
/**
* GeglParallelDistributeAreaFunc:
* @area: the current sub-region
* @area: the current sub-area
* @user_data: user data pointer
*
* Specifies the type of function passed to gegl_parallel_distribute_area().
* Specifies the type of function passed to
* gegl_parallel_distribute_area().
*
* The function should process the sub-region specified by @area, whose
* area may be greater-than or equal-to the @min_sub_area argument passed
* to gegl_parallel_distribute_area().
* The function should process the sub-area specified by @area.
*
*/
typedef void (* GeglParallelDistributeAreaFunc) (const GeglRectangle *area,
......@@ -87,7 +86,8 @@ void gegl_parallel_distribute (gint max_n,
/**
* gegl_parallel_distribute_range:
* @size: the total size of the data
* @min_sub_size: the minimal data size to be processed by each thread
* @thread_cost: the cost of using each additional thread, relative
* to the cost of processing a single data element
* @func: (closure user_data) (scope call): the function to call
* @user_data: user data to pass to the function
*
......@@ -96,24 +96,25 @@ void gegl_parallel_distribute (gint max_n,
* sub-ranges on different threads.
*/
void gegl_parallel_distribute_range (gsize size,
gsize min_sub_size,
gdouble thread_cost,
GeglParallelDistributeRangeFunc func,
gpointer user_data);
/**
* gegl_parallel_distribute_area:
* @area: the region to process
* @min_sub_area: the minimal area to be processed by each thread
* @split_strategy: the strategy to use for dividing the region
* @area: the area to process
* @thread_cost: the cost of using each additional thread, relative
* to the cost of processing a single data element
* @split_strategy: the strategy to use for dividing the area
* @func: (closure user_data) (scope call): the function to call
* @user_data: user data to pass to the function
*
* Distributes the processing of a planar data-structure across
* multiple threads, by calling the given function with different
* sub-regions on different threads.
* sub-areas on different threads.
*/
void gegl_parallel_distribute_area (const GeglRectangle *area,
gsize min_sub_area,
gdouble thread_cost,
GeglSplitStrategy split_strategy,
GeglParallelDistributeAreaFunc func,
gpointer user_data);
......@@ -145,10 +146,10 @@ gegl_parallel_distribute (gint max_n,
template <class ParallelDistributeRangeFunc>
inline void
gegl_parallel_distribute_range (gsize size,
gsize min_sub_size,
gdouble thread_cost,
ParallelDistributeRangeFunc func)
{
gegl_parallel_distribute_range (size, min_sub_size,
gegl_parallel_distribute_range (size, thread_cost,
[] (gsize offset,
gsize size,
gpointer user_data)
......@@ -164,11 +165,11 @@ gegl_parallel_distribute_range (gsize size,
template <class ParallelDistributeAreaFunc>
inline void
gegl_parallel_distribute_area (const GeglRectangle *area,
gsize min_sub_area,
gdouble thread_cost,
GeglSplitStrategy split_strategy,
ParallelDistributeAreaFunc func)
{
gegl_parallel_distribute_area (area, min_sub_area, split_strategy,
gegl_parallel_distribute_area (area, thread_cost, split_strategy,
[] (const GeglRectangle *area,
gpointer user_data)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment