From 66dd0a3300ebed87ad6037049711aa3eb0619889 Mon Sep 17 00:00:00 2001 From: Ivan Molodetskikh Date: Fri, 3 Aug 2018 21:54:21 +0300 Subject: [PATCH] Parallelize box_blur_loop() --- .../src/surface_utils/shared_surface.rs | 311 +++++++++++++----- 1 file changed, 229 insertions(+), 82 deletions(-) diff --git a/rsvg_internals/src/surface_utils/shared_surface.rs b/rsvg_internals/src/surface_utils/shared_surface.rs index c1eccbdd4..fbf58dee1 100644 --- a/rsvg_internals/src/surface_utils/shared_surface.rs +++ b/rsvg_internals/src/surface_utils/shared_surface.rs @@ -1,4 +1,5 @@ //! Shared access to Cairo image surfaces. +use std::marker::PhantomData; use std::ptr::NonNull; use cairo::prelude::SurfaceExt; @@ -6,6 +7,7 @@ use cairo::{self, ImageSurface}; use cairo_sys; use glib::translate::{Stash, ToGlibPtr}; use nalgebra::{storage::Storage, Dim, Matrix}; +use rayon; use filters::context::IRect; use srgb; @@ -450,9 +452,128 @@ impl SharedImageSurface { assert_ne!(kernel_size, 0); assert!(target < kernel_size); - let output_stride = output_surface.get_stride() as usize; { - let mut output_data = output_surface.get_data().unwrap(); + // The following code is needed for a parallel implementation of the blur loop. The + // blurring is done either for each row or for each column of pixels, depending on the + // value of `vertical`, independently of the others. Naturally, we want to run the + // outer loop on a thread pool. + // + // The case of `vertical == false` is simple since the input image slice can be + // partitioned into chunks for each row of pixels and processed in parallel with rayon. + // The case of `verical == true`, however, is more involved because we can't just make + // mutable slices for all pixel columns (they would be overlapping which is forbidden + // by the aliasing rules). + // + // This is where the following struct comes into play: it stores a sub-slice of the + // pixel data and can be split at any row or column into two parts (similar to + // slice::split_at_mut()). + struct UnsafeSendPixelData<'a> { + width: u32, + height: u32, + stride: isize, + ptr: NonNull, + _marker: PhantomData<&'a mut ()>, + } + + unsafe impl<'a> Send for UnsafeSendPixelData<'a> {} + + impl<'a> UnsafeSendPixelData<'a> { + /// Creates a new `UnsafeSendPixelData`. + /// + /// # Safety + /// You must call cairo_surface_mark_dirty() on the surface once all instances of + /// `UnsafeSendPixelData` are dropped to make sure the pixel changes are comitted + /// to Cairo. + #[inline] + unsafe fn new(surface: &mut cairo::ImageSurface) -> Self { + assert_eq!(surface.get_format(), cairo::Format::ARgb32); + let ptr = surface.get_data().unwrap().as_mut_ptr(); + + Self { + width: surface.get_width() as u32, + height: surface.get_height() as u32, + stride: surface.get_stride() as isize, + ptr: NonNull::new(ptr).unwrap(), + _marker: PhantomData, + } + } + + /// Sets a pixel value at the given coordinates. + #[inline] + fn set_pixel(&mut self, pixel: Pixel, x: u32, y: u32) { + assert!(x < self.width); + assert!(y < self.height); + + let value = pixel.to_u32(); + + unsafe { + let ptr = self + .ptr + .as_ptr() + .offset(y as isize * self.stride + x as isize * 4) + as *mut u32; + *ptr = value; + } + } + + /// Splits this `UnsafeSendPixelData` into two at the given row. + /// + /// The first one contains rows `0..index` (index not included) and the second one + /// contains rows `index..height`. + #[inline] + fn split_at_row(self, index: u32) -> (Self, Self) { + assert!(index <= self.height); + + ( + UnsafeSendPixelData { + width: self.width, + height: index, + stride: self.stride, + ptr: self.ptr, + _marker: PhantomData, + }, + UnsafeSendPixelData { + width: self.width, + height: self.height - index, + stride: self.stride, + ptr: NonNull::new(unsafe { + self.ptr.as_ptr().offset(index as isize * self.stride) + }).unwrap(), + _marker: PhantomData, + }, + ) + } + + /// Splits this `UnsafeSendPixelData` into two at the given column. + /// + /// The first one contains columns `0..index` (index not included) and the second + /// one contains columns `index..width`. + #[inline] + fn split_at_column(self, index: u32) -> (Self, Self) { + assert!(index <= self.width); + + ( + UnsafeSendPixelData { + width: index, + height: self.height, + stride: self.stride, + ptr: self.ptr, + _marker: PhantomData, + }, + UnsafeSendPixelData { + width: self.width - index, + height: self.height, + stride: self.stride, + ptr: NonNull::new(unsafe { + self.ptr.as_ptr().offset(index as isize * 4) + }).unwrap(), + _marker: PhantomData, + }, + ) + } + } + + let output_data = unsafe { UnsafeSendPixelData::new(output_surface) }; // Shift is target into the opposite direction. let shift = (kernel_size - target) as i32; @@ -463,103 +584,129 @@ impl SharedImageSurface { let compute = |x: u32| (f64::from(x) / kernel_size_f64 + 0.5) as u8; // Depending on `vertical`, we're blurring either horizontally line-by-line, or - // vertically column-by-column. In the code below, the main axis is the - // axis along which the blurring happens (so if `vertical` is false, the - // main axis is the horizontal axis). The other axis is the outer loop - // axis. The code uses `i` and `j` for the other axis and main axis - // coordinates, respectively. + // vertically column-by-column. In the code below, the main axis is the axis along + // which the blurring happens (so if `vertical` is false, the main axis is the + // horizontal axis). The other axis is the outer loop axis. The code uses `i` and `j` + // for the other axis and main axis coordinates, respectively. let (main_axis_min, main_axis_max, other_axis_min, other_axis_max) = if vertical { (bounds.y0, bounds.y1, bounds.x0, bounds.x1) } else { (bounds.x0, bounds.x1, bounds.y0, bounds.y1) }; - // Helper functions for getting and setting the pixels. + // Helper function for getting the pixels. let pixel = |i, j| { let (x, y) = if vertical { (i, j) } else { (j, i) }; self.get_pixel_or_transparent(bounds, x, y) }; - let mut set_pixel = |i, j, pixel| { - let (x, y) = if vertical { (i, j) } else { (j, i) }; - - output_data.set_pixel(output_stride, pixel, x, y); + // The following loop assumes the first row or column of `output_data` is the first row + // or column inside `bounds`. + let mut output_data = if vertical { + output_data.split_at_column(bounds.x0 as u32).1 + } else { + output_data.split_at_row(bounds.y0 as u32).1 }; - for i in other_axis_min..other_axis_max { - // The idea is that since all weights of the box blur kernel are equal, for each - // step along the main axis, instead of recomputing the full sum, - // we can take the previous sum, subtract the "oldest" pixel value - // and add the "newest" pixel value. - // - // The sum is u32 so that it can fit MAXIMUM_KERNEL_SIZE * 255. - let mut sum_r = 0; - let mut sum_g = 0; - let mut sum_b = 0; - let mut sum_a = 0; - - // The whole sum needs to be computed for the first pixel. However, we know that - // values outside of bounds are transparent, so the loop starts on - // the first pixel in bounds. - for j in main_axis_min..main_axis_min + shift { - let Pixel { r, g, b, a } = pixel(i, j); - - if !self.is_alpha_only() { - sum_r += u32::from(r); - sum_g += u32::from(g); - sum_b += u32::from(b); - } - - sum_a += u32::from(a); - } - - set_pixel( - i as u32, - main_axis_min as u32, - Pixel { - r: compute(sum_r), - g: compute(sum_g), - b: compute(sum_b), - a: compute(sum_a), - }, - ); - - // Now, go through all the other pixels. - for j in main_axis_min + 1..main_axis_max { - let old_pixel = pixel(i, j - target - 1); - - if !self.is_alpha_only() { - sum_r -= u32::from(old_pixel.r); - sum_g -= u32::from(old_pixel.g); - sum_b -= u32::from(old_pixel.b); - } - - sum_a -= u32::from(old_pixel.a); - - let new_pixel = pixel(i, j + shift - 1); - - if !self.is_alpha_only() { - sum_r += u32::from(new_pixel.r); - sum_g += u32::from(new_pixel.g); - sum_b += u32::from(new_pixel.b); - } - - sum_a += u32::from(new_pixel.a); + rayon::scope(|s| { + for i in other_axis_min..other_axis_max { + // Split off one row or column and launch its processing on another thread. + // Thanks to the initial split before the loop, there's no special case for the + // very first split. + let (mut current, remaining) = if vertical { + output_data.split_at_column(1) + } else { + output_data.split_at_row(1) + }; - set_pixel( - i as u32, - j as u32, - Pixel { - r: compute(sum_r), - g: compute(sum_g), - b: compute(sum_b), - a: compute(sum_a), - }, - ); + output_data = remaining; + + s.spawn(move |_| { + // Helper function for setting the pixels. + let mut set_pixel = |j, pixel| { + // We're processing rows or columns one-by-one, so the other coordinate + // is always 0. + let (x, y) = if vertical { (0, j) } else { (j, 0) }; + current.set_pixel(pixel, x, y); + }; + + // The idea is that since all weights of the box blur kernel are equal, for + // each step along the main axis, instead of recomputing the full sum, we + // can take the previous sum, subtract the "oldest" pixel value and add the + // "newest" pixel value. + // + // The sum is u32 so that it can fit MAXIMUM_KERNEL_SIZE * 255. + let mut sum_r = 0; + let mut sum_g = 0; + let mut sum_b = 0; + let mut sum_a = 0; + + // The whole sum needs to be computed for the first pixel. However, we know + // that values outside of bounds are transparent, so the loop starts on the + // first pixel in bounds. + for j in main_axis_min..main_axis_min + shift { + let Pixel { r, g, b, a } = pixel(i, j); + + if !self.is_alpha_only() { + sum_r += u32::from(r); + sum_g += u32::from(g); + sum_b += u32::from(b); + } + + sum_a += u32::from(a); + } + + set_pixel( + main_axis_min as u32, + Pixel { + r: compute(sum_r), + g: compute(sum_g), + b: compute(sum_b), + a: compute(sum_a), + }, + ); + + // Now, go through all the other pixels. + for j in main_axis_min + 1..main_axis_max { + let old_pixel = pixel(i, j - target - 1); + + if !self.is_alpha_only() { + sum_r -= u32::from(old_pixel.r); + sum_g -= u32::from(old_pixel.g); + sum_b -= u32::from(old_pixel.b); + } + + sum_a -= u32::from(old_pixel.a); + + let new_pixel = pixel(i, j + shift - 1); + + if !self.is_alpha_only() { + sum_r += u32::from(new_pixel.r); + sum_g += u32::from(new_pixel.g); + sum_b += u32::from(new_pixel.b); + } + + sum_a += u32::from(new_pixel.a); + + set_pixel( + j as u32, + Pixel { + r: compute(sum_r), + g: compute(sum_g), + b: compute(sum_b), + a: compute(sum_a), + }, + ); + } + }); } - } + }); } + + // Don't forget to manually mark the surface as dirty (due to usage of + // `UnsafeSendPixelData`). + unsafe { cairo_sys::cairo_surface_mark_dirty(output_surface.to_raw_none()) } } /// Performs a horizontal or vertical box blur. -- GitLab