From 6c547a24cea3422d934547bc9baf28a5f9ecf139 Mon Sep 17 00:00:00 2001 From: Daniel Schadt Date: Tue, 10 Jan 2023 21:40:02 +0100 Subject: considerably speed up the rendering process Most of the time was spent doing hashmap lookups because all of our operations were done pixel by pixel, and layer.get_pixel_mut always went through the hashmap lookup. This was true for render_circle, render_line *and* merge_heat_counter - the last of which iterated over the full layer every time. The biggest change now is that we try to do accesses tile-by-tile. For the drawing functions, this means that we render the image on a small patch locally, and then blit the image onto the base - tile by tile, instead of pixel by pixel. For merge_heat_counters, we do the same: We iterate over tiles first, keeping a reference, and then iterate over the tile's pixels - that way we get a *huge* speedup. I can now render level 19 in 9 seconds, compared to before when it took 20s for level 17. Another benefit now is that we save the heatmap as u8 instead of u32. For a single track, we could even use a single bit (though that brings other problems with it). For the complete heatmap, u8 is probably too small (having 256 tracks is realistic), but we can change the merged one to be u16 later. This allows us to cut down on the RAM the program needs considerably, as we basically only use a fourth of the space now. A bit of noise is introduced in this patch since I ran cargo fmt. Side note: The bottleneck now seems to be the PNG compression, so that would be the next area to improve upon. Either by toning down the compression ratio (at the cost of higher storage needs), or by leveraging multithreading to deal with that. --- src/gpx.rs | 19 +++++++++---- src/layer.rs | 86 +++++++++++++++++++++++++++++++++++++++++++++++++-------- src/renderer.rs | 58 +++++++++++++++----------------------- 3 files changed, 111 insertions(+), 52 deletions(-) (limited to 'src') diff --git a/src/gpx.rs b/src/gpx.rs index 7760ca5..337a2a9 100644 --- a/src/gpx.rs +++ b/src/gpx.rs @@ -22,7 +22,8 @@ impl Coordinates { let lambda = self.longitude.to_radians(); let phi = self.latitude.to_radians(); let x = 2u64.pow(zoom) as f64 / (2.0 * PI) * 256.0 * (lambda + PI); - let y = 2u64.pow(zoom) as f64 / (2.0 * PI) * 256.0 * (PI - (PI / 4.0 + phi / 2.0).tan().ln()); + let y = + 2u64.pow(zoom) as f64 / (2.0 * PI) * 256.0 * (PI - (PI / 4.0 + phi / 2.0).tan().ln()); (x.floor() as u64, y.floor() as u64) } } @@ -45,13 +46,18 @@ pub fn extract_from_str(input: &str) -> Result> { for node in document.root_element().children().filter(is_track_node) { for segment in node.children().filter(is_track_segment) { for point in segment.children().filter(is_track_point) { - let latitude = point.attribute("lat") + let latitude = point + .attribute("lat") .and_then(|l| l.parse::().ok()) .ok_or_else(|| eyre!("Invalid latitude"))?; - let longitude = point.attribute("lon") + let longitude = point + .attribute("lon") .and_then(|l| l.parse::().ok()) .ok_or_else(|| eyre!("Invalid longitude"))?; - result.push(Coordinates { latitude, longitude }); + result.push(Coordinates { + latitude, + longitude, + }); } } } @@ -63,7 +69,10 @@ pub enum Compression { None, } -pub fn extract_from_file>(path: P, _compression: Compression) -> Result> { +pub fn extract_from_file>( + path: P, + _compression: Compression, +) -> Result> { let content = fs::read_to_string(path)?; extract_from_str(&content) } diff --git a/src/layer.rs b/src/layer.rs index 1c14df3..74c36e0 100644 --- a/src/layer.rs +++ b/src/layer.rs @@ -3,7 +3,11 @@ //! This supports OSM-style "tiled" images, but not all of the tiles have to be present. If a tile //! is not present, a default pixel is returned. The tile is allocated with the first call to a //! mutating operation. -use std::{fs::{self, File}, io::BufWriter, path::Path}; +use std::{ + fs::{self, File}, + io::BufWriter, + path::Path, +}; use color_eyre::eyre::{bail, Result}; use fnv::FnvHashMap; @@ -11,6 +15,7 @@ use image::{ codecs::png::{CompressionType, FilterType, PngEncoder}, ColorType, ImageBuffer, ImageEncoder, Pixel, Rgba, RgbaImage, }; +use num_traits::Zero; pub const TILE_HEIGHT: u64 = 256; pub const TILE_WIDTH: u64 = 256; @@ -45,18 +50,23 @@ impl TileLayer

{ fn index(&self, x: u64, y: u64) -> ((u64, u64), (u32, u32)) { ( (x / TILE_WIDTH, y / TILE_HEIGHT), - ((x % TILE_WIDTH).try_into().unwrap(), (y % TILE_HEIGHT).try_into().unwrap()), + ( + (x % TILE_WIDTH).try_into().unwrap(), + (y % TILE_HEIGHT).try_into().unwrap(), + ), ) } - pub fn enumerate_tiles(&self) -> impl Iterator>)> { + pub fn enumerate_tiles( + &self, + ) -> impl Iterator>)> { self.tiles.iter().map(|((x, y), t)| (*x, *y, t)) } pub fn tile_mut(&mut self, tile_x: u64, tile_y: u64) -> &mut ImageBuffer> { - self.tiles - .entry((tile_x, tile_y)) - .or_insert_with(|| ImageBuffer::from_pixel(TILE_WIDTH as u32, TILE_HEIGHT as u32, self.default_pixel)) + self.tiles.entry((tile_x, tile_y)).or_insert_with(|| { + ImageBuffer::from_pixel(TILE_WIDTH as u32, TILE_HEIGHT as u32, self.default_pixel) + }) } pub fn tile_for_mut(&mut self, x: u64, y: u64) -> &mut ImageBuffer> { @@ -73,7 +83,7 @@ impl TileLayer

{ self.tiles .get(&outer_idx) .map(|tile| tile.get_pixel(inner_x, inner_y)) - .or_else(|| Some(&self.default_pixel)) + .or(Some(&self.default_pixel)) } pub fn get_pixel(&self, x: u64, y: u64) -> &P { @@ -101,16 +111,26 @@ impl TileLayer

{ /// Enumerate all pixels that are explicitely set in this layer. pub fn enumerate_pixels(&self) -> impl Iterator { self.tiles.iter().flat_map(|((tx, ty), tile)| { - tile.enumerate_pixels() - .map(move |(x, y, p)| (u64::from(x) + tx * TILE_WIDTH, u64::from(y) + ty * TILE_HEIGHT, p)) + tile.enumerate_pixels().map(move |(x, y, p)| { + ( + u64::from(x) + tx * TILE_WIDTH, + u64::from(y) + ty * TILE_HEIGHT, + p, + ) + }) }) } /// Mutably enumerate all pixels that are explicitely set in this layer. pub fn enumerate_pixels_mut(&mut self) -> impl Iterator { self.tiles.iter_mut().flat_map(|((tx, ty), tile)| { - tile.enumerate_pixels_mut() - .map(move |(x, y, p)| (u64::from(x) + tx * TILE_WIDTH, u64::from(y) + ty * TILE_HEIGHT, p)) + tile.enumerate_pixels_mut().map(move |(x, y, p)| { + ( + u64::from(x) + tx * TILE_WIDTH, + u64::from(y) + ty * TILE_HEIGHT, + p, + ) + }) }) } @@ -125,6 +145,43 @@ impl TileLayer

{ pub fn tile_count(&self) -> usize { self.tiles.len() } + + /// Copies the non-zero pixels from `source` to `self`. + /// + /// A zero-pixel is identified by comparing all its channels' values with `Zero::zero()`. If + /// any channel is non-zero, the pixel is considered non-zero and is copied. + /// + /// The top-left pixel of `source` is copied to `(x, y)`. + /// + /// This method is more efficient than repeatedly calling [`get_pixel_mut`], as it groups + /// pixels by tile and only does one tile lookup. + pub fn blit_nonzero(&mut self, x: u64, y: u64, source: &ImageBuffer>) { + let zero = zero_pixel::

(); + let source_width = u64::from(source.width()); + let source_height = u64::from(source.height()); + for tx in x / TILE_WIDTH..=(x + source_width) / TILE_WIDTH { + for ty in y / TILE_HEIGHT..=(y + source_height) / TILE_HEIGHT { + let tile = self.tile_mut(tx, ty); + let offset_x = (tx * TILE_WIDTH).saturating_sub(x); + let offset_y = (ty * TILE_HEIGHT).saturating_sub(y); + let local_min_x = x.saturating_sub(tx * TILE_WIDTH); + let local_min_y = y.saturating_sub(ty * TILE_HEIGHT); + let local_max_x = TILE_WIDTH.min(x + source_width - tx * TILE_WIDTH); + let local_max_y = TILE_HEIGHT.min(y + source_height - ty * TILE_HEIGHT); + // Keep x in the inner loop for better cache locality! + for (y, source_y) in (local_min_y..local_max_y).zip(offset_y..) { + for (x, source_x) in (local_min_x..local_max_x).zip(offset_x..) { + let pixel = source + .get_pixel(source_x.try_into().unwrap(), source_y.try_into().unwrap()); + if pixel.channels() != zero.channels() { + *tile.get_pixel_mut(x.try_into().unwrap(), y.try_into().unwrap()) = + *pixel; + } + } + } + } + } + } } impl TileLayer> { @@ -137,7 +194,7 @@ impl TileLayer> { match metadata { Err(_) => fs::create_dir(&folder)?, Ok(m) if !m.is_dir() => bail!("Output path is not a directory"), - _ => {}, + _ => {} } let file = folder.join(&format!("{y}.png")); compress_png(tile, file)?; @@ -156,3 +213,8 @@ pub fn compress_png>(image: &RgbaImage, path: P) -> Result<()> { Ok(()) } + +fn zero_pixel() -> P { + let zeroes = vec![Zero::zero(); P::CHANNEL_COUNT as usize]; + *P::from_slice(&zeroes) +} diff --git a/src/renderer.rs b/src/renderer.rs index 757019c..1346ed1 100644 --- a/src/renderer.rs +++ b/src/renderer.rs @@ -1,8 +1,8 @@ use std::{fs, mem, path::Path}; use color_eyre::eyre::{bail, Result}; -use indicatif::ProgressBar; use image::{ImageBuffer, Luma, Pixel, Rgba, RgbaImage}; +use indicatif::ProgressBar; use nalgebra::{vector, Vector2}; use num_traits::identities::Zero; @@ -11,32 +11,21 @@ use super::{ layer::{self, TileLayer}, }; -pub type HeatCounter = TileLayer>; +pub type HeatCounter = TileLayer>; pub type HeatMap = TileLayer>; -/// Returns (a - b)**2, but ensures that no underflow happens (if b > a). -fn diff_squared(a: u64, b: u64) -> u64 { - if a > b { - (a - b).pow(2) - } else { - (b - a).pow(2) - } -} - fn render_circle(layer: &mut TileLayer

, center: (u64, u64), radius: u64, pixel: P) { - let x_lower = center.0.saturating_sub(radius); - let x_upper = (layer.width() - 1).min(center.0 + radius); - let y_lower = center.1.saturating_sub(radius); - let y_upper = (layer.height() - 1).min(center.1 + radius); - - for x in x_lower..=x_upper { - for y in y_lower..=y_upper { - if diff_squared(center.0, x) + diff_squared(center.1, y) <= radius * radius { - *layer.get_pixel_mut(x, y) = pixel; - } - } - } + let topleft = (center.0 - radius, center.1 - radius); + let rad_32: u32 = radius.try_into().unwrap(); + let mut circle = ImageBuffer::>::new(rad_32 * 2 + 1, rad_32 * 2 + 1); + imageproc::drawing::draw_filled_circle_mut( + &mut circle, + (i32::try_from(radius).unwrap(), i32::try_from(radius).unwrap()), + radius.try_into().unwrap(), + pixel, + ); + layer.blit_nonzero(topleft.0, topleft.1, &circle); } fn direction_vector(a: (u64, u64), b: (u64, u64)) -> Vector2 { @@ -112,21 +101,20 @@ fn render_line( .collect::>(); imageproc::drawing::draw_polygon_mut(&mut overlay, &adjusted_poly, pixel); - for (x, y, pixel) in overlay.enumerate_pixels() { - if pixel.channels()[0] > Zero::zero() { - *layer.get_pixel_mut(u64::from(x) + min_x, u64::from(y) + min_y) = *pixel; - } - } + layer.blit_nonzero(min_x, min_y, &overlay); } fn merge_heat_counter(base: &mut HeatCounter, overlay: &HeatCounter) { - for (x, y, source) in overlay.enumerate_pixels() { - let target = base.get_pixel_mut(x, y); - target[0] += source[0]; + for (tx, ty, source) in overlay.enumerate_tiles() { + let target = base.tile_mut(tx, ty); + for (x, y, source) in source.enumerate_pixels() { + let target = target.get_pixel_mut(x, y); + target[0] += source[0]; + } } } -fn colorize_tile(tile: &ImageBuffer, Vec>, max: u32) -> RgbaImage { +fn colorize_tile(tile: &ImageBuffer, Vec>, max: u32) -> RgbaImage { let gradient = colorgrad::yl_or_rd(); let mut result = ImageBuffer::from_pixel(tile.width(), tile.height(), [0, 0, 0, 0].into()); for (x, y, pixel) in tile.enumerate_pixels() { @@ -147,7 +135,7 @@ pub fn colorize_heatcounter(layer: &HeatCounter) -> HeatMap { return result; } for (tile_x, tile_y, tile) in layer.enumerate_tiles() { - let colorized = colorize_tile(&tile, max); + let colorized = colorize_tile(&tile, max.into()); *result.tile_mut(tile_x, tile_y) = colorized; } result @@ -167,7 +155,7 @@ pub fn lazy_colorization>(layer: &HeatCounter, base_dir: P) -> Re let bar = ProgressBar::new(layer.tile_count().try_into().unwrap()); for (tile_x, tile_y, tile) in layer.enumerate_tiles() { - let colorized = colorize_tile(&tile, max); + let colorized = colorize_tile(&tile, max.into()); let folder = base_dir.join(&tile_x.to_string()); let metadata = folder.metadata(); match metadata { @@ -200,7 +188,7 @@ pub fn render_heatcounter(zoom: u32, tracks: &[Vec]) -> HeatCounter .collect::>(); for point in points.iter() { - render_circle(&mut layer, *point, (zoom as u64 / 4).max(1), [1].into()); + render_circle(&mut layer, *point, (zoom as u64 / 4).max(2) - 1, [1].into()); } for (a, b) in points.iter().zip(points.iter().skip(1)) { -- cgit v1.2.3