diff options
author | Daniel Schadt <kingdread@gmx.de> | 2023-01-10 21:40:02 +0100 |
---|---|---|
committer | Daniel Schadt <kingdread@gmx.de> | 2023-01-10 21:46:13 +0100 |
commit | 6c547a24cea3422d934547bc9baf28a5f9ecf139 (patch) | |
tree | 0c7b43c23fb051632937602955898c2210bc08de /src/renderer.rs | |
parent | 4b30bda722668bef4ba362b3e578f02dede0099b (diff) | |
download | hittekaart-6c547a24cea3422d934547bc9baf28a5f9ecf139.tar.gz hittekaart-6c547a24cea3422d934547bc9baf28a5f9ecf139.tar.bz2 hittekaart-6c547a24cea3422d934547bc9baf28a5f9ecf139.zip |
considerably speed up the rendering process
Most of the time was spent doing hashmap lookups because all of our
operations were done pixel by pixel, and layer.get_pixel_mut always went
through the hashmap lookup. This was true for render_circle, render_line
*and* merge_heat_counter - the last of which iterated over the full
layer every time.
The biggest change now is that we try to do accesses tile-by-tile. For
the drawing functions, this means that we render the image on a small
patch locally, and then blit the image onto the base - tile by tile,
instead of pixel by pixel.
For merge_heat_counters, we do the same: We iterate over tiles first,
keeping a reference, and then iterate over the tile's pixels - that way
we get a *huge* speedup. I can now render level 19 in 9 seconds,
compared to before when it took 20s for level 17.
Another benefit now is that we save the heatmap as u8 instead of u32.
For a single track, we could even use a single bit (though that brings
other problems with it). For the complete heatmap, u8 is probably too
small (having 256 tracks is realistic), but we can change the merged one
to be u16 later. This allows us to cut down on the RAM the program needs
considerably, as we basically only use a fourth of the space now.
A bit of noise is introduced in this patch since I ran cargo fmt.
Side note: The bottleneck now seems to be the PNG compression, so that
would be the next area to improve upon. Either by toning down the
compression ratio (at the cost of higher storage needs), or by
leveraging multithreading to deal with that.
Diffstat (limited to 'src/renderer.rs')
-rw-r--r-- | src/renderer.rs | 58 |
1 files changed, 23 insertions, 35 deletions
diff --git a/src/renderer.rs b/src/renderer.rs index 757019c..1346ed1 100644 --- a/src/renderer.rs +++ b/src/renderer.rs @@ -1,8 +1,8 @@ use std::{fs, mem, path::Path}; use color_eyre::eyre::{bail, Result}; -use indicatif::ProgressBar; use image::{ImageBuffer, Luma, Pixel, Rgba, RgbaImage}; +use indicatif::ProgressBar; use nalgebra::{vector, Vector2}; use num_traits::identities::Zero; @@ -11,32 +11,21 @@ use super::{ layer::{self, TileLayer}, }; -pub type HeatCounter = TileLayer<Luma<u32>>; +pub type HeatCounter = TileLayer<Luma<u8>>; pub type HeatMap = TileLayer<Rgba<u8>>; -/// Returns (a - b)**2, but ensures that no underflow happens (if b > a). -fn diff_squared(a: u64, b: u64) -> u64 { - if a > b { - (a - b).pow(2) - } else { - (b - a).pow(2) - } -} - fn render_circle<P: Pixel>(layer: &mut TileLayer<P>, center: (u64, u64), radius: u64, pixel: P) { - let x_lower = center.0.saturating_sub(radius); - let x_upper = (layer.width() - 1).min(center.0 + radius); - let y_lower = center.1.saturating_sub(radius); - let y_upper = (layer.height() - 1).min(center.1 + radius); - - for x in x_lower..=x_upper { - for y in y_lower..=y_upper { - if diff_squared(center.0, x) + diff_squared(center.1, y) <= radius * radius { - *layer.get_pixel_mut(x, y) = pixel; - } - } - } + let topleft = (center.0 - radius, center.1 - radius); + let rad_32: u32 = radius.try_into().unwrap(); + let mut circle = ImageBuffer::<P, Vec<P::Subpixel>>::new(rad_32 * 2 + 1, rad_32 * 2 + 1); + imageproc::drawing::draw_filled_circle_mut( + &mut circle, + (i32::try_from(radius).unwrap(), i32::try_from(radius).unwrap()), + radius.try_into().unwrap(), + pixel, + ); + layer.blit_nonzero(topleft.0, topleft.1, &circle); } fn direction_vector(a: (u64, u64), b: (u64, u64)) -> Vector2<f64> { @@ -112,21 +101,20 @@ fn render_line<P: Pixel>( .collect::<Vec<_>>(); imageproc::drawing::draw_polygon_mut(&mut overlay, &adjusted_poly, pixel); - for (x, y, pixel) in overlay.enumerate_pixels() { - if pixel.channels()[0] > Zero::zero() { - *layer.get_pixel_mut(u64::from(x) + min_x, u64::from(y) + min_y) = *pixel; - } - } + layer.blit_nonzero(min_x, min_y, &overlay); } fn merge_heat_counter(base: &mut HeatCounter, overlay: &HeatCounter) { - for (x, y, source) in overlay.enumerate_pixels() { - let target = base.get_pixel_mut(x, y); - target[0] += source[0]; + for (tx, ty, source) in overlay.enumerate_tiles() { + let target = base.tile_mut(tx, ty); + for (x, y, source) in source.enumerate_pixels() { + let target = target.get_pixel_mut(x, y); + target[0] += source[0]; + } } } -fn colorize_tile(tile: &ImageBuffer<Luma<u32>, Vec<u32>>, max: u32) -> RgbaImage { +fn colorize_tile(tile: &ImageBuffer<Luma<u8>, Vec<u8>>, max: u32) -> RgbaImage { let gradient = colorgrad::yl_or_rd(); let mut result = ImageBuffer::from_pixel(tile.width(), tile.height(), [0, 0, 0, 0].into()); for (x, y, pixel) in tile.enumerate_pixels() { @@ -147,7 +135,7 @@ pub fn colorize_heatcounter(layer: &HeatCounter) -> HeatMap { return result; } for (tile_x, tile_y, tile) in layer.enumerate_tiles() { - let colorized = colorize_tile(&tile, max); + let colorized = colorize_tile(&tile, max.into()); *result.tile_mut(tile_x, tile_y) = colorized; } result @@ -167,7 +155,7 @@ pub fn lazy_colorization<P: AsRef<Path>>(layer: &HeatCounter, base_dir: P) -> Re let bar = ProgressBar::new(layer.tile_count().try_into().unwrap()); for (tile_x, tile_y, tile) in layer.enumerate_tiles() { - let colorized = colorize_tile(&tile, max); + let colorized = colorize_tile(&tile, max.into()); let folder = base_dir.join(&tile_x.to_string()); let metadata = folder.metadata(); match metadata { @@ -200,7 +188,7 @@ pub fn render_heatcounter(zoom: u32, tracks: &[Vec<Coordinates>]) -> HeatCounter .collect::<Vec<_>>(); for point in points.iter() { - render_circle(&mut layer, *point, (zoom as u64 / 4).max(1), [1].into()); + render_circle(&mut layer, *point, (zoom as u64 / 4).max(2) - 1, [1].into()); } for (a, b) in points.iter().zip(points.iter().skip(1)) { |