aboutsummaryrefslogtreecommitdiff
path: root/src/renderer.rs
diff options
context:
space:
mode:
authorDaniel Schadt <kingdread@gmx.de>2023-01-10 21:40:02 +0100
committerDaniel Schadt <kingdread@gmx.de>2023-01-10 21:46:13 +0100
commit6c547a24cea3422d934547bc9baf28a5f9ecf139 (patch)
tree0c7b43c23fb051632937602955898c2210bc08de /src/renderer.rs
parent4b30bda722668bef4ba362b3e578f02dede0099b (diff)
downloadhittekaart-6c547a24cea3422d934547bc9baf28a5f9ecf139.tar.gz
hittekaart-6c547a24cea3422d934547bc9baf28a5f9ecf139.tar.bz2
hittekaart-6c547a24cea3422d934547bc9baf28a5f9ecf139.zip
considerably speed up the rendering process
Most of the time was spent doing hashmap lookups because all of our operations were done pixel by pixel, and layer.get_pixel_mut always went through the hashmap lookup. This was true for render_circle, render_line *and* merge_heat_counter - the last of which iterated over the full layer every time. The biggest change now is that we try to do accesses tile-by-tile. For the drawing functions, this means that we render the image on a small patch locally, and then blit the image onto the base - tile by tile, instead of pixel by pixel. For merge_heat_counters, we do the same: We iterate over tiles first, keeping a reference, and then iterate over the tile's pixels - that way we get a *huge* speedup. I can now render level 19 in 9 seconds, compared to before when it took 20s for level 17. Another benefit now is that we save the heatmap as u8 instead of u32. For a single track, we could even use a single bit (though that brings other problems with it). For the complete heatmap, u8 is probably too small (having 256 tracks is realistic), but we can change the merged one to be u16 later. This allows us to cut down on the RAM the program needs considerably, as we basically only use a fourth of the space now. A bit of noise is introduced in this patch since I ran cargo fmt. Side note: The bottleneck now seems to be the PNG compression, so that would be the next area to improve upon. Either by toning down the compression ratio (at the cost of higher storage needs), or by leveraging multithreading to deal with that.
Diffstat (limited to 'src/renderer.rs')
-rw-r--r--src/renderer.rs58
1 files changed, 23 insertions, 35 deletions
diff --git a/src/renderer.rs b/src/renderer.rs
index 757019c..1346ed1 100644
--- a/src/renderer.rs
+++ b/src/renderer.rs
@@ -1,8 +1,8 @@
use std::{fs, mem, path::Path};
use color_eyre::eyre::{bail, Result};
-use indicatif::ProgressBar;
use image::{ImageBuffer, Luma, Pixel, Rgba, RgbaImage};
+use indicatif::ProgressBar;
use nalgebra::{vector, Vector2};
use num_traits::identities::Zero;
@@ -11,32 +11,21 @@ use super::{
layer::{self, TileLayer},
};
-pub type HeatCounter = TileLayer<Luma<u32>>;
+pub type HeatCounter = TileLayer<Luma<u8>>;
pub type HeatMap = TileLayer<Rgba<u8>>;
-/// Returns (a - b)**2, but ensures that no underflow happens (if b > a).
-fn diff_squared(a: u64, b: u64) -> u64 {
- if a > b {
- (a - b).pow(2)
- } else {
- (b - a).pow(2)
- }
-}
-
fn render_circle<P: Pixel>(layer: &mut TileLayer<P>, center: (u64, u64), radius: u64, pixel: P) {
- let x_lower = center.0.saturating_sub(radius);
- let x_upper = (layer.width() - 1).min(center.0 + radius);
- let y_lower = center.1.saturating_sub(radius);
- let y_upper = (layer.height() - 1).min(center.1 + radius);
-
- for x in x_lower..=x_upper {
- for y in y_lower..=y_upper {
- if diff_squared(center.0, x) + diff_squared(center.1, y) <= radius * radius {
- *layer.get_pixel_mut(x, y) = pixel;
- }
- }
- }
+ let topleft = (center.0 - radius, center.1 - radius);
+ let rad_32: u32 = radius.try_into().unwrap();
+ let mut circle = ImageBuffer::<P, Vec<P::Subpixel>>::new(rad_32 * 2 + 1, rad_32 * 2 + 1);
+ imageproc::drawing::draw_filled_circle_mut(
+ &mut circle,
+ (i32::try_from(radius).unwrap(), i32::try_from(radius).unwrap()),
+ radius.try_into().unwrap(),
+ pixel,
+ );
+ layer.blit_nonzero(topleft.0, topleft.1, &circle);
}
fn direction_vector(a: (u64, u64), b: (u64, u64)) -> Vector2<f64> {
@@ -112,21 +101,20 @@ fn render_line<P: Pixel>(
.collect::<Vec<_>>();
imageproc::drawing::draw_polygon_mut(&mut overlay, &adjusted_poly, pixel);
- for (x, y, pixel) in overlay.enumerate_pixels() {
- if pixel.channels()[0] > Zero::zero() {
- *layer.get_pixel_mut(u64::from(x) + min_x, u64::from(y) + min_y) = *pixel;
- }
- }
+ layer.blit_nonzero(min_x, min_y, &overlay);
}
fn merge_heat_counter(base: &mut HeatCounter, overlay: &HeatCounter) {
- for (x, y, source) in overlay.enumerate_pixels() {
- let target = base.get_pixel_mut(x, y);
- target[0] += source[0];
+ for (tx, ty, source) in overlay.enumerate_tiles() {
+ let target = base.tile_mut(tx, ty);
+ for (x, y, source) in source.enumerate_pixels() {
+ let target = target.get_pixel_mut(x, y);
+ target[0] += source[0];
+ }
}
}
-fn colorize_tile(tile: &ImageBuffer<Luma<u32>, Vec<u32>>, max: u32) -> RgbaImage {
+fn colorize_tile(tile: &ImageBuffer<Luma<u8>, Vec<u8>>, max: u32) -> RgbaImage {
let gradient = colorgrad::yl_or_rd();
let mut result = ImageBuffer::from_pixel(tile.width(), tile.height(), [0, 0, 0, 0].into());
for (x, y, pixel) in tile.enumerate_pixels() {
@@ -147,7 +135,7 @@ pub fn colorize_heatcounter(layer: &HeatCounter) -> HeatMap {
return result;
}
for (tile_x, tile_y, tile) in layer.enumerate_tiles() {
- let colorized = colorize_tile(&tile, max);
+ let colorized = colorize_tile(&tile, max.into());
*result.tile_mut(tile_x, tile_y) = colorized;
}
result
@@ -167,7 +155,7 @@ pub fn lazy_colorization<P: AsRef<Path>>(layer: &HeatCounter, base_dir: P) -> Re
let bar = ProgressBar::new(layer.tile_count().try_into().unwrap());
for (tile_x, tile_y, tile) in layer.enumerate_tiles() {
- let colorized = colorize_tile(&tile, max);
+ let colorized = colorize_tile(&tile, max.into());
let folder = base_dir.join(&tile_x.to_string());
let metadata = folder.metadata();
match metadata {
@@ -200,7 +188,7 @@ pub fn render_heatcounter(zoom: u32, tracks: &[Vec<Coordinates>]) -> HeatCounter
.collect::<Vec<_>>();
for point in points.iter() {
- render_circle(&mut layer, *point, (zoom as u64 / 4).max(1), [1].into());
+ render_circle(&mut layer, *point, (zoom as u64 / 4).max(2) - 1, [1].into());
}
for (a, b) in points.iter().zip(points.iter().skip(1)) {