From 5bfe77d93315111f80e3b56aee929fc33a14cf1f Mon Sep 17 00:00:00 2001 From: Daniel Schadt Date: Fri, 23 Oct 2020 14:41:20 +0200 Subject: add some basic parsing benchmarks This adds some simple benchmarks to test the speed of evtclib::process_{file,stream}, just so we can quickly assess on a high-level if changes make a big impact. I'd like to add some more benchmarks in the future as well, mostly - on the higher level when we go from RawEvtc to a log, benchmarking the process function itself (thus not benchmarking all the byte-twiddling deserialization code). - on the Analyzer level --- Cargo.toml | 5 ++++ benches/parsing_benchmark.rs | 63 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 benches/parsing_benchmark.rs diff --git a/Cargo.toml b/Cargo.toml index 252fd30..f09dc1e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,8 +17,13 @@ include = [ [features] default = [] +[[bench]] +name = "parsing_benchmark" +harness = false + [dev-dependencies] serde_json = "1.0" +criterion = "0.3" [dependencies] num-traits = "0.2" diff --git a/benches/parsing_benchmark.rs b/benches/parsing_benchmark.rs new file mode 100644 index 0000000..e245f77 --- /dev/null +++ b/benches/parsing_benchmark.rs @@ -0,0 +1,63 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use std::{fs, io, io::Read}; +use zip::ZipArchive; + +const QADIM_LOG: &str = "tests/logs/qadim-20200427.zevtc"; + +/// This benchmark tests the overall performance of process_file. +/// +/// This is important because for many applications that just want to read a log, this is the +/// easiest and preferred way to do it. We want to ensure that we internally use a fast method +/// (such as a buffered reader or a memory mapped file) so that the downstream application will +/// receive the log fast. +fn zipped_qadim_benchmark(c: &mut Criterion) { + c.bench_function("on-disk zipped Qadim", |b| { + b.iter(|| evtclib::process_file(black_box(QADIM_LOG), evtclib::Compression::Zip).unwrap()) + }); +} + +/// This benchmark tests the process_stream on a pre-loaded zipped log. +/// +/// This is important because it contains the slowdown imposed by decompression, but without I/O +/// slowdowns. This is the most realistic target to strive for with our process_file function, as +/// we can try to work around I/O (assuming prefetching, memory mapped files, ...), but we probably +/// have to de-compress logs at some point. +fn zipped_qadim_ram_benchmark(c: &mut Criterion) { + let log_data = &fs::read(QADIM_LOG).unwrap(); + + c.bench_function("in-memory zipped Qadim", |b| { + b.iter(|| { + evtclib::process_stream(io::Cursor::new(log_data), evtclib::Compression::Zip).unwrap() + }) + }); +} + +/// This benchmark tests the process_stream on a pre-extracted log. +/// +/// This is important because it gets rid of any I/O and decompression slowdown. This probably +/// measures our parsing performance most accurately, assuming that all data is readily available, +/// the only slowdown that remains is our parsing and processing algorithm. +fn unzipped_qadim_benchmark(c: &mut Criterion) { + let mut log_data = Vec::new(); + let zip_data = fs::read(QADIM_LOG).unwrap(); + let mut archive = ZipArchive::new(io::Cursor::new(zip_data)).unwrap(); + archive + .by_index(0) + .unwrap() + .read_to_end(&mut log_data) + .unwrap(); + let log_data = &log_data; + + c.bench_function("in-memory unzipped Qadim", |b| { + b.iter(|| { + evtclib::process_stream(io::Cursor::new(log_data), evtclib::Compression::None).unwrap() + }) + }); +} + +criterion_group! { + name = benches; + config = Criterion::default().sample_size(30); + targets = zipped_qadim_benchmark, zipped_qadim_ram_benchmark, unzipped_qadim_benchmark +} +criterion_main!(benches); -- cgit v1.2.3