aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Schadt <kingdread@gmx.de>2020-10-23 14:41:20 +0200
committerDaniel Schadt <kingdread@gmx.de>2020-10-23 14:41:20 +0200
commit5bfe77d93315111f80e3b56aee929fc33a14cf1f (patch)
treeb060be4d5fce46dcbaa1c5f3d8c329f9a89b4605
parentc9e03333f94385cc35bdf3b8d4eb00cd77c3ffb7 (diff)
downloadevtclib-5bfe77d93315111f80e3b56aee929fc33a14cf1f.tar.gz
evtclib-5bfe77d93315111f80e3b56aee929fc33a14cf1f.tar.bz2
evtclib-5bfe77d93315111f80e3b56aee929fc33a14cf1f.zip
add some basic parsing benchmarks
This adds some simple benchmarks to test the speed of evtclib::process_{file,stream}, just so we can quickly assess on a high-level if changes make a big impact. I'd like to add some more benchmarks in the future as well, mostly - on the higher level when we go from RawEvtc to a log, benchmarking the process function itself (thus not benchmarking all the byte-twiddling deserialization code). - on the Analyzer level
-rw-r--r--Cargo.toml5
-rw-r--r--benches/parsing_benchmark.rs63
2 files changed, 68 insertions, 0 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 252fd30..f09dc1e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,8 +17,13 @@ include = [
[features]
default = []
+[[bench]]
+name = "parsing_benchmark"
+harness = false
+
[dev-dependencies]
serde_json = "1.0"
+criterion = "0.3"
[dependencies]
num-traits = "0.2"
diff --git a/benches/parsing_benchmark.rs b/benches/parsing_benchmark.rs
new file mode 100644
index 0000000..e245f77
--- /dev/null
+++ b/benches/parsing_benchmark.rs
@@ -0,0 +1,63 @@
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use std::{fs, io, io::Read};
+use zip::ZipArchive;
+
+const QADIM_LOG: &str = "tests/logs/qadim-20200427.zevtc";
+
+/// This benchmark tests the overall performance of process_file.
+///
+/// This is important because for many applications that just want to read a log, this is the
+/// easiest and preferred way to do it. We want to ensure that we internally use a fast method
+/// (such as a buffered reader or a memory mapped file) so that the downstream application will
+/// receive the log fast.
+fn zipped_qadim_benchmark(c: &mut Criterion) {
+ c.bench_function("on-disk zipped Qadim", |b| {
+ b.iter(|| evtclib::process_file(black_box(QADIM_LOG), evtclib::Compression::Zip).unwrap())
+ });
+}
+
+/// This benchmark tests the process_stream on a pre-loaded zipped log.
+///
+/// This is important because it contains the slowdown imposed by decompression, but without I/O
+/// slowdowns. This is the most realistic target to strive for with our process_file function, as
+/// we can try to work around I/O (assuming prefetching, memory mapped files, ...), but we probably
+/// have to de-compress logs at some point.
+fn zipped_qadim_ram_benchmark(c: &mut Criterion) {
+ let log_data = &fs::read(QADIM_LOG).unwrap();
+
+ c.bench_function("in-memory zipped Qadim", |b| {
+ b.iter(|| {
+ evtclib::process_stream(io::Cursor::new(log_data), evtclib::Compression::Zip).unwrap()
+ })
+ });
+}
+
+/// This benchmark tests the process_stream on a pre-extracted log.
+///
+/// This is important because it gets rid of any I/O and decompression slowdown. This probably
+/// measures our parsing performance most accurately, assuming that all data is readily available,
+/// the only slowdown that remains is our parsing and processing algorithm.
+fn unzipped_qadim_benchmark(c: &mut Criterion) {
+ let mut log_data = Vec::new();
+ let zip_data = fs::read(QADIM_LOG).unwrap();
+ let mut archive = ZipArchive::new(io::Cursor::new(zip_data)).unwrap();
+ archive
+ .by_index(0)
+ .unwrap()
+ .read_to_end(&mut log_data)
+ .unwrap();
+ let log_data = &log_data;
+
+ c.bench_function("in-memory unzipped Qadim", |b| {
+ b.iter(|| {
+ evtclib::process_stream(io::Cursor::new(log_data), evtclib::Compression::None).unwrap()
+ })
+ });
+}
+
+criterion_group! {
+ name = benches;
+ config = Criterion::default().sample_size(30);
+ targets = zipped_qadim_benchmark, zipped_qadim_ram_benchmark, unzipped_qadim_benchmark
+}
+criterion_main!(benches);