From 5bfe77d93315111f80e3b56aee929fc33a14cf1f Mon Sep 17 00:00:00 2001
From: Daniel Schadt <kingdread@gmx.de>
Date: Fri, 23 Oct 2020 14:41:20 +0200
Subject: add some basic parsing benchmarks

This adds some simple benchmarks to test the speed of
evtclib::process_{file,stream}, just so we can quickly assess on a
high-level if changes make a big impact.

I'd like to add some more benchmarks in the future as well, mostly
- on the higher level when we go from RawEvtc to a log, benchmarking the
  process function itself (thus not benchmarking all the byte-twiddling
  deserialization code).
- on the Analyzer level
---
 Cargo.toml                   |  5 ++++
 benches/parsing_benchmark.rs | 63 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)
 create mode 100644 benches/parsing_benchmark.rs

diff --git a/Cargo.toml b/Cargo.toml
index 252fd30..f09dc1e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,8 +17,13 @@ include = [
 [features]
 default = []
 
+[[bench]]
+name = "parsing_benchmark"
+harness = false
+
 [dev-dependencies]
 serde_json = "1.0"
+criterion = "0.3"
 
 [dependencies]
 num-traits = "0.2"
diff --git a/benches/parsing_benchmark.rs b/benches/parsing_benchmark.rs
new file mode 100644
index 0000000..e245f77
--- /dev/null
+++ b/benches/parsing_benchmark.rs
@@ -0,0 +1,63 @@
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use std::{fs, io, io::Read};
+use zip::ZipArchive;
+
+const QADIM_LOG: &str = "tests/logs/qadim-20200427.zevtc";
+
+/// This benchmark tests the overall performance of process_file.
+///
+/// This is important because for many applications that just want to read a log, this is the
+/// easiest and preferred way to do it. We want to ensure that we internally use a fast method
+/// (such as a buffered reader or a memory mapped file) so that the downstream application will
+/// receive the log fast.
+fn zipped_qadim_benchmark(c: &mut Criterion) {
+    c.bench_function("on-disk zipped Qadim", |b| {
+        b.iter(|| evtclib::process_file(black_box(QADIM_LOG), evtclib::Compression::Zip).unwrap())
+    });
+}
+
+/// This benchmark tests the process_stream on a pre-loaded zipped log.
+///
+/// This is important because it contains the slowdown imposed by decompression, but without I/O
+/// slowdowns. This is the most realistic target to strive for with our process_file function, as
+/// we can try to work around I/O (assuming prefetching, memory mapped files, ...), but we probably
+/// have to de-compress logs at some point.
+fn zipped_qadim_ram_benchmark(c: &mut Criterion) {
+    let log_data = &fs::read(QADIM_LOG).unwrap();
+
+    c.bench_function("in-memory zipped Qadim", |b| {
+        b.iter(|| {
+            evtclib::process_stream(io::Cursor::new(log_data), evtclib::Compression::Zip).unwrap()
+        })
+    });
+}
+
+/// This benchmark tests the process_stream on a pre-extracted log.
+///
+/// This is important because it gets rid of any I/O and decompression slowdown. This probably
+/// measures our parsing performance most accurately, assuming that all data is readily available,
+/// the only slowdown that remains is our parsing and processing algorithm.
+fn unzipped_qadim_benchmark(c: &mut Criterion) {
+    let mut log_data = Vec::new();
+    let zip_data = fs::read(QADIM_LOG).unwrap();
+    let mut archive = ZipArchive::new(io::Cursor::new(zip_data)).unwrap();
+    archive
+        .by_index(0)
+        .unwrap()
+        .read_to_end(&mut log_data)
+        .unwrap();
+    let log_data = &log_data;
+
+    c.bench_function("in-memory unzipped Qadim", |b| {
+        b.iter(|| {
+            evtclib::process_stream(io::Cursor::new(log_data), evtclib::Compression::None).unwrap()
+        })
+    });
+}
+
+criterion_group! {
+    name = benches;
+    config = Criterion::default().sample_size(30);
+    targets = zipped_qadim_benchmark, zipped_qadim_ram_benchmark, unzipped_qadim_benchmark
+}
+criterion_main!(benches);
-- 
cgit v1.2.3