From f3af2cf6966b6033563d315492fc6b84f433f780 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 1 May 2020 14:09:30 +0200 Subject: enable date parsing from filenames This allows the date-based filters to work much faster. --- src/filters/log.rs | 107 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 95 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/filters/log.rs b/src/filters/log.rs index 60e6912..1728d4b 100644 --- a/src/filters/log.rs +++ b/src/filters/log.rs @@ -7,12 +7,17 @@ use super::{ Filter, Inclusion, }; -use std::collections::HashSet; +use std::{collections::HashSet, ffi::OsStr}; use evtclib::Boss; -use chrono::{DateTime, Datelike, Utc, Weekday}; +use chrono::{DateTime, Datelike, Local, TimeZone, Utc, Weekday}; use num_traits::FromPrimitive as _; +use once_cell::sync::Lazy; +use regex::Regex; + +/// The regular expression used to extract datetimes from filenames. +static DATE_REGEX: Lazy = Lazy::new(|| Regex::new(r"\d{8}-\d{6}").unwrap()); /// Filter trait used for filters that operate on complete logs. pub trait LogFilter = Filter; @@ -90,20 +95,53 @@ pub fn weekday(weekdays: HashSet) -> Box { struct TimeFilter(Option>, Option>); impl Filter for TimeFilter { + fn filter_early(&self, early_log: &EarlyLogResult) -> Inclusion { + early_log + .log_file + .file_name() + .and_then(datetime_from_filename) + .map(|d| time_is_between(d, self.0, self.1)) + .map(Into::into) + .unwrap_or(Inclusion::Unknown) + } + fn filter(&self, log: &LogResult) -> bool { - let after_ok = match self.0 { - Some(time) => time <= log.time, - None => true, - }; - let before_ok = match self.1 { - Some(time) => time >= log.time, - None => true, - }; - - after_ok && before_ok + time_is_between(log.time, self.0, self.1) } } +/// Check if the given time is after `after` but before `before`. +/// +/// If one of the bounds is `None`, the time is always in bounds w.r.t. that bound. +fn time_is_between( + time: DateTime, + after: Option>, + before: Option>, +) -> bool { + let after_ok = match after { + Some(after) => after <= time, + None => true, + }; + let before_ok = match before { + Some(before) => before >= time, + None => true, + }; + + after_ok && before_ok +} + +/// Try to extract the log time from the filename. +/// +/// This expects the filename to have the datetime in the pattern `YYYYmmdd-HHMMSS` somewhere in +/// it. +fn datetime_from_filename(name: &OsStr) -> Option> { + let date_match = DATE_REGEX.find(name.to_str()?)?; + let local_time = Local + .datetime_from_str(date_match.as_str(), "%Y%m%d-%H%M%S") + .ok()?; + Some(local_time.with_timezone(&Utc)) +} + /// A `LogFilter` that only accepts logs in the given time frame. /// /// If a bound is not given, -Infinity is assumed for the lower bound, and Infinity for the upper @@ -125,3 +163,48 @@ pub fn after(when: DateTime) -> Box { pub fn before(when: DateTime) -> Box { time(None, Some(when)) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_time_is_between() { + assert!(time_is_between( + Utc.ymd(1955, 11, 5).and_hms(6, 15, 0), + None, + None, + )); + assert!(time_is_between( + Utc.ymd(1955, 11, 5).and_hms(6, 15, 0), + Some(Utc.ymd(1955, 11, 5).and_hms(5, 0, 0)), + None, + )); + assert!(time_is_between( + Utc.ymd(1955, 11, 5).and_hms(6, 15, 0), + None, + Some(Utc.ymd(1955, 11, 5).and_hms(7, 0, 0)), + )); + assert!(time_is_between( + Utc.ymd(1955, 11, 5).and_hms(6, 15, 0), + Some(Utc.ymd(1955, 11, 5).and_hms(5, 0, 0)), + Some(Utc.ymd(1955, 11, 5).and_hms(7, 0, 0)), + )); + + assert!(!time_is_between( + Utc.ymd(1955, 11, 5).and_hms(6, 15, 0), + Some(Utc.ymd(1955, 11, 5).and_hms(7, 0, 0)), + None, + )); + assert!(!time_is_between( + Utc.ymd(1955, 11, 5).and_hms(6, 15, 0), + None, + Some(Utc.ymd(1955, 11, 5).and_hms(5, 0, 0)), + )); + assert!(!time_is_between( + Utc.ymd(1955, 11, 5).and_hms(6, 15, 0), + Some(Utc.ymd(1955, 11, 5).and_hms(5, 0, 0)), + Some(Utc.ymd(1955, 11, 5).and_hms(6, 0, 0)), + )); + } +} -- cgit v1.2.3