From d4a24eef7fd410c147de201d776089e0601317d5 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 12 Jun 2020 00:48:18 +0200 Subject: initial work on comparison based filters This enables filters such as -time > 2020-01-01 -time < 2020-02-03 ... for time and duration, and later possibly also for more things (such as a COUNT(...) construct). This work tries to integrate them into the existing filter system as seamless as possible, by providing a Comparator which implements LogFilter. The "type checking" is done at parse time, so nonsensical comparisons like -time > 12s flat out give a parse error. This however might be changed to a more dynamic system with run-time type checking, in which case we could do away with the type parameter on Producer and simply work with a generic Value. The comparator would then return an error if two non-identical types would be compared. Note that the system does not support arithmetic expressions, only simple comparisons to constant values. --- src/filters/log.rs | 2 +- src/filters/mod.rs | 1 + src/filters/values.rs | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 src/filters/values.rs (limited to 'src/filters') diff --git a/src/filters/log.rs b/src/filters/log.rs index 8cfdcb4..9ca7d3c 100644 --- a/src/filters/log.rs +++ b/src/filters/log.rs @@ -137,7 +137,7 @@ fn time_is_between( /// /// This expects the filename to have the datetime in the pattern `YYYYmmdd-HHMMSS` somewhere in /// it. -fn datetime_from_filename(name: &OsStr) -> Option> { +pub(crate) fn datetime_from_filename(name: &OsStr) -> Option> { let date_match = DATE_REGEX.find(name.to_str()?)?; let local_time = Local .datetime_from_str(date_match.as_str(), "%Y%m%d-%H%M%S") diff --git a/src/filters/mod.rs b/src/filters/mod.rs index 162b6f8..e966851 100644 --- a/src/filters/mod.rs +++ b/src/filters/mod.rs @@ -5,6 +5,7 @@ use num_traits::FromPrimitive as _; pub mod log; pub mod player; +pub mod values; /// Early filtering result. /// diff --git a/src/filters/values.rs b/src/filters/values.rs new file mode 100644 index 0000000..543b59c --- /dev/null +++ b/src/filters/values.rs @@ -0,0 +1,144 @@ +use std::{ + cmp::Ordering, + fmt::{self, Debug}, +}; + +use chrono::{DateTime, Duration, Utc}; + +use super::{log::LogFilter, Filter}; +use crate::{EarlyLogResult, LogResult}; + +pub trait Producer: Send + Sync + Debug { + type Output; + + fn produce_early(&self, _early_log: &EarlyLogResult) -> Option { + None + } + + fn produce(&self, log: &LogResult) -> Self::Output; +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum CompOp { + Less, + LessEqual, + Equal, + GreaterEqual, + Greater, +} + +impl fmt::Display for CompOp { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let symbol = match self { + CompOp::Less => "<", + CompOp::LessEqual => "<=", + CompOp::Equal => "=", + CompOp::GreaterEqual => ">=", + CompOp::Greater => ">", + }; + f.pad(symbol) + } +} + +impl CompOp { + pub fn matches(self, cmp: Ordering) -> bool { + match cmp { + Ordering::Less => self == CompOp::Less || self == CompOp::LessEqual, + Ordering::Equal => { + self == CompOp::LessEqual || self == CompOp::Equal || self == CompOp::GreaterEqual + } + Ordering::Greater => self == CompOp::Greater || self == CompOp::GreaterEqual, + } + } +} + +struct Comparator( + Box>, + CompOp, + Box>, +); + +impl Debug for Comparator { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "({:?} {} {:?})", self.0, self.1, self.2) + } +} + +impl Filter for Comparator +where + V: Ord, +{ + fn filter(&self, log: &LogResult) -> bool { + let lhs = self.0.produce(log); + let rhs = self.2.produce(log); + self.1.matches(lhs.cmp(&rhs)) + } +} + +pub fn comparison( + lhs: Box>, + op: CompOp, + rhs: Box>, +) -> Box +where + V: Ord, +{ + Box::new(Comparator(lhs, op, rhs)) +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +struct ConstantProducer(V); + +impl Producer for ConstantProducer { + type Output = V; + fn produce_early(&self, _: &EarlyLogResult) -> Option { + Some(self.0.clone()) + } + + fn produce(&self, _: &LogResult) -> Self::Output { + self.0.clone() + } +} + +pub fn constant( + value: V, +) -> Box> { + Box::new(ConstantProducer(value)) +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +struct TimeProducer; + +impl Producer for TimeProducer { + type Output = DateTime; + + fn produce_early(&self, early_log: &EarlyLogResult) -> Option { + early_log + .log_file + .file_name() + .and_then(super::log::datetime_from_filename) + } + + fn produce(&self, log: &LogResult) -> Self::Output { + log.time + } +} + +pub fn time() -> Box>> { + Box::new(TimeProducer) +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +struct DurationProducer; + +impl Producer for DurationProducer { + type Output = Duration; + + fn produce(&self, log: &LogResult) -> Self::Output { + log.duration + } +} + +pub fn duration() -> Box> { + Box::new(DurationProducer) +} -- cgit v1.2.3 From c3d6e1bd3f5c793dc5df6b668a07ccc4d81455a9 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 12 Jun 2020 01:03:14 +0200 Subject: more documentation for filters::values --- src/filters/values.rs | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'src/filters') diff --git a/src/filters/values.rs b/src/filters/values.rs index 543b59c..f64d6dd 100644 --- a/src/filters/values.rs +++ b/src/filters/values.rs @@ -1,3 +1,16 @@ +//! Value extractor system for raidgrep filters. +//! +//! [`Comparators`][Comparator] are special filters that work by first producing a value from a +//! given log (via the [`Producer`][Producer]) trait and then applying a comparison operator +//! ([`CompOp`][CompOp]) between the results. This type of filter gives a lot of flexibility, as it +//! can reduce the number of hard-coded filters one has to create (for example, `-before` and +//! `-after` can be merged). +//! +//! A [`Comparator`][Comparator] can only compare producers which produce the same type of value, +//! and that value must define a total order (i.e. it must implement [`Ord`][Ord]). Note that the +//! actual comparison is done on filter time, that is a [`Comparator`][Comparator] is basically a +//! filter that first uses the producers to produce a value from the given log, and then compares +//! the two resulting values with the given comparison operator. use std::{ cmp::Ordering, fmt::{self, Debug}, @@ -8,22 +21,41 @@ use chrono::{DateTime, Duration, Utc}; use super::{log::LogFilter, Filter}; use crate::{EarlyLogResult, LogResult}; +/// A producer for a given value. +/// +/// A producer is something that produces a value of a certain type from a log, which can then be +/// used by [`Comparators`][Comparator] to do the actual comparison. pub trait Producer: Send + Sync + Debug { + /// Type of the value that will be produced. type Output; + /// Early production. + /// + /// This function should be implemented if the value can already be produced without the + /// complete log being parsed. This can speed up filtering if a lot of logs end up being thrown + /// away. + /// + /// If a value cannot be produced early, `None` should be returned. fn produce_early(&self, _early_log: &EarlyLogResult) -> Option { None } + /// Produce the value from the given log. fn produce(&self, log: &LogResult) -> Self::Output; } +/// The comparison operator to be used. #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub enum CompOp { + /// The first value must be strictly less than the second value. Less, + /// The first value must be less or equal to the second value. LessEqual, + /// Both values must be equal. Equal, + /// The first value must be greater or equal to the second value. GreaterEqual, + /// The first value must be strictly greater than the second value. Greater, } @@ -41,6 +73,7 @@ impl fmt::Display for CompOp { } impl CompOp { + /// Check whether the comparison operator matches the given ordering. pub fn matches(self, cmp: Ordering) -> bool { match cmp { Ordering::Less => self == CompOp::Less || self == CompOp::LessEqual, @@ -75,6 +108,12 @@ where } } +/// Create a log filter that works by comparing two values. +/// +/// The values will be produced by the given producers. +/// +/// This function acts as a "bridge" between the value producers and the log filter system by +/// actually evaluating the comparison. pub fn comparison( lhs: Box>, op: CompOp, @@ -100,6 +139,7 @@ impl Producer for ConstantProducer { } } +/// A producer that always produces the given constant, regardless of the log. pub fn constant( value: V, ) -> Box> { @@ -124,6 +164,7 @@ impl Producer for TimeProducer { } } +/// A producer that produces the time at which a log was created. pub fn time() -> Box>> { Box::new(TimeProducer) } @@ -139,6 +180,7 @@ impl Producer for DurationProducer { } } +/// A producer that produces the duration that a fight lasted in the log. pub fn duration() -> Box> { Box::new(DurationProducer) } -- cgit v1.2.3 From e7449ee3883709320978526637720d46f16ff09d Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 12 Jun 2020 01:06:08 +0200 Subject: implement filter_early for Comparator --- src/filters/values.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'src/filters') diff --git a/src/filters/values.rs b/src/filters/values.rs index f64d6dd..3109ef4 100644 --- a/src/filters/values.rs +++ b/src/filters/values.rs @@ -18,7 +18,7 @@ use std::{ use chrono::{DateTime, Duration, Utc}; -use super::{log::LogFilter, Filter}; +use super::{log::LogFilter, Filter, Inclusion}; use crate::{EarlyLogResult, LogResult}; /// A producer for a given value. @@ -101,6 +101,15 @@ impl Filter for Comparator where V: Ord, { + fn filter_early(&self, early_log: &EarlyLogResult) -> Inclusion { + self.0 + .produce_early(early_log) + .and_then(|lhs| self.2.produce_early(early_log).map(|rhs| lhs.cmp(&rhs))) + .map(|ordering| self.1.matches(ordering)) + .map(Into::into) + .unwrap_or(Inclusion::Unknown) + } + fn filter(&self, log: &LogResult) -> bool { let lhs = self.0.produce(log); let rhs = self.2.produce(log); -- cgit v1.2.3 From 8d1aece508b76497daa04a90c00967b543d1741b Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 12 Jun 2020 01:11:51 +0200 Subject: add tests for CompOp::matches --- src/filters/values.rs | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'src/filters') diff --git a/src/filters/values.rs b/src/filters/values.rs index 3109ef4..141aecd 100644 --- a/src/filters/values.rs +++ b/src/filters/values.rs @@ -193,3 +193,31 @@ impl Producer for DurationProducer { pub fn duration() -> Box> { Box::new(DurationProducer) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_compop_matches() { + assert!(CompOp::Less.matches(Ordering::Less)); + assert!(!CompOp::Less.matches(Ordering::Equal)); + assert!(!CompOp::Less.matches(Ordering::Greater)); + + assert!(CompOp::LessEqual.matches(Ordering::Less)); + assert!(CompOp::LessEqual.matches(Ordering::Equal)); + assert!(!CompOp::LessEqual.matches(Ordering::Greater)); + + assert!(!CompOp::Equal.matches(Ordering::Less)); + assert!(CompOp::Equal.matches(Ordering::Equal)); + assert!(!CompOp::Equal.matches(Ordering::Greater)); + + assert!(!CompOp::GreaterEqual.matches(Ordering::Less)); + assert!(CompOp::GreaterEqual.matches(Ordering::Equal)); + assert!(CompOp::GreaterEqual.matches(Ordering::Greater)); + + assert!(!CompOp::Greater.matches(Ordering::Less)); + assert!(!CompOp::Greater.matches(Ordering::Equal)); + assert!(CompOp::Greater.matches(Ordering::Greater)); + } +} -- cgit v1.2.3 From e23af286b81f4c9df0e0ca9d71113caeb909cb0f Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 12 Jun 2020 13:21:35 +0200 Subject: implement count(player: ...) construct --- src/filters/values.rs | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'src/filters') diff --git a/src/filters/values.rs b/src/filters/values.rs index 141aecd..df5b805 100644 --- a/src/filters/values.rs +++ b/src/filters/values.rs @@ -13,12 +13,14 @@ //! the two resulting values with the given comparison operator. use std::{ cmp::Ordering, + convert::TryFrom, fmt::{self, Debug}, }; use chrono::{DateTime, Duration, Utc}; +use evtclib::Agent; -use super::{log::LogFilter, Filter, Inclusion}; +use super::{log::LogFilter, player::PlayerFilter, Filter, Inclusion}; use crate::{EarlyLogResult, LogResult}; /// A producer for a given value. @@ -194,6 +196,44 @@ pub fn duration() -> Box> { Box::new(DurationProducer) } +#[derive(Debug)] +struct PlayerCountProducer(Box); + +impl Producer for PlayerCountProducer { + type Output = u8; + + fn produce_early(&self, early_log: &EarlyLogResult) -> Option { + let mut count = 0; + for agent in &early_log.evtc.agents { + if !agent.is_player() { + continue; + } + + let agent = Agent::try_from(agent); + if let Ok(agent) = agent { + let result = self.0.filter_early(&agent); + match result { + Inclusion::Include => count += 1, + Inclusion::Exclude => (), + Inclusion::Unknown => return None, + } + } else { + return None; + } + } + Some(count) + } + + fn produce(&self, log: &LogResult) -> Self::Output { + log.players.iter().filter(|p| self.0.filter(p)).count() as u8 + } +} + +/// A producer that counts the players matching the given filter. +pub fn player_count(filter: Box) -> Box> { + Box::new(PlayerCountProducer(filter)) +} + #[cfg(test)] mod tests { use super::*; -- cgit v1.2.3 From 4a3e7137334601828f56a3ee614f01d84bada4ce Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 12 Jun 2020 15:55:19 +0200 Subject: implement -after/-before in terms of -time It makes sense to unify this implementation to avoid code duplication and bugs that might be hidden. -after and -before can stay for now, as shortcuts for -time < and -time >, the same way we have other shortcuts as well. --- src/filters/log.rs | 65 +++++---------------------------------------------- src/filters/values.rs | 22 +++++++++++++++-- 2 files changed, 26 insertions(+), 61 deletions(-) (limited to 'src/filters') diff --git a/src/filters/log.rs b/src/filters/log.rs index 9ca7d3c..bde02e5 100644 --- a/src/filters/log.rs +++ b/src/filters/log.rs @@ -7,17 +7,12 @@ use super::{ Filter, Inclusion, }; -use std::{collections::HashSet, ffi::OsStr}; +use std::collections::HashSet; use evtclib::Boss; -use chrono::{DateTime, Datelike, Local, TimeZone, Utc, Weekday}; +use chrono::{DateTime, Datelike, Utc, Weekday}; use num_traits::FromPrimitive as _; -use once_cell::sync::Lazy; -use regex::Regex; - -/// The regular expression used to extract datetimes from filenames. -static DATE_REGEX: Lazy = Lazy::new(|| Regex::new(r"\d{8}-\d{6}").unwrap()); /// Filter trait used for filters that operate on complete logs. pub trait LogFilter = Filter; @@ -91,23 +86,9 @@ pub fn weekday(weekdays: HashSet) -> Box { } #[derive(Debug, Clone)] -struct TimeFilter(Option>, Option>, bool); +struct TimeFilter(Option>, Option>); impl Filter for TimeFilter { - fn filter_early(&self, early_log: &EarlyLogResult) -> Inclusion { - // Ignore the filename heuristic if the user wishes so. - if !self.2 { - return Inclusion::Unknown; - } - early_log - .log_file - .file_name() - .and_then(datetime_from_filename) - .map(|d| time_is_between(d, self.0, self.1)) - .map(Into::into) - .unwrap_or(Inclusion::Unknown) - } - fn filter(&self, log: &LogResult) -> bool { time_is_between(log.time, self.0, self.1) } @@ -133,46 +114,12 @@ fn time_is_between( after_ok && before_ok } -/// Try to extract the log time from the filename. -/// -/// This expects the filename to have the datetime in the pattern `YYYYmmdd-HHMMSS` somewhere in -/// it. -pub(crate) fn datetime_from_filename(name: &OsStr) -> Option> { - let date_match = DATE_REGEX.find(name.to_str()?)?; - let local_time = Local - .datetime_from_str(date_match.as_str(), "%Y%m%d-%H%M%S") - .ok()?; - Some(local_time.with_timezone(&Utc)) -} - -/// A `LogFilter` that only accepts logs in the given time frame. -/// -/// If a bound is not given, -Infinity is assumed for the lower bound, and Infinity for the upper -/// bound. -pub fn time(lower: Option>, upper: Option>) -> Box { - Box::new(TimeFilter(lower, upper, true)) -} - -/// A `LogFilter` that only accepts logs after the given date. -/// -/// Also see [`time`][time] and [`before`][before]. -pub fn after(when: DateTime) -> Box { - time(Some(when), None) -} - -/// A `LogFilter` that only accepts logs before the given date. -/// -/// Also see [`time`][time] and [`after`][after]. -pub fn before(when: DateTime) -> Box { - time(None, Some(when)) -} - /// A `LogFilter` that only accepts logs in the given time frame. /// -/// Compared to [`time`][time], this filter ignores the file name. This can result in more accurate -/// results if you renamed logs, but if also leads to a worse runtime. +/// Compared to [`-time`][super::values::time], this filter ignores the file name. This can result +/// in more accurate results if you renamed logs, but if also leads to a worse runtime. pub fn log_time(lower: Option>, upper: Option>) -> Box { - Box::new(TimeFilter(lower, upper, false)) + Box::new(TimeFilter(lower, upper)) } /// Like [`after`][after], but ignores the file name for date calculations. diff --git a/src/filters/values.rs b/src/filters/values.rs index df5b805..a523dad 100644 --- a/src/filters/values.rs +++ b/src/filters/values.rs @@ -14,15 +14,21 @@ use std::{ cmp::Ordering, convert::TryFrom, + ffi::OsStr, fmt::{self, Debug}, }; -use chrono::{DateTime, Duration, Utc}; +use chrono::{DateTime, Duration, Local, TimeZone, Utc}; use evtclib::Agent; +use once_cell::sync::Lazy; +use regex::Regex; use super::{log::LogFilter, player::PlayerFilter, Filter, Inclusion}; use crate::{EarlyLogResult, LogResult}; +/// The regular expression used to extract datetimes from filenames. +static DATE_REGEX: Lazy = Lazy::new(|| Regex::new(r"\d{8}-\d{6}").unwrap()); + /// A producer for a given value. /// /// A producer is something that produces a value of a certain type from a log, which can then be @@ -167,7 +173,7 @@ impl Producer for TimeProducer { early_log .log_file .file_name() - .and_then(super::log::datetime_from_filename) + .and_then(datetime_from_filename) } fn produce(&self, log: &LogResult) -> Self::Output { @@ -175,6 +181,18 @@ impl Producer for TimeProducer { } } +/// Try to extract the log time from the filename. +/// +/// This expects the filename to have the datetime in the pattern `YYYYmmdd-HHMMSS` somewhere in +/// it. +fn datetime_from_filename(name: &OsStr) -> Option> { + let date_match = DATE_REGEX.find(name.to_str()?)?; + let local_time = Local + .datetime_from_str(date_match.as_str(), "%Y%m%d-%H%M%S") + .ok()?; + Some(local_time.with_timezone(&Utc)) +} + /// A producer that produces the time at which a log was created. pub fn time() -> Box>> { Box::new(TimeProducer) -- cgit v1.2.3 From 918f1e4eeab94ee8850aefd7c17b48ea2adcd6d2 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 12 Jun 2020 16:04:17 +0200 Subject: fix up doc comments and tests --- src/filters/log.rs | 5 +++-- src/filters/mod.rs | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'src/filters') diff --git a/src/filters/log.rs b/src/filters/log.rs index bde02e5..30cf0cb 100644 --- a/src/filters/log.rs +++ b/src/filters/log.rs @@ -122,12 +122,12 @@ pub fn log_time(lower: Option>, upper: Option>) -> B Box::new(TimeFilter(lower, upper)) } -/// Like [`after`][after], but ignores the file name for date calculations. +/// Only include logs after the given date, but ignore the file name for date calculations. pub fn log_after(when: DateTime) -> Box { log_time(Some(when), None) } -/// Like [`before`][before], but ignores the file name for date calculations. +/// Only include logs before the given date, but ignore the file name for date calculations. pub fn log_before(when: DateTime) -> Box { log_time(None, Some(when)) } @@ -148,6 +148,7 @@ pub fn challenge_mote() -> Box { #[cfg(test)] mod tests { + use chrono::TimeZone; use super::*; #[test] diff --git a/src/filters/mod.rs b/src/filters/mod.rs index e966851..7ab0d42 100644 --- a/src/filters/mod.rs +++ b/src/filters/mod.rs @@ -63,8 +63,8 @@ pub trait Filter: Send + Sync + fmt::Debug { /// Determine early (before processing all events) whether the log stands a chance to be /// included. /// - /// Note that you can return [Inclusion::Unkown] if this filter cannot determine yet a definite - /// answer. + /// Note that you can return [Inclusion::Unknown] if this filter cannot determine yet a + /// definite answer. fn filter_early(&self, _: &Early) -> Inclusion { Inclusion::Unknown } -- cgit v1.2.3 From e22b79adaaa69761fc520d6cd57baee1025fa926 Mon Sep 17 00:00:00 2001 From: Daniel Schadt Date: Fri, 26 Jun 2020 16:47:15 +0200 Subject: fix formatting --- src/filters/log.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/filters') diff --git a/src/filters/log.rs b/src/filters/log.rs index 30cf0cb..10258a0 100644 --- a/src/filters/log.rs +++ b/src/filters/log.rs @@ -148,8 +148,8 @@ pub fn challenge_mote() -> Box { #[cfg(test)] mod tests { - use chrono::TimeZone; use super::*; + use chrono::TimeZone; #[test] fn test_time_is_between() { -- cgit v1.2.3