diff options
author | Daniel <kingdread@gmx.de> | 2019-05-24 17:50:53 +0200 |
---|---|---|
committer | Daniel <kingdread@gmx.de> | 2019-05-24 17:50:53 +0200 |
commit | 9243e9c6cc6cdefe1565291a2933cc58556ebe9b (patch) | |
tree | c03c6117e81016b1548d9d83c31f03b2acdb14d9 | |
parent | 54e29430b3a668e9e98d3fc6e1a107fd36af8af4 (diff) | |
download | raidgrep-9243e9c6cc6cdefe1565291a2933cc58556ebe9b.tar.gz raidgrep-9243e9c6cc6cdefe1565291a2933cc58556ebe9b.tar.bz2 raidgrep-9243e9c6cc6cdefe1565291a2933cc58556ebe9b.zip |
lazily parse log events
A lot of time is spent parsing the actual log events, especially when
they are zipped, as they have to be decompressed first. This results in
huge run-time hits, especially for files where we could determine very
early if we actually need it.
For example, player names are saved in the header, which can be examined
very quickly. If we can determine at that stage that a log file will not
appear in the result set, we don't need to parse all the log events.
This patch relies on the partial parsing support of evtclib to do
exactly that. It parses only the header with the player names, and only
if there's a match, it will proceed to parse the events and do more
filtering.
In the future, we can extend this even more, for example we can also
check the boss ID that way, since we can also access that in the header.
On the downside, we now have the zip handling logic replicated in
raidgrep, as we want a "common" interface to extract the actual data
stream. But this logic could be pushed back to evtclib after polishing
it a bit. There are some problems with Rust's borrow checking though,
which is why it looks a bit convoluted.
-rw-r--r-- | Cargo.toml | 1 | ||||
-rw-r--r-- | src/errors.rs | 5 | ||||
-rw-r--r-- | src/filters.rs | 19 | ||||
-rw-r--r-- | src/main.rs | 62 |
4 files changed, 67 insertions, 20 deletions
@@ -15,3 +15,4 @@ chrono = "0.4" rayon = "1" num-traits = "0.2" humantime = "1.1" +zip = "0.5" diff --git a/src/errors.rs b/src/errors.rs index 7cb1886..286a92e 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -14,5 +14,10 @@ quick_error! { cause(err) display("I/O error: {}", err) } + Parsing(err: evtclib::raw::ParseError) { + from() + cause(err) + display("Parsing error: {}", err) + } } } diff --git a/src/filters.rs b/src/filters.rs index d8d43ea..8433690 100644 --- a/src/filters.rs +++ b/src/filters.rs @@ -1,16 +1,21 @@ -use evtclib::{AgentName, Log}; +use evtclib::{Agent, AgentName}; +use evtclib::raw::parser::PartialEvtc; use super::{SearchField, LogResult, Opt}; use chrono::Datelike; /// Do filtering based on the character or account name. -pub fn filter_name(log: &Log, opt: &Opt) -> bool { - for player in log.players() { - match player.name() { +pub fn filter_name(evtc: &PartialEvtc, opt: &Opt) -> bool { + for player in &evtc.agents { + let fancy = Agent::from_raw(player); + if fancy.is_err() { + continue; + } + match fancy.unwrap().name() { AgentName::Player { - account_name, - character_name, + ref account_name, + ref character_name, .. } => { if (opt.field.contains(&SearchField::Account) && opt.expression.is_match(account_name)) @@ -19,7 +24,7 @@ pub fn filter_name(log: &Log, opt: &Opt) -> bool { return true; } } - _ => unreachable!(), + _ => (), } } false diff --git a/src/main.rs b/src/main.rs index 099262c..87ebbb0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,9 +9,10 @@ extern crate num_traits; extern crate rayon; extern crate regex; extern crate walkdir; +extern crate zip; use std::fs::File; -use std::io::{self, BufReader}; +use std::io::{self, BufReader, Read, Seek}; use std::path::PathBuf; use std::str::FromStr; @@ -220,6 +221,30 @@ fn try_from_str_simple_error<T: FromStr>(input: &str) -> Result<T, String> T::from_str(input).map_err(|_| format!("'{}' is an invalid value", input)) } + +enum ZipWrapper<R: Read + Seek> { + Raw(Option<R>), + Zipped(zip::ZipArchive<R>), +} + +impl<R: Read + Seek> ZipWrapper<R> { + pub fn raw(input: R) -> Self { + ZipWrapper::Raw(Some(input)) + } + + pub fn zipped(input: R) -> Self { + ZipWrapper::Zipped(zip::ZipArchive::new(input).unwrap()) + } + + pub fn get_stream<'a>(&'a mut self) -> Box<(dyn Read + 'a)> { + match *self { + ZipWrapper::Raw(ref mut o) => Box::new(o.take().unwrap()), + ZipWrapper::Zipped(ref mut z) => Box::new(z.by_index(0).unwrap()), + } + } +} + + fn main() { let opt = Opt::from_args(); @@ -258,8 +283,13 @@ fn grep(opt: &Opt) -> Result<(), RuntimeError> { let entry = entry?; s.spawn(move |_| { if is_log_file(&entry) { - if let Some(result) = search_log(&entry, opt).unwrap() { - output::output(io::stdout(), opt, &result).unwrap(); + let search = search_log(&entry, opt); + match search { + Ok(None) => (), + Ok(Some(result)) => output::output(io::stdout(), opt, &result).unwrap(), + Err(err) => { + debug!("Runtime error while scanning {:?}: {}", entry.path(), err); + } } } }); @@ -274,18 +304,25 @@ fn grep(opt: &Opt) -> Result<(), RuntimeError> { /// If the log doesn't match, returns `Ok(None)`. /// If there was a fatal error, returns `Err(..)`. fn search_log(entry: &DirEntry, opt: &Opt) -> Result<Option<LogResult>, RuntimeError> { - let mut input = BufReader::new(File::open(entry.path())?); - let raw = if entry + let file_stream = BufReader::new(File::open(entry.path())?); + let is_zip = entry .file_name() .to_str() .map(|n| n.ends_with(".zip") || n.ends_with(".zevtc")) - .unwrap_or(false) - { - evtclib::raw::parse_zip(&mut input) - } else { - evtclib::raw::parse_file(&mut input) + .unwrap_or(false); + let mut wrapper = match is_zip { + false => ZipWrapper::raw(file_stream), + true => ZipWrapper::zipped(file_stream), }; - let parsed = raw.ok().and_then(|m| evtclib::process(&m).ok()); + let mut stream = wrapper.get_stream(); + let partial = evtclib::raw::parser::parse_partial_file(&mut stream)?; + + if filters::filter_name(&partial, opt) == opt.invert { + return Ok(None) + } + + let raw = evtclib::raw::parser::finish_parsing(partial, &mut stream)?; + let parsed = evtclib::process(&raw).ok(); let log = if let Some(e) = parsed { e } else { @@ -295,8 +332,7 @@ fn search_log(entry: &DirEntry, opt: &Opt) -> Result<Option<LogResult>, RuntimeE let info = extract_info(entry, &log); - let take_log = filters::filter_name(&log, opt) == !opt.invert - && filters::filter_outcome(&info, opt) + let take_log = filters::filter_outcome(&info, opt) && filters::filter_weekday(&info, opt) && filters::filter_time(&info, opt); |