From 9243e9c6cc6cdefe1565291a2933cc58556ebe9b Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 24 May 2019 17:50:53 +0200 Subject: lazily parse log events A lot of time is spent parsing the actual log events, especially when they are zipped, as they have to be decompressed first. This results in huge run-time hits, especially for files where we could determine very early if we actually need it. For example, player names are saved in the header, which can be examined very quickly. If we can determine at that stage that a log file will not appear in the result set, we don't need to parse all the log events. This patch relies on the partial parsing support of evtclib to do exactly that. It parses only the header with the player names, and only if there's a match, it will proceed to parse the events and do more filtering. In the future, we can extend this even more, for example we can also check the boss ID that way, since we can also access that in the header. On the downside, we now have the zip handling logic replicated in raidgrep, as we want a "common" interface to extract the actual data stream. But this logic could be pushed back to evtclib after polishing it a bit. There are some problems with Rust's borrow checking though, which is why it looks a bit convoluted. --- src/errors.rs | 5 +++++ src/filters.rs | 19 +++++++++++------- src/main.rs | 62 ++++++++++++++++++++++++++++++++++++++++++++++------------ 3 files changed, 66 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/errors.rs b/src/errors.rs index 7cb1886..286a92e 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -14,5 +14,10 @@ quick_error! { cause(err) display("I/O error: {}", err) } + Parsing(err: evtclib::raw::ParseError) { + from() + cause(err) + display("Parsing error: {}", err) + } } } diff --git a/src/filters.rs b/src/filters.rs index d8d43ea..8433690 100644 --- a/src/filters.rs +++ b/src/filters.rs @@ -1,16 +1,21 @@ -use evtclib::{AgentName, Log}; +use evtclib::{Agent, AgentName}; +use evtclib::raw::parser::PartialEvtc; use super::{SearchField, LogResult, Opt}; use chrono::Datelike; /// Do filtering based on the character or account name. -pub fn filter_name(log: &Log, opt: &Opt) -> bool { - for player in log.players() { - match player.name() { +pub fn filter_name(evtc: &PartialEvtc, opt: &Opt) -> bool { + for player in &evtc.agents { + let fancy = Agent::from_raw(player); + if fancy.is_err() { + continue; + } + match fancy.unwrap().name() { AgentName::Player { - account_name, - character_name, + ref account_name, + ref character_name, .. } => { if (opt.field.contains(&SearchField::Account) && opt.expression.is_match(account_name)) @@ -19,7 +24,7 @@ pub fn filter_name(log: &Log, opt: &Opt) -> bool { return true; } } - _ => unreachable!(), + _ => (), } } false diff --git a/src/main.rs b/src/main.rs index 099262c..87ebbb0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,9 +9,10 @@ extern crate num_traits; extern crate rayon; extern crate regex; extern crate walkdir; +extern crate zip; use std::fs::File; -use std::io::{self, BufReader}; +use std::io::{self, BufReader, Read, Seek}; use std::path::PathBuf; use std::str::FromStr; @@ -220,6 +221,30 @@ fn try_from_str_simple_error(input: &str) -> Result T::from_str(input).map_err(|_| format!("'{}' is an invalid value", input)) } + +enum ZipWrapper { + Raw(Option), + Zipped(zip::ZipArchive), +} + +impl ZipWrapper { + pub fn raw(input: R) -> Self { + ZipWrapper::Raw(Some(input)) + } + + pub fn zipped(input: R) -> Self { + ZipWrapper::Zipped(zip::ZipArchive::new(input).unwrap()) + } + + pub fn get_stream<'a>(&'a mut self) -> Box<(dyn Read + 'a)> { + match *self { + ZipWrapper::Raw(ref mut o) => Box::new(o.take().unwrap()), + ZipWrapper::Zipped(ref mut z) => Box::new(z.by_index(0).unwrap()), + } + } +} + + fn main() { let opt = Opt::from_args(); @@ -258,8 +283,13 @@ fn grep(opt: &Opt) -> Result<(), RuntimeError> { let entry = entry?; s.spawn(move |_| { if is_log_file(&entry) { - if let Some(result) = search_log(&entry, opt).unwrap() { - output::output(io::stdout(), opt, &result).unwrap(); + let search = search_log(&entry, opt); + match search { + Ok(None) => (), + Ok(Some(result)) => output::output(io::stdout(), opt, &result).unwrap(), + Err(err) => { + debug!("Runtime error while scanning {:?}: {}", entry.path(), err); + } } } }); @@ -274,18 +304,25 @@ fn grep(opt: &Opt) -> Result<(), RuntimeError> { /// If the log doesn't match, returns `Ok(None)`. /// If there was a fatal error, returns `Err(..)`. fn search_log(entry: &DirEntry, opt: &Opt) -> Result, RuntimeError> { - let mut input = BufReader::new(File::open(entry.path())?); - let raw = if entry + let file_stream = BufReader::new(File::open(entry.path())?); + let is_zip = entry .file_name() .to_str() .map(|n| n.ends_with(".zip") || n.ends_with(".zevtc")) - .unwrap_or(false) - { - evtclib::raw::parse_zip(&mut input) - } else { - evtclib::raw::parse_file(&mut input) + .unwrap_or(false); + let mut wrapper = match is_zip { + false => ZipWrapper::raw(file_stream), + true => ZipWrapper::zipped(file_stream), }; - let parsed = raw.ok().and_then(|m| evtclib::process(&m).ok()); + let mut stream = wrapper.get_stream(); + let partial = evtclib::raw::parser::parse_partial_file(&mut stream)?; + + if filters::filter_name(&partial, opt) == opt.invert { + return Ok(None) + } + + let raw = evtclib::raw::parser::finish_parsing(partial, &mut stream)?; + let parsed = evtclib::process(&raw).ok(); let log = if let Some(e) = parsed { e } else { @@ -295,8 +332,7 @@ fn search_log(entry: &DirEntry, opt: &Opt) -> Result, RuntimeE let info = extract_info(entry, &log); - let take_log = filters::filter_name(&log, opt) == !opt.invert - && filters::filter_outcome(&info, opt) + let take_log = filters::filter_outcome(&info, opt) && filters::filter_weekday(&info, opt) && filters::filter_time(&info, opt); -- cgit v1.2.3