aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel <kingdread@gmx.de>2019-05-24 17:50:53 +0200
committerDaniel <kingdread@gmx.de>2019-05-24 17:50:53 +0200
commit9243e9c6cc6cdefe1565291a2933cc58556ebe9b (patch)
treec03c6117e81016b1548d9d83c31f03b2acdb14d9
parent54e29430b3a668e9e98d3fc6e1a107fd36af8af4 (diff)
downloadraidgrep-9243e9c6cc6cdefe1565291a2933cc58556ebe9b.tar.gz
raidgrep-9243e9c6cc6cdefe1565291a2933cc58556ebe9b.tar.bz2
raidgrep-9243e9c6cc6cdefe1565291a2933cc58556ebe9b.zip
lazily parse log events
A lot of time is spent parsing the actual log events, especially when they are zipped, as they have to be decompressed first. This results in huge run-time hits, especially for files where we could determine very early if we actually need it. For example, player names are saved in the header, which can be examined very quickly. If we can determine at that stage that a log file will not appear in the result set, we don't need to parse all the log events. This patch relies on the partial parsing support of evtclib to do exactly that. It parses only the header with the player names, and only if there's a match, it will proceed to parse the events and do more filtering. In the future, we can extend this even more, for example we can also check the boss ID that way, since we can also access that in the header. On the downside, we now have the zip handling logic replicated in raidgrep, as we want a "common" interface to extract the actual data stream. But this logic could be pushed back to evtclib after polishing it a bit. There are some problems with Rust's borrow checking though, which is why it looks a bit convoluted.
-rw-r--r--Cargo.toml1
-rw-r--r--src/errors.rs5
-rw-r--r--src/filters.rs19
-rw-r--r--src/main.rs62
4 files changed, 67 insertions, 20 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 576e7d5..ec3f50a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,3 +15,4 @@ chrono = "0.4"
rayon = "1"
num-traits = "0.2"
humantime = "1.1"
+zip = "0.5"
diff --git a/src/errors.rs b/src/errors.rs
index 7cb1886..286a92e 100644
--- a/src/errors.rs
+++ b/src/errors.rs
@@ -14,5 +14,10 @@ quick_error! {
cause(err)
display("I/O error: {}", err)
}
+ Parsing(err: evtclib::raw::ParseError) {
+ from()
+ cause(err)
+ display("Parsing error: {}", err)
+ }
}
}
diff --git a/src/filters.rs b/src/filters.rs
index d8d43ea..8433690 100644
--- a/src/filters.rs
+++ b/src/filters.rs
@@ -1,16 +1,21 @@
-use evtclib::{AgentName, Log};
+use evtclib::{Agent, AgentName};
+use evtclib::raw::parser::PartialEvtc;
use super::{SearchField, LogResult, Opt};
use chrono::Datelike;
/// Do filtering based on the character or account name.
-pub fn filter_name(log: &Log, opt: &Opt) -> bool {
- for player in log.players() {
- match player.name() {
+pub fn filter_name(evtc: &PartialEvtc, opt: &Opt) -> bool {
+ for player in &evtc.agents {
+ let fancy = Agent::from_raw(player);
+ if fancy.is_err() {
+ continue;
+ }
+ match fancy.unwrap().name() {
AgentName::Player {
- account_name,
- character_name,
+ ref account_name,
+ ref character_name,
..
} => {
if (opt.field.contains(&SearchField::Account) && opt.expression.is_match(account_name))
@@ -19,7 +24,7 @@ pub fn filter_name(log: &Log, opt: &Opt) -> bool {
return true;
}
}
- _ => unreachable!(),
+ _ => (),
}
}
false
diff --git a/src/main.rs b/src/main.rs
index 099262c..87ebbb0 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -9,9 +9,10 @@ extern crate num_traits;
extern crate rayon;
extern crate regex;
extern crate walkdir;
+extern crate zip;
use std::fs::File;
-use std::io::{self, BufReader};
+use std::io::{self, BufReader, Read, Seek};
use std::path::PathBuf;
use std::str::FromStr;
@@ -220,6 +221,30 @@ fn try_from_str_simple_error<T: FromStr>(input: &str) -> Result<T, String>
T::from_str(input).map_err(|_| format!("'{}' is an invalid value", input))
}
+
+enum ZipWrapper<R: Read + Seek> {
+ Raw(Option<R>),
+ Zipped(zip::ZipArchive<R>),
+}
+
+impl<R: Read + Seek> ZipWrapper<R> {
+ pub fn raw(input: R) -> Self {
+ ZipWrapper::Raw(Some(input))
+ }
+
+ pub fn zipped(input: R) -> Self {
+ ZipWrapper::Zipped(zip::ZipArchive::new(input).unwrap())
+ }
+
+ pub fn get_stream<'a>(&'a mut self) -> Box<(dyn Read + 'a)> {
+ match *self {
+ ZipWrapper::Raw(ref mut o) => Box::new(o.take().unwrap()),
+ ZipWrapper::Zipped(ref mut z) => Box::new(z.by_index(0).unwrap()),
+ }
+ }
+}
+
+
fn main() {
let opt = Opt::from_args();
@@ -258,8 +283,13 @@ fn grep(opt: &Opt) -> Result<(), RuntimeError> {
let entry = entry?;
s.spawn(move |_| {
if is_log_file(&entry) {
- if let Some(result) = search_log(&entry, opt).unwrap() {
- output::output(io::stdout(), opt, &result).unwrap();
+ let search = search_log(&entry, opt);
+ match search {
+ Ok(None) => (),
+ Ok(Some(result)) => output::output(io::stdout(), opt, &result).unwrap(),
+ Err(err) => {
+ debug!("Runtime error while scanning {:?}: {}", entry.path(), err);
+ }
}
}
});
@@ -274,18 +304,25 @@ fn grep(opt: &Opt) -> Result<(), RuntimeError> {
/// If the log doesn't match, returns `Ok(None)`.
/// If there was a fatal error, returns `Err(..)`.
fn search_log(entry: &DirEntry, opt: &Opt) -> Result<Option<LogResult>, RuntimeError> {
- let mut input = BufReader::new(File::open(entry.path())?);
- let raw = if entry
+ let file_stream = BufReader::new(File::open(entry.path())?);
+ let is_zip = entry
.file_name()
.to_str()
.map(|n| n.ends_with(".zip") || n.ends_with(".zevtc"))
- .unwrap_or(false)
- {
- evtclib::raw::parse_zip(&mut input)
- } else {
- evtclib::raw::parse_file(&mut input)
+ .unwrap_or(false);
+ let mut wrapper = match is_zip {
+ false => ZipWrapper::raw(file_stream),
+ true => ZipWrapper::zipped(file_stream),
};
- let parsed = raw.ok().and_then(|m| evtclib::process(&m).ok());
+ let mut stream = wrapper.get_stream();
+ let partial = evtclib::raw::parser::parse_partial_file(&mut stream)?;
+
+ if filters::filter_name(&partial, opt) == opt.invert {
+ return Ok(None)
+ }
+
+ let raw = evtclib::raw::parser::finish_parsing(partial, &mut stream)?;
+ let parsed = evtclib::process(&raw).ok();
let log = if let Some(e) = parsed {
e
} else {
@@ -295,8 +332,7 @@ fn search_log(entry: &DirEntry, opt: &Opt) -> Result<Option<LogResult>, RuntimeE
let info = extract_info(entry, &log);
- let take_log = filters::filter_name(&log, opt) == !opt.invert
- && filters::filter_outcome(&info, opt)
+ let take_log = filters::filter_outcome(&info, opt)
&& filters::filter_weekday(&info, opt)
&& filters::filter_time(&info, opt);