From 4425f230e7f3fae010a9316886eab7f960d3669c Mon Sep 17 00:00:00 2001
From: Daniel Schadt <kingdread@gmx.de>
Date: Fri, 23 Oct 2020 15:18:10 +0200
Subject: speed up agent-by-addr search

When this function was written, it was done under the assumption that
a) There are not a lot of agents, so linear search is fast enough
and
b) We just want it to work for now

However, it turns out that there can be a lot of agents, close to 1000
for the Qadim log for example. This means that there is quite some time
saving that we can do here, as get_agent_by_addr is used a lot in
set_agent_awares and set_agent_masters, so speeding this part up is
good!

We could build a HashMap, mapping the address to the agent (index), but
that would mean that we have to carry the hash map around. This patch
provides a simpler yet already good improvement: We invest a bit of time
after converting all agents to sort them by their address (as the agent
order is implementation defined anyway), so we can later use a binary
search to get the right agent. It's not O(1), as a hash map would be,
but it works in logarithmic time and already provides a big benefit:

Before
process Qadim           time:   [39.444 ms 39.501 ms 39.561 ms]

After
process Qadim           time:   [18.699 ms 18.744 ms 18.788 ms]
                        change: [-52.672% -52.546% -52.413%] (p = 0.00 < 0.05)

That is half of the processing time saved by a 3 line patch!
---
 src/processing.rs | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/processing.rs b/src/processing.rs
index 3f72b39..a152d9e 100644
--- a/src/processing.rs
+++ b/src/processing.rs
@@ -23,6 +23,9 @@ use super::{raw, Agent, Event, EvtcError, Log};
 pub fn process(data: &raw::Evtc) -> Result<Log, EvtcError> {
     // Prepare "augmented" agents
     let mut agents = setup_agents(data)?;
+    // We sort the agents so we can do a binary search later in get_agent_by_addr. The order is not
+    // really defined or important anyway, so we can just choose whatever works best here.
+    agents.sort_by_key(Agent::addr);
     // Do the first aware/last aware field
     set_agent_awares(data, &mut agents)?;
 
@@ -103,9 +106,9 @@ fn setup_agents(data: &raw::Evtc) -> Result<Vec<Agent>, EvtcError> {
     data.agents.iter().map(Agent::try_from).collect()
 }
 
-#[inline]
 fn get_agent_by_addr(agents: &mut [Agent], addr: u64) -> Option<&mut Agent> {
-    agents.iter_mut().find(|agent| agent.addr() == addr)
+    let pos = agents.binary_search_by_key(&addr, Agent::addr).ok()?;
+    Some(&mut agents[pos])
 }
 
 fn set_agent_awares(data: &raw::Evtc, agents: &mut [Agent]) -> Result<(), EvtcError> {
-- 
cgit v1.2.3