From 0009a24bfb76fe425844c99769148d66c23a7225 Mon Sep 17 00:00:00 2001 From: Daniel Schadt Date: Thu, 10 Apr 2025 21:06:56 +0200 Subject: pre-multiply keys This can give a speedup from 17% to 66%, depending on the input size (larger speedup for larger inputs). It seems like even the "optimized" multiply is slow enough to really cause a slowdown, especially for large inputs where it is called a lot. --- src/lib.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d1e0cab..102e254 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -694,6 +694,7 @@ fn aez_prf(key: &Key, tweaks: Tweak, buffer: &mut [u8]) { /// temporary values and makes it much faster to compute E_K^{j, i+1}, E_K^{j, i+2}, ... struct E { key_l: Block, + key_ls: [Block; 8], state: Estate, aes: aesround::AesImpl, } @@ -726,7 +727,22 @@ impl E { ki_p_i: key_i.exp(exponent), } }; - E { key_l, state, aes } + let key_ls = [ + key_l * 0, + key_l * 1, + key_l * 2, + key_l * 3, + key_l * 4, + key_l * 5, + key_l * 6, + key_l * 7, + ]; + E { + key_l, + key_ls, + state, + aes, + } } /// Complete this computation to evaluate E_K^{j,i}(block). @@ -737,7 +753,7 @@ impl E { self.aes.aes10(block ^ delta) } Estate::Pos { i, kj_t_j, ki_p_i } => { - let delta = kj_t_j ^ ki_p_i ^ (self.key_l * (i % 8)); + let delta = kj_t_j ^ ki_p_i ^ self.key_ls[i as usize % 8]; self.aes.aes4(block ^ delta) } } -- cgit v1.2.3