From 2d2525f28aadcb149e085dbcc5fa421109457c54 Mon Sep 17 00:00:00 2001 From: Daniel Schadt Date: Thu, 10 Apr 2025 13:38:36 +0200 Subject: unroll Block::mul We only ever use this function for small factors, either 2 (in Block::exp), or 0-7 (in e, after the modulo 8). Therefore, for those small values, we hard-code how they are computed by manually unrolling the loop/recursion. This gives around 30% more throughput. --- src/block.rs | 76 ++++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 61 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/block.rs b/src/block.rs index 3a9eea2..28ef0b3 100644 --- a/src/block.rs +++ b/src/block.rs @@ -128,24 +128,70 @@ impl IndexMut for Block { impl Mul for Block { type Output = Block; - fn mul(mut self, mut rhs: u32) -> Block { - loop { - match rhs { - 0 => break Block::NULL, - 1 => break self, - 2 => { - let mut result = self << 1; - if self[0] & 0x80 != 0 { - result[15] ^= 135; - } - break result; + fn mul(self, rhs: u32) -> Block { + match rhs { + 0 => Block::NULL, + 1 => self, + 2 => { + let mut result = self << 1; + if self[0] & 0x80 != 0 { + result[15] ^= 0x87; } - _ if rhs % 2 == 0 => { - self = self * 2; - rhs = rhs / 2; + result + } + 3 => { + let mut result = self << 1; + if self[0] & 0x80 != 0 { + result[15] ^= 0x87; + } + result ^ self + } + 4 => { + let mut result = self << 2; + if self[0] & 0x80 != 0 { + result[15] ^= 0x0E; + result[14] ^= 0x01; + } + if self[0] & 0x40 != 0 { + result[15] ^= 0x87; + } + result + } + 5 => { + let mut result = self << 2; + if self[0] & 0x80 != 0 { + result[15] ^= 0x0E; + result[14] ^= 0x01; + } + if self[0] & 0x40 != 0 { + result[15] ^= 0x87; + } + result ^ self + } + 6 => { + let mut result = self << 1; + if self[0] & 0x80 != 0 { + result[15] ^= 0x87; + } + result = result ^ self; + let mask = if result[0] & 0x80 != 0 { 0x87 } else { 0 }; + result = result << 1; + result[15] ^= mask; + result + } + 7 => { + let mut result = self << 1; + if self[0] & 0x80 != 0 { + result[15] ^= 0x87; } - _ => break self * (rhs - 1) ^ self, + result = result ^ self; + let mask = if result[0] & 0x80 != 0 { 0x87 } else { 0 }; + result = result << 1; + result[15] ^= mask; + result ^ self } + _ if rhs % 2 == 0 => self * 2 * (rhs / 2), + _ => self * (rhs - 1) ^ self, } } } -- cgit v1.2.3