diff options
| author | Daniel Schadt <kingdread@gmx.de> | 2025-04-10 13:38:36 +0200 | 
|---|---|---|
| committer | Daniel Schadt <kingdread@gmx.de> | 2025-04-10 18:22:46 +0200 | 
| commit | 2d2525f28aadcb149e085dbcc5fa421109457c54 (patch) | |
| tree | ad7b67d0b78f2bb33cb6f4957b036158ab84c662 /src/block.rs | |
| parent | a259cff1894c546647f0b975e2d061327a92f879 (diff) | |
| download | zears-2d2525f28aadcb149e085dbcc5fa421109457c54.tar.gz zears-2d2525f28aadcb149e085dbcc5fa421109457c54.tar.bz2 zears-2d2525f28aadcb149e085dbcc5fa421109457c54.zip | |
unroll Block::mul
We only ever use this function for small factors, either 2 (in
Block::exp), or 0-7 (in e, after the modulo 8). Therefore, for those
small values, we hard-code how they are computed by manually unrolling
the loop/recursion.
This gives around 30% more throughput.
Diffstat (limited to 'src/block.rs')
| -rw-r--r-- | src/block.rs | 76 | 
1 files changed, 61 insertions, 15 deletions
| diff --git a/src/block.rs b/src/block.rs index 3a9eea2..28ef0b3 100644 --- a/src/block.rs +++ b/src/block.rs @@ -128,24 +128,70 @@ impl IndexMut<usize> for Block {  impl Mul<u32> for Block {      type Output = Block; -    fn mul(mut self, mut rhs: u32) -> Block { -        loop { -            match rhs { -                0 => break Block::NULL, -                1 => break self, -                2 => { -                    let mut result = self << 1; -                    if self[0] & 0x80 != 0 { -                        result[15] ^= 135; -                    } -                    break result; +    fn mul(self, rhs: u32) -> Block { +        match rhs { +            0 => Block::NULL, +            1 => self, +            2 => { +                let mut result = self << 1; +                if self[0] & 0x80 != 0 { +                    result[15] ^= 0x87;                  } -                _ if rhs % 2 == 0 => { -                    self = self * 2; -                    rhs = rhs / 2; +                result +            } +            3 => { +                let mut result = self << 1; +                if self[0] & 0x80 != 0 { +                    result[15] ^= 0x87; +                } +                result ^ self +            } +            4 => { +                let mut result = self << 2; +                if self[0] & 0x80 != 0 { +                    result[15] ^= 0x0E; +                    result[14] ^= 0x01; +                } +                if self[0] & 0x40 != 0 { +                    result[15] ^= 0x87; +                } +                result +            } +            5 => { +                let mut result = self << 2; +                if self[0] & 0x80 != 0 { +                    result[15] ^= 0x0E; +                    result[14] ^= 0x01; +                } +                if self[0] & 0x40 != 0 { +                    result[15] ^= 0x87; +                } +                result ^ self +            } +            6 => { +                let mut result = self << 1; +                if self[0] & 0x80 != 0 { +                    result[15] ^= 0x87; +                } +                result = result ^ self; +                let mask = if result[0] & 0x80 != 0 { 0x87 } else { 0 }; +                result = result << 1; +                result[15] ^= mask; +                result +            } +            7 => { +                let mut result = self << 1; +                if self[0] & 0x80 != 0 { +                    result[15] ^= 0x87;                  } -                _ => break self * (rhs - 1) ^ self, +                result = result ^ self; +                let mask = if result[0] & 0x80 != 0 { 0x87 } else { 0 }; +                result = result << 1; +                result[15] ^= mask; +                result ^ self              } +            _ if rhs % 2 == 0 => self * 2 * (rhs / 2), +            _ => self * (rhs - 1) ^ self,          }      }  } | 
