From 5e05745e93a126673f2ba96d1746637fe0a3f52d Mon Sep 17 00:00:00 2001 From: Daniel Schadt Date: Tue, 15 Apr 2025 22:29:06 +0200 Subject: slightly speed up aez_prf It doesn't matter much because we barely expect tau > 16, but if somebody decides to use aez as a way to generate a lot of pseudorandom bytes, then oh well. With this change, we make better use of SIMD block xor'ing if available. --- benches/zears.rs | 5 +++++ src/lib.rs | 13 +++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/benches/zears.rs b/benches/zears.rs index 8ea78ba..1144087 100644 --- a/benches/zears.rs +++ b/benches/zears.rs @@ -23,6 +23,11 @@ fn bench(c: &mut Criterion) { b.iter(|| aez.encrypt_inplace(&[0], &[], 16, &mut out)) }); + group.bench_function(BenchmarkId::new("aez_prf", size), |b| { + let mut out = vec![0u8; size]; + b.iter(|| aez.encrypt_inplace(&[0], &[], size as u32, &mut out)) + }); + let buf = aez.encrypt(&[0], &[], 16, &buf); group.bench_function(BenchmarkId::new("decrypt", size), |b| { diff --git a/src/lib.rs b/src/lib.rs index 8985c2e..49a83d6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -626,13 +626,18 @@ fn aez_hash(aez: &Aez, tweaks: Tweak) -> Block { fn aez_prf(aez: &Aez, tweaks: Tweak, buffer: &mut [u8]) { let mut index = 0u128; let delta = aez_hash(aez, tweaks); - for chunk in buffer.chunks_mut(16) { + for chunk in buffer.chunks_exact_mut(16) { + let chunk: &mut [u8; 16] = chunk.try_into().unwrap(); let block = e(-1, 3, aez, delta ^ Block::from_int(index)); - for (a, b) in chunk.iter_mut().zip(block.bytes().iter()) { - *a ^= b; - } + (block ^ Block::from(*chunk)).write_to(chunk); index += 1; } + let suffix_start = buffer.len() - buffer.len() % 16; + let chunk = &mut buffer[suffix_start..]; + let block = e(-1, 3, aez, delta ^ Block::from_int(index)); + for (a, b) in chunk.iter_mut().zip(block.bytes().iter()) { + *a ^= *b; + } } /// Represents a computation of E_K^{j,i}. -- cgit v1.2.3