aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDaniel Schadt <kingdread@gmx.de>2025-04-10 19:34:07 +0200
committerDaniel Schadt <kingdread@gmx.de>2025-04-10 19:34:07 +0200
commitd77f4adf04f2878853d0919f908d1b110f3c94f2 (patch)
tree81d9ab0f9c1ba1bfb9ebe74d8f20362e47bad82b /src
parent2d2525f28aadcb149e085dbcc5fa421109457c54 (diff)
downloadzears-d77f4adf04f2878853d0919f908d1b110f3c94f2.tar.gz
zears-d77f4adf04f2878853d0919f908d1b110f3c94f2.tar.bz2
zears-d77f4adf04f2878853d0919f908d1b110f3c94f2.zip
implement aes4 and aes10 with native instructions
Even though aes::hazmat::cipher_round uses aes-ni instructions under the hood, simply loading the data (and the keys!) takes a significant amount of time. Sadly, there's no way that aes exposes that lets you re-use the "loaded" keys. By implementing aes4/aes10 directly with _mm_aesenc, we can keep the keys properly aligned. We still keep the software backend as fallback, using the software implementation of the aes crate. This gives a ~70% speedup.
Diffstat (limited to 'src')
-rw-r--r--src/aesround.rs133
-rw-r--r--src/lib.rs47
2 files changed, 141 insertions, 39 deletions
diff --git a/src/aesround.rs b/src/aesround.rs
new file mode 100644
index 0000000..0a06192
--- /dev/null
+++ b/src/aesround.rs
@@ -0,0 +1,133 @@
+use super::block::Block;
+
+#[cfg(target_arch = "x86_64")]
+pub type AesImpl = x86_64::AesNi;
+
+#[cfg(not(target_arch = "x86_64"))]
+pub type AesImpl = AesSoft;
+
+pub trait AesRound {
+ fn new(key_i: Block, key_j: Block, key_l: Block) -> Self;
+ fn aes4(&self, value: Block) -> Block;
+ fn aes10(&self, value: Block) -> Block;
+}
+
+/// Implementation of aes4 and aes10 in software.
+///
+/// Always available.
+///
+/// Uses the `aes` crate under the hood.
+pub struct AesSoft {
+ key_i: aes::Block,
+ key_j: aes::Block,
+ key_l: aes::Block,
+}
+
+impl AesRound for AesSoft {
+ fn new(key_i: Block, key_j: Block, key_l: Block) -> Self {
+ Self {
+ key_i: key_i.0.into(),
+ key_j: key_j.0.into(),
+ key_l: key_l.0.into(),
+ }
+ }
+
+ fn aes4(&self, value: Block) -> Block {
+ let mut block: aes::Block = value.0.into();
+ ::aes::hazmat::cipher_round(&mut block, &self.key_j);
+ ::aes::hazmat::cipher_round(&mut block, &self.key_i);
+ ::aes::hazmat::cipher_round(&mut block, &self.key_l);
+ ::aes::hazmat::cipher_round(&mut block, &Block::NULL.0.into());
+ Block(block.into())
+ }
+
+ fn aes10(&self, value: Block) -> Block {
+ let mut block: aes::Block = value.0.into();
+ ::aes::hazmat::cipher_round(&mut block, &self.key_i);
+ ::aes::hazmat::cipher_round(&mut block, &self.key_j);
+ ::aes::hazmat::cipher_round(&mut block, &self.key_l);
+ ::aes::hazmat::cipher_round(&mut block, &self.key_i);
+ ::aes::hazmat::cipher_round(&mut block, &self.key_j);
+ ::aes::hazmat::cipher_round(&mut block, &self.key_l);
+ ::aes::hazmat::cipher_round(&mut block, &self.key_i);
+ ::aes::hazmat::cipher_round(&mut block, &self.key_j);
+ ::aes::hazmat::cipher_round(&mut block, &self.key_l);
+ ::aes::hazmat::cipher_round(&mut block, &self.key_i);
+ Block(block.into())
+ }
+}
+
+#[cfg(target_arch = "x86_64")]
+pub mod x86_64 {
+ use super::*;
+ use core::arch::x86_64::*;
+
+ cpufeatures::new!(cpuid_aes, "aes");
+
+ pub struct AesNi {
+ support: cpuid_aes::InitToken,
+ fallback: AesSoft,
+ key_i: __m128i,
+ key_j: __m128i,
+ key_l: __m128i,
+ null: __m128i,
+ }
+
+ impl AesRound for AesNi {
+ fn new(key_i: Block, key_j: Block, key_l: Block) -> Self {
+ // SAFETY: loadu can load from unaligned memory
+ unsafe {
+ Self {
+ support: cpuid_aes::init(),
+ fallback: AesSoft::new(key_i, key_j, key_l),
+ key_i: _mm_loadu_si128(key_i.0.as_ptr() as *const _),
+ key_j: _mm_loadu_si128(key_j.0.as_ptr() as *const _),
+ key_l: _mm_loadu_si128(key_l.0.as_ptr() as *const _),
+ null: _mm_loadu_si128(Block::NULL.0.as_ptr() as *const _),
+ }
+ }
+ }
+
+ fn aes4(&self, value: Block) -> Block {
+ if !self.support.get() {
+ return self.fallback.aes4(value);
+ }
+
+ // SAFETY: loadu can load from unaligned memory
+ unsafe {
+ let mut block = _mm_loadu_si128(value.0.as_ptr() as *const _);
+ block = _mm_aesenc_si128(block, self.key_j);
+ block = _mm_aesenc_si128(block, self.key_i);
+ block = _mm_aesenc_si128(block, self.key_l);
+ block = _mm_aesenc_si128(block, self.null);
+ let mut result = Block::default();
+ _mm_storeu_si128(result.0.as_mut_ptr() as *mut _, block);
+ result
+ }
+ }
+
+ fn aes10(&self, value: Block) -> Block {
+ if !self.support.get() {
+ return self.fallback.aes10(value);
+ }
+
+ // SAFETY: loadu can load from unaligned memory
+ unsafe {
+ let mut block = _mm_loadu_si128(value.0.as_ptr() as *const _);
+ block = _mm_aesenc_si128(block, self.key_i);
+ block = _mm_aesenc_si128(block, self.key_j);
+ block = _mm_aesenc_si128(block, self.key_l);
+ block = _mm_aesenc_si128(block, self.key_i);
+ block = _mm_aesenc_si128(block, self.key_j);
+ block = _mm_aesenc_si128(block, self.key_l);
+ block = _mm_aesenc_si128(block, self.key_i);
+ block = _mm_aesenc_si128(block, self.key_j);
+ block = _mm_aesenc_si128(block, self.key_l);
+ block = _mm_aesenc_si128(block, self.key_i);
+ let mut result = Block::default();
+ _mm_storeu_si128(result.0.as_mut_ptr() as *mut _, block);
+ result
+ }
+ }
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
index b00612f..d1e0cab 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -86,11 +86,14 @@
use constant_time_eq::constant_time_eq;
mod accessor;
+mod aesround;
mod block;
+
#[cfg(test)]
mod testvectors;
use accessor::BlockAccessor;
+use aesround::AesRound;
use block::Block;
type Key = [u8; 48];
type Tweak<'a> = &'a [&'a [u8]];
@@ -247,34 +250,6 @@ impl Aez {
}
}
-fn aesenc(block: &mut Block, key: &Block){
- aes::hazmat::cipher_round((&mut block.0).into(), &key.0.into());
-}
-
-fn aes4(key_i: &Block, key_j: &Block, key_l: &Block, block: &Block) -> Block {
- let mut r = *block;
- aesenc(&mut r, key_j);
- aesenc(&mut r, key_i);
- aesenc(&mut r, key_l);
- aesenc(&mut r, &Block::NULL);
- r
-}
-
-fn aes10(key_i: &Block, key_j: &Block, key_l: &Block, block: &Block) -> Block {
- let mut r = *block;
- aesenc(&mut r, key_i);
- aesenc(&mut r, key_j);
- aesenc(&mut r, key_l);
- aesenc(&mut r, key_i);
- aesenc(&mut r, key_j);
- aesenc(&mut r, key_l);
- aesenc(&mut r, key_i);
- aesenc(&mut r, key_j);
- aesenc(&mut r, key_l);
- aesenc(&mut r, key_i);
- r
-}
-
fn extract(key: &[u8]) -> [u8; 48] {
if key.len() == 48 {
key.try_into().unwrap()
@@ -717,12 +692,10 @@ fn aez_prf(key: &Key, tweaks: Tweak, buffer: &mut [u8]) {
///
/// As we usually need multiple values with a fixed j and ascending i, this struct saves the
/// temporary values and makes it much faster to compute E_K^{j, i+1}, E_K^{j, i+2}, ...
-#[derive(Clone, Debug)]
struct E {
- key_i: Block,
- key_j: Block,
key_l: Block,
state: Estate,
+ aes: aesround::AesImpl,
}
#[derive(Clone, Debug)]
@@ -741,6 +714,7 @@ impl E {
/// Create a new "suspended" computation of E_K^{j,i}.
fn new(j: i32, i: u32, key: &Key) -> Self {
let (key_i, key_j, key_l) = split_key(key);
+ let aes = aesround::AesImpl::new(key_i, key_j, key_l);
let state = if j == -1 {
Estate::Neg { i }
} else {
@@ -752,12 +726,7 @@ impl E {
ki_p_i: key_i.exp(exponent),
}
};
- E {
- key_i,
- key_j,
- key_l,
- state,
- }
+ E { key_l, state, aes }
}
/// Complete this computation to evaluate E_K^{j,i}(block).
@@ -765,11 +734,11 @@ impl E {
match self.state {
Estate::Neg { i } => {
let delta = self.key_l * i;
- aes10(&self.key_i, &self.key_j, &self.key_l, &(block ^ delta))
+ self.aes.aes10(block ^ delta)
}
Estate::Pos { i, kj_t_j, ki_p_i } => {
let delta = kj_t_j ^ ki_p_i ^ (self.key_l * (i % 8));
- aes4(&self.key_i, &self.key_j, &self.key_l, &(block ^ delta))
+ self.aes.aes4(block ^ delta)
}
}
}