use super::block::Block; #[cfg(target_arch = "x86_64")] pub type AesImpl = x86_64::AesNi; #[cfg(not(target_arch = "x86_64"))] pub type AesImpl = AesSoft; pub trait AesRound { fn new(key_i: Block, key_j: Block, key_l: Block) -> Self; fn aes4(&self, value: Block) -> Block; fn aes10(&self, value: Block) -> Block; } /// Implementation of aes4 and aes10 in software. /// /// Always available. /// /// Uses the `aes` crate under the hood. pub struct AesSoft { key_i: aes::Block, key_j: aes::Block, key_l: aes::Block, } impl AesRound for AesSoft { fn new(key_i: Block, key_j: Block, key_l: Block) -> Self { Self { key_i: key_i.bytes().into(), key_j: key_j.bytes().into(), key_l: key_l.bytes().into(), } } fn aes4(&self, value: Block) -> Block { let mut block: aes::Block = value.bytes().into(); ::aes::hazmat::cipher_round(&mut block, &self.key_j); ::aes::hazmat::cipher_round(&mut block, &self.key_i); ::aes::hazmat::cipher_round(&mut block, &self.key_l); ::aes::hazmat::cipher_round(&mut block, &Block::null().bytes().into()); >::from(block.into()) } fn aes10(&self, value: Block) -> Block { let mut block: aes::Block = value.bytes().into(); ::aes::hazmat::cipher_round(&mut block, &self.key_i); ::aes::hazmat::cipher_round(&mut block, &self.key_j); ::aes::hazmat::cipher_round(&mut block, &self.key_l); ::aes::hazmat::cipher_round(&mut block, &self.key_i); ::aes::hazmat::cipher_round(&mut block, &self.key_j); ::aes::hazmat::cipher_round(&mut block, &self.key_l); ::aes::hazmat::cipher_round(&mut block, &self.key_i); ::aes::hazmat::cipher_round(&mut block, &self.key_j); ::aes::hazmat::cipher_round(&mut block, &self.key_l); ::aes::hazmat::cipher_round(&mut block, &self.key_i); >::from(block.into()) } } // It feels silly re-implementing the native AES instruction (especially since aes does use it // under the hood), but there is a big benefit here: // First, we can save time by only loading the keys once as a __m128i, which makes the whole thing // a bit faster. // More importantly though, when using target-cpu=native, we get nicely vectorized AES instructions // (VAESENC), which we don't get if we go through aes::hazmat::cipher_round. This is a *huge* // speedup, which we don't want to miss. #[cfg(target_arch = "x86_64")] pub mod x86_64 { use super::*; use core::arch::x86_64::*; cpufeatures::new!(cpuid_aes, "aes"); pub struct AesNi { support: cpuid_aes::InitToken, fallback: AesSoft, key_i: __m128i, key_j: __m128i, key_l: __m128i, null: __m128i, } #[cfg(feature = "simd")] fn to_simd(block: Block) -> __m128i { block.simd().into() } #[cfg(not(feature = "simd"))] fn to_simd(block: Block) -> __m128i { let bytes = block.bytes(); // SAFETY: loadu can load from unaligned memory unsafe { _mm_loadu_si128(bytes.as_ptr() as *const _) } } #[cfg(feature = "simd")] fn from_simd(simd: __m128i) -> Block { Block::from_simd(simd.into()) } #[cfg(not(feature = "simd"))] fn from_simd(simd: __m128i) -> Block { let mut bytes = [0; 16]; // SAFETY: storeu can store to unaligned memory unsafe { _mm_storeu_si128(bytes.as_mut_ptr() as *mut _, simd); } Block::from(bytes) } impl AesRound for AesNi { fn new(key_i: Block, key_j: Block, key_l: Block) -> Self { Self { support: cpuid_aes::init(), fallback: AesSoft::new(key_i, key_j, key_l), key_i: to_simd(key_i), key_j: to_simd(key_j), key_l: to_simd(key_l), null: to_simd(Block::null()), } } fn aes4(&self, value: Block) -> Block { if !self.support.get() { return self.fallback.aes4(value); } // SAFETY: Nothing should go wrong when calling AESENC unsafe { let mut block = to_simd(value); block = _mm_aesenc_si128(block, self.key_j); block = _mm_aesenc_si128(block, self.key_i); block = _mm_aesenc_si128(block, self.key_l); block = _mm_aesenc_si128(block, self.null); from_simd(block) } } fn aes10(&self, value: Block) -> Block { if !self.support.get() { return self.fallback.aes10(value); } // SAFETY: Nothing should go wrong when calling AESENC unsafe { let mut block = to_simd(value); block = _mm_aesenc_si128(block, self.key_i); block = _mm_aesenc_si128(block, self.key_j); block = _mm_aesenc_si128(block, self.key_l); block = _mm_aesenc_si128(block, self.key_i); block = _mm_aesenc_si128(block, self.key_j); block = _mm_aesenc_si128(block, self.key_l); block = _mm_aesenc_si128(block, self.key_i); block = _mm_aesenc_si128(block, self.key_j); block = _mm_aesenc_si128(block, self.key_l); block = _mm_aesenc_si128(block, self.key_i); from_simd(block) } } } }