1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
|
use super::block::Block;
#[cfg(target_arch = "x86_64")]
pub type AesImpl = x86_64::AesNi;
#[cfg(not(target_arch = "x86_64"))]
pub type AesImpl = AesSoft;
pub trait AesRound {
fn new(key_i: Block, key_j: Block, key_l: Block) -> Self;
fn aes4(&self, value: Block) -> Block;
fn aes10(&self, value: Block) -> Block;
}
/// Implementation of aes4 and aes10 in software.
///
/// Always available.
///
/// Uses the `aes` crate under the hood.
pub struct AesSoft {
key_i: aes::Block,
key_j: aes::Block,
key_l: aes::Block,
}
impl AesRound for AesSoft {
fn new(key_i: Block, key_j: Block, key_l: Block) -> Self {
Self {
key_i: key_i.bytes().into(),
key_j: key_j.bytes().into(),
key_l: key_l.bytes().into(),
}
}
fn aes4(&self, value: Block) -> Block {
let mut block: aes::Block = value.bytes().into();
::aes::hazmat::cipher_round(&mut block, &self.key_j);
::aes::hazmat::cipher_round(&mut block, &self.key_i);
::aes::hazmat::cipher_round(&mut block, &self.key_l);
::aes::hazmat::cipher_round(&mut block, &Block::null().bytes().into());
<Block as From<[u8; 16]>>::from(block.into())
}
fn aes10(&self, value: Block) -> Block {
let mut block: aes::Block = value.bytes().into();
::aes::hazmat::cipher_round(&mut block, &self.key_i);
::aes::hazmat::cipher_round(&mut block, &self.key_j);
::aes::hazmat::cipher_round(&mut block, &self.key_l);
::aes::hazmat::cipher_round(&mut block, &self.key_i);
::aes::hazmat::cipher_round(&mut block, &self.key_j);
::aes::hazmat::cipher_round(&mut block, &self.key_l);
::aes::hazmat::cipher_round(&mut block, &self.key_i);
::aes::hazmat::cipher_round(&mut block, &self.key_j);
::aes::hazmat::cipher_round(&mut block, &self.key_l);
::aes::hazmat::cipher_round(&mut block, &self.key_i);
<Block as From<[u8; 16]>>::from(block.into())
}
}
// It feels silly re-implementing the native AES instruction (especially since aes does use it
// under the hood), but there is a big benefit here:
// First, we can save time by only loading the keys once as a __m128i, which makes the whole thing
// a bit faster.
// More importantly though, when using target-cpu=native, we get nicely vectorized AES instructions
// (VAESENC), which we don't get if we go through aes::hazmat::cipher_round. This is a *huge*
// speedup, which we don't want to miss.
#[cfg(target_arch = "x86_64")]
pub mod x86_64 {
use super::*;
use core::arch::x86_64::*;
cpufeatures::new!(cpuid_aes, "aes");
pub struct AesNi {
support: cpuid_aes::InitToken,
fallback: AesSoft,
key_i: __m128i,
key_j: __m128i,
key_l: __m128i,
null: __m128i,
}
impl AesRound for AesNi {
fn new(key_i: Block, key_j: Block, key_l: Block) -> Self {
Self {
support: cpuid_aes::init(),
fallback: AesSoft::new(key_i, key_j, key_l),
key_i: key_i.simd().into(),
key_j: key_j.simd().into(),
key_l: key_l.simd().into(),
null: Block::null().simd().into(),
}
}
fn aes4(&self, value: Block) -> Block {
if !self.support.get() {
return self.fallback.aes4(value);
}
// SAFETY: Nothing should go wrong when calling AESENC
unsafe {
let mut block = value.simd().into();
block = _mm_aesenc_si128(block, self.key_j);
block = _mm_aesenc_si128(block, self.key_i);
block = _mm_aesenc_si128(block, self.key_l);
block = _mm_aesenc_si128(block, self.null);
Block::from_simd(block.into())
}
}
fn aes10(&self, value: Block) -> Block {
if !self.support.get() {
return self.fallback.aes10(value);
}
// SAFETY: Nothing should go wrong when calling AESENC
unsafe {
let mut block = value.simd().into();
block = _mm_aesenc_si128(block, self.key_i);
block = _mm_aesenc_si128(block, self.key_j);
block = _mm_aesenc_si128(block, self.key_l);
block = _mm_aesenc_si128(block, self.key_i);
block = _mm_aesenc_si128(block, self.key_j);
block = _mm_aesenc_si128(block, self.key_l);
block = _mm_aesenc_si128(block, self.key_i);
block = _mm_aesenc_si128(block, self.key_j);
block = _mm_aesenc_si128(block, self.key_l);
block = _mm_aesenc_si128(block, self.key_i);
Block::from_simd(block.into())
}
}
}
}
|