update rationale for implementing AESNI ourselves

author: Daniel Schadt <kingdread@gmx.de> 2025-06-18 12:00:39 +0200
committer: Daniel Schadt <kingdread@gmx.de> 2025-06-18 12:06:16 +0200
commit: 942f522a68adfb9c436a05e736bdaf07c8aa7db8 (patch)
tree: 4dae5f6c203b6f9593480a704fde08494c38d2dc /src/aesround.rs
parent: 79e1a87f789cbb40d2041063c674817bb0484e5f (diff)
download: zears-942f522a68adfb9c436a05e736bdaf07c8aa7db8.tar.gz
zears-942f522a68adfb9c436a05e736bdaf07c8aa7db8.tar.bz2
zears-942f522a68adfb9c436a05e736bdaf07c8aa7db8.zip
1 files changed, 5 insertions, 3 deletions
diff --git a/src/aesround.rs b/src/aesround.rs
index 6f63243..4ae3f6f 100644
--- a/src/aesround.rs
+++ b/src/aesround.rs
@@ -61,9 +61,11 @@ impl AesRound for AesSoft {
 // under the hood), but there is a big benefit here:
 // First, we can save time by only loading the keys once as a __m128i, which makes the whole thing
 // a bit faster.
-// More importantly though, when using target-cpu=native, we get nicely vectorized AES instructions
-// (VAESENC), which we don't get if we go through aes::hazmat::cipher_round. This is a *huge*
-// speedup, which we don't want to miss.
+// More importantly though, the compiler does not inline the call to cipher_round, even when using
+// target-cpu=native. I guess this is because it crosses a crate boundary (and cross-crate inlining
+// only happens with LTO). In fact, compiling with lto=true does inline the call, but we don't want
+// to force that to all library users. Anyway, by re-implementing the AES instruction here, we get
+// nice inlining without relying on LTO and therefore a huge speedup, as AES is called a lot.
 #[cfg(target_arch = "x86_64")]
 pub mod x86_64 {
     use super::*;
author	Daniel Schadt <kingdread@gmx.de>	2025-06-18 12:00:39 +0200
committer	Daniel Schadt <kingdread@gmx.de>	2025-06-18 12:06:16 +0200
commit	942f522a68adfb9c436a05e736bdaf07c8aa7db8 (patch)
tree	4dae5f6c203b6f9593480a704fde08494c38d2dc /src/aesround.rs
parent	79e1a87f789cbb40d2041063c674817bb0484e5f (diff)
download	zears-942f522a68adfb9c436a05e736bdaf07c8aa7db8.tar.gz zears-942f522a68adfb9c436a05e736bdaf07c8aa7db8.tar.bz2 zears-942f522a68adfb9c436a05e736bdaf07c8aa7db8.zip