aboutsummaryrefslogtreecommitdiff
path: root/aezref/aezv5/aesni
diff options
context:
space:
mode:
Diffstat (limited to 'aezref/aezv5/aesni')
-rw-r--r--aezref/aezv5/aesni/api.h4
-rw-r--r--aezref/aezv5/aesni/encrypt.c944
2 files changed, 948 insertions, 0 deletions
diff --git a/aezref/aezv5/aesni/api.h b/aezref/aezv5/aesni/api.h
new file mode 100644
index 0000000..abcd511
--- /dev/null
+++ b/aezref/aezv5/aesni/api.h
@@ -0,0 +1,4 @@
+#define CRYPTO_KEYBYTES 48
+#define CRYPTO_NSECBYTES 0
+#define CRYPTO_NPUBBYTES 12
+#define CRYPTO_ABYTES 16
diff --git a/aezref/aezv5/aesni/encrypt.c b/aezref/aezv5/aesni/encrypt.c
new file mode 100644
index 0000000..9c86bcf
--- /dev/null
+++ b/aezref/aezv5/aesni/encrypt.c
@@ -0,0 +1,944 @@
+/*
+ // AEZ v5 AES-NI version. AEZ info: http://www.cs.ucdavis.edu/~rogaway/aez
+ //
+ // REQUIREMENTS: - Intel or ARM CPU supporting AES instructions
+ // - Faster if all pointers are 16-byte aligned.
+ // - Max 16 byte nonce, 16 byte authenticator
+ // - Single AD (AEZ spec allows vector AD but this code doesn't)
+ // - Max 2^32-1 byte buffers allowed (due to using unsigned int)
+ //
+ // Written by Ted Krovetz (ted@krovetz.net). Last modified 21 March 2017.
+ //
+ // This is free and unencumbered software released into the public domain.
+ //
+ // Anyone is free to copy, modify, publish, use, compile, sell, or
+ // distribute this software, either in source code form or as a compiled
+ // binary, for any purpose, commercial or non-commercial, and by any
+ // means.
+ //
+ // In jurisdictions that recognize copyright laws, the author or authors
+ // of this software dedicate any and all copyright interest in the
+ // software to the public domain. We make this dedication for the benefit
+ // of the public at large and to the detriment of our heirs and
+ // successors. We intend this dedication to be an overt act of
+ // relinquishment in perpetuity of all present and future rights to this
+ // software under copyright law.
+ //
+ // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ // OTHER DEALINGS IN THE SOFTWARE.
+ //
+ // For more information, please refer to <http://unlicense.org/>
+ */
+
+#include "crypto_aead.h"
+#include <stdint.h>
+#include <stddef.h>
+
+/* ------------------------------------------------------------------------- */
+#if __AES__ /* Defined by gcc/clang when compiling for AES-NI */
+/* ------------------------------------------------------------------------- */
+
+#include <smmintrin.h>
+#include <wmmintrin.h>
+#define block __m128i
+
+/* ------------------------------------------------------------------------- */
+
+#define zero _mm_setzero_si128()
+#define vadd(x,y) _mm_add_epi8(x,y)
+#define vand(x,y) _mm_and_si128(x,y)
+#define vandnot(x,y) _mm_andnot_si128(x,y) /* (~x)&y */
+#define vor(x,y) _mm_or_si128(x,y)
+#define vxor(x,y) _mm_xor_si128(x,y)
+
+static int is_zero(block x) { return _mm_testz_si128(x,x); } /* 0 or 1 */
+
+static block sll4(block x) {
+ return vor(_mm_srli_epi64(x, 4), _mm_slli_epi64(_mm_srli_si128(x, 8), 60));
+}
+
+static block srl4(block x) {
+ return vor(_mm_slli_epi64(x, 4), _mm_srli_epi64(_mm_slli_si128(x, 8), 60));
+}
+
+static __m128i bswap16(__m128i b) {
+ const __m128i t = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15);
+ return _mm_shuffle_epi8(b,t);
+}
+
+static __m128i double_block(__m128i bl) {
+ const __m128i mask = _mm_set_epi32(135,1,1,1);
+ __m128i tmp = _mm_srai_epi32(bl, 31);
+ tmp = _mm_and_si128(tmp, mask);
+ tmp = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(2,1,0,3));
+ bl = _mm_slli_epi32(bl, 1);
+ return _mm_xor_si128(bl,tmp);
+}
+
+static __m128i aes(__m128i *key, __m128i in, __m128i first_key) {
+ in = vxor(in, first_key);
+ in = _mm_aesenc_si128 (in,key[0]);
+ in = _mm_aesenc_si128 (in,key[2]);
+ in = _mm_aesenc_si128 (in,key[5]);
+ in = _mm_aesenc_si128 (in,key[0]);
+ in = _mm_aesenc_si128 (in,key[2]);
+ in = _mm_aesenc_si128 (in,key[5]);
+ in = _mm_aesenc_si128 (in,key[0]);
+ in = _mm_aesenc_si128 (in,key[2]);
+ in = _mm_aesenc_si128 (in,key[5]);
+ return _mm_aesenc_si128 (in,key[0]);
+}
+
+static __m128i aes4(__m128i in, __m128i a, __m128i b,
+ __m128i c, __m128i d, __m128i e) {
+ in = _mm_aesenc_si128(vxor(in,a),b);
+ in = _mm_aesenc_si128(in,c);
+ in = _mm_aesenc_si128(in,d);
+ return _mm_aesenc_si128 (in,e);
+}
+
+#define aes4pre(in,a,b,c,d) aes4(in,a,b,c,d,zero)
+
+static __m128i loadu(const void *p) { return _mm_loadu_si128((__m128i*)p); }
+static void storeu(const void *p, __m128i x) {_mm_storeu_si128((__m128i*)p,x);}
+
+#define load loadu /* Intel with AES-NI has fast unaligned loads/stores */
+#define store storeu
+
+/* ------------------------------------------------------------------------- */
+#elif __ARM_FEATURE_CRYPTO
+/* ------------------------------------------------------------------------- */
+
+#include <arm_neon.h>
+#define block uint8x16_t
+
+#define zero vmovq_n_u8(0)
+#define vadd(x,y) vaddq_u8(x,y)
+#define vand(x,y) vandq_u8(x,y)
+#define vandnot(x,y) vbicq_u8(y,x) /* (~x)&y */
+#define vor(x,y) vorrq_u8(x,y)
+#define vxor(x,y) veorq_u8(x,y)
+
+static int is_zero(block x) { /* 0 or 1 */
+ uint8x8_t t = vorr_u8(vget_high_u8(x), vget_low_u8(x));
+ return vget_lane_u64(vreinterpret_u64_u8(t),0) == 0;
+}
+
+static block srl4(block x) {
+ const block mask = {15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,0};
+ uint8x16_t tmp = vandq_u8(vshrq_n_u8(vextq_u8(x, x, 1),4),mask);
+ return veorq_u8(tmp,vshlq_n_u8(x,4));
+}
+
+static block sll4(block x) {
+ const block mask = {0,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15};
+ uint8x16_t tmp = vshlq_n_u8(vandq_u8(vextq_u8(x, x, 15),mask),4);
+ return veorq_u8(tmp,vshrq_n_u8(x,4));
+}
+
+static uint8x16_t bswap16(uint8x16_t b) { return b; } /* Not with uint8x16_t */
+
+static block double_block(block b) {
+ const block mask = {135,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
+ block tmp = (block)vshrq_n_s8((int8x16_t)b,7);
+ tmp = vandq_u8(tmp, mask);
+ tmp = vextq_u8(tmp, tmp, 1); /* Rotate high byte to low end */
+ b = vshlq_n_u8(b,1);
+ return veorq_u8(tmp,b);
+}
+
+static uint8x16_t aes(uint8x16_t *key, uint8x16_t in, uint8x16_t first_key) {
+ in = vaesmcq_u8(vaeseq_u8(in, first_key));
+ in = vaesmcq_u8(vaeseq_u8(in, key[0]));
+ in = vaesmcq_u8(vaeseq_u8(in, key[2]));
+ in = vaesmcq_u8(vaeseq_u8(in, key[5]));
+ in = vaesmcq_u8(vaeseq_u8(in, key[0]));
+ in = vaesmcq_u8(vaeseq_u8(in, key[2]));
+ in = vaesmcq_u8(vaeseq_u8(in, key[5]));
+ in = vaesmcq_u8(vaeseq_u8(in, key[0]));
+ in = vaesmcq_u8(vaeseq_u8(in, key[2]));
+ in = vaesmcq_u8(vaeseq_u8(in, key[5]));
+ return vxor(in, key[0]);
+}
+
+static uint8x16_t aes4pre(uint8x16_t in, uint8x16_t a, uint8x16_t b,
+ uint8x16_t c, uint8x16_t d) {
+ in = vaesmcq_u8(vaeseq_u8(in, a));
+ in = vaesmcq_u8(vaeseq_u8(in, b));
+ in = vaesmcq_u8(vaeseq_u8(in, c));
+ return vaesmcq_u8(vaeseq_u8(in, d));
+}
+
+#define aes4(in,a,b,c,d,e) vxor(aes4pre(in,a,b,c,d),e)
+
+static uint8x16_t load(const void *p) { return *(uint8x16_t *)p; }
+static void store(void *p, uint8x16_t x) { *(uint8x16_t *)p = x; }
+
+#define loadu load /* ARMv8 allows unaligned loads/stores */
+#define storeu store /* ARMv8 allows unaligned stores */
+
+/* ------------------------------------------------------------------------- */
+#else
+#error - This implementation requires __AES__ or __ARM_FEATURE_CRYPTO
+#endif
+/* ------------------------------------------------------------------------- */
+
+#define vxor3(x,y,z) vxor(vxor(x,y),z)
+#define vxor4(w,x,y,z) vxor(vxor(w,x),vxor(y,z))
+#define load_partial(p,n) loadu(p)
+
+/*
+Might need a version like this if, for example, we want to load a 12-byte nonce
+into a 16-byte block.
+
+static block load_partial(const void *p, unsigned n) {
+ if ((intptr_t)p % 16 == 0) return load(p);
+ else {
+ block tmp; unsigned i;
+ for (i=0; i<n; i++) ((char*)&tmp)[i] = ((char*)p)[i];
+ return tmp;
+ }
+}
+*/
+
+static const unsigned char pad[] = {0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
+
+static block zero_pad(block x, unsigned zero_bytes) {
+ return vand(x, loadu(pad + zero_bytes));
+}
+
+static block one_zero_pad(block x, unsigned one_zero_bytes) {
+ block *p = (block*)(pad + one_zero_bytes);
+ return vor(vand(x, loadu(p)), loadu(p+1));
+}
+
+static block zero_set_byte(char val, unsigned idx) {
+ block tmp = zero; ((char *)&tmp)[idx] = val; return tmp;
+}
+
+/* ------------------------------------------------------------------------- */
+
+typedef struct { /* All data memory-correct except 2I register-correct */
+ block I[2]; /* 1I, 2I */
+ block J[3]; /* 1J,2J,4J */
+ block L[3]; /* 1L,2L,4L */
+ block delta3_cache;
+} aez_ctx_t;
+
+/* ------------------------------------------------------------------------- */
+
+static int blake2b(void *out, size_t outlen,
+ const void *key, size_t keylen,
+ const void *in, size_t inlen);
+
+/* ------------------------------------------------------------------------- */
+
+void aez_setup(unsigned char *key, unsigned keylen, aez_ctx_t *ctx) {
+ block tmp;
+ if (keylen==48) {
+ ctx->I[0] = loadu(key);
+ ctx->J[0] = loadu(key+16);
+ ctx->L[0] = loadu(key+32);
+ } else {
+ blake2b(ctx, 48, 0, 0, key, keylen); /* Puts IJL into ctx */
+ ctx->L[0] = ctx->J[0]; /* Rearrange. */
+ ctx->J[0] = ctx->I[1]; /* Rearrange. */
+ }
+ /* Fill remaining ctx locations with doublings */
+ ctx->I[1] = double_block(bswap16(ctx->I[0])); /* No post-bswap */
+ ctx->J[1] = bswap16(tmp = double_block(bswap16(ctx->J[0])));
+ ctx->J[2] = bswap16(double_block(tmp));
+ ctx->L[1] = bswap16(tmp = double_block(bswap16(ctx->L[0])));
+ ctx->L[2] = bswap16(double_block(tmp));
+ ctx->delta3_cache = zero;
+}
+
+/* ------------------------------------------------------------------------- */
+
+/* !! Warning !! Only handles nbytes <= 16 and abytes <= 16 */
+static block aez_hash(aez_ctx_t *ctx, char *n, unsigned nbytes, char *ad,
+ unsigned adbytes, unsigned abytes) {
+ block o1, o2, o3, o4, o5, o6, o7, o8, sum, offset, tmp;
+ block I=ctx->I[0], Ifordoubling = ctx->I[1], I2 = bswap16(Ifordoubling);
+ block L=ctx->L[0], L2=ctx->L[1],L4=ctx->L[2];
+ block J=ctx->J[0], J2 = ctx->J[1], J4 = ctx->J[2], J5 = vxor(J,J4);
+
+ /* Process abytes and nonce */
+ offset = vxor4(J, J2, I2, L);
+ tmp = zero_set_byte((char)(8*abytes),15);
+ sum = aes4pre(offset,tmp,J,I,L);
+
+ if (nbytes==16) sum = aes4(vxor(loadu(n), J4), vxor(I2, L),J,I,L,sum);
+ else sum = aes4(vxor(J4, I),
+ one_zero_pad(load_partial(n,nbytes),16-nbytes),J,I,L,sum);
+
+ if (ad) { /* Possible easy misuse: ad==null && adbytes==0 */
+ if (adbytes==0) {
+ ctx->delta3_cache = aes4pre(vxor(J5, I), loadu(pad+32),J,I,L);
+ } else {
+ block delta3 = zero;
+ offset = vxor(J5, I2);
+ while (adbytes >= 8*16) {
+ o1 = vxor(offset,L);
+ o2 = vxor(offset,L2);
+ o3 = vxor(o1,L2);
+ o4 = vxor(offset,L4);
+ o5 = vxor(o1,L4);
+ o6 = vxor(o2,L4);
+ o7 = vxor(o3,L4);
+ o8 = offset;
+ Ifordoubling = double_block(Ifordoubling);
+ offset = vxor(J5, bswap16(Ifordoubling));
+ delta3 = vxor(delta3, aes4pre(load(ad+ 0), o1, J, I, L));
+ delta3 = vxor(delta3, aes4pre(load(ad+ 16), o2, J, I, L));
+ delta3 = vxor(delta3, aes4pre(load(ad+ 32), o3, J, I, L));
+ delta3 = vxor(delta3, aes4pre(load(ad+ 48), o4, J, I, L));
+ delta3 = vxor(delta3, aes4pre(load(ad+ 64), o5, J, I, L));
+ delta3 = vxor(delta3, aes4pre(load(ad+ 80), o6, J, I, L));
+ delta3 = vxor(delta3, aes4pre(load(ad+ 96), o7, J, I, L));
+ delta3 = vxor(delta3, aes4pre(load(ad+112), o8, J, I, L));
+ adbytes-=8*16; ad+=8*16;
+ }
+ if (adbytes >= 4*16) {
+ o1 = vxor(offset,L);
+ o2 = vxor(offset,L2);
+ o3 = vxor(o1,L2);
+ o4 = offset = vxor(offset,L4);
+ delta3 = vxor(delta3, aes4pre(load(ad+ 0), o1, J, I, L));
+ delta3 = vxor(delta3, aes4pre(load(ad+ 16), o2, J, I, L));
+ delta3 = vxor(delta3, aes4pre(load(ad+ 32), o3, J, I, L));
+ delta3 = vxor(delta3, aes4pre(load(ad+ 48), o4, J, I, L));
+ adbytes-=4*16; ad+=4*16;
+ }
+ if (adbytes >= 2*16) {
+ o1 = vxor(offset,L);
+ o2 = offset = vxor(offset,L2);
+ delta3 = vxor(delta3, aes4pre(load(ad+ 0), o1, J, I, L));
+ delta3 = vxor(delta3, aes4pre(load(ad+ 16), o2, J, I, L));
+ adbytes-=2*16; ad+=2*16;
+ }
+ if (adbytes >= 1*16) {
+ o1 = vxor(offset,L);
+ delta3 = vxor(delta3, aes4pre(load(ad+ 0), o1, J, I, L));
+ adbytes-=1*16; ad+=1*16;
+ }
+ if (adbytes) {
+ tmp = vxor3(J5, I, one_zero_pad(load(ad),16-adbytes));
+ delta3 = aes4(vxor(J5, I), one_zero_pad(load(ad),16-adbytes),
+ J, I, L, delta3);
+ }
+ ctx->delta3_cache = delta3;
+ }
+ }
+ return vxor(sum,ctx->delta3_cache);
+}
+
+/* ------------------------------------------------------------------------- */
+
+static block pass_one(aez_ctx_t *ctx, block *src, unsigned bytes, block *dst) {
+ block o1, o2, o3, o4, o5, o6, o7, o8, offset, tmp, sum=zero;
+ block I=ctx->I[0], Ifordoubling = ctx->I[1];
+ block L=ctx->L[0], L2=ctx->L[1],L4=ctx->L[2];
+ block J=ctx->J[0];
+ offset = vxor(J, bswap16(Ifordoubling));
+ while (bytes >= 16*16) {
+ o1 = vxor(offset,L);
+ o2 = vxor(offset,L2);
+ o3 = vxor(o1,L2);
+ o4 = vxor(offset,L4);
+ o5 = vxor(o1,L4);
+ o6 = vxor(o2,L4);
+ o7 = vxor(o3,L4);
+ o8 = offset;
+ Ifordoubling = double_block(Ifordoubling);
+ offset = vxor(J,bswap16(Ifordoubling));
+ store(dst+ 0, aes4(load(src + 1),o1, J, I, L, load(src+ 0)));
+ store(dst+ 2, aes4(load(src + 3),o2, J, I, L, load(src+ 2)));
+ store(dst+ 4, aes4(load(src + 5),o3, J, I, L, load(src+ 4)));
+ store(dst+ 6, aes4(load(src + 7),o4, J, I, L, load(src+ 6)));
+ store(dst+ 8, aes4(load(src + 9),o5, J, I, L, load(src+ 8)));
+ store(dst+10, aes4(load(src +11),o6, J, I, L, load(src+10)));
+ store(dst+12, aes4(load(src +13),o7, J, I, L, load(src+12)));
+ store(dst+14, aes4(load(src +15),o8, J, I, L, load(src+14)));
+ tmp=aes4(I,load(dst+ 0),J,I,L,load(src+ 1));store(dst+ 1,tmp);
+ sum=vxor(sum,tmp);
+ tmp=aes4(I,load(dst+ 2),J,I,L,load(src+ 3));
+ store(dst+ 3,tmp);sum=vxor(sum,tmp);
+ tmp=aes4(I,load(dst+ 4),J,I,L,load(src+ 5));
+ store(dst+ 5,tmp);sum=vxor(sum,tmp);
+ tmp=aes4(I,load(dst+ 6),J,I,L,load(src+ 7));
+ store(dst+ 7,tmp);sum=vxor(sum,tmp);
+ tmp=aes4(I,load(dst+ 8),J,I,L,load(src+ 9));
+ store(dst+ 9,tmp);sum=vxor(sum,tmp);
+ tmp=aes4(I,load(dst+10),J,I,L,load(src+11));
+ store(dst+11,tmp);sum=vxor(sum,tmp);
+ tmp=aes4(I,load(dst+12),J,I,L,load(src+13));
+ store(dst+13,tmp);sum=vxor(sum,tmp);
+ tmp=aes4(I,load(dst+14),J,I,L,load(src+15));
+ store(dst+15,tmp);sum=vxor(sum,tmp);
+ bytes -= 16*16; dst += 16; src += 16;
+ }
+ if (bytes >= 8*16) {
+ o1 = vxor(offset,L);
+ o2 = vxor(offset,L2);
+ o3 = vxor(o1,L2);
+ o4 = offset = vxor(offset,L4);
+ store(dst+ 0, aes4(load(src + 1),o1, J, I, L, load(src+ 0)));
+ store(dst+ 2, aes4(load(src + 3),o2, J, I, L, load(src+ 2)));
+ store(dst+ 4, aes4(load(src + 5),o3, J, I, L, load(src+ 4)));
+ store(dst+ 6, aes4(load(src + 7),o4, J, I, L, load(src+ 6)));
+ tmp=aes4(I,load(dst+ 0),J,I,L,load(src+ 1));
+ store(dst+ 1,tmp);sum=vxor(sum,tmp);
+ tmp=aes4(I,load(dst+ 2),J,I,L,load(src+ 3));
+ store(dst+ 3,tmp);sum=vxor(sum,tmp);
+ tmp=aes4(I,load(dst+ 4),J,I,L,load(src+ 5));
+ store(dst+ 5,tmp);sum=vxor(sum,tmp);
+ tmp=aes4(I,load(dst+ 6),J,I,L,load(src+ 7));
+ store(dst+ 7,tmp);sum=vxor(sum,tmp);
+ bytes -= 8*16; dst += 8; src += 8;
+ }
+ if (bytes >= 4*16) {
+ o1 = vxor(offset,L);
+ o2 = offset = vxor(offset,L2);
+ store(dst+ 0, aes4(load(src + 1),o1, J, I, L, load(src+ 0)));
+ store(dst+ 2, aes4(load(src + 3),o2, J, I, L, load(src+ 2)));
+ tmp=aes4(I,load(dst+ 0),J,I,L,load(src+ 1));
+ store(dst+ 1,tmp);sum=vxor(sum,tmp);
+ tmp=aes4(I,load(dst+ 2),J,I,L,load(src+ 3));
+ store(dst+ 3,tmp);sum=vxor(sum,tmp);
+ bytes -= 4*16; dst += 4; src += 4;
+ }
+ if (bytes) {
+ o1 = vxor(offset,L);
+ store(dst+ 0, aes4(load(src + 1),o1, J, I, L, load(src+ 0)));
+ tmp=aes4(I,load(dst+ 0),J,I,L,load(src+ 1));
+ store(dst+ 1,tmp);sum=vxor(sum,tmp);
+ }
+ return sum;
+}
+
+/* ------------------------------------------------------------------------- */
+
+static block pass_two(aez_ctx_t *ctx, block s, unsigned bytes, block *dst) {
+ block o1, o2, o3, o4, o5, o6, o7, o8, sum=zero, offset, fs[8], tmp[8];
+ block I=ctx->I[0], Ifordoubling = ctx->I[1];
+ block L=ctx->L[0], L2=ctx->L[1],L4=ctx->L[2];
+ block J=ctx->J[0], J2=ctx->J[1], J3=vxor(J,J2);
+ offset = vxor(J2, bswap16(Ifordoubling));
+ while (bytes >= 16*16) {
+ o1 = vxor(offset,L);
+ o2 = vxor(offset,L2);
+ o3 = vxor(o1,L2);
+ o4 = vxor(offset,L4);
+ o5 = vxor(o1,L4);
+ o6 = vxor(o2,L4);
+ o7 = vxor(o3,L4);
+ o8 = offset;
+ Ifordoubling = double_block(Ifordoubling);
+ offset = vxor(J2, bswap16(Ifordoubling));
+ fs[0] = aes4pre(s,o1,J,I,L); fs[1] = aes4pre(s,o2,J,I,L);
+ fs[2] = aes4pre(s,o3,J,I,L); fs[3] = aes4pre(s,o4,J,I,L);
+ fs[4] = aes4pre(s,o5,J,I,L); fs[5] = aes4pre(s,o6,J,I,L);
+ fs[6] = aes4pre(s,o7,J,I,L); fs[7] = aes4pre(s,o8,J,I,L);
+ o1 = vxor(J3,o1); o2 = vxor(J3,o2);
+ o3 = vxor(J3,o3); o4 = vxor(J3,o4);
+ o5 = vxor(J3,o5); o6 = vxor(J3,o6);
+ o7 = vxor(J3,o7); o8 = vxor(J3,o8);
+ tmp[0] = vxor(load(dst+ 0),fs[0]); sum = vxor(sum,tmp[0]);
+ store(dst+ 0,vxor(load(dst+ 1),fs[0]));
+ tmp[1] = vxor(load(dst+ 2),fs[1]); sum = vxor(sum,tmp[1]);
+ store(dst+ 2,vxor(load(dst+ 3),fs[1]));
+ tmp[2] = vxor(load(dst+ 4),fs[2]); sum = vxor(sum,tmp[2]);
+ store(dst+ 4,vxor(load(dst+ 5),fs[2]));
+ tmp[3] = vxor(load(dst+ 6),fs[3]); sum = vxor(sum,tmp[3]);
+ store(dst+ 6,vxor(load(dst+ 7),fs[3]));
+ tmp[4] = vxor(load(dst+ 8),fs[4]); sum = vxor(sum,tmp[4]);
+ store(dst+ 8,vxor(load(dst+ 9),fs[4]));
+ tmp[5] = vxor(load(dst+10),fs[5]); sum = vxor(sum,tmp[5]);
+ store(dst+10,vxor(load(dst+11),fs[5]));
+ tmp[6] = vxor(load(dst+12),fs[6]); sum = vxor(sum,tmp[6]);
+ store(dst+12,vxor(load(dst+13),fs[6]));
+ tmp[7] = vxor(load(dst+14),fs[7]); sum = vxor(sum,tmp[7]);
+ store(dst+14,vxor(load(dst+15),fs[7]));
+ store(dst+ 1, aes4(I,load(dst+ 0), J, I, L, tmp[0]));
+ store(dst+ 3, aes4(I,load(dst+ 2), J, I, L, tmp[1]));
+ store(dst+ 5, aes4(I,load(dst+ 4), J, I, L, tmp[2]));
+ store(dst+ 7, aes4(I,load(dst+ 6), J, I, L, tmp[3]));
+ store(dst+ 9, aes4(I,load(dst+ 8), J, I, L, tmp[4]));
+ store(dst+11, aes4(I,load(dst+10), J, I, L, tmp[5]));
+ store(dst+13, aes4(I,load(dst+12), J, I, L, tmp[6]));
+ store(dst+15, aes4(I,load(dst+14), J, I, L, tmp[7]));
+ store(dst+ 0, aes4(load(dst+ 1),o1, J, I, L, load(dst+ 0)));
+ store(dst+ 2, aes4(load(dst+ 3),o2, J, I, L, load(dst+ 2)));
+ store(dst+ 4, aes4(load(dst+ 5),o3, J, I, L, load(dst+ 4)));
+ store(dst+ 6, aes4(load(dst+ 7),o4, J, I, L, load(dst+ 6)));
+ store(dst+ 8, aes4(load(dst+ 9),o5, J, I, L, load(dst+ 8)));
+ store(dst+10, aes4(load(dst+11),o6, J, I, L, load(dst+10)));
+ store(dst+12, aes4(load(dst+13),o7, J, I, L, load(dst+12)));
+ store(dst+14, aes4(load(dst+15),o8, J, I, L, load(dst+14)));
+ bytes -= 16*16; dst += 16;
+ }
+ if (bytes >= 8*16) {
+ o1 = vxor(offset,L);
+ o2 = vxor(offset,L2);
+ o3 = vxor(o1,L2);
+ o4 = offset = vxor(offset,L4);
+ fs[0] = aes4pre(s,o1,J,I,L); fs[1] = aes4pre(s,o2,J,I,L);
+ fs[2] = aes4pre(s,o3,J,I,L); fs[3] = aes4pre(s,o4,J,I,L);
+ o1 = vxor(J3,o1); o2 = vxor(J3,o2);
+ o3 = vxor(J3,o3); o4 = vxor(J3,o4);
+ tmp[0] = vxor(load(dst+ 0),fs[0]); sum = vxor(sum,tmp[0]);
+ store(dst+ 0,vxor(load(dst+ 1),fs[0]));
+ tmp[1] = vxor(load(dst+ 2),fs[1]); sum = vxor(sum,tmp[1]);
+ store(dst+ 2,vxor(load(dst+ 3),fs[1]));
+ tmp[2] = vxor(load(dst+ 4),fs[2]); sum = vxor(sum,tmp[2]);
+ store(dst+ 4,vxor(load(dst+ 5),fs[2]));
+ tmp[3] = vxor(load(dst+ 6),fs[3]); sum = vxor(sum,tmp[3]);
+ store(dst+ 6,vxor(load(dst+ 7),fs[3]));
+ store(dst+ 1, aes4(I,load(dst+ 0), J, I, L, tmp[0]));
+ store(dst+ 3, aes4(I,load(dst+ 2), J, I, L, tmp[1]));
+ store(dst+ 5, aes4(I,load(dst+ 4), J, I, L, tmp[2]));
+ store(dst+ 7, aes4(I,load(dst+ 6), J, I, L, tmp[3]));
+ store(dst+ 0, aes4(load(dst+ 1),o1, J, I, L, load(dst+ 0)));
+ store(dst+ 2, aes4(load(dst+ 3),o2, J, I, L, load(dst+ 2)));
+ store(dst+ 4, aes4(load(dst+ 5),o3, J, I, L, load(dst+ 4)));
+ store(dst+ 6, aes4(load(dst+ 7),o4, J, I, L, load(dst+ 6)));
+ bytes -= 8*16; dst += 8;
+ }
+ if (bytes >= 4*16) {
+ o1 = vxor(offset,L);
+ o2 = offset = vxor(offset,L2);
+ fs[0] = aes4pre(s,o1,J,I,L); fs[1] = aes4pre(s,o2,J,I,L);
+ o1 = vxor(J3,o1); o2 = vxor(J3,o2);
+ tmp[0] = vxor(load(dst+ 0),fs[0]); sum = vxor(sum,tmp[0]);
+ store(dst+ 0,vxor(load(dst+ 1),fs[0]));
+ tmp[1] = vxor(load(dst+ 2),fs[1]); sum = vxor(sum,tmp[1]);
+ store(dst+ 2,vxor(load(dst+ 3),fs[1]));
+ store(dst+ 1, aes4(I,load(dst+ 0), J, I, L, tmp[0]));
+ store(dst+ 3, aes4(I,load(dst+ 2), J, I, L, tmp[1]));
+ store(dst+ 0, aes4(load(dst+ 1),o1, J, I, L, load(dst+ 0)));
+ store(dst+ 2, aes4(load(dst+ 3),o2, J, I, L, load(dst+ 2)));
+ bytes -= 4*16; dst += 4;
+ }
+ if (bytes) {
+ o1 = vxor(offset,L);
+ fs[0] = aes4pre(s,o1,J,I,L);
+ o1 = vxor(J3,o1);
+ tmp[0] = vxor(load(dst+ 0),fs[0]); sum = vxor(sum,tmp[0]);
+ store(dst+ 0,vxor(load(dst+ 1),fs[0]));
+ store(dst+ 1, aes4(I,load(dst+ 0), J, I, L, tmp[0]));
+ store(dst+ 0, aes4(load(dst+ 1),o1, J, I, L, load(dst+ 0)));
+ }
+ return sum;
+}
+
+/* ------------------------------------------------------------------------- */
+
+static int cipher_aez_core(aez_ctx_t *ctx, block t, int d, char *src,
+ unsigned bytes, unsigned abytes, char *dst) {
+ block s, x, y, frag0, frag1, final0, final1;
+ block I=ctx->I[0], L=ctx->L[0], J=ctx->J[0];
+ block L4=ctx->L[2], I2 = bswap16(ctx->I[1]);
+ unsigned i, frag_bytes, initial_bytes;
+
+ if (!d) bytes += abytes;
+ frag_bytes = bytes % 32;
+ initial_bytes = bytes - frag_bytes - 32;
+
+ /* Compute x and store intermediate results */
+ x = pass_one(ctx, (block*)src, initial_bytes, (block*)dst);
+ if (frag_bytes >= 16) {
+ frag0 = load(src + initial_bytes);
+ frag1 = one_zero_pad(load(src + initial_bytes + 16), 32-frag_bytes);
+ x = aes4(frag0, vxor(L4, I2), J, I, L, x);
+ x = vxor(x, aes4pre(frag1, vxor3(I2, L4, L), J, I, L));
+ } else if (frag_bytes) {
+ frag0 = one_zero_pad(load(src + initial_bytes), 16-frag_bytes);
+ x = aes4(frag0, vxor(L4, I2), J, I, L, x);
+ }
+
+ /* Calculate s and final block values (y xor'd to final1 later) */
+ final0 = vxor3(loadu(src + (bytes - 32)), x, t);
+ if (d || !abytes) final1 = loadu(src+(bytes-32)+16);
+ else final1 = zero_pad(loadu(src+(bytes-32)+16), abytes);
+ final0 = aes4(final1, vxor(I2, ctx->L[d]), J, I, L, final0);
+ final1 = vxor(final1, aes((block*)ctx, final0, ctx->L[d]));
+ s = vxor(final0, final1);
+ final0 = vxor(final0, aes((block*)ctx, final1, ctx->L[d^1]));
+ /* Decryption: final0 should hold abytes zero bytes. If not, failure */
+ if (d && !is_zero(vandnot(loadu(pad+abytes),final0))) return -1;
+ final1 = aes4(final0, vxor(I2, ctx->L[d^1]), J, I, L, final1);
+
+ /* Compute y and store final results */
+ y = pass_two(ctx, s, initial_bytes, (block*)dst);
+ if (frag_bytes >= 16) {
+ frag0 = vxor(frag0, aes((block*)ctx, s, L4));
+ frag1 = vxor(frag1, aes((block*)ctx, s, vxor(L4, L)));
+ frag1 = one_zero_pad(frag1, 32-frag_bytes);
+ y = aes4(frag0, vxor(I2, L4), J, I, L, y);
+ y = vxor(y, aes4pre(frag1, vxor3(I2, L4, L), J, I, L));
+ store(dst + initial_bytes, frag0);
+ store(dst + initial_bytes + 16, frag1);
+ } else if (frag_bytes) {
+ frag0 = vxor(frag0, aes((block*)ctx, s, L4));
+ frag0 = one_zero_pad(frag0, 16-frag_bytes);
+ y = aes4(frag0, vxor(I2, L4), J, I, L, y);
+ store(dst + initial_bytes, frag0);
+ }
+
+ storeu(dst + (bytes - 32), vxor3(final1, y, t));
+ if (!d || !abytes)
+ storeu(dst + (bytes - 32) + 16, final0);
+ else {
+ for (i=0; i<16-abytes; i++)
+ ((char*)dst + (bytes - 16))[i] = ((char*)&final0)[i];
+ }
+ return 0;
+}
+
+/* ------------------------------------------------------------------------- */
+
+static int cipher_aez_tiny(aez_ctx_t *ctx, block t, int d, char *src,
+ unsigned bytes, unsigned abytes, char *dst) {
+ block l, r, tmp, one, rcon, buf[2], mask_10, mask_ff;
+ block I=ctx->I[0], L=ctx->L[0], J=ctx->J[0], t_orig = t;
+ block L2=ctx->L[1], L4=ctx->L[2], I2 = bswap16(ctx->I[1]);
+ unsigned rnds, i;
+
+ /* load src into buf, zero pad, update bytes for abytes */
+ if (bytes >= 16) {
+ buf[0] = load(src);
+ buf[1] = zero_pad(load_partial(src+16,bytes-16),32-bytes);
+ } else {
+ buf[0] = zero_pad(load_partial(src,bytes),16-bytes);
+ buf[1] = zero;
+ }
+ if (!d) bytes += abytes;
+
+ /* load l/r, create 10* padding masks, shift r 4 bits if odd length */
+ l = buf[0];
+ r = loadu((char*)buf+bytes/2);
+ mask_ff = loadu(pad+16-bytes/2);
+ mask_10 = loadu(pad+32-bytes/2);
+ if (bytes&1) { /* Odd length. Deal with nibbles. */
+ mask_10 = sll4(mask_10);
+ ((char*)&mask_ff)[bytes/2] = (char)0xf0;
+ r = bswap16(r);
+ r = srl4(r);
+ r = bswap16(r);
+ }
+ r = vor(vand(r, mask_ff), mask_10);
+
+ /* Add tweak offset into t, and determine the number of rounds */
+ if (bytes >= 16) {
+ t = vxor4(t, I2, L2, L4); /* (0,6) offset */
+ rnds = 8;
+ } else {
+ t = vxor(vxor4(t, I2, L2, L4), L); /* (0,7) offset */
+ if (bytes>=3) rnds = 10; else if (bytes==2) rnds = 16; else rnds = 24;
+ }
+
+ if (!d) {
+ one = zero_set_byte(1,15);
+ rcon = zero;
+ } else {
+ one = zero_set_byte(-1,15);
+ rcon = zero_set_byte((char)(rnds-1),15);
+ }
+
+ if ((d) && (bytes < 16)) {
+ block offset = vxor3(I2, L, L2);
+ tmp = vor(l, loadu(pad+32));
+ tmp = aes4pre(t_orig, vxor(tmp,offset), J, I, L);
+ tmp = vand(tmp, loadu(pad+32));
+ l = vxor(l, tmp);
+ }
+
+ /* Feistel */
+ for (i=0; i<rnds; i+=2) {
+ l = vor(vand(aes4(t,vxor(r,rcon), J, I, L, l), mask_ff), mask_10);
+ rcon = vadd(rcon,one);
+ r = vor(vand(aes4(t,vxor(l,rcon), J, I, L, r), mask_ff), mask_10);
+ rcon = vadd(rcon,one);
+ }
+ buf[0] = r;
+ if (bytes&1) {
+ l = bswap16(l);
+ l = sll4(l);
+ l = bswap16(l);
+ r = vand(loadu((char*)buf+bytes/2), zero_set_byte((char)0xf0,0));
+ l = vor(l, r);
+ }
+ storeu((char*)buf+bytes/2, l);
+ if (d) {
+ bytes -= abytes;
+ if (abytes==16) tmp = loadu((char*)buf+bytes);
+ else {
+ tmp = zero;
+ for (i=0; i<abytes; i++) ((char*)&tmp)[i] = ((char*)buf+bytes)[i];
+ }
+ if (!is_zero(tmp)) return -1;
+ } else if (bytes < 16) {
+ block offset = vxor3(I2, L, L2);
+ tmp = vor(zero_pad(buf[0], 16-bytes), loadu(pad+32));
+ tmp = aes4pre(t_orig,vxor(tmp,offset), J, I, L);
+ buf[0] = vxor(buf[0], vand(tmp, loadu(pad+32)));
+ }
+ for (i=0; i<bytes; i++) dst[i] = ((char*)buf)[i];
+ return 0;
+}
+
+/* ------------------------------------------------------------------------- */
+
+void aez_encrypt(aez_ctx_t *ctx, char *n, unsigned nbytes,
+ char *ad, unsigned adbytes, unsigned abytes,
+ char *src, unsigned bytes, char *dst) {
+
+ block t = aez_hash(ctx, n, nbytes, ad, adbytes, abytes);
+ if (bytes==0) {
+ unsigned i;
+ t = aes((block*)ctx, t, vxor(ctx->L[0], ctx->L[1]));
+ for (i=0; i<abytes; i++) dst[i] = ((char*)&t)[i];
+ } else if (bytes+abytes < 32)
+ cipher_aez_tiny(ctx, t, 0, src, bytes, abytes, dst);
+ else
+ cipher_aez_core(ctx, t, 0, src, bytes, abytes, dst);
+}
+
+/* ------------------------------------------------------------------------- */
+
+int aez_decrypt(aez_ctx_t *ctx, char *n, unsigned nbytes,
+ char *ad, unsigned adbytes, unsigned abytes,
+ char *src, unsigned bytes, char *dst) {
+
+ block t;
+ if (bytes < abytes) return -1;
+ t = aez_hash(ctx, n, nbytes, ad, adbytes, abytes);
+ if (bytes==abytes) {
+ block claimed = zero_pad(load_partial(src,abytes), 16-abytes);
+ t = zero_pad(aes((block*)ctx, t, vxor(ctx->L[0], ctx->L[1])), 16-abytes);
+ return is_zero(vandnot(t, claimed)) - 1; /* is_zero return 0 or 1 */
+ } else if (bytes < 32) {
+ return cipher_aez_tiny(ctx, t, 1, src, bytes, abytes, dst);
+ } else {
+ return cipher_aez_core(ctx, t, 1, src, bytes, abytes, dst);
+ }
+}
+
+/* ------------------------------------------------------------------------- */
+/* Reference Blake2b code, here for convenience, and not for speed. */
+/* Dowloaded Sep 2015 from https://github.com/mjosaarinen/blake2_mjosref */
+
+#include <stdint.h>
+
+typedef struct {
+ uint8_t b[128];
+ uint64_t h[8];
+ uint64_t t[2];
+ size_t c;
+ size_t outlen;
+} blake2b_ctx;
+
+#ifndef ROTR64
+#define ROTR64(x, y) (((x) >> (y)) ^ ((x) << (64 - (y))))
+#endif
+
+#define B2B_GET64(p) \
+(((uint64_t) ((uint8_t *) (p))[0]) ^ \
+(((uint64_t) ((uint8_t *) (p))[1]) << 8) ^ \
+(((uint64_t) ((uint8_t *) (p))[2]) << 16) ^ \
+(((uint64_t) ((uint8_t *) (p))[3]) << 24) ^ \
+(((uint64_t) ((uint8_t *) (p))[4]) << 32) ^ \
+(((uint64_t) ((uint8_t *) (p))[5]) << 40) ^ \
+(((uint64_t) ((uint8_t *) (p))[6]) << 48) ^ \
+(((uint64_t) ((uint8_t *) (p))[7]) << 56))
+
+#define B2B_G(a, b, c, d, x, y) { \
+v[a] = v[a] + v[b] + x; \
+v[d] = ROTR64(v[d] ^ v[a], 32); \
+v[c] = v[c] + v[d]; \
+v[b] = ROTR64(v[b] ^ v[c], 24); \
+v[a] = v[a] + v[b] + y; \
+v[d] = ROTR64(v[d] ^ v[a], 16); \
+v[c] = v[c] + v[d]; \
+v[b] = ROTR64(v[b] ^ v[c], 63); }
+
+static const uint64_t blake2b_iv[8] = {
+ 0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
+ 0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
+ 0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
+ 0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
+};
+
+static void blake2b_compress(blake2b_ctx *ctx, int last)
+{
+ const uint8_t sigma[12][16] = {
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
+ { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
+ { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
+ { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
+ { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
+ { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
+ { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
+ { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
+ { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
+ };
+ int i;
+ uint64_t v[16], m[16];
+
+ for (i = 0; i < 8; i++) {
+ v[i] = ctx->h[i];
+ v[i + 8] = blake2b_iv[i];
+ }
+
+ v[12] ^= ctx->t[0];
+ v[13] ^= ctx->t[1];
+ if (last)
+ v[14] = ~v[14];
+
+ for (i = 0; i < 16; i++)
+ m[i] = B2B_GET64(&ctx->b[8 * i]);
+
+ for (i = 0; i < 12; i++) {
+ B2B_G( 0, 4, 8, 12, m[sigma[i][ 0]], m[sigma[i][ 1]]);
+ B2B_G( 1, 5, 9, 13, m[sigma[i][ 2]], m[sigma[i][ 3]]);
+ B2B_G( 2, 6, 10, 14, m[sigma[i][ 4]], m[sigma[i][ 5]]);
+ B2B_G( 3, 7, 11, 15, m[sigma[i][ 6]], m[sigma[i][ 7]]);
+ B2B_G( 0, 5, 10, 15, m[sigma[i][ 8]], m[sigma[i][ 9]]);
+ B2B_G( 1, 6, 11, 12, m[sigma[i][10]], m[sigma[i][11]]);
+ B2B_G( 2, 7, 8, 13, m[sigma[i][12]], m[sigma[i][13]]);
+ B2B_G( 3, 4, 9, 14, m[sigma[i][14]], m[sigma[i][15]]);
+ }
+
+ for( i = 0; i < 8; ++i )
+ ctx->h[i] ^= v[i] ^ v[i + 8];
+}
+
+static void blake2b_update(blake2b_ctx *ctx,
+ const void *in, size_t inlen)
+{
+ size_t i;
+
+ for (i = 0; i < inlen; i++) {
+ if (ctx->c == 128) {
+ ctx->t[0] += ctx->c;
+ if (ctx->t[0] < ctx->c)
+ ctx->t[1]++;
+ blake2b_compress(ctx, 0);
+ ctx->c = 0;
+ }
+ ctx->b[ctx->c++] = ((const uint8_t *) in)[i];
+ }
+}
+
+static void blake2b_final(blake2b_ctx *ctx, void *out)
+{
+ size_t i;
+
+ ctx->t[0] += ctx->c;
+ if (ctx->t[0] < ctx->c)
+ ctx->t[1]++;
+
+ while (ctx->c < 128)
+ ctx->b[ctx->c++] = 0;
+ blake2b_compress(ctx, 1);
+
+ for (i = 0; i < ctx->outlen; i++) {
+ ((uint8_t *) out)[i] =
+ (ctx->h[i >> 3] >> (8 * (i & 7))) & 0xFF;
+ }
+}
+
+static int blake2b_init(blake2b_ctx *ctx, size_t outlen,
+ const void *key, size_t keylen)
+{
+ size_t i;
+
+ if (outlen == 0 || outlen > 64 || keylen > 64)
+ return -1;
+
+ for (i = 0; i < 8; i++)
+ ctx->h[i] = blake2b_iv[i];
+ ctx->h[0] ^= 0x01010000 ^ (keylen << 8) ^ outlen;
+
+ ctx->t[0] = 0;
+ ctx->t[1] = 0;
+ ctx->c = 0;
+ ctx->outlen = outlen;
+
+ for (i = keylen; i < 128; i++)
+ ctx->b[i] = 0;
+ if (keylen > 0) {
+ blake2b_update(ctx, key, keylen);
+ ctx->c = 128;
+ }
+
+ return 0;
+}
+
+static int blake2b(void *out, size_t outlen,
+ const void *key, size_t keylen,
+ const void *in, size_t inlen)
+{
+ blake2b_ctx ctx;
+
+ if (blake2b_init(&ctx, outlen, key, keylen))
+ return -1;
+ blake2b_update(&ctx, in, inlen);
+ blake2b_final(&ctx, out);
+
+ return 0;
+}
+
+/* ------------------------------------------------------------------------- */
+/* aez mapping for CAESAR competition */
+
+int crypto_aead_encrypt(
+ unsigned char *c,unsigned long long *clen,
+ const unsigned char *m,unsigned long long mlen,
+ const unsigned char *ad,unsigned long long adlen,
+ const unsigned char *nsec,
+ const unsigned char *npub,
+ const unsigned char *k
+)
+{
+ aez_ctx_t ctx;
+ (void)nsec;
+ if (clen) *clen = mlen+16;
+ aez_setup((unsigned char *)k, 48, &ctx);
+ aez_encrypt(&ctx, (char *)npub, 12,
+ (char *)ad, (unsigned)adlen, 16,
+ (char *)m, (unsigned)mlen, (char *)c);
+ return 0;
+}
+
+int crypto_aead_decrypt(
+ unsigned char *m,unsigned long long *mlen,
+ unsigned char *nsec,
+ const unsigned char *c,unsigned long long clen,
+ const unsigned char *ad,unsigned long long adlen,
+ const unsigned char *npub,
+ const unsigned char *k
+)
+{
+ aez_ctx_t ctx;
+ (void)nsec;
+ if (mlen) *mlen = clen-16;
+ aez_setup((unsigned char *)k, 48, &ctx);
+ return aez_decrypt(&ctx, (char *)npub, 12,
+ (char *)ad, (unsigned)adlen, 16,
+ (char *)c, (unsigned)clen, (char *)m);
+}