Commit a6f7f366 authored by Kevin Jacobs's avatar Kevin Jacobs

Bug 1608493 - Use AES-NI intrinsics for CBC and ECB decrypt when no assembly...

Bug 1608493 - Use AES-NI intrinsics for CBC and ECB decrypt when no assembly implementation is available. r=mt

AES-NI is currently not used for //CBC// or //ECB decrypt// when an assembly implementation (`intel-aes.s` or `intel-aes-x86/64-masm.asm`) is not available. Concretely, this is the case on MacOS, Linux32, and other non-Linux OSes such as BSD. This patch adds the plumbing to use AES-NI intrinsics when available.

Before:
```
       mode          in symmkey  opreps  cxreps     context          op   time(sec)     thrgput
  aes_ecb_d        78Mb     256     10T       0       0.000     395.000       0.395       197Mb
  aes_cbc_e        78Mb     256     10T       0       0.000     392.000       0.393       198Mb
  aes_cbc_d        78Mb     256     10T       0       0.000     425.000       0.425       183Mb

```

After:
```
      mode          in symmkey  opreps  cxreps     context          op   time(sec)     thrgput
 aes_ecb_d        78Mb     256     10T       0       0.000      39.000       0.039         1Gb
 aes_cbc_e        78Mb     256     10T       0       0.000      94.000       0.094       831Mb
 aes_cbc_d        78Mb     256     10T       0       0.000      74.000       0.075         1Gb

```

Differential Revision: https://phabricator.services.mozilla.com/D60195

--HG--
extra : moz-landing-system : lando
parent fdde600d
......@@ -155,3 +155,30 @@ rijndael_native_encryptBlock(AESContext *cx,
m = _mm_aesenclast_si128(m, cx->k.keySchedule[cx->Nr]);
_mm_storeu_si128((__m128i *)output, m);
}
void
rijndael_native_decryptBlock(AESContext *cx,
unsigned char *output,
const unsigned char *input)
{
int i;
pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input);
m = _mm_xor_si128(m, cx->k.keySchedule[cx->Nr]);
for (i = cx->Nr - 1; i > 0; --i) {
m = _mm_aesdec_si128(m, cx->k.keySchedule[i]);
}
m = _mm_aesdeclast_si128(m, cx->k.keySchedule[0]);
_mm_storeu_si128((__m128i *)output, m);
}
// out = a ^ b
void
native_xorBlock(unsigned char *out,
const unsigned char *a,
const unsigned char *b)
{
pre_align __m128i post_align in1 = _mm_loadu_si128((__m128i *)(a));
pre_align __m128i post_align in2 = _mm_loadu_si128((__m128i *)(b));
in1 = _mm_xor_si128(in1, in2);
_mm_storeu_si128((__m128i *)(out), in1);
}
......@@ -42,6 +42,12 @@ void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,
void rijndael_native_encryptBlock(AESContext *cx,
unsigned char *output,
const unsigned char *input);
void rijndael_native_decryptBlock(AESContext *cx,
unsigned char *output,
const unsigned char *input);
void native_xorBlock(unsigned char *out,
const unsigned char *a,
const unsigned char *b);
/* Stub definitions for the above rijndael_native_* functions, which
* shouldn't be used unless NSS_X86_OR_X64 is defined */
......@@ -62,6 +68,23 @@ rijndael_native_encryptBlock(AESContext *cx,
PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
PORT_Assert(0);
}
void
rijndael_native_decryptBlock(AESContext *cx,
unsigned char *output,
const unsigned char *input)
{
PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
PORT_Assert(0);
}
void
native_xorBlock(unsigned char *out, const unsigned char *a,
const unsigned char *b)
{
PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
PORT_Assert(0);
}
#endif /* NSS_X86_OR_X64 */
/*
......@@ -509,6 +532,15 @@ typedef union {
#define STATE_BYTE(i) state.b[i]
// out = a ^ b
inline static void
xorBlock(unsigned char *out, const unsigned char *a, const unsigned char *b)
{
for (unsigned int j = 0; j < AES_BLOCK_SIZE; ++j) {
(out)[j] = (a)[j] ^ (b)[j];
}
}
static void NO_SANITIZE_ALIGNMENT
rijndael_encryptBlock128(AESContext *cx,
unsigned char *output,
......@@ -604,7 +636,7 @@ rijndael_encryptBlock128(AESContext *cx,
#endif
}
static SECStatus NO_SANITIZE_ALIGNMENT
static void NO_SANITIZE_ALIGNMENT
rijndael_decryptBlock128(AESContext *cx,
unsigned char *output,
const unsigned char *input)
......@@ -693,7 +725,6 @@ rijndael_decryptBlock128(AESContext *cx,
memcpy(output, outBuf, sizeof outBuf);
}
#endif
return SECSuccess;
}
/**************************************************************************
......@@ -707,16 +738,13 @@ rijndael_encryptECB(AESContext *cx, unsigned char *output,
unsigned int *outputLen, unsigned int maxOutputLen,
const unsigned char *input, unsigned int inputLen)
{
AESBlockFunc *encryptor;
if (aesni_support()) {
/* Use hardware acceleration for normal AES parameters. */
encryptor = &rijndael_native_encryptBlock;
} else {
encryptor = &rijndael_encryptBlock128;
}
PRBool aesni = aesni_support();
while (inputLen > 0) {
(*encryptor)(cx, output, input);
if (aesni) {
rijndael_native_encryptBlock(cx, output, input);
} else {
rijndael_encryptBlock128(cx, output, input);
}
output += AES_BLOCK_SIZE;
input += AES_BLOCK_SIZE;
inputLen -= AES_BLOCK_SIZE;
......@@ -729,20 +757,23 @@ rijndael_encryptCBC(AESContext *cx, unsigned char *output,
unsigned int *outputLen, unsigned int maxOutputLen,
const unsigned char *input, unsigned int inputLen)
{
unsigned int j;
unsigned char *lastblock;
unsigned char *lastblock = cx->iv;
unsigned char inblock[AES_BLOCK_SIZE * 8];
PRBool aesni = aesni_support();
if (!inputLen)
return SECSuccess;
lastblock = cx->iv;
while (inputLen > 0) {
/* XOR with the last block (IV if first block) */
for (j = 0; j < AES_BLOCK_SIZE; ++j) {
inblock[j] = input[j] ^ lastblock[j];
if (aesni) {
/* XOR with the last block (IV if first block) */
native_xorBlock(inblock, input, lastblock);
/* encrypt */
rijndael_native_encryptBlock(cx, output, inblock);
} else {
xorBlock(inblock, input, lastblock);
rijndael_encryptBlock128(cx, output, inblock);
}
/* encrypt */
rijndael_encryptBlock128(cx, output, inblock);
/* move to the next block */
lastblock = output;
output += AES_BLOCK_SIZE;
......@@ -758,9 +789,12 @@ rijndael_decryptECB(AESContext *cx, unsigned char *output,
unsigned int *outputLen, unsigned int maxOutputLen,
const unsigned char *input, unsigned int inputLen)
{
PRBool aesni = aesni_support();
while (inputLen > 0) {
if (rijndael_decryptBlock128(cx, output, input) != SECSuccess) {
return SECFailure;
if (aesni) {
rijndael_native_decryptBlock(cx, output, input);
} else {
rijndael_decryptBlock128(cx, output, input);
}
output += AES_BLOCK_SIZE;
input += AES_BLOCK_SIZE;
......@@ -776,8 +810,8 @@ rijndael_decryptCBC(AESContext *cx, unsigned char *output,
{
const unsigned char *in;
unsigned char *out;
unsigned int j;
unsigned char newIV[AES_BLOCK_SIZE];
PRBool aesni = aesni_support();
if (!inputLen)
return SECSuccess;
......@@ -786,21 +820,26 @@ rijndael_decryptCBC(AESContext *cx, unsigned char *output,
memcpy(newIV, in, AES_BLOCK_SIZE);
out = output + (inputLen - AES_BLOCK_SIZE);
while (inputLen > AES_BLOCK_SIZE) {
if (rijndael_decryptBlock128(cx, out, in) != SECSuccess) {
return SECFailure;
if (aesni) {
// Use hardware acceleration for normal AES parameters.
rijndael_native_decryptBlock(cx, out, in);
native_xorBlock(out, out, &in[-AES_BLOCK_SIZE]);
} else {
rijndael_decryptBlock128(cx, out, in);
xorBlock(out, out, &in[-AES_BLOCK_SIZE]);
}
for (j = 0; j < AES_BLOCK_SIZE; ++j)
out[j] ^= in[(int)(j - AES_BLOCK_SIZE)];
out -= AES_BLOCK_SIZE;
in -= AES_BLOCK_SIZE;
inputLen -= AES_BLOCK_SIZE;
}
if (in == input) {
if (rijndael_decryptBlock128(cx, out, in) != SECSuccess) {
return SECFailure;
if (aesni) {
rijndael_native_decryptBlock(cx, out, in);
native_xorBlock(out, out, cx->iv);
} else {
rijndael_decryptBlock128(cx, out, in);
xorBlock(out, out, cx->iv);
}
for (j = 0; j < AES_BLOCK_SIZE; ++j)
out[j] ^= cx->iv[j];
}
memcpy(cx->iv, newIV, AES_BLOCK_SIZE);
return SECSuccess;
......
......@@ -26,10 +26,6 @@
#endif /* NSS_DISABLE_SSE2 */
#endif
typedef void AESBlockFunc(AESContext *cx,
unsigned char *output,
const unsigned char *input);
/* RIJNDAEL_NUM_ROUNDS
*
* Number of rounds per execution
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment