diff --git a/lib/freebl/aes-x86.c b/lib/freebl/aes-x86.c index edd0b51251..508fcc6502 100644 --- a/lib/freebl/aes-x86.c +++ b/lib/freebl/aes-x86.c @@ -155,3 +155,30 @@ rijndael_native_encryptBlock(AESContext *cx, m = _mm_aesenclast_si128(m, cx->k.keySchedule[cx->Nr]); _mm_storeu_si128((__m128i *)output, m); } + +void +rijndael_native_decryptBlock(AESContext *cx, + unsigned char *output, + const unsigned char *input) +{ + int i; + pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input); + m = _mm_xor_si128(m, cx->k.keySchedule[cx->Nr]); + for (i = cx->Nr - 1; i > 0; --i) { + m = _mm_aesdec_si128(m, cx->k.keySchedule[i]); + } + m = _mm_aesdeclast_si128(m, cx->k.keySchedule[0]); + _mm_storeu_si128((__m128i *)output, m); +} + +// out = a ^ b +void +native_xorBlock(unsigned char *out, + const unsigned char *a, + const unsigned char *b) +{ + pre_align __m128i post_align in1 = _mm_loadu_si128((__m128i *)(a)); + pre_align __m128i post_align in2 = _mm_loadu_si128((__m128i *)(b)); + in1 = _mm_xor_si128(in1, in2); + _mm_storeu_si128((__m128i *)(out), in1); +} diff --git a/lib/freebl/rijndael.c b/lib/freebl/rijndael.c index 2e8bab87ff..247a9419bd 100644 --- a/lib/freebl/rijndael.c +++ b/lib/freebl/rijndael.c @@ -42,6 +42,12 @@ void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, void rijndael_native_encryptBlock(AESContext *cx, unsigned char *output, const unsigned char *input); +void rijndael_native_decryptBlock(AESContext *cx, + unsigned char *output, + const unsigned char *input); +void native_xorBlock(unsigned char *out, + const unsigned char *a, + const unsigned char *b); /* Stub definitions for the above rijndael_native_* functions, which * shouldn't be used unless NSS_X86_OR_X64 is defined */ @@ -62,6 +68,23 @@ rijndael_native_encryptBlock(AESContext *cx, PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); PORT_Assert(0); } + +void +rijndael_native_decryptBlock(AESContext *cx, + unsigned char *output, + const unsigned char *input) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + PORT_Assert(0); +} + +void +native_xorBlock(unsigned char *out, const unsigned char *a, + const unsigned char *b) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + PORT_Assert(0); +} #endif /* NSS_X86_OR_X64 */ /* @@ -509,6 +532,15 @@ typedef union { #define STATE_BYTE(i) state.b[i] +// out = a ^ b +inline static void +xorBlock(unsigned char *out, const unsigned char *a, const unsigned char *b) +{ + for (unsigned int j = 0; j < AES_BLOCK_SIZE; ++j) { + (out)[j] = (a)[j] ^ (b)[j]; + } +} + static void NO_SANITIZE_ALIGNMENT rijndael_encryptBlock128(AESContext *cx, unsigned char *output, @@ -604,7 +636,7 @@ rijndael_encryptBlock128(AESContext *cx, #endif } -static SECStatus NO_SANITIZE_ALIGNMENT +static void NO_SANITIZE_ALIGNMENT rijndael_decryptBlock128(AESContext *cx, unsigned char *output, const unsigned char *input) @@ -693,7 +725,6 @@ rijndael_decryptBlock128(AESContext *cx, memcpy(output, outBuf, sizeof outBuf); } #endif - return SECSuccess; } /************************************************************************** @@ -707,16 +738,13 @@ rijndael_encryptECB(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen) { - AESBlockFunc *encryptor; - - if (aesni_support()) { - /* Use hardware acceleration for normal AES parameters. */ - encryptor = &rijndael_native_encryptBlock; - } else { - encryptor = &rijndael_encryptBlock128; - } + PRBool aesni = aesni_support(); while (inputLen > 0) { - (*encryptor)(cx, output, input); + if (aesni) { + rijndael_native_encryptBlock(cx, output, input); + } else { + rijndael_encryptBlock128(cx, output, input); + } output += AES_BLOCK_SIZE; input += AES_BLOCK_SIZE; inputLen -= AES_BLOCK_SIZE; @@ -729,20 +757,23 @@ rijndael_encryptCBC(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen) { - unsigned int j; - unsigned char *lastblock; + unsigned char *lastblock = cx->iv; unsigned char inblock[AES_BLOCK_SIZE * 8]; + PRBool aesni = aesni_support(); if (!inputLen) return SECSuccess; - lastblock = cx->iv; while (inputLen > 0) { - /* XOR with the last block (IV if first block) */ - for (j = 0; j < AES_BLOCK_SIZE; ++j) { - inblock[j] = input[j] ^ lastblock[j]; + if (aesni) { + /* XOR with the last block (IV if first block) */ + native_xorBlock(inblock, input, lastblock); + /* encrypt */ + rijndael_native_encryptBlock(cx, output, inblock); + } else { + xorBlock(inblock, input, lastblock); + rijndael_encryptBlock128(cx, output, inblock); } - /* encrypt */ - rijndael_encryptBlock128(cx, output, inblock); + /* move to the next block */ lastblock = output; output += AES_BLOCK_SIZE; @@ -758,9 +789,12 @@ rijndael_decryptECB(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen) { + PRBool aesni = aesni_support(); while (inputLen > 0) { - if (rijndael_decryptBlock128(cx, output, input) != SECSuccess) { - return SECFailure; + if (aesni) { + rijndael_native_decryptBlock(cx, output, input); + } else { + rijndael_decryptBlock128(cx, output, input); } output += AES_BLOCK_SIZE; input += AES_BLOCK_SIZE; @@ -776,8 +810,8 @@ rijndael_decryptCBC(AESContext *cx, unsigned char *output, { const unsigned char *in; unsigned char *out; - unsigned int j; unsigned char newIV[AES_BLOCK_SIZE]; + PRBool aesni = aesni_support(); if (!inputLen) return SECSuccess; @@ -786,21 +820,26 @@ rijndael_decryptCBC(AESContext *cx, unsigned char *output, memcpy(newIV, in, AES_BLOCK_SIZE); out = output + (inputLen - AES_BLOCK_SIZE); while (inputLen > AES_BLOCK_SIZE) { - if (rijndael_decryptBlock128(cx, out, in) != SECSuccess) { - return SECFailure; + if (aesni) { + // Use hardware acceleration for normal AES parameters. + rijndael_native_decryptBlock(cx, out, in); + native_xorBlock(out, out, &in[-AES_BLOCK_SIZE]); + } else { + rijndael_decryptBlock128(cx, out, in); + xorBlock(out, out, &in[-AES_BLOCK_SIZE]); } - for (j = 0; j < AES_BLOCK_SIZE; ++j) - out[j] ^= in[(int)(j - AES_BLOCK_SIZE)]; out -= AES_BLOCK_SIZE; in -= AES_BLOCK_SIZE; inputLen -= AES_BLOCK_SIZE; } if (in == input) { - if (rijndael_decryptBlock128(cx, out, in) != SECSuccess) { - return SECFailure; + if (aesni) { + rijndael_native_decryptBlock(cx, out, in); + native_xorBlock(out, out, cx->iv); + } else { + rijndael_decryptBlock128(cx, out, in); + xorBlock(out, out, cx->iv); } - for (j = 0; j < AES_BLOCK_SIZE; ++j) - out[j] ^= cx->iv[j]; } memcpy(cx->iv, newIV, AES_BLOCK_SIZE); return SECSuccess; diff --git a/lib/freebl/rijndael.h b/lib/freebl/rijndael.h index 61060e016e..16357c8c66 100644 --- a/lib/freebl/rijndael.h +++ b/lib/freebl/rijndael.h @@ -26,10 +26,6 @@ #endif /* NSS_DISABLE_SSE2 */ #endif -typedef void AESBlockFunc(AESContext *cx, - unsigned char *output, - const unsigned char *input); - /* RIJNDAEL_NUM_ROUNDS * * Number of rounds per execution