From 0d75de7e709304b382fd5c6ea947407c66a063fd Mon Sep 17 00:00:00 2001 From: Franziskus Kiefer Date: Tue, 25 Apr 2017 10:41:03 +0200 Subject: [PATCH] Bug 1357670 - AES-NI for all platforms that support it, r=ttaubert,mt With this patch we use AES-NI whenever possible. The compile time flag USE_HW_AES does NOT disable this new code. NSS_DISABLE_HW_AES can be used as runtime flag to disable AES-NI and fall back to the software implementation. Differential Revision: https://nss-review.dev.mozaws.net/D323 --HG-- extra : rebase_source : 40035bcd45711652feeef0b75b7326b8a371da80 extra : histedit_source : 552b54f8bf0bc5eb94092294313422a484490b52%2Cb18d7d5fd2c1b22e15e4ed5b57387a9d7780ccf0 --- gtests/freebl_gtest/ghash_unittest.cc | 7 +- lib/freebl/aeskeywrap.c | 20 +- lib/freebl/blapi.h | 3 +- lib/freebl/blapii.h | 12 + lib/freebl/ctr.c | 20 +- lib/freebl/ctr.h | 5 +- lib/freebl/cts.c | 8 +- lib/freebl/cts.h | 2 +- lib/freebl/gcm.c | 114 +++--- lib/freebl/gcm.h | 22 +- lib/freebl/intel-aes-x64-masm.asm | 19 +- lib/freebl/intel-aes-x86-masm.asm | 19 +- lib/freebl/intel-aes.s | 45 +-- lib/freebl/intel-gcm-wrap.c | 7 +- lib/freebl/intel-gcm-x64-masm.asm | 7 +- lib/freebl/intel-gcm-x86-masm.asm | 4 +- lib/freebl/intel-gcm.h | 2 +- lib/freebl/intel-gcm.s | 8 +- lib/freebl/rijndael.c | 512 ++++++++++++++------------ lib/freebl/rijndael.h | 36 +- 20 files changed, 431 insertions(+), 441 deletions(-) diff --git a/gtests/freebl_gtest/ghash_unittest.cc b/gtests/freebl_gtest/ghash_unittest.cc index eeca2daaf2..1866bda029 100644 --- a/gtests/freebl_gtest/ghash_unittest.cc +++ b/gtests/freebl_gtest/ghash_unittest.cc @@ -137,16 +137,15 @@ class GHashTest : public ::testing::TestWithParam { // Hash additional_data, cipher_text. gcmHash_Reset(&ghashCtx, const_cast(additional_data.data()), - additional_data.size(), 16); + additional_data.size()); gcmHash_Update(&ghashCtx, const_cast(cipher_text.data()), - cipher_text.size(), 16); + cipher_text.size()); // Finalise (hash in the length). uint8_t result_bytes[16]; unsigned int out_len; - ASSERT_EQ(SECSuccess, - gcmHash_Final(&ghashCtx, result_bytes, &out_len, 16, 16)); + ASSERT_EQ(SECSuccess, gcmHash_Final(&ghashCtx, result_bytes, &out_len, 16)); ASSERT_EQ(16U, out_len); EXPECT_EQ(expected, std::vector(result_bytes, result_bytes + 16)); } diff --git a/lib/freebl/aeskeywrap.c b/lib/freebl/aeskeywrap.c index 79ff8a852a..ee909dbd05 100644 --- a/lib/freebl/aeskeywrap.c +++ b/lib/freebl/aeskeywrap.c @@ -22,8 +22,9 @@ #include "rijndael.h" struct AESKeyWrapContextStr { - unsigned char iv[AES_KEY_WRAP_IV_BYTES]; AESContext aescx; + unsigned char iv[AES_KEY_WRAP_IV_BYTES]; + void *mem; /* Pointer to beginning of allocated memory. */ }; /******************************************/ @@ -34,8 +35,14 @@ struct AESKeyWrapContextStr { AESKeyWrapContext * AESKeyWrap_AllocateContext(void) { - AESKeyWrapContext *cx = PORT_New(AESKeyWrapContext); - return cx; + /* aligned_alloc is C11 so we have to do it the old way. */ + AESKeyWrapContext *ctx = PORT_ZAlloc(sizeof(AESKeyWrapContext) + 15); + if (ctx == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return NULL; + } + ctx->mem = ctx; + return (AESKeyWrapContext *)(((uintptr_t)ctx + 15) & ~(uintptr_t)0x0F); } SECStatus @@ -77,7 +84,7 @@ AESKeyWrap_CreateContext(const unsigned char *key, const unsigned char *iv, return NULL; /* error is already set */ rv = AESKeyWrap_InitContext(cx, key, keylen, iv, 0, encrypt, 0); if (rv != SECSuccess) { - PORT_Free(cx); + PORT_Free(cx->mem); cx = NULL; /* error should already be set */ } return cx; @@ -94,8 +101,9 @@ AESKeyWrap_DestroyContext(AESKeyWrapContext *cx, PRBool freeit) if (cx) { AES_DestroyContext(&cx->aescx, PR_FALSE); /* memset(cx, 0, sizeof *cx); */ - if (freeit) - PORT_Free(cx); + if (freeit) { + PORT_Free(cx->mem); + } } } diff --git a/lib/freebl/blapi.h b/lib/freebl/blapi.h index 592624bdf7..31e471ac43 100644 --- a/lib/freebl/blapi.h +++ b/lib/freebl/blapi.h @@ -801,8 +801,7 @@ SEED_Decrypt(SEEDContext *cx, unsigned char *output, ** Create a new AES context suitable for AES encryption/decryption. ** "key" raw key data ** "keylen" the number of bytes of key data (16, 24, or 32) -** "blocklen" is the blocksize to use (16, 24, or 32) -** XXX currently only blocksize==16 has been tested! +** "blocklen" is the blocksize to use. NOTE: only 16 is supported! */ extern AESContext * AES_CreateContext(const unsigned char *key, const unsigned char *iv, diff --git a/lib/freebl/blapii.h b/lib/freebl/blapii.h index 0087c78997..b1be7bedf7 100644 --- a/lib/freebl/blapii.h +++ b/lib/freebl/blapii.h @@ -51,6 +51,18 @@ SEC_END_PROTOS #define HAVE_NO_SANITIZE_ATTR 0 #endif +/* Alignment helpers. */ +#if defined(_WINDOWS) && defined(NSS_X86_OR_X64) +#define pre_align __declspec(align(16)) +#define post_align +#elif defined(NSS_X86_OR_X64) +#define pre_align +#define post_align __attribute__((aligned(16))) +#else +#define pre_align +#define post_align +#endif + #if defined(HAVE_UNALIGNED_ACCESS) && HAVE_NO_SANITIZE_ATTR #define NO_SANITIZE_ALIGNMENT __attribute__((no_sanitize("alignment"))) #else diff --git a/lib/freebl/ctr.c b/lib/freebl/ctr.c index d5715a505f..b7167d4c4a 100644 --- a/lib/freebl/ctr.c +++ b/lib/freebl/ctr.c @@ -19,30 +19,30 @@ SECStatus CTR_InitContext(CTRContext *ctr, void *context, freeblCipherFunc cipher, - const unsigned char *param, unsigned int blocksize) + const unsigned char *param) { const CK_AES_CTR_PARAMS *ctrParams = (const CK_AES_CTR_PARAMS *)param; if (ctrParams->ulCounterBits == 0 || - ctrParams->ulCounterBits > blocksize * PR_BITS_PER_BYTE) { + ctrParams->ulCounterBits > AES_BLOCK_SIZE * PR_BITS_PER_BYTE) { PORT_SetError(SEC_ERROR_INVALID_ARGS); return SECFailure; } - /* Invariant: 0 < ctr->bufPtr <= blocksize */ + /* Invariant: 0 < ctr->bufPtr <= AES_BLOCK_SIZE */ ctr->checkWrap = PR_FALSE; - ctr->bufPtr = blocksize; /* no unused data in the buffer */ + ctr->bufPtr = AES_BLOCK_SIZE; /* no unused data in the buffer */ ctr->cipher = cipher; ctr->context = context; ctr->counterBits = ctrParams->ulCounterBits; - if (blocksize > sizeof(ctr->counter) || - blocksize > sizeof(ctrParams->cb)) { + if (AES_BLOCK_SIZE > sizeof(ctr->counter) || + AES_BLOCK_SIZE > sizeof(ctrParams->cb)) { PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); return SECFailure; } - PORT_Memcpy(ctr->counter, ctrParams->cb, blocksize); + PORT_Memcpy(ctr->counter, ctrParams->cb, AES_BLOCK_SIZE); if (ctr->counterBits < 64) { - PORT_Memcpy(ctr->counterFirst, ctr->counter, blocksize); + PORT_Memcpy(ctr->counterFirst, ctr->counter, AES_BLOCK_SIZE); ctr->checkWrap = PR_TRUE; } return SECSuccess; @@ -50,7 +50,7 @@ CTR_InitContext(CTRContext *ctr, void *context, freeblCipherFunc cipher, CTRContext * CTR_CreateContext(void *context, freeblCipherFunc cipher, - const unsigned char *param, unsigned int blocksize) + const unsigned char *param) { CTRContext *ctr; SECStatus rv; @@ -60,7 +60,7 @@ CTR_CreateContext(void *context, freeblCipherFunc cipher, if (ctr == NULL) { return NULL; } - rv = CTR_InitContext(ctr, context, cipher, param, blocksize); + rv = CTR_InitContext(ctr, context, cipher, param); if (rv != SECSuccess) { CTR_DestroyContext(ctr, PR_TRUE); ctr = NULL; diff --git a/lib/freebl/ctr.h b/lib/freebl/ctr.h index a97da144e5..a397e690e6 100644 --- a/lib/freebl/ctr.h +++ b/lib/freebl/ctr.h @@ -23,8 +23,7 @@ struct CTRContextStr { typedef struct CTRContextStr CTRContext; SECStatus CTR_InitContext(CTRContext *ctr, void *context, - freeblCipherFunc cipher, const unsigned char *param, - unsigned int blocksize); + freeblCipherFunc cipher, const unsigned char *param); /* * The context argument is the inner cipher context to use with cipher. The @@ -34,7 +33,7 @@ SECStatus CTR_InitContext(CTRContext *ctr, void *context, * The cipher argument is a block cipher in the ECB encrypt mode. */ CTRContext *CTR_CreateContext(void *context, freeblCipherFunc cipher, - const unsigned char *param, unsigned int blocksize); + const unsigned char *param); void CTR_DestroyContext(CTRContext *ctr, PRBool freeit); diff --git a/lib/freebl/cts.c b/lib/freebl/cts.c index 99ccebb603..774294b7a1 100644 --- a/lib/freebl/cts.c +++ b/lib/freebl/cts.c @@ -20,19 +20,15 @@ struct CTSContextStr { CTSContext * CTS_CreateContext(void *context, freeblCipherFunc cipher, - const unsigned char *iv, unsigned int blocksize) + const unsigned char *iv) { CTSContext *cts; - if (blocksize > MAX_BLOCK_SIZE) { - PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); - return NULL; - } cts = PORT_ZNew(CTSContext); if (cts == NULL) { return NULL; } - PORT_Memcpy(cts->iv, iv, blocksize); + PORT_Memcpy(cts->iv, iv, MAX_BLOCK_SIZE); cts->cipher = cipher; cts->context = context; return cts; diff --git a/lib/freebl/cts.h b/lib/freebl/cts.h index a3ec180af8..ddd56197f6 100644 --- a/lib/freebl/cts.h +++ b/lib/freebl/cts.h @@ -17,7 +17,7 @@ typedef struct CTSContextStr CTSContext; * The cipher argument is a block cipher in the CBC mode. */ CTSContext *CTS_CreateContext(void *context, freeblCipherFunc cipher, - const unsigned char *iv, unsigned int blocksize); + const unsigned char *iv); void CTS_DestroyContext(CTSContext *cts, PRBool freeit); diff --git a/lib/freebl/gcm.c b/lib/freebl/gcm.c index 11ef19ecf2..4ac1e18ef1 100644 --- a/lib/freebl/gcm.c +++ b/lib/freebl/gcm.c @@ -23,11 +23,11 @@ /* Forward declarations */ SECStatus gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, - unsigned int count, unsigned int blocksize); + unsigned int count); SECStatus gcm_HashMult_sftw(gcmHashContext *ghash, const unsigned char *buf, - unsigned int count, unsigned int blocksize); + unsigned int count); SECStatus gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf, - unsigned int count, unsigned int blocksize); + unsigned int count); uint64_t get64(const unsigned char *bytes) @@ -121,7 +121,7 @@ bmul(uint64_t x, uint64_t y, uint64_t *r_high, uint64_t *r_low) SECStatus gcm_HashMult_sftw(gcmHashContext *ghash, const unsigned char *buf, - unsigned int count, unsigned int blocksize) + unsigned int count) { uint64_t ci_low, ci_high; size_t i; @@ -198,7 +198,7 @@ bmul32(uint32_t x, uint32_t y, uint32_t *r_high, uint32_t *r_low) SECStatus gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf, - unsigned int count, unsigned int blocksize) + unsigned int count) { size_t i; uint64_t ci_low, ci_high; @@ -285,7 +285,7 @@ gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf, SECStatus gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, - unsigned int count, unsigned int blocksize) + unsigned int count) { #ifdef NSS_X86_OR_X64 size_t i; @@ -383,12 +383,12 @@ gcm_zeroX(gcmHashContext *ghash) /* * implement GCM GHASH using the freebl GHASH function. The gcm_HashMult - * function always takes blocksize lengths of data. gcmHash_Update will + * function always takes AES_BLOCK_SIZE lengths of data. gcmHash_Update will * format the data properly. */ SECStatus gcmHash_Update(gcmHashContext *ghash, const unsigned char *buf, - unsigned int len, unsigned int blocksize) + unsigned int len) { unsigned int blocks; SECStatus rv; @@ -398,7 +398,7 @@ gcmHash_Update(gcmHashContext *ghash, const unsigned char *buf, /* first deal with the current buffer of data. Try to fill it out so * we can hash it */ if (ghash->bufLen) { - unsigned int needed = PR_MIN(len, blocksize - ghash->bufLen); + unsigned int needed = PR_MIN(len, AES_BLOCK_SIZE - ghash->bufLen); if (needed != 0) { PORT_Memcpy(ghash->buffer + ghash->bufLen, buf, needed); } @@ -409,24 +409,24 @@ gcmHash_Update(gcmHashContext *ghash, const unsigned char *buf, /* didn't add enough to hash the data, nothing more do do */ return SECSuccess; } - PORT_Assert(ghash->bufLen == blocksize); + PORT_Assert(ghash->bufLen == AES_BLOCK_SIZE); /* hash the buffer and clear it */ - rv = ghash->ghash_mul(ghash, ghash->buffer, 1, blocksize); - PORT_Memset(ghash->buffer, 0, blocksize); + rv = ghash->ghash_mul(ghash, ghash->buffer, 1); + PORT_Memset(ghash->buffer, 0, AES_BLOCK_SIZE); ghash->bufLen = 0; if (rv != SECSuccess) { return SECFailure; } } /* now hash any full blocks remaining in the data stream */ - blocks = len / blocksize; + blocks = len / AES_BLOCK_SIZE; if (blocks) { - rv = ghash->ghash_mul(ghash, buf, blocks, blocksize); + rv = ghash->ghash_mul(ghash, buf, blocks); if (rv != SECSuccess) { return SECFailure; } - buf += blocks * blocksize; - len -= blocks * blocksize; + buf += blocks * AES_BLOCK_SIZE; + len -= blocks * AES_BLOCK_SIZE; } /* save any remainder in the buffer to be hashed with the next call */ @@ -442,7 +442,7 @@ gcmHash_Update(gcmHashContext *ghash, const unsigned char *buf, * save the lengths for the final completion of the hash */ static SECStatus -gcmHash_Sync(gcmHashContext *ghash, unsigned int blocksize) +gcmHash_Sync(gcmHashContext *ghash) { int i; SECStatus rv; @@ -459,9 +459,9 @@ gcmHash_Sync(gcmHashContext *ghash, unsigned int blocksize) /* now zero fill the buffer and hash the last block */ if (ghash->bufLen) { - PORT_Memset(ghash->buffer + ghash->bufLen, 0, blocksize - ghash->bufLen); - rv = ghash->ghash_mul(ghash, ghash->buffer, 1, blocksize); - PORT_Memset(ghash->buffer, 0, blocksize); + PORT_Memset(ghash->buffer + ghash->bufLen, 0, AES_BLOCK_SIZE - ghash->bufLen); + rv = ghash->ghash_mul(ghash, ghash->buffer, 1); + PORT_Memset(ghash->buffer, 0, AES_BLOCK_SIZE); ghash->bufLen = 0; if (rv != SECSuccess) { return SECFailure; @@ -486,20 +486,18 @@ gcmHash_Sync(gcmHashContext *ghash, unsigned int blocksize) */ SECStatus gcmHash_Final(gcmHashContext *ghash, unsigned char *outbuf, - unsigned int *outlen, unsigned int maxout, - unsigned int blocksize) + unsigned int *outlen, unsigned int maxout) { unsigned char T[MAX_BLOCK_SIZE]; SECStatus rv; - rv = gcmHash_Sync(ghash, blocksize); + rv = gcmHash_Sync(ghash); if (rv != SECSuccess) { goto cleanup; } rv = ghash->ghash_mul(ghash, ghash->counterBuf, - (GCM_HASH_LEN_LEN * 2) / blocksize, - blocksize); + (GCM_HASH_LEN_LEN * 2) / AES_BLOCK_SIZE); if (rv != SECSuccess) { goto cleanup; } @@ -519,8 +517,8 @@ gcmHash_Final(gcmHashContext *ghash, unsigned char *outbuf, WRITE64(ghash->x_high, T); } - if (maxout > blocksize) { - maxout = blocksize; + if (maxout > AES_BLOCK_SIZE) { + maxout = AES_BLOCK_SIZE; } PORT_Memcpy(outbuf, T, maxout); *outlen = maxout; @@ -533,7 +531,7 @@ gcmHash_Final(gcmHashContext *ghash, unsigned char *outbuf, SECStatus gcmHash_Reset(gcmHashContext *ghash, const unsigned char *AAD, - unsigned int AADLen, unsigned int blocksize) + unsigned int AADLen) { SECStatus rv; @@ -547,11 +545,11 @@ gcmHash_Reset(gcmHashContext *ghash, const unsigned char *AAD, /* now kick things off by hashing the Additional Authenticated Data */ if (AADLen != 0) { - rv = gcmHash_Update(ghash, AAD, AADLen, blocksize); + rv = gcmHash_Update(ghash, AAD, AADLen); if (rv != SECSuccess) { return SECFailure; } - rv = gcmHash_Sync(ghash, blocksize); + rv = gcmHash_Sync(ghash); if (rv != SECSuccess) { return SECFailure; } @@ -573,7 +571,7 @@ struct GCMContextStr { GCMContext * GCM_CreateContext(void *context, freeblCipherFunc cipher, - const unsigned char *params, unsigned int blocksize) + const unsigned char *params) { GCMContext *gcm = NULL; gcmHashContext *ghash = NULL; @@ -589,10 +587,6 @@ GCM_CreateContext(void *context, freeblCipherFunc cipher, const PRBool sw = PR_FALSE; #endif - if (blocksize > MAX_BLOCK_SIZE || blocksize > sizeof(ctrParams.cb)) { - PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); - return NULL; - } gcm = PORT_ZNew(GCMContext); if (gcm == NULL) { PORT_SetError(SEC_ERROR_NO_MEMORY); @@ -609,8 +603,8 @@ GCM_CreateContext(void *context, freeblCipherFunc cipher, /* first plug in the ghash context */ gcm->ghash_context = ghash; - PORT_Memset(H, 0, blocksize); - rv = (*cipher)(context, H, &tmp, blocksize, H, blocksize, blocksize); + PORT_Memset(H, 0, AES_BLOCK_SIZE); + rv = (*cipher)(context, H, &tmp, AES_BLOCK_SIZE, H, AES_BLOCK_SIZE, AES_BLOCK_SIZE); if (rv != SECSuccess) { goto loser; } @@ -622,22 +616,21 @@ GCM_CreateContext(void *context, freeblCipherFunc cipher, /* fill in the Counter context */ ctrParams.ulCounterBits = 32; PORT_Memset(ctrParams.cb, 0, sizeof(ctrParams.cb)); - if ((blocksize == 16) && (gcmParams->ulIvLen == 12)) { + if (gcmParams->ulIvLen == 12) { PORT_Memcpy(ctrParams.cb, gcmParams->pIv, gcmParams->ulIvLen); - ctrParams.cb[blocksize - 1] = 1; + ctrParams.cb[AES_BLOCK_SIZE - 1] = 1; } else { - rv = gcmHash_Update(ghash, gcmParams->pIv, gcmParams->ulIvLen, - blocksize); + rv = gcmHash_Update(ghash, gcmParams->pIv, gcmParams->ulIvLen); if (rv != SECSuccess) { goto loser; } - rv = gcmHash_Final(ghash, ctrParams.cb, &tmp, blocksize, blocksize); + rv = gcmHash_Final(ghash, ctrParams.cb, &tmp, AES_BLOCK_SIZE); if (rv != SECSuccess) { goto loser; } } rv = CTR_InitContext(&gcm->ctr_context, context, cipher, - (unsigned char *)&ctrParams, blocksize); + (unsigned char *)&ctrParams); if (rv != SECSuccess) { goto loser; } @@ -647,14 +640,14 @@ GCM_CreateContext(void *context, freeblCipherFunc cipher, gcm->tagBits = gcmParams->ulTagBits; /* save for final step */ /* calculate the final tag key. NOTE: gcm->tagKey is zero to start with. * if this assumption changes, we would need to explicitly clear it here */ - rv = CTR_Update(&gcm->ctr_context, gcm->tagKey, &tmp, blocksize, - gcm->tagKey, blocksize, blocksize); + rv = CTR_Update(&gcm->ctr_context, gcm->tagKey, &tmp, AES_BLOCK_SIZE, + gcm->tagKey, AES_BLOCK_SIZE, AES_BLOCK_SIZE); if (rv != SECSuccess) { goto loser; } /* finally mix in the AAD data */ - rv = gcmHash_Reset(ghash, gcmParams->pAAD, gcmParams->ulAADLen, blocksize); + rv = gcmHash_Reset(ghash, gcmParams->pAAD, gcmParams->ulAADLen); if (rv != SECSuccess) { goto loser; } @@ -691,8 +684,7 @@ GCM_DestroyContext(GCMContext *gcm, PRBool freeit) static SECStatus gcm_GetTag(GCMContext *gcm, unsigned char *outbuf, - unsigned int *outlen, unsigned int maxout, - unsigned int blocksize) + unsigned int *outlen, unsigned int maxout) { unsigned int tagBytes; unsigned int extra; @@ -714,7 +706,7 @@ gcm_GetTag(GCMContext *gcm, unsigned char *outbuf, return SECFailure; } maxout = tagBytes; - rv = gcmHash_Final(gcm->ghash_context, outbuf, outlen, maxout, blocksize); + rv = gcmHash_Final(gcm->ghash_context, outbuf, outlen, maxout); if (rv != SECSuccess) { return SECFailure; } @@ -744,6 +736,12 @@ GCM_EncryptUpdate(GCMContext *gcm, unsigned char *outbuf, unsigned int tagBytes; unsigned int len; + PORT_Assert(blocksize == AES_BLOCK_SIZE); + if (blocksize != AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; if (UINT_MAX - inlen < tagBytes) { PORT_SetError(SEC_ERROR_INPUT_LEN); @@ -756,17 +754,17 @@ GCM_EncryptUpdate(GCMContext *gcm, unsigned char *outbuf, } rv = CTR_Update(&gcm->ctr_context, outbuf, outlen, maxout, - inbuf, inlen, blocksize); + inbuf, inlen, AES_BLOCK_SIZE); if (rv != SECSuccess) { return SECFailure; } - rv = gcmHash_Update(gcm->ghash_context, outbuf, *outlen, blocksize); + rv = gcmHash_Update(gcm->ghash_context, outbuf, *outlen); if (rv != SECSuccess) { PORT_Memset(outbuf, 0, *outlen); /* clear the output buffer */ *outlen = 0; return SECFailure; } - rv = gcm_GetTag(gcm, outbuf + *outlen, &len, maxout - *outlen, blocksize); + rv = gcm_GetTag(gcm, outbuf + *outlen, &len, maxout - *outlen); if (rv != SECSuccess) { PORT_Memset(outbuf, 0, *outlen); /* clear the output buffer */ *outlen = 0; @@ -796,6 +794,12 @@ GCM_DecryptUpdate(GCMContext *gcm, unsigned char *outbuf, const unsigned char *intag; unsigned int len; + PORT_Assert(blocksize == AES_BLOCK_SIZE); + if (blocksize != AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; /* get the authentication block */ @@ -808,11 +812,11 @@ GCM_DecryptUpdate(GCMContext *gcm, unsigned char *outbuf, intag = inbuf + inlen; /* verify the block */ - rv = gcmHash_Update(gcm->ghash_context, inbuf, inlen, blocksize); + rv = gcmHash_Update(gcm->ghash_context, inbuf, inlen); if (rv != SECSuccess) { return SECFailure; } - rv = gcm_GetTag(gcm, tag, &len, blocksize, blocksize); + rv = gcm_GetTag(gcm, tag, &len, AES_BLOCK_SIZE); if (rv != SECSuccess) { return SECFailure; } @@ -828,5 +832,5 @@ GCM_DecryptUpdate(GCMContext *gcm, unsigned char *outbuf, PORT_Memset(tag, 0, sizeof(tag)); /* finish the decryption */ return CTR_Update(&gcm->ctr_context, outbuf, outlen, maxout, - inbuf, inlen, blocksize); + inbuf, inlen, AES_BLOCK_SIZE); } diff --git a/lib/freebl/gcm.h b/lib/freebl/gcm.h index 0185d412b3..0c707a0811 100644 --- a/lib/freebl/gcm.h +++ b/lib/freebl/gcm.h @@ -28,7 +28,7 @@ typedef struct GCMContextStr GCMContext; * The cipher argument is a block cipher in the ECB encrypt mode. */ GCMContext *GCM_CreateContext(void *context, freeblCipherFunc cipher, - const unsigned char *params, unsigned int blocksize); + const unsigned char *params); void GCM_DestroyContext(GCMContext *gcm, PRBool freeit); SECStatus GCM_EncryptUpdate(GCMContext *gcm, unsigned char *outbuf, unsigned int *outlen, unsigned int maxout, @@ -40,21 +40,10 @@ SECStatus GCM_DecryptUpdate(GCMContext *gcm, unsigned char *outbuf, unsigned int blocksize); /* These functions are here only so we can test them */ -#if defined(_WINDOWS) && defined(NSS_X86_OR_X64) -#define pre_align __declspec(align(16)) -#define post_align -#elif defined(NSS_X86_OR_X64) -#define pre_align -#define post_align __attribute__((aligned(16))) -#else -#define pre_align -#define post_align -#endif - #define GCM_HASH_LEN_LEN 8 /* gcm hash defines lengths to be 64 bits */ typedef struct gcmHashContextStr gcmHashContext; typedef SECStatus (*ghash_t)(gcmHashContext *, const unsigned char *, - unsigned int, unsigned int); + unsigned int); pre_align struct gcmHashContextStr { #ifdef NSS_X86_OR_X64 __m128i x, h; @@ -70,14 +59,13 @@ pre_align struct gcmHashContextStr { } post_align; SECStatus gcmHash_Update(gcmHashContext *ghash, const unsigned char *buf, - unsigned int len, unsigned int blocksize); + unsigned int len); SECStatus gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, PRBool sw); SECStatus gcmHash_Reset(gcmHashContext *ghash, const unsigned char *AAD, - unsigned int AADLen, unsigned int blocksize); + unsigned int AADLen); SECStatus gcmHash_Final(gcmHashContext *ghash, unsigned char *outbuf, - unsigned int *outlen, unsigned int maxout, - unsigned int blocksize); + unsigned int *outlen, unsigned int maxout); SEC_END_PROTOS diff --git a/lib/freebl/intel-aes-x64-masm.asm b/lib/freebl/intel-aes-x64-masm.asm index ef5c76ba28..fe183bca03 100644 --- a/lib/freebl/intel-aes-x64-masm.asm +++ b/lib/freebl/intel-aes-x64-masm.asm @@ -91,8 +91,6 @@ LOCAL bail movdqu [rsp + 1*16], xmm7 movdqu [rsp + 2*16], xmm8 - lea ctx, [48+ctx] - loop8: cmp inputLen, 8*16 jb loop1 @@ -555,9 +553,7 @@ LOCAL bail movdqu [rsp + 1*16], xmm7 movdqu [rsp + 2*16], xmm8 - lea ctx, [48+ctx] - - movdqu xmm0, [-32+ctx] + movdqu xmm0, [256+ctx] movdqu xmm2, [0*16 + ctx] movdqu xmm3, [1*16 + ctx] @@ -597,7 +593,7 @@ loop1: jmp loop1 bail: - movdqu [-32+ctx], xmm0 + movdqu [256+ctx], xmm0 xor rax, rax @@ -625,8 +621,6 @@ LOCAL bail movdqu [rsp + 1*16], xmm7 movdqu [rsp + 2*16], xmm8 - lea ctx, [48+ctx] - loop8: cmp inputLen, 8*16 jb dec1 @@ -657,7 +651,7 @@ loop8: ENDM aes_dec_last_rnd rnds - movdqu xmm8, [-32 + ctx] + movdqu xmm8, [256 + ctx] pxor xmm0, xmm8 movdqu xmm8, [0*16 + input] pxor xmm1, xmm8 @@ -683,7 +677,7 @@ loop8: movdqu [5*16 + output], xmm5 movdqu [6*16 + output], xmm6 movdqu [7*16 + output], xmm7 - movdqu [-32 + ctx], xmm8 + movdqu [256 + ctx], xmm8 lea input, [8*16 + input] lea output, [8*16 + output] @@ -691,7 +685,7 @@ loop8: jmp loop8 dec1: - movdqu xmm3, [-32 + ctx] + movdqu xmm3, [256 + ctx] loop1: cmp inputLen, 1*16 @@ -721,7 +715,7 @@ loop1: jmp loop1 bail: - movdqu [-32 + ctx], xmm3 + movdqu [256 + ctx], xmm3 xor rax, rax movdqu xmm6, [rsp + 0*16] @@ -773,7 +767,6 @@ LOCAL bail mov ctrCtx, ctx mov ctx, [8+ctrCtx] - lea ctx, [48+ctx] sub rsp, 3*16 movdqu [rsp + 0*16], xmm6 diff --git a/lib/freebl/intel-aes-x86-masm.asm b/lib/freebl/intel-aes-x86-masm.asm index 7d805e7660..790c951e7c 100644 --- a/lib/freebl/intel-aes-x86-masm.asm +++ b/lib/freebl/intel-aes-x86-masm.asm @@ -87,8 +87,6 @@ LOCAL bail mov input, [esp + 2*4 + 4*4] mov inputLen, [esp + 2*4 + 5*4] - lea ctx, [44+ctx] - loop7: cmp inputLen, 7*16 jb loop1 @@ -557,9 +555,7 @@ LOCAL bail mov input, [esp + 2*4 + 4*4] mov inputLen, [esp + 2*4 + 5*4] - lea ctx, [44+ctx] - - movdqu xmm0, [-32+ctx] + movdqu xmm0, [252+ctx] movdqu xmm2, [0*16 + ctx] movdqu xmm3, [1*16 + ctx] @@ -597,7 +593,7 @@ loop1: jmp loop1 bail: - movdqu [-32+ctx], xmm0 + movdqu [252+ctx], xmm0 xor eax, eax pop inputLen @@ -619,8 +615,6 @@ LOCAL bail mov input, [esp + 2*4 + 4*4] mov inputLen, [esp + 2*4 + 5*4] - lea ctx, [44+ctx] - loop7: cmp inputLen, 7*16 jb dec1 @@ -649,7 +643,7 @@ loop7: ENDM aes_dec_last_rnd rnds - movdqu xmm7, [-32 + ctx] + movdqu xmm7, [252 + ctx] pxor xmm0, xmm7 movdqu xmm7, [0*16 + input] pxor xmm1, xmm7 @@ -672,7 +666,7 @@ loop7: movdqu [4*16 + output], xmm4 movdqu [5*16 + output], xmm5 movdqu [6*16 + output], xmm6 - movdqu [-32 + ctx], xmm7 + movdqu [252 + ctx], xmm7 lea input, [7*16 + input] lea output, [7*16 + output] @@ -680,7 +674,7 @@ loop7: jmp loop7 dec1: - movdqu xmm3, [-32 + ctx] + movdqu xmm3, [252 + ctx] loop1: cmp inputLen, 1*16 @@ -710,7 +704,7 @@ loop1: jmp loop1 bail: - movdqu [-32 + ctx], xmm3 + movdqu [252 + ctx], xmm3 xor eax, eax pop inputLen ret @@ -769,7 +763,6 @@ LOCAL bail mov inputLen, [esp + 4*5 + 5*4] mov ctx, [4+ctrCtx] - lea ctx, [44+ctx] mov ebp, esp sub esp, 7*16 diff --git a/lib/freebl/intel-aes.s b/lib/freebl/intel-aes.s index 2dfcfa15b4..b242d233fe 100644 --- a/lib/freebl/intel-aes.s +++ b/lib/freebl/intel-aes.s @@ -4,8 +4,7 @@ .text -#define IV_OFFSET 16 -#define EXPANDED_KEY_OFFSET 48 +#define IV_OFFSET 256 /* * Warning: the length values used in this module are "unsigned int" @@ -144,9 +143,6 @@ key_expansion128: .globl intel_aes_encrypt_ecb_128 .align 16 intel_aes_encrypt_ecb_128: -// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi - leaq 48(%rdi), %rdi - movdqu (%rdi), %xmm2 movdqu 160(%rdi), %xmm12 xor %eax, %eax @@ -328,9 +324,6 @@ intel_aes_encrypt_ecb_128: .globl intel_aes_decrypt_ecb_128 .align 16 intel_aes_decrypt_ecb_128: -// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi - leaq 48(%rdi), %rdi - movdqu (%rdi), %xmm2 movdqu 160(%rdi), %xmm12 xorl %eax, %eax @@ -516,9 +509,7 @@ intel_aes_encrypt_cbc_128: je 2f // leaq IV_OFFSET(%rdi), %rdx -// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi - leaq 16(%rdi), %rdx - leaq 48(%rdi), %rdi + leaq 256(%rdi), %rdx movdqu (%rdx), %xmm0 movdqu (%rdi), %xmm2 @@ -575,9 +566,7 @@ intel_aes_encrypt_cbc_128: .align 16 intel_aes_decrypt_cbc_128: // leaq IV_OFFSET(%rdi), %rdx -// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi - leaq 16(%rdi), %rdx - leaq 48(%rdi), %rdi + leaq 256(%rdi), %rdx movdqu (%rdx), %xmm0 /* iv */ movdqu (%rdi), %xmm2 /* first key block */ @@ -902,9 +891,6 @@ key_expansion192: .globl intel_aes_encrypt_ecb_192 .align 16 intel_aes_encrypt_ecb_192: -// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi - leaq 48(%rdi), %rdi - movdqu (%rdi), %xmm2 movdqu 192(%rdi), %xmm14 xorl %eax, %eax @@ -1109,9 +1095,6 @@ intel_aes_encrypt_ecb_192: .globl intel_aes_decrypt_ecb_192 .align 16 intel_aes_decrypt_ecb_192: -// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi - leaq 48(%rdi), %rdi - movdqu (%rdi), %xmm2 movdqu 192(%rdi), %xmm14 xorl %eax, %eax @@ -1320,9 +1303,7 @@ intel_aes_encrypt_cbc_192: je 2f // leaq IV_OFFSET(%rdi), %rdx -// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi - leaq 16(%rdi), %rdx - leaq 48(%rdi), %rdi + leaq 256(%rdi), %rdx movdqu (%rdx), %xmm0 movdqu (%rdi), %xmm2 @@ -1382,8 +1363,8 @@ intel_aes_encrypt_cbc_192: .globl intel_aes_decrypt_cbc_192 .align 16 intel_aes_decrypt_cbc_192: - leaq 16(%rdi), %rdx - leaq 48(%rdi), %rdi +// leaq IV_OFFSET(%rdi), %rdx + leaq 256(%rdi), %rdx movdqu (%rdx), %xmm0 movdqu (%rdi), %xmm2 @@ -1738,9 +1719,6 @@ key_expansion256: .globl intel_aes_encrypt_ecb_256 .align 16 intel_aes_encrypt_ecb_256: -// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi - leaq 48(%rdi), %rdi - movdqu (%rdi), %xmm2 movdqu 224(%rdi), %xmm15 xorl %eax, %eax @@ -1970,9 +1948,6 @@ intel_aes_encrypt_ecb_256: .globl intel_aes_decrypt_ecb_256 .align 16 intel_aes_decrypt_ecb_256: -// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi - leaq 48(%rdi), %rdi - movdqu (%rdi), %xmm2 movdqu 224(%rdi), %xmm15 xorl %eax, %eax @@ -2206,9 +2181,7 @@ intel_aes_encrypt_cbc_256: je 2f // leaq IV_OFFSET(%rdi), %rdx -// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi - leaq 16(%rdi), %rdx - leaq 48(%rdi), %rdi + leaq 256(%rdi), %rdx movdqu (%rdx), %xmm0 movdqu (%rdi), %xmm8 @@ -2274,9 +2247,7 @@ intel_aes_encrypt_cbc_256: .align 16 intel_aes_decrypt_cbc_256: // leaq IV_OFFSET(%rdi), %rdx -// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi - leaq 16(%rdi), %rdx - leaq 48(%rdi), %rdi + leaq 256(%rdi), %rdx movdqu (%rdx), %xmm0 movdqu (%rdi), %xmm2 diff --git a/lib/freebl/intel-gcm-wrap.c b/lib/freebl/intel-gcm-wrap.c index 8c5eaf0214..a9dc2e3edc 100644 --- a/lib/freebl/intel-gcm-wrap.c +++ b/lib/freebl/intel-gcm-wrap.c @@ -41,8 +41,7 @@ struct intel_AES_GCMContextStr { intel_AES_GCMContext * intel_AES_GCM_CreateContext(void *context, freeblCipherFunc cipher, - const unsigned char *params, - unsigned int blocksize) + const unsigned char *params) { intel_AES_GCMContext *gcm = NULL; AESContext *aes = (AESContext *)context; @@ -59,10 +58,6 @@ intel_AES_GCM_CreateContext(void *context, unsigned int j; SECStatus rv; - if (blocksize != AES_BLOCK_SIZE) { - PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); - return NULL; - } gcm = PORT_ZNew(intel_AES_GCMContext); if (gcm == NULL) { diff --git a/lib/freebl/intel-gcm-x64-masm.asm b/lib/freebl/intel-gcm-x64-masm.asm index 8b68b76e58..07ddefbc1e 100644 --- a/lib/freebl/intel-gcm-x64-masm.asm +++ b/lib/freebl/intel-gcm-x64-masm.asm @@ -496,8 +496,8 @@ LbeginENC: vmovdqu CTR0, XMMWORD PTR[16*16 + 2*16 + Gctx] vmovdqu BSWAPMASK, XMMWORD PTR[Lbswap_mask] mov KS, [16*16 + 3*16 + Gctx] - mov NR, [4 + KS] - lea KS, [48 + KS] + mov NR, [244 + KS] + lea KS, [KS] vpshufb CTR0, CTR0, BSWAPMASK @@ -994,8 +994,7 @@ LbeginDEC: vmovdqu CTR0, XMMWORD PTR[16*16 + 2*16 + Gctx] vmovdqu BSWAPMASK, XMMWORD PTR[Lbswap_mask] mov KS, [16*16 + 3*16 + Gctx] - mov NR, [4 + KS] - lea KS, [48 + KS] + mov NR, [244 + KS] vpshufb CTR0, CTR0, BSWAPMASK diff --git a/lib/freebl/intel-gcm-x86-masm.asm b/lib/freebl/intel-gcm-x86-masm.asm index 6362ad8595..32f4257884 100644 --- a/lib/freebl/intel-gcm-x86-masm.asm +++ b/lib/freebl/intel-gcm-x86-masm.asm @@ -390,7 +390,7 @@ Htbl textequ Gctx textequ len textequ KS textequ -NR textequ +NR textequ aluCTR textequ aluTMP textequ @@ -463,7 +463,6 @@ LbeginENC: mov Gctx, [ebp + 5*4 + 2*4] mov KS, [16*16 + 3*16 + Gctx] - lea KS, [44 + KS] mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx] bswap aluCTR @@ -931,7 +930,6 @@ LbeginDEC: mov Gctx, [ebp + 5*4 + 2*4] mov KS, [16*16 + 3*16 + Gctx] - lea KS, [44 + KS] mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx] bswap aluCTR diff --git a/lib/freebl/intel-gcm.h b/lib/freebl/intel-gcm.h index 566e544d87..05f52f297d 100644 --- a/lib/freebl/intel-gcm.h +++ b/lib/freebl/intel-gcm.h @@ -27,7 +27,7 @@ typedef struct intel_AES_GCMContextStr intel_AES_GCMContext; intel_AES_GCMContext *intel_AES_GCM_CreateContext(void *context, freeblCipherFunc cipher, - const unsigned char *params, unsigned int blocksize); + const unsigned char *params); void intel_AES_GCM_DestroyContext(intel_AES_GCMContext *gcm, PRBool freeit); diff --git a/lib/freebl/intel-gcm.s b/lib/freebl/intel-gcm.s index 1a31060914..5b5cf5d4bb 100644 --- a/lib/freebl/intel-gcm.s +++ b/lib/freebl/intel-gcm.s @@ -467,8 +467,8 @@ intel_aes_gcmENC: vmovdqu 288(Gctx), CTR vmovdqu 272(Gctx), T mov 304(Gctx), KS - mov 4(KS), NR - lea 48(KS), KS +# AESContext->Nr + mov 244(KS), NR vpshufb .Lbswap_mask(%rip), CTR, CTR vpshufb .Lbswap_mask(%rip), T, T @@ -1001,8 +1001,8 @@ intel_aes_gcmDEC: vmovdqu 288(Gctx), CTR vmovdqu 272(Gctx), T mov 304(Gctx), KS - mov 4(KS), NR - lea 48(KS), KS +# AESContext->Nr + mov 244(KS), NR vpshufb .Lbswap_mask(%rip), CTR, CTR vpshufb .Lbswap_mask(%rip), T, T diff --git a/lib/freebl/rijndael.c b/lib/freebl/rijndael.c index 0c3ab58dea..e4ad60388f 100644 --- a/lib/freebl/rijndael.c +++ b/lib/freebl/rijndael.c @@ -18,21 +18,14 @@ #include "cts.h" #include "ctr.h" #include "gcm.h" +#include "mpi.h" #ifdef USE_HW_AES #include "intel-aes.h" #endif - -#include "mpi.h" - -#ifdef USE_HW_AES -static PRBool use_hw_aes = PR_FALSE; - #ifdef INTEL_GCM #include "intel-gcm.h" -static PRBool use_hw_gcm = PR_FALSE; -#endif -#endif /* USE_HW_AES */ +#endif /* INTEL_GCM */ /* * There are currently five ways to build this code, varying in performance @@ -373,7 +366,7 @@ init_rijndael_tables(void) * Nk == 8 where it happens twice in every key word, in the same positions). * For now, I'm implementing this case "dumbly", w/o any unrolling. */ -static SECStatus +static void rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int Nk) { unsigned int i; @@ -394,14 +387,169 @@ rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int N tmp = SUBBYTE(tmp); *pW = W[i - Nk] ^ tmp; } - return SECSuccess; +} + +#if defined(NSS_X86_OR_X64) +#define EXPAND_KEY128(k, rcon, res) \ + tmp_key = _mm_aeskeygenassist_si128(k, rcon); \ + tmp_key = _mm_shuffle_epi32(tmp_key, 0xFF); \ + tmp = _mm_xor_si128(k, _mm_slli_si128(k, 4)); \ + tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ + tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ + res = _mm_xor_si128(tmp, tmp_key) + +static void +native_key_expansion128(AESContext *cx, const unsigned char *key) +{ + __m128i *keySchedule = cx->keySchedule; + pre_align __m128i tmp_key post_align; + pre_align __m128i tmp post_align; + keySchedule[0] = _mm_loadu_si128((__m128i *)key); + EXPAND_KEY128(keySchedule[0], 0x01, keySchedule[1]); + EXPAND_KEY128(keySchedule[1], 0x02, keySchedule[2]); + EXPAND_KEY128(keySchedule[2], 0x04, keySchedule[3]); + EXPAND_KEY128(keySchedule[3], 0x08, keySchedule[4]); + EXPAND_KEY128(keySchedule[4], 0x10, keySchedule[5]); + EXPAND_KEY128(keySchedule[5], 0x20, keySchedule[6]); + EXPAND_KEY128(keySchedule[6], 0x40, keySchedule[7]); + EXPAND_KEY128(keySchedule[7], 0x80, keySchedule[8]); + EXPAND_KEY128(keySchedule[8], 0x1B, keySchedule[9]); + EXPAND_KEY128(keySchedule[9], 0x36, keySchedule[10]); +} + +#define EXPAND_KEY192_PART1(res, k0, kt, rcon) \ + tmp2 = _mm_slli_si128(k0, 4); \ + tmp1 = _mm_xor_si128(k0, tmp2); \ + tmp2 = _mm_slli_si128(tmp2, 4); \ + tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ + tmp2 = _mm_aeskeygenassist_si128(kt, rcon); \ + res = _mm_xor_si128(tmp1, _mm_shuffle_epi32(tmp2, 0x55)) + +#define EXPAND_KEY192_PART2(res, k1, k2) \ + tmp2 = _mm_xor_si128(k1, _mm_slli_si128(k1, 4)); \ + res = _mm_xor_si128(tmp2, _mm_shuffle_epi32(k2, 0xFF)) + +#define EXPAND_KEY192(k0, res1, res2, res3, carry, rcon1, rcon2) \ + EXPAND_KEY192_PART1(tmp3, k0, res1, rcon1); \ + EXPAND_KEY192_PART2(carry, res1, tmp3); \ + res1 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(res1), \ + _mm_castsi128_pd(tmp3), 0)); \ + res2 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(tmp3), \ + _mm_castsi128_pd(carry), 1)); \ + EXPAND_KEY192_PART1(res3, tmp3, carry, rcon2) + +static void +native_key_expansion192(AESContext *cx, const unsigned char *key) +{ + __m128i *keySchedule = cx->keySchedule; + pre_align __m128i tmp1 post_align; + pre_align __m128i tmp2 post_align; + pre_align __m128i tmp3 post_align; + pre_align __m128i carry post_align; + keySchedule[0] = _mm_loadu_si128((__m128i *)key); + keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); + EXPAND_KEY192(keySchedule[0], keySchedule[1], keySchedule[2], + keySchedule[3], carry, 0x1, 0x2); + EXPAND_KEY192_PART2(keySchedule[4], carry, keySchedule[3]); + EXPAND_KEY192(keySchedule[3], keySchedule[4], keySchedule[5], + keySchedule[6], carry, 0x4, 0x8); + EXPAND_KEY192_PART2(keySchedule[7], carry, keySchedule[6]); + EXPAND_KEY192(keySchedule[6], keySchedule[7], keySchedule[8], + keySchedule[9], carry, 0x10, 0x20); + EXPAND_KEY192_PART2(keySchedule[10], carry, keySchedule[9]); + EXPAND_KEY192(keySchedule[9], keySchedule[10], keySchedule[11], + keySchedule[12], carry, 0x40, 0x80); +} + +#define EXPAND_KEY256_PART(res, rconx, k1x, k2x, X) \ + tmp_key = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(k2x, rconx), X); \ + tmp2 = _mm_slli_si128(k1x, 4); \ + tmp1 = _mm_xor_si128(k1x, tmp2); \ + tmp2 = _mm_slli_si128(tmp2, 4); \ + tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ + res = _mm_xor_si128(tmp1, tmp_key); + +#define EXPAND_KEY256(res1, res2, k1, k2, rcon) \ + EXPAND_KEY256_PART(res1, rcon, k1, k2, 0xFF); \ + EXPAND_KEY256_PART(res2, 0x00, k2, res1, 0xAA) + +static void +native_key_expansion256(AESContext *cx, const unsigned char *key) +{ + __m128i *keySchedule = cx->keySchedule; + pre_align __m128i tmp_key post_align; + pre_align __m128i tmp1 post_align; + pre_align __m128i tmp2 post_align; + keySchedule[0] = _mm_loadu_si128((__m128i *)key); + keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); + EXPAND_KEY256(keySchedule[2], keySchedule[3], keySchedule[0], + keySchedule[1], 0x01); + EXPAND_KEY256(keySchedule[4], keySchedule[5], keySchedule[2], + keySchedule[3], 0x02); + EXPAND_KEY256(keySchedule[6], keySchedule[7], keySchedule[4], + keySchedule[5], 0x04); + EXPAND_KEY256(keySchedule[8], keySchedule[9], keySchedule[6], + keySchedule[7], 0x08); + EXPAND_KEY256(keySchedule[10], keySchedule[11], keySchedule[8], + keySchedule[9], 0x10); + EXPAND_KEY256(keySchedule[12], keySchedule[13], keySchedule[10], + keySchedule[11], 0x20); + EXPAND_KEY256_PART(keySchedule[14], 0x40, keySchedule[12], + keySchedule[13], 0xFF); +} + +#endif /* NSS_X86_OR_X64 */ + +/* + * AES key expansion using aes-ni instructions. + */ +static void +native_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) +{ +#ifdef NSS_X86_OR_X64 + switch (Nk) { + case 4: + native_key_expansion128(cx, key); + return; + case 6: + native_key_expansion192(cx, key); + return; + case 8: + native_key_expansion256(cx, key); + return; + default: + /* This shouldn't happen. */ + PORT_Assert(0); + } +#else + PORT_Assert(0); +#endif /* NSS_X86_OR_X64 */ +} + +static void +native_encryptBlock(AESContext *cx, + unsigned char *output, + const unsigned char *input) +{ +#ifdef NSS_X86_OR_X64 + int i; + pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input); + m = _mm_xor_si128(m, cx->keySchedule[0]); + for (i = 1; i < cx->Nr; ++i) { + m = _mm_aesenc_si128(m, cx->keySchedule[i]); + } + m = _mm_aesenclast_si128(m, cx->keySchedule[cx->Nr]); + _mm_storeu_si128((__m128i *)output, m); +#else + PORT_Assert(0); +#endif /* NSS_X86_OR_X64 */ } /* rijndael_key_expansion * * Generate the expanded key from the key input by the user. */ -static SECStatus +static void rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) { unsigned int i; @@ -409,8 +557,10 @@ rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk PRUint32 *pW; PRUint32 tmp; unsigned int round_key_words = cx->Nb * (cx->Nr + 1); - if (Nk == 7) - return rijndael_key_expansion7(cx, key, Nk); + if (Nk == 7) { + rijndael_key_expansion7(cx, key, Nk); + return; + } W = cx->expandedKey; /* The first Nk words contain the input cipher key */ memcpy(W, key, Nk * 4); @@ -469,7 +619,6 @@ rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk *pW = W[i - Nk] ^ tmp; } } - return SECSuccess; } /* rijndael_invkey_expansion @@ -477,7 +626,7 @@ rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk * Generate the expanded key for the inverse cipher from the key input by * the user. */ -static SECStatus +static void rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) { unsigned int r; @@ -485,8 +634,7 @@ rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int PRUint8 *b; int Nb = cx->Nb; /* begins like usual key expansion ... */ - if (rijndael_key_expansion(cx, key, Nk) != SECSuccess) - return SECFailure; + rijndael_key_expansion(cx, key, Nk); /* ... but has the additional step of InvMixColumn, * excepting the first and last round keys. */ @@ -528,12 +676,11 @@ rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int IMXC2(b[2]) ^ IMXC3(b[3]); } } - return SECSuccess; } + /************************************************************************** * - * Stuff related to Rijndael encryption/decryption, optimized for - * a 128-bit blocksize. + * Stuff related to Rijndael encryption/decryption. * *************************************************************************/ @@ -561,7 +708,7 @@ typedef union { #define STATE_BYTE(i) state.b[i] -static SECStatus NO_SANITIZE_ALIGNMENT +static void NO_SANITIZE_ALIGNMENT rijndael_encryptBlock128(AESContext *cx, unsigned char *output, const unsigned char *input) @@ -654,7 +801,6 @@ rijndael_encryptBlock128(AESContext *cx, memcpy(output, outBuf, sizeof outBuf); } #endif - return SECSuccess; } static SECStatus NO_SANITIZE_ALIGNMENT @@ -749,104 +895,6 @@ rijndael_decryptBlock128(AESContext *cx, return SECSuccess; } -/************************************************************************** - * - * Stuff related to general Rijndael encryption/decryption, for blocksizes - * greater than 128 bits. - * - * XXX This code is currently untested! So far, AES specs have only been - * released for 128 bit blocksizes. This will be tested, but for now - * only the code above has been tested using known values. - * - *************************************************************************/ - -#define COLUMN(array, j) *((PRUint32 *)(array + j)) - -SECStatus -rijndael_encryptBlock(AESContext *cx, - unsigned char *output, - const unsigned char *input) -{ - return SECFailure; -#ifdef rijndael_large_blocks_fixed - unsigned int j, r, Nb; - unsigned int c2 = 0, c3 = 0; - PRUint32 *roundkeyw; - PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE]; - Nb = cx->Nb; - roundkeyw = cx->expandedKey; - /* Step 1: Add Round Key 0 to initial state */ - for (j = 0; j < 4 * Nb; j += 4) { - COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw++; - } - /* Step 2: Loop over rounds [1..NR-1] */ - for (r = 1; r < cx->Nr; ++r) { - for (j = 0; j < Nb; ++j) { - COLUMN(output, j) = T0(STATE_BYTE(4 * j)) ^ - T1(STATE_BYTE(4 * ((j + 1) % Nb) + 1)) ^ - T2(STATE_BYTE(4 * ((j + c2) % Nb) + 2)) ^ - T3(STATE_BYTE(4 * ((j + c3) % Nb) + 3)); - } - for (j = 0; j < 4 * Nb; j += 4) { - COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw++; - } - } - /* Step 3: Do the last round */ - /* Final round does not employ MixColumn */ - for (j = 0; j < Nb; ++j) { - COLUMN(output, j) = ((BYTE0WORD(T2(STATE_BYTE(4 * j)))) | - (BYTE1WORD(T3(STATE_BYTE(4 * (j + 1) % Nb) + 1))) | - (BYTE2WORD(T0(STATE_BYTE(4 * (j + c2) % Nb) + 2))) | - (BYTE3WORD(T1(STATE_BYTE(4 * (j + c3) % Nb) + 3)))) ^ - *roundkeyw++; - } - return SECSuccess; -#endif -} - -SECStatus -rijndael_decryptBlock(AESContext *cx, - unsigned char *output, - const unsigned char *input) -{ - return SECFailure; -#ifdef rijndael_large_blocks_fixed - int j, r, Nb; - int c2 = 0, c3 = 0; - PRUint32 *roundkeyw; - PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE]; - Nb = cx->Nb; - roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3; - /* reverse key addition */ - for (j = 4 * Nb; j >= 0; j -= 4) { - COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw--; - } - /* Loop over rounds in reverse [NR..1] */ - for (r = cx->Nr; r > 1; --r) { - /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */ - for (j = 0; j < Nb; ++j) { - COLUMN(output, 4 * j) = TInv0(STATE_BYTE(4 * j)) ^ - TInv1(STATE_BYTE(4 * (j + Nb - 1) % Nb) + 1) ^ - TInv2(STATE_BYTE(4 * (j + Nb - c2) % Nb) + 2) ^ - TInv3(STATE_BYTE(4 * (j + Nb - c3) % Nb) + 3); - } - /* Invert the key addition step */ - for (j = 4 * Nb; j >= 0; j -= 4) { - COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw--; - } - } - /* inverse sub */ - for (j = 0; j < 4 * Nb; ++j) { - output[j] = SINV(clone[j]); - } - /* final key addition */ - for (j = 4 * Nb; j >= 0; j -= 4) { - COLUMN(output, j) ^= *roundkeyw--; - } - return SECSuccess; -#endif -} - /************************************************************************** * * Rijndael modes of operation (ECB and CBC) @@ -856,22 +904,21 @@ rijndael_decryptBlock(AESContext *cx, static SECStatus rijndael_encryptECB(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, - const unsigned char *input, unsigned int inputLen, - unsigned int blocksize) + const unsigned char *input, unsigned int inputLen) { - SECStatus rv; AESBlockFunc *encryptor; - encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) - ? &rijndael_encryptBlock128 - : &rijndael_encryptBlock; + if (aesni_support()) { + /* Use hardware acceleration for normal AES parameters. */ + encryptor = &native_encryptBlock; + } else { + encryptor = &rijndael_encryptBlock128; + } while (inputLen > 0) { - rv = (*encryptor)(cx, output, input); - if (rv != SECSuccess) - return rv; - output += blocksize; - input += blocksize; - inputLen -= blocksize; + (*encryptor)(cx, output, input); + output += AES_BLOCK_SIZE; + input += AES_BLOCK_SIZE; + inputLen -= AES_BLOCK_SIZE; } return SECSuccess; } @@ -879,58 +926,44 @@ rijndael_encryptECB(AESContext *cx, unsigned char *output, static SECStatus rijndael_encryptCBC(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, - const unsigned char *input, unsigned int inputLen, - unsigned int blocksize) + const unsigned char *input, unsigned int inputLen) { unsigned int j; - SECStatus rv; - AESBlockFunc *encryptor; unsigned char *lastblock; - unsigned char inblock[RIJNDAEL_MAX_STATE_SIZE * 8]; + unsigned char inblock[AES_BLOCK_SIZE * 8]; if (!inputLen) return SECSuccess; lastblock = cx->iv; - encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) - ? &rijndael_encryptBlock128 - : &rijndael_encryptBlock; while (inputLen > 0) { /* XOR with the last block (IV if first block) */ - for (j = 0; j < blocksize; ++j) + for (j = 0; j < AES_BLOCK_SIZE; ++j) { inblock[j] = input[j] ^ lastblock[j]; + } /* encrypt */ - rv = (*encryptor)(cx, output, inblock); - if (rv != SECSuccess) - return rv; + rijndael_encryptBlock128(cx, output, inblock); /* move to the next block */ lastblock = output; - output += blocksize; - input += blocksize; - inputLen -= blocksize; + output += AES_BLOCK_SIZE; + input += AES_BLOCK_SIZE; + inputLen -= AES_BLOCK_SIZE; } - memcpy(cx->iv, lastblock, blocksize); + memcpy(cx->iv, lastblock, AES_BLOCK_SIZE); return SECSuccess; } static SECStatus rijndael_decryptECB(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, - const unsigned char *input, unsigned int inputLen, - unsigned int blocksize) + const unsigned char *input, unsigned int inputLen) { - SECStatus rv; - AESBlockFunc *decryptor; - - decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) - ? &rijndael_decryptBlock128 - : &rijndael_decryptBlock; while (inputLen > 0) { - rv = (*decryptor)(cx, output, input); - if (rv != SECSuccess) - return rv; - output += blocksize; - input += blocksize; - inputLen -= blocksize; + if (rijndael_decryptBlock128(cx, output, input) != SECSuccess) { + return SECFailure; + } + output += AES_BLOCK_SIZE; + input += AES_BLOCK_SIZE; + inputLen -= AES_BLOCK_SIZE; } return SECSuccess; } @@ -938,43 +971,37 @@ rijndael_decryptECB(AESContext *cx, unsigned char *output, static SECStatus rijndael_decryptCBC(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, - const unsigned char *input, unsigned int inputLen, - unsigned int blocksize) + const unsigned char *input, unsigned int inputLen) { - SECStatus rv; - AESBlockFunc *decryptor; const unsigned char *in; unsigned char *out; unsigned int j; - unsigned char newIV[RIJNDAEL_MAX_BLOCKSIZE]; + unsigned char newIV[AES_BLOCK_SIZE]; if (!inputLen) return SECSuccess; PORT_Assert(output - input >= 0 || input - output >= (int)inputLen); - decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) - ? &rijndael_decryptBlock128 - : &rijndael_decryptBlock; - in = input + (inputLen - blocksize); - memcpy(newIV, in, blocksize); - out = output + (inputLen - blocksize); - while (inputLen > blocksize) { - rv = (*decryptor)(cx, out, in); - if (rv != SECSuccess) - return rv; - for (j = 0; j < blocksize; ++j) - out[j] ^= in[(int)(j - blocksize)]; - out -= blocksize; - in -= blocksize; - inputLen -= blocksize; + in = input + (inputLen - AES_BLOCK_SIZE); + memcpy(newIV, in, AES_BLOCK_SIZE); + out = output + (inputLen - AES_BLOCK_SIZE); + while (inputLen > AES_BLOCK_SIZE) { + if (rijndael_decryptBlock128(cx, out, in) != SECSuccess) { + return SECFailure; + } + for (j = 0; j < AES_BLOCK_SIZE; ++j) + out[j] ^= in[(int)(j - AES_BLOCK_SIZE)]; + out -= AES_BLOCK_SIZE; + in -= AES_BLOCK_SIZE; + inputLen -= AES_BLOCK_SIZE; } if (in == input) { - rv = (*decryptor)(cx, out, in); - if (rv != SECSuccess) - return rv; - for (j = 0; j < blocksize; ++j) + if (rijndael_decryptBlock128(cx, out, in) != SECSuccess) { + return SECFailure; + } + for (j = 0; j < AES_BLOCK_SIZE; ++j) out[j] ^= cx->iv[j]; } - memcpy(cx->iv, newIV, blocksize); + memcpy(cx->iv, newIV, AES_BLOCK_SIZE); return SECSuccess; } @@ -990,7 +1017,14 @@ rijndael_decryptCBC(AESContext *cx, unsigned char *output, AESContext * AES_AllocateContext(void) { - return PORT_ZNew(AESContext); + /* aligned_alloc is C11 so we have to do it the old way. */ + AESContext *ctx = PORT_ZAlloc(sizeof(AESContext) + 15); + if (ctx == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return NULL; + } + ctx->mem = ctx; + return (AESContext *)(((uintptr_t)ctx + 15) & ~(uintptr_t)0x0F); } /* @@ -1000,21 +1034,19 @@ AES_AllocateContext(void) */ static SECStatus aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, - const unsigned char *iv, int mode, unsigned int encrypt, - unsigned int blocksize) + const unsigned char *iv, int mode, unsigned int encrypt) { unsigned int Nk; - /* According to Rijndael AES Proposal, section 12.1, block and key - * lengths between 128 and 256 bits are supported, as long as the + PRBool use_hw_aes; + /* According to AES, block lengths are 128 and key lengths are 128, 192, or + * 256 bits. We support other key sizes as well [128, 256] as long as the * length in bytes is divisible by 4. */ + if (key == NULL || - keysize < RIJNDAEL_MIN_BLOCKSIZE || - keysize > RIJNDAEL_MAX_BLOCKSIZE || - keysize % 4 != 0 || - blocksize < RIJNDAEL_MIN_BLOCKSIZE || - blocksize > RIJNDAEL_MAX_BLOCKSIZE || - blocksize % 4 != 0) { + keysize < AES_BLOCK_SIZE || + keysize > 32 || + keysize % 4 != 0) { PORT_SetError(SEC_ERROR_INVALID_ARGS); return SECFailure; } @@ -1030,21 +1062,16 @@ aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, PORT_SetError(SEC_ERROR_INVALID_ARGS); return SECFailure; } -#ifdef USE_HW_AES - use_hw_aes = aesni_support() && (keysize % 8) == 0 && blocksize == 16; -#ifdef INTEL_GCM - use_hw_gcm = use_hw_aes && avx_support() && clmul_support(); -#endif -#endif /* USE_HW_AES */ + use_hw_aes = aesni_support() && (keysize % 8) == 0; /* Nb = (block size in bits) / 32 */ - cx->Nb = blocksize / 4; + cx->Nb = AES_BLOCK_SIZE / 4; /* Nk = (key size in bits) / 32 */ Nk = keysize / 4; /* Obtain number of rounds from "table" */ cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb); /* copy in the iv, if neccessary */ if (mode == NSS_AES_CBC) { - memcpy(cx->iv, iv, blocksize); + memcpy(cx->iv, iv, AES_BLOCK_SIZE); #ifdef USE_HW_AES if (use_hw_aes) { cx->worker = (freeblCipherFunc) @@ -1072,7 +1099,7 @@ aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE); if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) { PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); - goto cleanup; + return SECFailure; } #ifdef USE_HW_AES if (use_hw_aes) { @@ -1085,25 +1112,28 @@ aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, defined(RIJNDAEL_GENERATE_TABLES_MACRO) if (rijndaelTables == NULL) { if (PR_CallOnce(&coRTInit, init_rijndael_tables) != PR_SUCCESS) { - return SecFailure; + return SECFailure; } } #endif /* Generate expanded key */ if (encrypt) { - if (rijndael_key_expansion(cx, key, Nk) != SECSuccess) - goto cleanup; + if (use_hw_aes && (cx->mode == NSS_AES_GCM || cx->mode == NSS_AES || + cx->mode == NSS_AES_CTR)) { + PORT_Assert(keysize == 16 || keysize == 24 || keysize == 32); + /* Prepare hardware key for normal AES parameters. */ + native_key_expansion(cx, key, Nk); + } else { + rijndael_key_expansion(cx, key, Nk); + } } else { - if (rijndael_invkey_expansion(cx, key, Nk) != SECSuccess) - goto cleanup; + rijndael_invkey_expansion(cx, key, Nk); } } cx->worker_cx = cx; cx->destroy = NULL; cx->isBlock = PR_TRUE; return SECSuccess; -cleanup: - return SECFailure; } SECStatus @@ -1115,6 +1145,11 @@ AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, PRBool baseencrypt = encrypt; SECStatus rv; + if (blocksize != AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + switch (mode) { case NSS_AES_CTS: basemode = NSS_AES_CBC; @@ -1125,45 +1160,47 @@ AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, baseencrypt = PR_TRUE; break; } - /* make sure enough is initializes so we can safely call Destroy */ + /* Make sure enough is initialized so we can safely call Destroy. */ cx->worker_cx = NULL; cx->destroy = NULL; - rv = aes_InitContext(cx, key, keysize, iv, basemode, - baseencrypt, blocksize); + cx->mode = mode; + rv = aes_InitContext(cx, key, keysize, iv, basemode, baseencrypt); if (rv != SECSuccess) { AES_DestroyContext(cx, PR_FALSE); return rv; } - cx->mode = mode; /* finally, set up any mode specific contexts */ switch (mode) { case NSS_AES_CTS: - cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv, blocksize); + cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv); cx->worker = (freeblCipherFunc)(encrypt ? CTS_EncryptUpdate : CTS_DecryptUpdate); cx->destroy = (freeblDestroyFunc)CTS_DestroyContext; cx->isBlock = PR_FALSE; break; case NSS_AES_GCM: -#ifdef INTEL_GCM - if (use_hw_gcm) { - cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv, blocksize); - cx->worker = (freeblCipherFunc)(encrypt ? intel_AES_GCM_EncryptUpdate : intel_AES_GCM_DecryptUpdate); +#if defined(INTEL_GCM) && defined(USE_HW_AES) + if (aesni_support() && (keysize % 8) == 0 && avx_support() && + clmul_support()) { + cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv); + cx->worker = (freeblCipherFunc)(encrypt ? intel_AES_GCM_EncryptUpdate + : intel_AES_GCM_DecryptUpdate); cx->destroy = (freeblDestroyFunc)intel_AES_GCM_DestroyContext; cx->isBlock = PR_FALSE; } else #endif { - cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv, blocksize); - cx->worker = (freeblCipherFunc)(encrypt ? GCM_EncryptUpdate : GCM_DecryptUpdate); + cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv); + cx->worker = (freeblCipherFunc)(encrypt ? GCM_EncryptUpdate + : GCM_DecryptUpdate); cx->destroy = (freeblDestroyFunc)GCM_DestroyContext; cx->isBlock = PR_FALSE; } break; case NSS_AES_CTR: - cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv, blocksize); + cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv); #if defined(USE_HW_AES) && defined(_MSC_VER) - if (use_hw_aes) { + if (aesni_support() && (keysize % 8) == 0) { cx->worker = (freeblCipherFunc)CTR_Update_HW_AES; } else #endif @@ -1175,7 +1212,7 @@ AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, break; default: /* everything has already been set up by aes_InitContext, just - * return */ + * return */ return SECSuccess; } /* check to see if we succeeded in getting the worker context */ @@ -1224,8 +1261,9 @@ AES_DestroyContext(AESContext *cx, PRBool freeit) cx->worker_cx = NULL; cx->destroy = NULL; } - if (freeit) - PORT_Free(cx); + if (freeit) { + PORT_Free(cx->mem); + } } /* @@ -1239,14 +1277,12 @@ AES_Encrypt(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen) { - int blocksize; /* Check args */ if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { PORT_SetError(SEC_ERROR_INVALID_ARGS); return SECFailure; } - blocksize = 4 * cx->Nb; - if (cx->isBlock && (inputLen % blocksize != 0)) { + if (cx->isBlock && (inputLen % AES_BLOCK_SIZE != 0)) { PORT_SetError(SEC_ERROR_INPUT_LEN); return SECFailure; } @@ -1277,7 +1313,7 @@ AES_Encrypt(AESContext *cx, unsigned char *output, #endif return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, - input, inputLen, blocksize); + input, inputLen, AES_BLOCK_SIZE); } /* @@ -1291,14 +1327,12 @@ AES_Decrypt(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen) { - int blocksize; /* Check args */ if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { PORT_SetError(SEC_ERROR_INVALID_ARGS); return SECFailure; } - blocksize = 4 * cx->Nb; - if (cx->isBlock && (inputLen % blocksize != 0)) { + if (cx->isBlock && (inputLen % AES_BLOCK_SIZE != 0)) { PORT_SetError(SEC_ERROR_INPUT_LEN); return SECFailure; } @@ -1308,5 +1342,5 @@ AES_Decrypt(AESContext *cx, unsigned char *output, } *outputLen = inputLen; return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, - input, inputLen, blocksize); + input, inputLen, AES_BLOCK_SIZE); } diff --git a/lib/freebl/rijndael.h b/lib/freebl/rijndael.h index 0e14ec2fc3..1f4a8a9f73 100644 --- a/lib/freebl/rijndael.h +++ b/lib/freebl/rijndael.h @@ -6,13 +6,15 @@ #define _RIJNDAEL_H_ 1 #include "blapii.h" +#include -#define RIJNDAEL_MIN_BLOCKSIZE 16 /* bytes */ -#define RIJNDAEL_MAX_BLOCKSIZE 32 /* bytes */ +#ifdef NSS_X86_OR_X64 +#include /* aes-ni */ +#endif -typedef SECStatus AESBlockFunc(AESContext *cx, - unsigned char *output, - const unsigned char *input); +typedef void AESBlockFunc(AESContext *cx, + unsigned char *output, + const unsigned char *input); /* RIJNDAEL_NUM_ROUNDS * @@ -23,24 +25,18 @@ typedef SECStatus AESBlockFunc(AESContext *cx, #define RIJNDAEL_NUM_ROUNDS(Nk, Nb) \ (PR_MAX(Nk, Nb) + 6) -/* RIJNDAEL_MAX_STATE_SIZE - * - * Maximum number of bytes in the state (spec includes up to 256-bit block - * size) - */ -#define RIJNDAEL_MAX_STATE_SIZE 32 - /* * This magic number is (Nb_max * (Nr_max + 1)) * where Nb_max is the maximum block size in 32-bit words, * Nr_max is the maximum number of rounds, which is Nb_max + 6 */ -#define RIJNDAEL_MAX_EXP_KEY_SIZE (8 * 15) +#define RIJNDAEL_MAX_EXP_KEY_SIZE (4 * 15) /* AESContextStr * * Values which maintain the state for Rijndael encryption/decryption. * + * keySchedule - 128-bit registers for the key-schedule * iv - initialization vector for CBC mode * Nb - the number of bytes in a block, specified by user * Nr - the number of rounds, specified by a table @@ -51,17 +47,23 @@ typedef SECStatus AESBlockFunc(AESContext *cx, * isBlock - is the mode of operation a block cipher or a stream cipher? */ struct AESContextStr { + /* NOTE: Offsets to members in this struct are hardcoded in assembly. + * Don't change the struct without updating intel-aes.s and intel-gcm.s. */ + union { +#if defined(NSS_X86_OR_X64) + __m128i keySchedule[15]; +#endif + PRUint32 expandedKey[RIJNDAEL_MAX_EXP_KEY_SIZE]; + }; unsigned int Nb; unsigned int Nr; freeblCipherFunc worker; - /* NOTE: The offsets of iv and expandedKey are hardcoded in intel-aes.s. - * Don't add new members before them without updating intel-aes.s. */ - unsigned char iv[RIJNDAEL_MAX_BLOCKSIZE]; - PRUint32 expandedKey[RIJNDAEL_MAX_EXP_KEY_SIZE]; + unsigned char iv[AES_BLOCK_SIZE]; freeblDestroyFunc destroy; void *worker_cx; PRBool isBlock; int mode; + void *mem; /* Start of the allocated memory to free. */ }; #endif /* _RIJNDAEL_H_ */