Commit 3fa042f1 authored by Franziskus Kiefer's avatar Franziskus Kiefer

Bug 1424663 - vectorized ChaCha20 from HACL* for SSSE3 and ARM NEON, r=ttaubert

Summary:
This adds the vectorized ChaCha20 implementation from HACL* to NSS and replaces the old vectorized code.
Note that this is not used on Android as we currently have no way of testing this for Android or use it on Android for Firefox.

Reviewers: ttaubert

Reviewed By: ttaubert

Bug #: 1424663

Differential Revision: https://phabricator.services.mozilla.com/D467

--HG--
extra : rebase_source : 715ec95c1f5377f86a0f183e7250f8495743b579
extra : histedit_source : 4d998bcb0c41fb703182455eaf5acf1b2e53a2e0%2C16cfd6e35aaab7cb0f1d9487d6ca0d392b04c1b3
parent c88eaefe
......@@ -9,7 +9,7 @@ ENV haclrepo https://github.com/mitls/hacl-star.git
# Define versions of dependencies
ENV opamv 4.04.2
ENV haclversion dcd48329d535727dbde93877b124c5ec4a7a2b20
ENV haclversion 104de0fbc83939a5e76012d64e3db2b3c0524bd1
# Install required packages and set versions
ADD setup.sh /tmp/setup.sh
......
......@@ -77,7 +77,8 @@ queue.filter(task => {
}
}
if (task.tests == "fips" && task.platform == "mac") {
if (task.tests == "fips" &&
(task.platform == "mac" || task.platform == "aarch64")) {
return false;
}
......@@ -93,7 +94,7 @@ queue.filter(task => {
}
}
// Don't run additional hardware tests on ARM (we don't have anything there).
// Don't run all additional hardware tests on ARM.
if (task.group == "Cipher" && task.platform == "aarch64" && task.env &&
(task.env.NSS_DISABLE_PCLMUL == "1" || task.env.NSS_DISABLE_HW_AES == "1"
|| task.env.NSS_DISABLE_AVX == "1")) {
......@@ -271,6 +272,18 @@ export default async function main() {
}, aarch64_base)
);
await scheduleLinux("Linux AArch64 (debug, make)",
merge({
env: {USE_64: "1"},
command: [
"/bin/bash",
"-c",
"bin/checkout.sh && nss/automation/taskcluster/scripts/build.sh"
],
collection: "make",
}, aarch64_base)
);
await scheduleMac("Mac (opt)", {collection: "opt"}, "--opt");
await scheduleMac("Mac (debug)", {collection: "debug"});
}
......@@ -899,6 +912,13 @@ function scheduleTests(task_build, task_cert, test_base) {
name: "Cipher tests", symbol: "NoAVX", tests: "cipher",
env: {NSS_DISABLE_AVX: "1"}, group: "Cipher"
}));
queue.scheduleTask(merge(no_cert_base, {
name: "Cipher tests", symbol: "NoSSSE3|NEON", tests: "cipher",
env: {
NSS_DISABLE_ARM_NEON: "1",
NSS_DISABLE_SSSE3: "1"
}, group: "Cipher"
}));
queue.scheduleTask(merge(no_cert_base, {
name: "EC tests", symbol: "EC", tests: "ec"
}));
......
......@@ -519,23 +519,16 @@ ifndef NSS_DISABLE_CHACHAPOLY
else
EXTRA_SRCS += poly1305.c
endif
ifneq (1,$(CC_IS_GCC))
EXTRA_SRCS += chacha20.c
VERIFIED_SRCS += Hacl_Chacha20.c
else
EXTRA_SRCS += chacha20_vec.c
endif
else
ifeq ($(CPU_ARCH),aarch64)
EXTRA_SRCS += Hacl_Poly1305_64.c
else
EXTRA_SRCS += poly1305.c
endif
EXTRA_SRCS += chacha20.c
VERIFIED_SRCS += Hacl_Chacha20.c
endif # x86_64
VERIFIED_SRCS += Hacl_Chacha20.c
VERIFIED_SRCS += Hacl_Chacha20_Vec128.c
endif # NSS_DISABLE_CHACHAPOLY
ifeq (,$(filter-out i386 x386 x86 x86_64 aarch64,$(CPU_ARCH)))
......
......@@ -80,5 +80,11 @@ SECStatus generate_prime(mp_int *prime, int primeLen);
PRBool aesni_support();
PRBool clmul_support();
PRBool avx_support();
PRBool ssse3_support();
PRBool arm_neon_support();
PRBool arm_aes_support();
PRBool arm_pmull_support();
PRBool arm_sha1_support();
PRBool arm_sha2_support();
#endif /* _BLAPII_H_ */
......@@ -23,6 +23,12 @@ static PRCallOnceType coFreeblInit;
static PRBool aesni_support_ = PR_FALSE;
static PRBool clmul_support_ = PR_FALSE;
static PRBool avx_support_ = PR_FALSE;
static PRBool ssse3_support_ = PR_FALSE;
static PRBool arm_neon_support_ = PR_FALSE;
static PRBool arm_aes_support_ = PR_FALSE;
static PRBool arm_sha1_support_ = PR_FALSE;
static PRBool arm_sha2_support_ = PR_FALSE;
static PRBool arm_pmull_support_ = PR_FALSE;
#ifdef NSS_X86_OR_X64
/*
......@@ -62,6 +68,7 @@ check_xcr0_ymm()
#define ECX_XSAVE (1 << 26)
#define ECX_OSXSAVE (1 << 27)
#define ECX_AVX (1 << 28)
#define ECX_SSSE3 (1 << 9)
#define AVX_BITS (ECX_XSAVE | ECX_OSXSAVE | ECX_AVX)
void
......@@ -71,6 +78,7 @@ CheckX86CPUSupport()
char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES");
char *disable_pclmul = PR_GetEnvSecure("NSS_DISABLE_PCLMUL");
char *disable_avx = PR_GetEnvSecure("NSS_DISABLE_AVX");
char *disable_ssse3 = PR_GetEnvSecure("NSS_DISABLE_SSSE3");
freebl_cpuid(1, &eax, &ebx, &ecx, &edx);
aesni_support_ = (PRBool)((ecx & ECX_AESNI) != 0 && disable_hw_aes == NULL);
clmul_support_ = (PRBool)((ecx & ECX_CLMUL) != 0 && disable_pclmul == NULL);
......@@ -78,9 +86,107 @@ CheckX86CPUSupport()
* as well as XMM and YMM state. */
avx_support_ = (PRBool)((ecx & AVX_BITS) == AVX_BITS) && check_xcr0_ymm() &&
disable_avx == NULL;
ssse3_support_ = (PRBool)((ecx & ECX_SSSE3) != 0 &&
disable_ssse3 == NULL);
}
#endif /* NSS_X86_OR_X64 */
#if (defined(__aarch64__) || defined(__arm__)) && !defined(__ANDROID__)
#if defined(__GNUC__) && __GNUC__ >= 2 && defined(__ELF__)
#include <sys/auxv.h>
extern unsigned long getauxval(unsigned long type) __attribute__((weak));
#else
static unsigned long (*getauxval)(unsigned long) = NULL;
#define AT_HWCAP2
#define AT_HWCAP
#endif /* defined(__GNUC__) && __GNUC__ >= 2 && defined(__ELF__)*/
#endif /* (defined(__aarch64__) || defined(__arm__)) && !defined(__ANDROID__) */
#if defined(__aarch64__) && !defined(__ANDROID__)
// Defines from hwcap.h in Linux kernel - ARM64
#define HWCAP_AES (1 << 3)
#define HWCAP_PMULL (1 << 4)
#define HWCAP_SHA1 (1 << 5)
#define HWCAP_SHA2 (1 << 6)
void
CheckARMSupport()
{
char *disable_arm_neon = PR_GetEnvSecure("NSS_DISABLE_ARM_NEON");
char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES");
if (getauxval) {
long hwcaps = getauxval(AT_HWCAP);
arm_aes_support_ = hwcaps & HWCAP_AES && disable_hw_aes == NULL;
arm_pmull_support_ = hwcaps & HWCAP_PMULL;
arm_sha1_support_ = hwcaps & HWCAP_SHA1;
arm_sha2_support_ = hwcaps & HWCAP_SHA2;
}
/* aarch64 must support NEON. */
arm_neon_support_ = disable_arm_neon == NULL;
}
#endif /* defined(__aarch64__) && !defined(__ANDROID__) */
#if defined(__arm__) && !defined(__ANDROID__)
// Defines from hwcap.h in Linux kernel - ARM
/*
* HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP
*/
#define HWCAP_NEON (1 << 12)
/*
* HWCAP2 flags - for elf_hwcap2 (in kernel) and AT_HWCAP2
*/
#define HWCAP2_AES (1 << 0)
#define HWCAP2_PMULL (1 << 1)
#define HWCAP2_SHA1 (1 << 2)
#define HWCAP2_SHA2 (1 << 3)
void
CheckARMSupport()
{
char *disable_arm_neon = PR_GetEnvSecure("NSS_DISABLE_ARM_NEON");
char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES");
if (getauxval) {
long hwcaps = getauxval(AT_HWCAP2);
arm_aes_support_ = hwcaps & HWCAP2_AES && disable_hw_aes == NULL;
arm_pmull_support_ = hwcaps & HWCAP2_PMULL;
arm_sha1_support_ = hwcaps & HWCAP2_SHA1;
arm_sha2_support_ = hwcaps & HWCAP2_SHA2;
arm_neon_support_ = hwcaps & HWCAP_NEON && disable_arm_neon == NULL;
}
}
#endif /* defined(__arm__) && !defined(__ANDROID__) */
// Enable when Firefox can use it.
// #if defined(__ANDROID__) && (defined(__arm__) || defined(__aarch64__))
// #include <cpu-features.h>
// void
// CheckARMSupport()
// {
// char *disable_arm_neon = PR_GetEnvSecure("NSS_DISABLE_ARM_NEON");
// char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES");
// AndroidCpuFamily family = android_getCpuFamily();
// uint64_t features = android_getCpuFeatures();
// if (family == ANDROID_CPU_FAMILY_ARM64) {
// arm_aes_support_ = features & ANDROID_CPU_ARM64_FEATURE_AES &&
// disable_hw_aes == NULL;
// arm_pmull_support_ = features & ANDROID_CPU_ARM64_FEATURE_PMULL;
// arm_sha1_support_ = features & ANDROID_CPU_ARM64_FEATURE_SHA1;
// arm_sha2_support_ = features & ANDROID_CPU_ARM64_FEATURE_SHA2;
// arm_neon_support_ = disable_arm_neon == NULL;
// }
// if (family == ANDROID_CPU_FAMILY_ARM) {
// arm_aes_support_ = features & ANDROID_CPU_ARM_FEATURE_AES &&
// disable_hw_aes == NULL;
// arm_pmull_support_ = features & ANDROID_CPU_ARM_FEATURE_PMULL;
// arm_sha1_support_ = features & ANDROID_CPU_ARM_FEATURE_SHA1;
// arm_sha2_support_ = features & ANDROID_CPU_ARM_FEATURE_SHA2;
// arm_neon_support_ = hwcaps & ANDROID_CPU_ARM_FEATURE_NEON &&
// disable_arm_neon == NULL;
// }
// }
// #endif /* defined(__ANDROID__) && (defined(__arm__) || defined(__aarch64__)) */
PRBool
aesni_support()
{
......@@ -96,12 +202,44 @@ avx_support()
{
return avx_support_;
}
PRBool
ssse3_support()
{
return ssse3_support_;
}
PRBool
arm_neon_support()
{
return arm_neon_support_;
}
PRBool
arm_aes_support()
{
return arm_aes_support_;
}
PRBool
arm_pmull_support()
{
return arm_pmull_support_;
}
PRBool
arm_sha1_support()
{
return arm_sha1_support_;
}
PRBool
arm_sha2_support()
{
return arm_sha2_support_;
}
static PRStatus
FreeblInit(void)
{
#ifdef NSS_X86_OR_X64
CheckX86CPUSupport();
#elif (defined(__aarch64__) || defined(__arm__)) && !defined(__ANDROID__)
CheckARMSupport();
#endif
return PR_SUCCESS;
}
......
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/* Adopted from the public domain code in NaCl by djb. */
#include <string.h>
#include <stdio.h>
#include "chacha20.h"
#include "verified/Hacl_Chacha20.h"
void
ChaCha20XOR(unsigned char *out, const unsigned char *in, unsigned int inLen,
const unsigned char key[32], const unsigned char nonce[12],
uint32_t counter)
{
Hacl_Chacha20_chacha20(out, (uint8_t *)in, inLen, (uint8_t *)key, (uint8_t *)nonce, counter);
}
/*
* chacha20.h - header file for ChaCha20 implementation.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef FREEBL_CHACHA20_H_
#define FREEBL_CHACHA20_H_
#if defined(_MSC_VER) && _MSC_VER < 1600
#include "prtypes.h"
typedef PRUint32 uint32_t;
typedef PRUint64 uint64_t;
#else
#include <stdint.h>
#endif
/* ChaCha20XOR encrypts |inLen| bytes from |in| with the given key and
* nonce and writes the result to |out|, which may be equal to |in|. The
* initial block counter is specified by |counter|. */
extern void ChaCha20XOR(unsigned char *out, const unsigned char *in,
unsigned int inLen, const unsigned char key[32],
const unsigned char nonce[12], uint32_t counter);
#endif /* FREEBL_CHACHA20_H_ */
This diff is collapsed.
......@@ -12,25 +12,28 @@
#include "seccomon.h"
#include "secerr.h"
#include "blapit.h"
#include "blapii.h"
#ifndef NSS_DISABLE_CHACHAPOLY
#if defined(HAVE_INT128_SUPPORT) && (defined(NSS_X86_OR_X64) || defined(__aarch64__))
#include "verified/Hacl_Poly1305_64.h"
#else
#include "poly1305.h"
#endif
#include "chacha20.h"
#include "chacha20poly1305.h"
#endif
// Forward declaration from "Hacl_Chacha20_Vec128.h".
extern void Hacl_Chacha20_Vec128_chacha20(uint8_t *output, uint8_t *plain,
uint32_t len, uint8_t *k, uint8_t *n1,
uint32_t ctr);
// Forward declaration from "Hacl_Chacha20.h".
extern void Hacl_Chacha20_chacha20(uint8_t *output, uint8_t *plain, uint32_t len,
uint8_t *k, uint8_t *n1, uint32_t ctr);
/* Poly1305Do writes the Poly1305 authenticator of the given additional data
* and ciphertext to |out|. */
#ifndef NSS_DISABLE_CHACHAPOLY
#if defined(HAVE_INT128_SUPPORT) && (defined(NSS_X86_OR_X64) || defined(__aarch64__))
/* Use HACL* Poly1305 on 64-bit Intel and ARM */
#include "verified/Hacl_Poly1305_64.h"
static void
Poly1305PadUpdate(Hacl_Impl_Poly1305_64_State_poly1305_state state, unsigned char *block, const unsigned char *p, const unsigned int pLen)
Poly1305PadUpdate(Hacl_Impl_Poly1305_64_State_poly1305_state state,
unsigned char *block, const unsigned char *p,
const unsigned int pLen)
{
unsigned int pRemLen = pLen % 16;
Hacl_Poly1305_64_update(state, (uint8_t *)p, (pLen / 16));
......@@ -46,7 +49,8 @@ Poly1305Do(unsigned char *out, const unsigned char *ad, unsigned int adLen,
const unsigned char key[32])
{
uint64_t tmp1[6U] = { 0U };
Hacl_Impl_Poly1305_64_State_poly1305_state state = Hacl_Poly1305_64_mk_state(tmp1, tmp1 + 3);
Hacl_Impl_Poly1305_64_State_poly1305_state state =
Hacl_Poly1305_64_mk_state(tmp1, tmp1 + 3);
unsigned char block[16] = { 0 };
Hacl_Poly1305_64_init(state, (uint8_t *)key);
......@@ -68,6 +72,8 @@ Poly1305Do(unsigned char *out, const unsigned char *ad, unsigned int adLen,
Hacl_Poly1305_64_finish(state, out, (uint8_t *)(key + 16));
}
#else
/* All other platforms get the 32-bit poly1305 reference implementation. */
#include "poly1305.h"
static void
Poly1305Do(unsigned char *out, const unsigned char *ad, unsigned int adLen,
......@@ -165,6 +171,17 @@ ChaCha20Poly1305_DestroyContext(ChaCha20Poly1305Context *ctx, PRBool freeit)
#endif
}
void
ChaCha20Xor(uint8_t *output, uint8_t *block, uint32_t len, uint8_t *k,
uint8_t *nonce, uint32_t ctr)
{
if (ssse3_support() || arm_neon_support()) {
Hacl_Chacha20_Vec128_chacha20(output, block, len, k, nonce, ctr);
} else {
Hacl_Chacha20_chacha20(output, block, len, k, nonce, ctr);
}
}
SECStatus
ChaCha20Poly1305_Seal(const ChaCha20Poly1305Context *ctx, unsigned char *output,
unsigned int *outputLen, unsigned int maxOutputLen,
......@@ -191,8 +208,10 @@ ChaCha20Poly1305_Seal(const ChaCha20Poly1305Context *ctx, unsigned char *output,
PORT_Memset(block, 0, sizeof(block));
// Generate a block of keystream. The first 32 bytes will be the poly1305
// key. The remainder of the block is discarded.
ChaCha20XOR(block, block, sizeof(block), ctx->key, nonce, 0);
ChaCha20XOR(output, input, inputLen, ctx->key, nonce, 1);
ChaCha20Xor(block, (uint8_t *)block, sizeof(block), (uint8_t *)ctx->key,
(uint8_t *)nonce, 0);
ChaCha20Xor(output, (uint8_t *)input, inputLen, (uint8_t *)ctx->key,
(uint8_t *)nonce, 1);
Poly1305Do(tag, ad, adLen, output, inputLen, block);
PORT_Memcpy(output + inputLen, tag, ctx->tagLen);
......@@ -233,14 +252,16 @@ ChaCha20Poly1305_Open(const ChaCha20Poly1305Context *ctx, unsigned char *output,
PORT_Memset(block, 0, sizeof(block));
// Generate a block of keystream. The first 32 bytes will be the poly1305
// key. The remainder of the block is discarded.
ChaCha20XOR(block, block, sizeof(block), ctx->key, nonce, 0);
ChaCha20Xor(block, (uint8_t *)block, sizeof(block), (uint8_t *)ctx->key,
(uint8_t *)nonce, 0);
Poly1305Do(tag, ad, adLen, input, ciphertextLen, block);
if (NSS_SecureMemcmp(tag, &input[ciphertextLen], ctx->tagLen) != 0) {
PORT_SetError(SEC_ERROR_BAD_DATA);
return SECFailure;
}
ChaCha20XOR(output, input, ciphertextLen, ctx->key, nonce, 1);
ChaCha20Xor(output, (uint8_t *)input, ciphertextLen, (uint8_t *)ctx->key,
(uint8_t *)nonce, 1);
return SECSuccess;
#endif
......
......@@ -4,7 +4,7 @@
#include "blapi.h"
#include "blapit.h"
#include "chacha20.h"
#include "Hacl_Chacha20.h"
#include "nssilock.h"
#include "seccomon.h"
#include "secerr.h"
......@@ -99,7 +99,7 @@ RNG_GenerateGlobalRandomBytes(void *dest, size_t len)
memset(dest, 0, len);
memcpy(dest, globalBytes, PR_MIN(len, GLOBAL_BYTES_SIZE));
ChaCha20XOR(dest, dest, len, key, nonce, 0);
Hacl_Chacha20_chacha20(dest, (uint8_t *)dest, len, (uint8_t *)key, nonce, 0);
ChaCha20Poly1305_DestroyContext(cx, PR_TRUE);
PZ_Unlock(rng_lock);
......
......@@ -10,7 +10,7 @@
'target_name': 'intel-gcm-wrap_c_lib',
'type': 'static_library',
'sources': [
'intel-gcm-wrap.c'
'intel-gcm-wrap.c',
],
'dependencies': [
'<(DEPTH)/exports.gyp:nss_exports'
......@@ -22,6 +22,38 @@
'-mssse3'
]
},
{
# TODO: make this so that all hardware accelerated code is in here.
'target_name': 'hw-acc-crypto',
'type': 'static_library',
'sources': [
'verified/Hacl_Chacha20_Vec128.c',
],
'dependencies': [
'<(DEPTH)/exports.gyp:nss_exports'
],
'conditions': [
[ 'target_arch=="ia32" or target_arch=="x64"', {
'cflags': [
'-mssse3'
],
'cflags_mozilla': [
'-mssse3'
],
# GCC doesn't define this.
'defines': [
'__SSSE3__',
],
}],
[ 'OS=="android"', {
# On Android we can't use any of the hardware acceleration :(
'defines!': [
'__ARM_NEON__',
'__ARM_NEON',
],
}],
],
},
{
'target_name': 'gcm-aes-x86_c_lib',
'type': 'static_library',
......@@ -74,11 +106,12 @@
],
'dependencies': [
'<(DEPTH)/exports.gyp:nss_exports',
'hw-acc-crypto',
],
'conditions': [
[ 'target_arch=="ia32" or target_arch=="x64"', {
'dependencies': [
'gcm-aes-x86_c_lib'
'gcm-aes-x86_c_lib',
],
}],
[ 'OS=="linux"', {
......@@ -110,11 +143,12 @@
],
'dependencies': [
'<(DEPTH)/exports.gyp:nss_exports',
'hw-acc-crypto',
],
'conditions': [
[ 'target_arch=="ia32" or target_arch=="x64"', {
'dependencies': [
'gcm-aes-x86_c_lib'
'gcm-aes-x86_c_lib',
]
}],
[ 'OS!="linux" and OS!="android"', {
......
......@@ -144,12 +144,17 @@
],
}],
[ 'disable_chachapoly==0', {
# The ChaCha20 code is linked in through the static ssse3-crypto lib on
# all platforms that support SSSE3. There are runtime checks in place to
# choose the correct ChaCha implementation at runtime.
'sources': [
'verified/Hacl_Chacha20.c',
],
'conditions': [
[ 'OS!="win"', {
'conditions': [
[ 'target_arch=="x64"', {
'sources': [
'chacha20_vec.c',
'verified/Hacl_Poly1305_64.c',
],
}, {
......@@ -157,15 +162,11 @@
'conditions': [
[ 'target_arch=="arm64" or target_arch=="aarch64"', {
'sources': [
'chacha20.c',
'verified/Hacl_Chacha20.c',
'verified/Hacl_Poly1305_64.c',
],
}, {
# !Windows & !x64 & !arm64 & !aarch64
'sources': [
'chacha20.c',
'verified/Hacl_Chacha20.c',
'poly1305.c',
],
}],
......@@ -175,8 +176,6 @@
}, {
# Windows
'sources': [
'chacha20.c',
'verified/Hacl_Chacha20.c',
'poly1305.c',
],
}],
......
This diff is collapsed.
/* Copyright 2016-2017 INRIA and Microsoft Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kremlib.h"
#ifndef __Hacl_Chacha20_Vec128_H
#define __Hacl_Chacha20_Vec128_H
#include "vec128.h"
typedef uint32_t Hacl_Impl_Xor_Lemmas_u32;
typedef uint8_t Hacl_Impl_Xor_Lemmas_u8;
typedef uint32_t Hacl_Impl_Chacha20_Vec128_State_u32;
typedef uint32_t Hacl_Impl_Chacha20_Vec128_State_h32;
typedef uint8_t *Hacl_Impl_Chacha20_Vec128_State_uint8_p;
typedef vec *Hacl_Impl_Chacha20_Vec128_State_state;
typedef uint32_t Hacl_Impl_Chacha20_Vec128_u32;
typedef uint32_t Hacl_Impl_Chacha20_Vec128_h32;
typedef uint8_t *Hacl_Impl_Chacha20_Vec128_uint8_p;
typedef uint32_t Hacl_Impl_Chacha20_Vec128_idx;
typedef struct
{
void *k;
void *n;
uint32_t ctr;
} Hacl_Impl_Chacha20_Vec128_log_t_;
typedef void *Hacl_Impl_Chacha20_Vec128_log_t;
typedef uint8_t *Hacl_Chacha20_Vec128_uint8_p;
void
Hacl_Chacha20_Vec128_chacha20(
uint8_t *output,
uint8_t *plain,
uint32_t len,
uint8_t *k,
uint8_t *n1,
uint32_t ctr);
#endif
/* Copyright 2016-2017 INRIA and Microsoft Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __Vec_H
#define __Vec_H
#ifdef __MSVC__
#define forceinline __forceinline inline
#elif (defined(__GNUC__) || defined(__clang__))
#define forceinline __attribute__((always_inline)) inline
#else
#define forceinline inline
#endif
#if defined(__SSSE3__) || defined(__AVX2__) || defined(__AVX__)
#include <emmintrin.h>
#include <tmmintrin.h>
#define VEC128
#define vec_size 4
typedef __m128i vec;
static forceinline vec
vec_rotate_left_8(vec v)
{
__m128i x = _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
return _mm_shuffle_epi8(v, x);
}
static forceinline vec
vec_rotate_left_16(vec v)
{
__m128i x = _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
return _mm_shuffle_epi8(v, x);
}
static forceinline vec
vec_rotate_left(vec v, unsigned int n)
{
if (n == 8)
return vec_rotate_left_8(v);
if (n == 16)
return vec_rotate_left_16(v);
return _mm_xor_si128(_mm_slli_epi32(v, n),
_mm_srli_epi32(v, 32 - n));
}
static forceinline vec
vec_rotate_right(vec v, unsigned int n)
{
return (vec_rotate_left(v, 32 - n));
}
#define vec_shuffle_right(x, n) \
_mm_shuffle_epi32(x, _MM_SHUFFLE((3 + (n)) % 4, (2 + (n)) % 4, (1 + (n)) % 4, (n) % 4))
#define vec_shuffle_left(x, n) vec_shuffle_right((x), 4 - (n))
static forceinline vec
vec_load_32x4(uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4)
{
return _mm_set_epi32(x4, x3, x2, x1);
}
static forceinline vec
vec_load_32x8(uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4, uint32_t x5, uint32_t x6, uint32_t x7, uint32_t x8)
{
return _mm_set_epi32(x4, x3, x2, x1);
}
static forceinline vec
vec_load_le(const unsigned char* in)
{
return _mm_loadu_si128((__m128i*)(in));
}
static forceinline vec
vec_load128_le(const unsigned char* in)
{
return vec_load_le(in);
}
static forceinline void
vec_store_le(unsigned char* out, vec v)
{
_mm_storeu_si128((__m128i*)(out), v);
}
static forceinline vec
vec_add(vec v1, vec v2)
{
return _mm_add_epi32(v1, v2);
}
static forceinline vec
vec_add_u32(vec v1, uint32_t x)
{