Skip to content

Commit

Permalink
Bug 1612493 - Support for HACL* AVX2 code for Chacha20, Poly1305 and …
Browse files Browse the repository at this point in the history
…Chacha20Poly1305. r=kjacobs

***
Bug 1612493 - Import AVX2 code from HACL*
***
Bug 1612493 - Add CPU detection for AVX2, BMI1, BMI2, FMA, MOVBE
***
Bug 1612493 - New flag NSS_DISABLE_AVX2 for freebl/Makefile and freebl.gyp
***
Bug 1612493 - Disable use of AVX2 on GCC 4.4 which doesn’t support -mavx2
***
Bug 1612493 - Disable tests when the platform doesn't have support for AVX2

Differential Revision: https://phabricator.services.mozilla.com/D64718

--HG--
extra : moz-landing-system : lando
  • Loading branch information
beurdouche committed Feb 28, 2020
1 parent 2bd3b6a commit 64ae3c8
Show file tree
Hide file tree
Showing 15 changed files with 4,593 additions and 44 deletions.
6 changes: 5 additions & 1 deletion automation/taskcluster/graph/src/extend.js
Expand Up @@ -101,7 +101,7 @@ queue.filter(task => {
// Don't run all additional hardware tests on ARM.
if (task.group == "Cipher" && task.platform == "aarch64" && task.env &&
(task.env.NSS_DISABLE_PCLMUL == "1" || task.env.NSS_DISABLE_HW_AES == "1"
|| task.env.NSS_DISABLE_AVX == "1")) {
|| task.env.NSS_DISABLE_AVX == "1" || task.env.NSS_DISABLE_AVX2 == "1")) {
return false;
}

Expand Down Expand Up @@ -1014,6 +1014,10 @@ function scheduleTests(task_build, task_cert, test_base) {
name: "Cipher tests", symbol: "NoAVX", tests: "cipher",
env: {NSS_DISABLE_AVX: "1"}, group: "Cipher"
}));
queue.scheduleTask(merge(cert_base_long, {
name: "Cipher tests", symbol: "NoAVX2", tests: "cipher",
env: {NSS_DISABLE_AVX2: "1"}, group: "Cipher"
}));
queue.scheduleTask(merge(cert_base_long, {
name: "Cipher tests", symbol: "NoSSSE3|NEON", tests: "cipher",
env: {
Expand Down
29 changes: 29 additions & 0 deletions coreconf/arch.mk
Expand Up @@ -139,6 +139,35 @@ ifeq ($(OS_ARCH),OS_2)
OS_RELEASE := $(shell uname -v)
endif

#######################################################################
# Master "Core Components" macros for Hardware features #
#######################################################################

ifndef NSS_DISABLE_AVX2
ifneq ($(CPU_ARCH),x86_64)
# Disable AVX2 entirely on non-Intel platforms
NSS_DISABLE_AVX2 = 1
$(warning CPU_ARCH is not x86_64, disabling -mavx2)
else
ifdef CC_IS_CLANG
# Clang reports its version as an older gcc, but it's OK
NSS_DISABLE_AVX2 = 0
else
ifneq (,$(filter 4.8 4.9,$(word 1,$(GCC_VERSION)).$(word 2,$(GCC_VERSION))))
NSS_DISABLE_AVX2 = 0
endif
ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION))))
NSS_DISABLE_AVX2 = 0
endif
endif
ifndef NSS_DISABLE_AVX2
$(warning Unable to find gcc 4.8 or greater, disabling -Werror)
NSS_DISABLE_AVX2 = 1
endif
endif
export NSS_DISABLE_AVX2
endif #ndef NSS_DISABLE_AVX2

#######################################################################
# Master "Core Components" macros for getting the OS target #
#######################################################################
Expand Down
4 changes: 4 additions & 0 deletions coreconf/config.mk
Expand Up @@ -162,6 +162,10 @@ ifdef NSS_DISABLE_DBM
DEFINES += -DNSS_DISABLE_DBM
endif

ifdef NSS_DISABLE_AVX2
DEFINES += -DNSS_DISABLE_AVX2
endif

ifdef NSS_DISABLE_CHACHAPOLY
DEFINES += -DNSS_DISABLE_CHACHAPOLY
endif
Expand Down
80 changes: 47 additions & 33 deletions lib/freebl/Makefile
Expand Up @@ -85,11 +85,11 @@ endif
# FREEBL_PRELINK_COMMAND
#
# This is an optional environment variable which can override the default
# prelink command. It could be used on systems that did something similiar to
# prelink but used a different command and syntax. The only requirement is the
# program must take the library as the last argument, the program must output
# the original library to standard out, and the program does not need to take
# any quoted or imbedded spaces in its arguments (except the path to the
# prelink command. It could be used on systems that did something similiar to
# prelink but used a different command and syntax. The only requirement is the
# program must take the library as the last argument, the program must output
# the original library to standard out, and the program does not need to take
# any quoted or imbedded spaces in its arguments (except the path to the
# library itself, which can have imbedded spaces or special characters).
#
ifdef FREEBL_USE_PRELINK
Expand Down Expand Up @@ -148,7 +148,7 @@ endif

ifeq (OS2,$(OS_TARGET))
ASFILES = mpi_x86_os2.s
DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
DEFINES += -DMP_ASSEMBLY_DIV_2DX1D
DEFINES += -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD
DEFINES += -DMP_IS_LITTLE_ENDIAN
Expand All @@ -169,7 +169,7 @@ ifdef NS_USE_GCC
else
# MSVC
MPI_SRCS += mpi_x86_asm.c
DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
DEFINES += -DMP_ASSEMBLY_DIV_2DX1D -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD
ifdef BUILD_OPT
OPTIMIZER += -Ox # maximum optimization for freebl
Expand Down Expand Up @@ -220,7 +220,7 @@ ifeq ($(USE_N32),1)
ifeq ($(NS_USE_GCC),1)
ASFLAGS = -Wp,-P -Wp,-traditional -O -mips3
else
ASFLAGS = -O -OPT:Olimit=4000 -dollar -fullwarn -xansi -n32 -mips3
ASFLAGS = -O -OPT:Olimit=4000 -dollar -fullwarn -xansi -n32 -mips3
endif
DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
DEFINES += -DMP_USE_UINT_DIGIT
Expand Down Expand Up @@ -253,12 +253,12 @@ ifeq ($(CPU_ARCH),x86_64)
endif
ifeq ($(CPU_ARCH),x86)
ASFILES = mpi_x86.s
DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
DEFINES += -DMP_ASSEMBLY_DIV_2DX1D -DMP_USE_UINT_DIGIT
DEFINES += -DMP_IS_LITTLE_ENDIAN
endif
ifeq ($(CPU_ARCH),arm)
DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
DEFINES += -DMP_USE_UINT_DIGIT
DEFINES += -DSHA_NO_LONG_LONG # avoid 64-bit arithmetic in SHA512
MPI_SRCS += mpi_arm.c
Expand All @@ -283,7 +283,7 @@ ifneq ($(OS_TEST), ia64)
# PA-RISC
ASFILES += ret_cr16.s
ifndef USE_64
FREEBL_BUILD_SINGLE_SHLIB =
FREEBL_BUILD_SINGLE_SHLIB =
HAVE_ABI32_INT32 = 1
HAVE_ABI32_FPU = 1
endif
Expand All @@ -294,15 +294,15 @@ ifdef USE_ABI32_INT32
DEFINES += -DSHA_NO_LONG_LONG # avoid 64-bit arithmetic in SHA512
else
ifdef USE_64
# this builds for DA2.0W (HP PA 2.0 Wide), the LP64 ABI, using 64-bit digits
MPI_SRCS += mpi_hp.c
ASFILES += hpma512.s hppa20.s
# this builds for DA2.0W (HP PA 2.0 Wide), the LP64 ABI, using 64-bit digits
MPI_SRCS += mpi_hp.c
ASFILES += hpma512.s hppa20.s
DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
else
# this builds for DA2.0 (HP PA 2.0 Narrow) ABI32_FPU model
# this builds for DA2.0 (HP PA 2.0 Narrow) ABI32_FPU model
# (the 32-bit ABI with 64-bit registers) using 64-bit digits
MPI_SRCS += mpi_hp.c
ASFILES += hpma512.s hppa20.s
MPI_SRCS += mpi_hp.c
ASFILES += hpma512.s hppa20.s
DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
ifndef NS_USE_GCC
ARCHFLAG = -Aa +e +DA2.0 +DS2.0
Expand Down Expand Up @@ -337,7 +337,7 @@ else
endif # NS_USE_GCC

# Sun's WorkShop defines v8, v8plus and v9 architectures.
# gcc on Solaris defines v8 and v9 "cpus".
# gcc on Solaris defines v8 and v9 "cpus".
# gcc's v9 is equivalent to Workshop's v8plus.
# gcc's -m64 is equivalent to Workshop's v9
# We always use Sun's assembler, which uses Sun's naming convention.
Expand Down Expand Up @@ -387,7 +387,7 @@ ifeq ($(CPU_ARCH),sparc)
FPU_TARGET_OPTIMIZER = -xchip=ultra2
endif
ifdef USE_ABI32_INT64
# this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers,
# this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers,
# 32-bit ABI, it uses 64-bit words, integer arithmetic,
# no FPU (non-VIS cpus).
# These flags were suggested by the compiler group for building
Expand All @@ -400,7 +400,7 @@ ifeq ($(CPU_ARCH),sparc)
SOLARIS_AS_FLAGS = -xarch=v8plus -K PIC
endif
ifdef USE_ABI32_FPU
# this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers,
# this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers,
# 32-bit ABI, it uses FPU code, and 32-bit word size.
# these flags were determined by running cc -### -fast and copying
# the generated flag settings
Expand Down Expand Up @@ -442,12 +442,12 @@ ifeq ($(CPU_ARCH),sparc)

### set flags for both GCC and Sun cc
ifdef USE_ABI32_INT64
# this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers,
# this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers,
# 32-bit ABI, it uses 64-bit words, integer arithmetic, no FPU
# best times are with no MP_ flags specified
endif
ifdef USE_ABI32_FPU
# this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers,
# this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers,
# 32-bit ABI, it uses FPU code, and 32-bit word size
MPI_SRCS += mpi_sparc.c
ASFILES = mpv_sparcv8.s montmulfv8.s
Expand Down Expand Up @@ -503,7 +503,7 @@ else
else
# Solaris x86
DEFINES += -DMP_USE_UINT_DIGIT
DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
DEFINES += -DMP_ASSEMBLY_DIV_2DX1D
ASFILES = mpi_i86pc.s
ifndef NS_USE_GCC
Expand All @@ -526,8 +526,12 @@ ifneq ($(shell $(CC) -? 2>&1 >/dev/null </dev/null | sed -e 's/:.*//;1q'),lcc)
HAVE_INT128_SUPPORT = 1
DEFINES += -DHAVE_INT128_SUPPORT
endif
ifneq (,$(filter 4.8 4.9,$(word 1,$(GCC_VERSION)).$(word 2,$(GCC_VERSION))))
NSS_DISABLE_AVX2 = 1
endif
ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION))))
HAVE_INT128_SUPPORT = 1
NSS_DISABLE_AVX2 = 0
DEFINES += -DHAVE_INT128_SUPPORT
endif
endif
Expand All @@ -540,7 +544,11 @@ endif

ifndef NSS_DISABLE_CHACHAPOLY
ifeq ($(CPU_ARCH),x86_64)
EXTRA_SRCS += Hacl_Poly1305_128.c Hacl_Chacha20_Vec128.c Hacl_Chacha20Poly1305_128.c
ifndef NSS_DISABLE_AVX2
EXTRA_SRCS += Hacl_Poly1305_256.c Hacl_Chacha20_Vec256.c Hacl_Chacha20Poly1305_256.c
else
EXTRA_SRCS += Hacl_Poly1305_128.c Hacl_Chacha20_Vec128.c Hacl_Chacha20Poly1305_128.c
endif # NSS_DISABLE_AVX2
endif # x86_64

VERIFIED_SRCS += Hacl_Poly1305_32.c Hacl_Chacha20.c Hacl_Chacha20Poly1305_32.c
Expand Down Expand Up @@ -630,7 +638,7 @@ ifdef FREEBL_BUILD_SINGLE_SHLIB

################### Single shared lib stuff #########################
SINGLE_SHLIB_DIR = $(OBJDIR)/$(OS_TARGET)_SINGLE_SHLIB
ALL_TRASH += $(SINGLE_SHLIB_DIR)
ALL_TRASH += $(SINGLE_SHLIB_DIR)

$(SINGLE_SHLIB_DIR):
-mkdir -p $(SINGLE_SHLIB_DIR)
Expand All @@ -644,7 +652,7 @@ endif

ifdef NEED_STUB_BUILD
SINGLE_SHLIB_DIR = $(OBJDIR)/$(OS_TARGET)_SINGLE_SHLIB
ALL_TRASH += $(SINGLE_SHLIB_DIR)
ALL_TRASH += $(SINGLE_SHLIB_DIR)
$(SINGLE_SHLIB_DIR):
-mkdir $(SINGLE_SHLIB_DIR)

Expand All @@ -658,7 +666,7 @@ endif
######################## ABI32_FPU stuff #########################
ifdef HAVE_ABI32_FPU
ABI32_FPU_DIR = $(OBJDIR)/$(OS_TARGET)_ABI32_FPU
ALL_TRASH += $(ABI32_FPU_DIR)
ALL_TRASH += $(ABI32_FPU_DIR)

$(ABI32_FPU_DIR):
-mkdir $(ABI32_FPU_DIR)
Expand All @@ -671,7 +679,7 @@ endif
######################## ABI32_INT32 stuff #########################
ifdef HAVE_ABI32_INT32
ABI32_INT32_DIR = $(OBJDIR)/$(OS_TARGET)_ABI32_INT32
ALL_TRASH += $(ABI32_INT32_DIR)
ALL_TRASH += $(ABI32_INT32_DIR)

$(ABI32_INT32_DIR):
-mkdir $(ABI32_INT32_DIR)
Expand All @@ -684,7 +692,7 @@ endif
######################## ABI32_INT64 stuff #########################
ifdef HAVE_ABI32_INT64
ABI32_INT64_DIR = $(OBJDIR)/$(OS_TARGET)_ABI32_INT64
ALL_TRASH += $(ABI32_INT64_DIR)
ALL_TRASH += $(ABI32_INT64_DIR)

$(ABI32_INT64_DIR):
-mkdir $(ABI32_INT64_DIR)
Expand All @@ -701,7 +709,7 @@ endif
######################## ABI64_FPU stuff #########################
ifdef HAVE_ABI64_FPU
ABI64_FPU_DIR = $(OBJDIR)/$(OS_TARGET)_ABI64_FPU
ALL_TRASH += $(ABI64_FPU_DIR)
ALL_TRASH += $(ABI64_FPU_DIR)

$(ABI64_FPU_DIR):
-mkdir $(ABI64_FPU_DIR)
Expand All @@ -714,7 +722,7 @@ endif
######################## ABI64_INT stuff #########################
ifdef HAVE_ABI64_INT
ABI64_INT_DIR = $(OBJDIR)/$(OS_TARGET)_ABI64_INT
ALL_TRASH += $(ABI64_INT_DIR)
ALL_TRASH += $(ABI64_INT_DIR)

$(ABI64_INT_DIR):
-mkdir $(ABI64_INT_DIR)
Expand Down Expand Up @@ -785,6 +793,12 @@ $(OBJDIR)/$(PROG_PREFIX)rijndael$(OBJ_SUFFIX): CFLAGS += -mcrypto -maltivec -mvs
endif
endif

$(OBJDIR)/$(PROG_PREFIX)Hacl_Chacha20_Vec128$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4 -mavx -maes
$(OBJDIR)/$(PROG_PREFIX)Hacl_Chacha20Poly1305_128$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4 -mavx -maes
$(OBJDIR)/$(PROG_PREFIX)Hacl_Chacha20_Vec128$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4 -mavx -maes
$(OBJDIR)/$(PROG_PREFIX)Hacl_Chacha20Poly1305_128$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4 -mavx -maes
$(OBJDIR)/$(PROG_PREFIX)Hacl_Poly1305_128$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4 -mavx -maes -mpclmul

ifndef NSS_DISABLE_AVX2
$(OBJDIR)/$(PROG_PREFIX)Hacl_Chacha20Poly1305_256$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4 -mavx2 -maes
$(OBJDIR)/$(PROG_PREFIX)Hacl_Chacha20_Vec256$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4 -mavx -mavx2 -maes
$(OBJDIR)/$(PROG_PREFIX)Hacl_Poly1305_256$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4 -mavx -mavx2 -maes -mpclmul
endif
1 change: 1 addition & 0 deletions lib/freebl/blapii.h
Expand Up @@ -80,6 +80,7 @@ SECStatus generate_prime(mp_int *prime, int primeLen);
PRBool aesni_support();
PRBool clmul_support();
PRBool avx_support();
PRBool avx2_support();
PRBool ssse3_support();
PRBool sse4_1_support();
PRBool sse4_2_support();
Expand Down
21 changes: 21 additions & 0 deletions lib/freebl/blinit.c
Expand Up @@ -27,6 +27,7 @@ static PRCallOnceType coFreeblInit;
static PRBool aesni_support_ = PR_FALSE;
static PRBool clmul_support_ = PR_FALSE;
static PRBool avx_support_ = PR_FALSE;
static PRBool avx2_support_ = PR_FALSE;
static PRBool ssse3_support_ = PR_FALSE;
static PRBool sse4_1_support_ = PR_FALSE;
static PRBool sse4_2_support_ = PR_FALSE;
Expand Down Expand Up @@ -75,28 +76,43 @@ check_xcr0_ymm()
#define ECX_XSAVE (1 << 26)
#define ECX_OSXSAVE (1 << 27)
#define ECX_AVX (1 << 28)
#define EBX_AVX2 (1 << 5)
#define EBX_BMI1 (1 << 3)
#define EBX_BMI2 (1 << 8)
#define ECX_FMA (1 << 12)
#define ECX_MOVBE (1 << 22)
#define ECX_SSSE3 (1 << 9)
#define ECX_SSE4_1 (1 << 19)
#define ECX_SSE4_2 (1 << 20)
#define AVX_BITS (ECX_XSAVE | ECX_OSXSAVE | ECX_AVX)
#define AVX2_EBX_BITS (EBX_AVX2 | EBX_BMI1 | EBX_BMI2)
#define AVX2_ECX_BITS (ECX_FMA | ECX_MOVBE)

void
CheckX86CPUSupport()
{
unsigned long eax, ebx, ecx, edx;
unsigned long eax7, ebx7, ecx7, edx7;
char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES");
char *disable_pclmul = PR_GetEnvSecure("NSS_DISABLE_PCLMUL");
char *disable_avx = PR_GetEnvSecure("NSS_DISABLE_AVX");
char *disable_avx2 = PR_GetEnvSecure("NSS_DISABLE_AVX2");
char *disable_ssse3 = PR_GetEnvSecure("NSS_DISABLE_SSSE3");
char *disable_sse4_1 = PR_GetEnvSecure("NSS_DISABLE_SSE4_1");
char *disable_sse4_2 = PR_GetEnvSecure("NSS_DISABLE_SSE4_2");
freebl_cpuid(1, &eax, &ebx, &ecx, &edx);
freebl_cpuid(7, &eax7, &ebx7, &ecx7, &edx7);
aesni_support_ = (PRBool)((ecx & ECX_AESNI) != 0 && disable_hw_aes == NULL);
clmul_support_ = (PRBool)((ecx & ECX_CLMUL) != 0 && disable_pclmul == NULL);
/* For AVX we check AVX, OSXSAVE, and XSAVE
* as well as XMM and YMM state. */
avx_support_ = (PRBool)((ecx & AVX_BITS) == AVX_BITS) && check_xcr0_ymm() &&
disable_avx == NULL;
/* For AVX2 we check AVX2, BMI1, BMI2, FMA, MOVBE.
* We do not check for AVX above. */
avx2_support_ = (PRBool)((ebx7 & AVX2_EBX_BITS) == AVX2_EBX_BITS &&
(ecx & AVX2_ECX_BITS) == AVX2_ECX_BITS &&
disable_avx2 == NULL);
ssse3_support_ = (PRBool)((ecx & ECX_SSSE3) != 0 &&
disable_ssse3 == NULL);
sse4_1_support_ = (PRBool)((ecx & ECX_SSE4_1) != 0 &&
Expand Down Expand Up @@ -384,6 +400,11 @@ avx_support()
return avx_support_;
}
PRBool
avx2_support()
{
return avx2_support_;
}
PRBool
ssse3_support()
{
return ssse3_support_;
Expand Down

0 comments on commit 64ae3c8

Please sign in to comment.