Skip to content

Commit

Permalink
Bug 1656981 - Use 64x64->128 multiply and MP_COMBA on x86_64 Mac. r=mt
Browse files Browse the repository at this point in the history
This patch makes two MPI changes for MacOS:

1. Rename `mpi_amd64_gas.s` to `mpi_amd64_common.S` and add defines for macho64, allowing Intel Macs to take advantage of the 64x64->128 multiply code.
2. Define and use `NSS_USE_COMBA` on Intel Macs.

Performance results with `rsaperf -n none -p 10 -e -x 65537` (default 2048-bit key):
Before: `12629.12 operations/s. one operation every 79 microseconds`
With 64x64->128 assembly: `29431.65 operations/s. one operation every 33 microseconds`
With MP_COMBA and 64x64->128 assembly: `30332.99 operations/s. one operation every 32 microseconds`

Differential Revision: https://phabricator.services.mozilla.com/D85783

--HG--
rename : lib/freebl/mpi/mpi_amd64_gas.s => lib/freebl/mpi/mpi_amd64_common.S
extra : moz-landing-system : lando
  • Loading branch information
Kevin Jacobs committed Aug 7, 2020
1 parent 16812f7 commit a4fc67e
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 7 deletions.
12 changes: 9 additions & 3 deletions lib/freebl/Makefile
Expand Up @@ -234,7 +234,12 @@ endif
endif

ifeq ($(OS_TARGET),Darwin)
ifeq ($(CPU_ARCH),x86)
ifeq ($(CPU_ARCH),x86_64)
ASFILES = mpi_amd64_common.s
DEFINES += -DMPI_AMD64 -DMP_IS_LITTLE_ENDIAN
DEFINES += -DMP_ASSEMBLY_MULTIPLY -DNSS_USE_COMBA
MPI_SRCS += mpi_amd64.c mp_comba.c
else ifeq ($(CPU_ARCH),x86)
ASFILES = mpi_sse2.s
DEFINES += -DMP_USE_UINT_DIGIT
DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
Expand All @@ -244,7 +249,8 @@ endif # Darwin

ifeq ($(OS_TARGET),Linux)
ifeq ($(CPU_ARCH),x86_64)
ASFILES = arcfour-amd64-gas.s mpi_amd64_gas.s
# Lower case s on mpi_amd64_common due to make implicit rules.
ASFILES = arcfour-amd64-gas.s mpi_amd64_common.s
ASFLAGS += -fPIC -Wa,--noexecstack
DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY
DEFINES += -DNSS_USE_COMBA
Expand Down Expand Up @@ -484,7 +490,7 @@ else
ifeq ($(USE_64),1)
# Solaris for AMD64
ifdef NS_USE_GCC
ASFILES = arcfour-amd64-gas.s mpi_amd64_gas.s
ASFILES = arcfour-amd64-gas.s mpi_amd64_common.s
ASFLAGS += -march=opteron -m64 -fPIC
MPI_SRCS += mp_comba.c
# comment the next four lines to turn off Intel HW acceleration
Expand Down
14 changes: 13 additions & 1 deletion lib/freebl/freebl_base.gypi
Expand Up @@ -68,7 +68,7 @@
'sources': [
'arcfour-amd64-gas.s',
'mpi/mpi_amd64.c',
'mpi/mpi_amd64_gas.s',
'mpi/mpi_amd64_common.S',
'mpi/mp_comba.c',
],
'conditions': [
Expand Down Expand Up @@ -202,6 +202,18 @@
'MP_ASSEMBLY_SQUARE',
'MP_ASSEMBLY_DIV_2DX1D',
],
}, 'target_arch=="x64"', {
'sources': [
'mpi/mpi_amd64.c',
'mpi/mpi_amd64_common.S',
'mpi/mp_comba.c',
],
'defines': [
'MP_IS_LITTLE_ENDIAN',
'MPI_AMD64',
'MP_ASSEMBLY_MULTIPLY',
'NSS_USE_COMBA',
],
}],
],
}],
Expand Down
Expand Up @@ -18,7 +18,15 @@
# s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
#

.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
.text; .align 16; .globl s_mpv_mul_set_vec64;

#ifdef DARWIN
#define s_mpv_mul_set_vec64 _s_mpv_mul_set_vec64
.private_extern s_mpv_mul_set_vec64
s_mpv_mul_set_vec64:
#else
.type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
#endif

xorq %rax, %rax # if (len == 0) return (0)
testq %rdx, %rdx
Expand Down Expand Up @@ -169,7 +177,9 @@
movq %r9, %rax
ret

#ifndef DARWIN
.size s_mpv_mul_set_vec64, .-s_mpv_mul_set_vec64
#endif

# ------------------------------------------------------------------------
#
Expand All @@ -186,7 +196,15 @@
# s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
#

.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
.text; .align 16; .globl s_mpv_mul_add_vec64;

#ifdef DARWIN
#define s_mpv_mul_add_vec64 _s_mpv_mul_add_vec64
.private_extern s_mpv_mul_add_vec64
s_mpv_mul_add_vec64:
#else
.type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
#endif

xorq %rax, %rax # if (len == 0) return (0)
testq %rdx, %rdx
Expand Down Expand Up @@ -381,9 +399,11 @@
.L27:
movq %r9, %rax
ret


#ifndef DARWIN
.size s_mpv_mul_add_vec64, .-s_mpv_mul_add_vec64

# Magic indicating no need for an executable stack
.section .note.GNU-stack, "", @progbits
.previous
#endif

0 comments on commit a4fc67e

Please sign in to comment.