From 1ec2a888690e6806e8d18d8890d2e574aa6335d2 Mon Sep 17 00:00:00 2001 From: Benjamin Beurdouche Date: Mon, 8 Mar 2021 09:59:34 +0000 Subject: [PATCH] Bug 1696800 - HACL* update March 2021 - c95ab70fcb2bc21025d8845281bc4bc8987ca683 r=beurdouche Differential Revision: https://phabricator.services.mozilla.com/D107387 --HG-- rename : lib/freebl/verified/Hacl_Curve25519_51.c => lib/freebl/verified/Hacl_Bignum25519_51.h extra : moz-landing-system : lando --- automation/taskcluster/scripts/run_hacl.sh | 2 +- lib/freebl/verified/Hacl_Bignum25519_51.h | 676 ++++++++++++++++ lib/freebl/verified/Hacl_Chacha20.c | 6 +- lib/freebl/verified/Hacl_Chacha20.h | 14 +- .../verified/Hacl_Chacha20Poly1305_128.c | 12 +- .../verified/Hacl_Chacha20Poly1305_128.h | 14 +- .../verified/Hacl_Chacha20Poly1305_256.c | 12 +- .../verified/Hacl_Chacha20Poly1305_256.h | 14 +- .../verified/Hacl_Chacha20Poly1305_32.c | 8 +- .../verified/Hacl_Chacha20Poly1305_32.h | 14 +- lib/freebl/verified/Hacl_Chacha20_Vec128.c | 26 +- lib/freebl/verified/Hacl_Chacha20_Vec128.h | 14 +- lib/freebl/verified/Hacl_Chacha20_Vec256.c | 26 +- lib/freebl/verified/Hacl_Chacha20_Vec256.h | 14 +- lib/freebl/verified/Hacl_Curve25519_51.c | 746 ++---------------- lib/freebl/verified/Hacl_Curve25519_51.h | 15 +- lib/freebl/verified/Hacl_Kremlib.h | 16 +- lib/freebl/verified/Hacl_Poly1305_128.c | 10 +- lib/freebl/verified/Hacl_Poly1305_128.h | 14 +- lib/freebl/verified/Hacl_Poly1305_256.c | 10 +- lib/freebl/verified/Hacl_Poly1305_256.h | 14 +- lib/freebl/verified/Hacl_Poly1305_32.c | 2 +- lib/freebl/verified/Hacl_Poly1305_32.h | 14 +- .../kremlin/include/kremlin/internal/target.h | 2 + .../kremlin/include/kremlin/internal/types.h | 22 +- .../kremlib/dist/minimal/FStar_UInt128.h | 5 +- .../dist/minimal/FStar_UInt128_Verified.h | 5 +- .../dist/minimal/FStar_UInt_8_16_32_64.h | 37 +- .../kremlib/dist/minimal/LowStar_Endianness.h | 7 +- .../dist/minimal/fstar_uint128_gcc64.h | 5 + .../kremlib/dist/minimal/fstar_uint128_msvc.h | 9 + lib/freebl/verified/libintvector.h | 201 ++++- 32 files changed, 1172 insertions(+), 814 deletions(-) create mode 100644 lib/freebl/verified/Hacl_Bignum25519_51.h diff --git a/automation/taskcluster/scripts/run_hacl.sh b/automation/taskcluster/scripts/run_hacl.sh index 84dc9dbc35..7b82c911bf 100755 --- a/automation/taskcluster/scripts/run_hacl.sh +++ b/automation/taskcluster/scripts/run_hacl.sh @@ -13,7 +13,7 @@ set -e -x -v # HACL CI. # When bug 1593647 is resolved, extract the code on CI again. git clone -q "https://github.com/project-everest/hacl-star" ~/hacl-star -git -C ~/hacl-star checkout -q e4311991b1526734f99f4e3a0058895a46c63e5c +git -C ~/hacl-star checkout -q c95ab70fcb2bc21025d8845281bc4bc8987ca683 # Format the C snapshot. cd ~/hacl-star/dist/mozilla diff --git a/lib/freebl/verified/Hacl_Bignum25519_51.h b/lib/freebl/verified/Hacl_Bignum25519_51.h new file mode 100644 index 0000000000..173f11188b --- /dev/null +++ b/lib/freebl/verified/Hacl_Bignum25519_51.h @@ -0,0 +1,676 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Bignum25519_51_H +#define __Hacl_Bignum25519_51_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include "kremlin/internal/types.h" +#include "kremlin/lowstar_endianness.h" +#include +#include + +#include "Hacl_Kremlib.h" + +static inline void +Hacl_Impl_Curve25519_Field51_fadd(uint64_t *out, uint64_t *f1, uint64_t *f2) +{ + uint64_t f10 = f1[0U]; + uint64_t f20 = f2[0U]; + uint64_t f11 = f1[1U]; + uint64_t f21 = f2[1U]; + uint64_t f12 = f1[2U]; + uint64_t f22 = f2[2U]; + uint64_t f13 = f1[3U]; + uint64_t f23 = f2[3U]; + uint64_t f14 = f1[4U]; + uint64_t f24 = f2[4U]; + out[0U] = f10 + f20; + out[1U] = f11 + f21; + out[2U] = f12 + f22; + out[3U] = f13 + f23; + out[4U] = f14 + f24; +} + +static inline void +Hacl_Impl_Curve25519_Field51_fsub(uint64_t *out, uint64_t *f1, uint64_t *f2) +{ + uint64_t f10 = f1[0U]; + uint64_t f20 = f2[0U]; + uint64_t f11 = f1[1U]; + uint64_t f21 = f2[1U]; + uint64_t f12 = f1[2U]; + uint64_t f22 = f2[2U]; + uint64_t f13 = f1[3U]; + uint64_t f23 = f2[3U]; + uint64_t f14 = f1[4U]; + uint64_t f24 = f2[4U]; + out[0U] = f10 + (uint64_t)0x3fffffffffff68U - f20; + out[1U] = f11 + (uint64_t)0x3ffffffffffff8U - f21; + out[2U] = f12 + (uint64_t)0x3ffffffffffff8U - f22; + out[3U] = f13 + (uint64_t)0x3ffffffffffff8U - f23; + out[4U] = f14 + (uint64_t)0x3ffffffffffff8U - f24; +} + +static inline void +Hacl_Impl_Curve25519_Field51_fmul( + uint64_t *out, + uint64_t *f1, + uint64_t *f2, + FStar_UInt128_uint128 *uu___) +{ + uint64_t f10 = f1[0U]; + uint64_t f11 = f1[1U]; + uint64_t f12 = f1[2U]; + uint64_t f13 = f1[3U]; + uint64_t f14 = f1[4U]; + uint64_t f20 = f2[0U]; + uint64_t f21 = f2[1U]; + uint64_t f22 = f2[2U]; + uint64_t f23 = f2[3U]; + uint64_t f24 = f2[4U]; + uint64_t tmp1 = f21 * (uint64_t)19U; + uint64_t tmp2 = f22 * (uint64_t)19U; + uint64_t tmp3 = f23 * (uint64_t)19U; + uint64_t tmp4 = f24 * (uint64_t)19U; + FStar_UInt128_uint128 o00 = FStar_UInt128_mul_wide(f10, f20); + FStar_UInt128_uint128 o10 = FStar_UInt128_mul_wide(f10, f21); + FStar_UInt128_uint128 o20 = FStar_UInt128_mul_wide(f10, f22); + FStar_UInt128_uint128 o30 = FStar_UInt128_mul_wide(f10, f23); + FStar_UInt128_uint128 o40 = FStar_UInt128_mul_wide(f10, f24); + FStar_UInt128_uint128 o01 = FStar_UInt128_add(o00, FStar_UInt128_mul_wide(f11, tmp4)); + FStar_UInt128_uint128 o11 = FStar_UInt128_add(o10, FStar_UInt128_mul_wide(f11, f20)); + FStar_UInt128_uint128 o21 = FStar_UInt128_add(o20, FStar_UInt128_mul_wide(f11, f21)); + FStar_UInt128_uint128 o31 = FStar_UInt128_add(o30, FStar_UInt128_mul_wide(f11, f22)); + FStar_UInt128_uint128 o41 = FStar_UInt128_add(o40, FStar_UInt128_mul_wide(f11, f23)); + FStar_UInt128_uint128 o02 = FStar_UInt128_add(o01, FStar_UInt128_mul_wide(f12, tmp3)); + FStar_UInt128_uint128 o12 = FStar_UInt128_add(o11, FStar_UInt128_mul_wide(f12, tmp4)); + FStar_UInt128_uint128 o22 = FStar_UInt128_add(o21, FStar_UInt128_mul_wide(f12, f20)); + FStar_UInt128_uint128 o32 = FStar_UInt128_add(o31, FStar_UInt128_mul_wide(f12, f21)); + FStar_UInt128_uint128 o42 = FStar_UInt128_add(o41, FStar_UInt128_mul_wide(f12, f22)); + FStar_UInt128_uint128 o03 = FStar_UInt128_add(o02, FStar_UInt128_mul_wide(f13, tmp2)); + FStar_UInt128_uint128 o13 = FStar_UInt128_add(o12, FStar_UInt128_mul_wide(f13, tmp3)); + FStar_UInt128_uint128 o23 = FStar_UInt128_add(o22, FStar_UInt128_mul_wide(f13, tmp4)); + FStar_UInt128_uint128 o33 = FStar_UInt128_add(o32, FStar_UInt128_mul_wide(f13, f20)); + FStar_UInt128_uint128 o43 = FStar_UInt128_add(o42, FStar_UInt128_mul_wide(f13, f21)); + FStar_UInt128_uint128 o04 = FStar_UInt128_add(o03, FStar_UInt128_mul_wide(f14, tmp1)); + FStar_UInt128_uint128 o14 = FStar_UInt128_add(o13, FStar_UInt128_mul_wide(f14, tmp2)); + FStar_UInt128_uint128 o24 = FStar_UInt128_add(o23, FStar_UInt128_mul_wide(f14, tmp3)); + FStar_UInt128_uint128 o34 = FStar_UInt128_add(o33, FStar_UInt128_mul_wide(f14, tmp4)); + FStar_UInt128_uint128 o44 = FStar_UInt128_add(o43, FStar_UInt128_mul_wide(f14, f20)); + FStar_UInt128_uint128 tmp_w0 = o04; + FStar_UInt128_uint128 tmp_w1 = o14; + FStar_UInt128_uint128 tmp_w2 = o24; + FStar_UInt128_uint128 tmp_w3 = o34; + FStar_UInt128_uint128 tmp_w4 = o44; + FStar_UInt128_uint128 + l_ = FStar_UInt128_add(tmp_w0, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp01 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); + FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w1, FStar_UInt128_uint64_to_uint128(c0)); + uint64_t tmp11 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); + FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w2, FStar_UInt128_uint64_to_uint128(c1)); + uint64_t tmp21 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); + FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w3, FStar_UInt128_uint64_to_uint128(c2)); + uint64_t tmp31 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); + FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w4, FStar_UInt128_uint64_to_uint128(c3)); + uint64_t tmp41 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); + uint64_t l_4 = tmp01 + c4 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_4 >> (uint32_t)51U; + uint64_t o0 = tmp0_; + uint64_t o1 = tmp11 + c5; + uint64_t o2 = tmp21; + uint64_t o3 = tmp31; + uint64_t o4 = tmp41; + out[0U] = o0; + out[1U] = o1; + out[2U] = o2; + out[3U] = o3; + out[4U] = o4; +} + +static inline void +Hacl_Impl_Curve25519_Field51_fmul2( + uint64_t *out, + uint64_t *f1, + uint64_t *f2, + FStar_UInt128_uint128 *uu___) +{ + uint64_t f10 = f1[0U]; + uint64_t f11 = f1[1U]; + uint64_t f12 = f1[2U]; + uint64_t f13 = f1[3U]; + uint64_t f14 = f1[4U]; + uint64_t f20 = f2[0U]; + uint64_t f21 = f2[1U]; + uint64_t f22 = f2[2U]; + uint64_t f23 = f2[3U]; + uint64_t f24 = f2[4U]; + uint64_t f30 = f1[5U]; + uint64_t f31 = f1[6U]; + uint64_t f32 = f1[7U]; + uint64_t f33 = f1[8U]; + uint64_t f34 = f1[9U]; + uint64_t f40 = f2[5U]; + uint64_t f41 = f2[6U]; + uint64_t f42 = f2[7U]; + uint64_t f43 = f2[8U]; + uint64_t f44 = f2[9U]; + uint64_t tmp11 = f21 * (uint64_t)19U; + uint64_t tmp12 = f22 * (uint64_t)19U; + uint64_t tmp13 = f23 * (uint64_t)19U; + uint64_t tmp14 = f24 * (uint64_t)19U; + uint64_t tmp21 = f41 * (uint64_t)19U; + uint64_t tmp22 = f42 * (uint64_t)19U; + uint64_t tmp23 = f43 * (uint64_t)19U; + uint64_t tmp24 = f44 * (uint64_t)19U; + FStar_UInt128_uint128 o00 = FStar_UInt128_mul_wide(f10, f20); + FStar_UInt128_uint128 o15 = FStar_UInt128_mul_wide(f10, f21); + FStar_UInt128_uint128 o25 = FStar_UInt128_mul_wide(f10, f22); + FStar_UInt128_uint128 o30 = FStar_UInt128_mul_wide(f10, f23); + FStar_UInt128_uint128 o40 = FStar_UInt128_mul_wide(f10, f24); + FStar_UInt128_uint128 o010 = FStar_UInt128_add(o00, FStar_UInt128_mul_wide(f11, tmp14)); + FStar_UInt128_uint128 o110 = FStar_UInt128_add(o15, FStar_UInt128_mul_wide(f11, f20)); + FStar_UInt128_uint128 o210 = FStar_UInt128_add(o25, FStar_UInt128_mul_wide(f11, f21)); + FStar_UInt128_uint128 o310 = FStar_UInt128_add(o30, FStar_UInt128_mul_wide(f11, f22)); + FStar_UInt128_uint128 o410 = FStar_UInt128_add(o40, FStar_UInt128_mul_wide(f11, f23)); + FStar_UInt128_uint128 o020 = FStar_UInt128_add(o010, FStar_UInt128_mul_wide(f12, tmp13)); + FStar_UInt128_uint128 o120 = FStar_UInt128_add(o110, FStar_UInt128_mul_wide(f12, tmp14)); + FStar_UInt128_uint128 o220 = FStar_UInt128_add(o210, FStar_UInt128_mul_wide(f12, f20)); + FStar_UInt128_uint128 o320 = FStar_UInt128_add(o310, FStar_UInt128_mul_wide(f12, f21)); + FStar_UInt128_uint128 o420 = FStar_UInt128_add(o410, FStar_UInt128_mul_wide(f12, f22)); + FStar_UInt128_uint128 o030 = FStar_UInt128_add(o020, FStar_UInt128_mul_wide(f13, tmp12)); + FStar_UInt128_uint128 o130 = FStar_UInt128_add(o120, FStar_UInt128_mul_wide(f13, tmp13)); + FStar_UInt128_uint128 o230 = FStar_UInt128_add(o220, FStar_UInt128_mul_wide(f13, tmp14)); + FStar_UInt128_uint128 o330 = FStar_UInt128_add(o320, FStar_UInt128_mul_wide(f13, f20)); + FStar_UInt128_uint128 o430 = FStar_UInt128_add(o420, FStar_UInt128_mul_wide(f13, f21)); + FStar_UInt128_uint128 o040 = FStar_UInt128_add(o030, FStar_UInt128_mul_wide(f14, tmp11)); + FStar_UInt128_uint128 o140 = FStar_UInt128_add(o130, FStar_UInt128_mul_wide(f14, tmp12)); + FStar_UInt128_uint128 o240 = FStar_UInt128_add(o230, FStar_UInt128_mul_wide(f14, tmp13)); + FStar_UInt128_uint128 o340 = FStar_UInt128_add(o330, FStar_UInt128_mul_wide(f14, tmp14)); + FStar_UInt128_uint128 o440 = FStar_UInt128_add(o430, FStar_UInt128_mul_wide(f14, f20)); + FStar_UInt128_uint128 tmp_w10 = o040; + FStar_UInt128_uint128 tmp_w11 = o140; + FStar_UInt128_uint128 tmp_w12 = o240; + FStar_UInt128_uint128 tmp_w13 = o340; + FStar_UInt128_uint128 tmp_w14 = o440; + FStar_UInt128_uint128 o0 = FStar_UInt128_mul_wide(f30, f40); + FStar_UInt128_uint128 o1 = FStar_UInt128_mul_wide(f30, f41); + FStar_UInt128_uint128 o2 = FStar_UInt128_mul_wide(f30, f42); + FStar_UInt128_uint128 o3 = FStar_UInt128_mul_wide(f30, f43); + FStar_UInt128_uint128 o4 = FStar_UInt128_mul_wide(f30, f44); + FStar_UInt128_uint128 o01 = FStar_UInt128_add(o0, FStar_UInt128_mul_wide(f31, tmp24)); + FStar_UInt128_uint128 o111 = FStar_UInt128_add(o1, FStar_UInt128_mul_wide(f31, f40)); + FStar_UInt128_uint128 o211 = FStar_UInt128_add(o2, FStar_UInt128_mul_wide(f31, f41)); + FStar_UInt128_uint128 o31 = FStar_UInt128_add(o3, FStar_UInt128_mul_wide(f31, f42)); + FStar_UInt128_uint128 o41 = FStar_UInt128_add(o4, FStar_UInt128_mul_wide(f31, f43)); + FStar_UInt128_uint128 o02 = FStar_UInt128_add(o01, FStar_UInt128_mul_wide(f32, tmp23)); + FStar_UInt128_uint128 o121 = FStar_UInt128_add(o111, FStar_UInt128_mul_wide(f32, tmp24)); + FStar_UInt128_uint128 o221 = FStar_UInt128_add(o211, FStar_UInt128_mul_wide(f32, f40)); + FStar_UInt128_uint128 o32 = FStar_UInt128_add(o31, FStar_UInt128_mul_wide(f32, f41)); + FStar_UInt128_uint128 o42 = FStar_UInt128_add(o41, FStar_UInt128_mul_wide(f32, f42)); + FStar_UInt128_uint128 o03 = FStar_UInt128_add(o02, FStar_UInt128_mul_wide(f33, tmp22)); + FStar_UInt128_uint128 o131 = FStar_UInt128_add(o121, FStar_UInt128_mul_wide(f33, tmp23)); + FStar_UInt128_uint128 o231 = FStar_UInt128_add(o221, FStar_UInt128_mul_wide(f33, tmp24)); + FStar_UInt128_uint128 o33 = FStar_UInt128_add(o32, FStar_UInt128_mul_wide(f33, f40)); + FStar_UInt128_uint128 o43 = FStar_UInt128_add(o42, FStar_UInt128_mul_wide(f33, f41)); + FStar_UInt128_uint128 o04 = FStar_UInt128_add(o03, FStar_UInt128_mul_wide(f34, tmp21)); + FStar_UInt128_uint128 o141 = FStar_UInt128_add(o131, FStar_UInt128_mul_wide(f34, tmp22)); + FStar_UInt128_uint128 o241 = FStar_UInt128_add(o231, FStar_UInt128_mul_wide(f34, tmp23)); + FStar_UInt128_uint128 o34 = FStar_UInt128_add(o33, FStar_UInt128_mul_wide(f34, tmp24)); + FStar_UInt128_uint128 o44 = FStar_UInt128_add(o43, FStar_UInt128_mul_wide(f34, f40)); + FStar_UInt128_uint128 tmp_w20 = o04; + FStar_UInt128_uint128 tmp_w21 = o141; + FStar_UInt128_uint128 tmp_w22 = o241; + FStar_UInt128_uint128 tmp_w23 = o34; + FStar_UInt128_uint128 tmp_w24 = o44; + FStar_UInt128_uint128 + l_ = FStar_UInt128_add(tmp_w10, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp00 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; + uint64_t c00 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); + FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w11, FStar_UInt128_uint64_to_uint128(c00)); + uint64_t tmp10 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; + uint64_t c10 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); + FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w12, FStar_UInt128_uint64_to_uint128(c10)); + uint64_t tmp20 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; + uint64_t c20 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); + FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w13, FStar_UInt128_uint64_to_uint128(c20)); + uint64_t tmp30 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; + uint64_t c30 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); + FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w14, FStar_UInt128_uint64_to_uint128(c30)); + uint64_t tmp40 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; + uint64_t c40 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); + uint64_t l_4 = tmp00 + c40 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c50 = l_4 >> (uint32_t)51U; + uint64_t o100 = tmp0_; + uint64_t o112 = tmp10 + c50; + uint64_t o122 = tmp20; + uint64_t o132 = tmp30; + uint64_t o142 = tmp40; + FStar_UInt128_uint128 + l_5 = FStar_UInt128_add(tmp_w20, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_5) & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_5, (uint32_t)51U)); + FStar_UInt128_uint128 l_6 = FStar_UInt128_add(tmp_w21, FStar_UInt128_uint64_to_uint128(c0)); + uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_6) & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_6, (uint32_t)51U)); + FStar_UInt128_uint128 l_7 = FStar_UInt128_add(tmp_w22, FStar_UInt128_uint64_to_uint128(c1)); + uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_7) & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_7, (uint32_t)51U)); + FStar_UInt128_uint128 l_8 = FStar_UInt128_add(tmp_w23, FStar_UInt128_uint64_to_uint128(c2)); + uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_8) & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_8, (uint32_t)51U)); + FStar_UInt128_uint128 l_9 = FStar_UInt128_add(tmp_w24, FStar_UInt128_uint64_to_uint128(c3)); + uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_9) & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_9, (uint32_t)51U)); + uint64_t l_10 = tmp0 + c4 * (uint64_t)19U; + uint64_t tmp0_0 = l_10 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_10 >> (uint32_t)51U; + uint64_t o200 = tmp0_0; + uint64_t o212 = tmp1 + c5; + uint64_t o222 = tmp2; + uint64_t o232 = tmp3; + uint64_t o242 = tmp4; + uint64_t o10 = o100; + uint64_t o11 = o112; + uint64_t o12 = o122; + uint64_t o13 = o132; + uint64_t o14 = o142; + uint64_t o20 = o200; + uint64_t o21 = o212; + uint64_t o22 = o222; + uint64_t o23 = o232; + uint64_t o24 = o242; + out[0U] = o10; + out[1U] = o11; + out[2U] = o12; + out[3U] = o13; + out[4U] = o14; + out[5U] = o20; + out[6U] = o21; + out[7U] = o22; + out[8U] = o23; + out[9U] = o24; +} + +static inline void +Hacl_Impl_Curve25519_Field51_fmul1(uint64_t *out, uint64_t *f1, uint64_t f2) +{ + uint64_t f10 = f1[0U]; + uint64_t f11 = f1[1U]; + uint64_t f12 = f1[2U]; + uint64_t f13 = f1[3U]; + uint64_t f14 = f1[4U]; + FStar_UInt128_uint128 tmp_w0 = FStar_UInt128_mul_wide(f2, f10); + FStar_UInt128_uint128 tmp_w1 = FStar_UInt128_mul_wide(f2, f11); + FStar_UInt128_uint128 tmp_w2 = FStar_UInt128_mul_wide(f2, f12); + FStar_UInt128_uint128 tmp_w3 = FStar_UInt128_mul_wide(f2, f13); + FStar_UInt128_uint128 tmp_w4 = FStar_UInt128_mul_wide(f2, f14); + FStar_UInt128_uint128 + l_ = FStar_UInt128_add(tmp_w0, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); + FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w1, FStar_UInt128_uint64_to_uint128(c0)); + uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); + FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w2, FStar_UInt128_uint64_to_uint128(c1)); + uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); + FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w3, FStar_UInt128_uint64_to_uint128(c2)); + uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); + FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w4, FStar_UInt128_uint64_to_uint128(c3)); + uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); + uint64_t l_4 = tmp0 + c4 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_4 >> (uint32_t)51U; + uint64_t o0 = tmp0_; + uint64_t o1 = tmp1 + c5; + uint64_t o2 = tmp2; + uint64_t o3 = tmp3; + uint64_t o4 = tmp4; + out[0U] = o0; + out[1U] = o1; + out[2U] = o2; + out[3U] = o3; + out[4U] = o4; +} + +static inline void +Hacl_Impl_Curve25519_Field51_fsqr(uint64_t *out, uint64_t *f, FStar_UInt128_uint128 *uu___) +{ + uint64_t f0 = f[0U]; + uint64_t f1 = f[1U]; + uint64_t f2 = f[2U]; + uint64_t f3 = f[3U]; + uint64_t f4 = f[4U]; + uint64_t d0 = (uint64_t)2U * f0; + uint64_t d1 = (uint64_t)2U * f1; + uint64_t d2 = (uint64_t)38U * f2; + uint64_t d3 = (uint64_t)19U * f3; + uint64_t d419 = (uint64_t)19U * f4; + uint64_t d4 = (uint64_t)2U * d419; + FStar_UInt128_uint128 + s0 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f0, f0), + FStar_UInt128_mul_wide(d4, f1)), + FStar_UInt128_mul_wide(d2, f3)); + FStar_UInt128_uint128 + s1 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f1), + FStar_UInt128_mul_wide(d4, f2)), + FStar_UInt128_mul_wide(d3, f3)); + FStar_UInt128_uint128 + s2 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f2), + FStar_UInt128_mul_wide(f1, f1)), + FStar_UInt128_mul_wide(d4, f3)); + FStar_UInt128_uint128 + s3 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f3), + FStar_UInt128_mul_wide(d1, f2)), + FStar_UInt128_mul_wide(f4, d419)); + FStar_UInt128_uint128 + s4 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f4), + FStar_UInt128_mul_wide(d1, f3)), + FStar_UInt128_mul_wide(f2, f2)); + FStar_UInt128_uint128 o00 = s0; + FStar_UInt128_uint128 o10 = s1; + FStar_UInt128_uint128 o20 = s2; + FStar_UInt128_uint128 o30 = s3; + FStar_UInt128_uint128 o40 = s4; + FStar_UInt128_uint128 + l_ = FStar_UInt128_add(o00, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); + FStar_UInt128_uint128 l_0 = FStar_UInt128_add(o10, FStar_UInt128_uint64_to_uint128(c0)); + uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); + FStar_UInt128_uint128 l_1 = FStar_UInt128_add(o20, FStar_UInt128_uint64_to_uint128(c1)); + uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); + FStar_UInt128_uint128 l_2 = FStar_UInt128_add(o30, FStar_UInt128_uint64_to_uint128(c2)); + uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); + FStar_UInt128_uint128 l_3 = FStar_UInt128_add(o40, FStar_UInt128_uint64_to_uint128(c3)); + uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); + uint64_t l_4 = tmp0 + c4 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_4 >> (uint32_t)51U; + uint64_t o0 = tmp0_; + uint64_t o1 = tmp1 + c5; + uint64_t o2 = tmp2; + uint64_t o3 = tmp3; + uint64_t o4 = tmp4; + out[0U] = o0; + out[1U] = o1; + out[2U] = o2; + out[3U] = o3; + out[4U] = o4; +} + +static inline void +Hacl_Impl_Curve25519_Field51_fsqr2(uint64_t *out, uint64_t *f, FStar_UInt128_uint128 *uu___) +{ + uint64_t f10 = f[0U]; + uint64_t f11 = f[1U]; + uint64_t f12 = f[2U]; + uint64_t f13 = f[3U]; + uint64_t f14 = f[4U]; + uint64_t f20 = f[5U]; + uint64_t f21 = f[6U]; + uint64_t f22 = f[7U]; + uint64_t f23 = f[8U]; + uint64_t f24 = f[9U]; + uint64_t d00 = (uint64_t)2U * f10; + uint64_t d10 = (uint64_t)2U * f11; + uint64_t d20 = (uint64_t)38U * f12; + uint64_t d30 = (uint64_t)19U * f13; + uint64_t d4190 = (uint64_t)19U * f14; + uint64_t d40 = (uint64_t)2U * d4190; + FStar_UInt128_uint128 + s00 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f10, f10), + FStar_UInt128_mul_wide(d40, f11)), + FStar_UInt128_mul_wide(d20, f13)); + FStar_UInt128_uint128 + s10 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f11), + FStar_UInt128_mul_wide(d40, f12)), + FStar_UInt128_mul_wide(d30, f13)); + FStar_UInt128_uint128 + s20 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f12), + FStar_UInt128_mul_wide(f11, f11)), + FStar_UInt128_mul_wide(d40, f13)); + FStar_UInt128_uint128 + s30 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f13), + FStar_UInt128_mul_wide(d10, f12)), + FStar_UInt128_mul_wide(f14, d4190)); + FStar_UInt128_uint128 + s40 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f14), + FStar_UInt128_mul_wide(d10, f13)), + FStar_UInt128_mul_wide(f12, f12)); + FStar_UInt128_uint128 o100 = s00; + FStar_UInt128_uint128 o110 = s10; + FStar_UInt128_uint128 o120 = s20; + FStar_UInt128_uint128 o130 = s30; + FStar_UInt128_uint128 o140 = s40; + uint64_t d0 = (uint64_t)2U * f20; + uint64_t d1 = (uint64_t)2U * f21; + uint64_t d2 = (uint64_t)38U * f22; + uint64_t d3 = (uint64_t)19U * f23; + uint64_t d419 = (uint64_t)19U * f24; + uint64_t d4 = (uint64_t)2U * d419; + FStar_UInt128_uint128 + s0 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f20, f20), + FStar_UInt128_mul_wide(d4, f21)), + FStar_UInt128_mul_wide(d2, f23)); + FStar_UInt128_uint128 + s1 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f21), + FStar_UInt128_mul_wide(d4, f22)), + FStar_UInt128_mul_wide(d3, f23)); + FStar_UInt128_uint128 + s2 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f22), + FStar_UInt128_mul_wide(f21, f21)), + FStar_UInt128_mul_wide(d4, f23)); + FStar_UInt128_uint128 + s3 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f23), + FStar_UInt128_mul_wide(d1, f22)), + FStar_UInt128_mul_wide(f24, d419)); + FStar_UInt128_uint128 + s4 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f24), + FStar_UInt128_mul_wide(d1, f23)), + FStar_UInt128_mul_wide(f22, f22)); + FStar_UInt128_uint128 o200 = s0; + FStar_UInt128_uint128 o210 = s1; + FStar_UInt128_uint128 o220 = s2; + FStar_UInt128_uint128 o230 = s3; + FStar_UInt128_uint128 o240 = s4; + FStar_UInt128_uint128 + l_ = FStar_UInt128_add(o100, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp00 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; + uint64_t c00 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); + FStar_UInt128_uint128 l_0 = FStar_UInt128_add(o110, FStar_UInt128_uint64_to_uint128(c00)); + uint64_t tmp10 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; + uint64_t c10 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); + FStar_UInt128_uint128 l_1 = FStar_UInt128_add(o120, FStar_UInt128_uint64_to_uint128(c10)); + uint64_t tmp20 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; + uint64_t c20 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); + FStar_UInt128_uint128 l_2 = FStar_UInt128_add(o130, FStar_UInt128_uint64_to_uint128(c20)); + uint64_t tmp30 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; + uint64_t c30 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); + FStar_UInt128_uint128 l_3 = FStar_UInt128_add(o140, FStar_UInt128_uint64_to_uint128(c30)); + uint64_t tmp40 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; + uint64_t c40 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); + uint64_t l_4 = tmp00 + c40 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c50 = l_4 >> (uint32_t)51U; + uint64_t o101 = tmp0_; + uint64_t o111 = tmp10 + c50; + uint64_t o121 = tmp20; + uint64_t o131 = tmp30; + uint64_t o141 = tmp40; + FStar_UInt128_uint128 + l_5 = FStar_UInt128_add(o200, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_5) & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_5, (uint32_t)51U)); + FStar_UInt128_uint128 l_6 = FStar_UInt128_add(o210, FStar_UInt128_uint64_to_uint128(c0)); + uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_6) & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_6, (uint32_t)51U)); + FStar_UInt128_uint128 l_7 = FStar_UInt128_add(o220, FStar_UInt128_uint64_to_uint128(c1)); + uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_7) & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_7, (uint32_t)51U)); + FStar_UInt128_uint128 l_8 = FStar_UInt128_add(o230, FStar_UInt128_uint64_to_uint128(c2)); + uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_8) & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_8, (uint32_t)51U)); + FStar_UInt128_uint128 l_9 = FStar_UInt128_add(o240, FStar_UInt128_uint64_to_uint128(c3)); + uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_9) & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_9, (uint32_t)51U)); + uint64_t l_10 = tmp0 + c4 * (uint64_t)19U; + uint64_t tmp0_0 = l_10 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_10 >> (uint32_t)51U; + uint64_t o201 = tmp0_0; + uint64_t o211 = tmp1 + c5; + uint64_t o221 = tmp2; + uint64_t o231 = tmp3; + uint64_t o241 = tmp4; + uint64_t o10 = o101; + uint64_t o11 = o111; + uint64_t o12 = o121; + uint64_t o13 = o131; + uint64_t o14 = o141; + uint64_t o20 = o201; + uint64_t o21 = o211; + uint64_t o22 = o221; + uint64_t o23 = o231; + uint64_t o24 = o241; + out[0U] = o10; + out[1U] = o11; + out[2U] = o12; + out[3U] = o13; + out[4U] = o14; + out[5U] = o20; + out[6U] = o21; + out[7U] = o22; + out[8U] = o23; + out[9U] = o24; +} + +static inline void +Hacl_Impl_Curve25519_Field51_store_felem(uint64_t *u64s, uint64_t *f) +{ + uint64_t f0 = f[0U]; + uint64_t f1 = f[1U]; + uint64_t f2 = f[2U]; + uint64_t f3 = f[3U]; + uint64_t f4 = f[4U]; + uint64_t l_ = f0 + (uint64_t)0U; + uint64_t tmp0 = l_ & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = l_ >> (uint32_t)51U; + uint64_t l_0 = f1 + c0; + uint64_t tmp1 = l_0 & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = l_0 >> (uint32_t)51U; + uint64_t l_1 = f2 + c1; + uint64_t tmp2 = l_1 & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = l_1 >> (uint32_t)51U; + uint64_t l_2 = f3 + c2; + uint64_t tmp3 = l_2 & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = l_2 >> (uint32_t)51U; + uint64_t l_3 = f4 + c3; + uint64_t tmp4 = l_3 & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = l_3 >> (uint32_t)51U; + uint64_t l_4 = tmp0 + c4 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_4 >> (uint32_t)51U; + uint64_t f01 = tmp0_; + uint64_t f11 = tmp1 + c5; + uint64_t f21 = tmp2; + uint64_t f31 = tmp3; + uint64_t f41 = tmp4; + uint64_t m0 = FStar_UInt64_gte_mask(f01, (uint64_t)0x7ffffffffffedU); + uint64_t m1 = FStar_UInt64_eq_mask(f11, (uint64_t)0x7ffffffffffffU); + uint64_t m2 = FStar_UInt64_eq_mask(f21, (uint64_t)0x7ffffffffffffU); + uint64_t m3 = FStar_UInt64_eq_mask(f31, (uint64_t)0x7ffffffffffffU); + uint64_t m4 = FStar_UInt64_eq_mask(f41, (uint64_t)0x7ffffffffffffU); + uint64_t mask = (((m0 & m1) & m2) & m3) & m4; + uint64_t f0_ = f01 - (mask & (uint64_t)0x7ffffffffffedU); + uint64_t f1_ = f11 - (mask & (uint64_t)0x7ffffffffffffU); + uint64_t f2_ = f21 - (mask & (uint64_t)0x7ffffffffffffU); + uint64_t f3_ = f31 - (mask & (uint64_t)0x7ffffffffffffU); + uint64_t f4_ = f41 - (mask & (uint64_t)0x7ffffffffffffU); + uint64_t f02 = f0_; + uint64_t f12 = f1_; + uint64_t f22 = f2_; + uint64_t f32 = f3_; + uint64_t f42 = f4_; + uint64_t o00 = f02 | f12 << (uint32_t)51U; + uint64_t o10 = f12 >> (uint32_t)13U | f22 << (uint32_t)38U; + uint64_t o20 = f22 >> (uint32_t)26U | f32 << (uint32_t)25U; + uint64_t o30 = f32 >> (uint32_t)39U | f42 << (uint32_t)12U; + uint64_t o0 = o00; + uint64_t o1 = o10; + uint64_t o2 = o20; + uint64_t o3 = o30; + u64s[0U] = o0; + u64s[1U] = o1; + u64s[2U] = o2; + u64s[3U] = o3; +} + +static inline void +Hacl_Impl_Curve25519_Field51_cswap2(uint64_t bit, uint64_t *p1, uint64_t *p2) +{ + uint64_t mask = (uint64_t)0U - bit; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)10U; i++) { + uint64_t dummy = mask & (p1[i] ^ p2[i]); + p1[i] = p1[i] ^ dummy; + p2[i] = p2[i] ^ dummy; + } +} + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Bignum25519_51_H_DEFINED +#endif diff --git a/lib/freebl/verified/Hacl_Chacha20.c b/lib/freebl/verified/Hacl_Chacha20.c index 2e552472bd..9a74e9f209 100644 --- a/lib/freebl/verified/Hacl_Chacha20.c +++ b/lib/freebl/verified/Hacl_Chacha20.c @@ -95,7 +95,7 @@ rounds(uint32_t *st) static inline void chacha20_core(uint32_t *k, uint32_t *ctx, uint32_t ctr) { - memcpy(k, ctx, (uint32_t)16U * sizeof(ctx[0U])); + memcpy(k, ctx, (uint32_t)16U * sizeof(uint32_t)); uint32_t ctr_u32 = ctr; k[12U] = k[12U] + ctr_u32; rounds(k); @@ -169,9 +169,9 @@ static inline void chacha20_encrypt_last(uint32_t *ctx, uint32_t len, uint8_t *out, uint32_t incr, uint8_t *text) { uint8_t plain[64U] = { 0U }; - memcpy(plain, text, len * sizeof(text[0U])); + memcpy(plain, text, len * sizeof(uint8_t)); chacha20_encrypt_block(ctx, plain, incr, plain); - memcpy(out, plain, len * sizeof(plain[0U])); + memcpy(out, plain, len * sizeof(uint8_t)); } static inline void diff --git a/lib/freebl/verified/Hacl_Chacha20.h b/lib/freebl/verified/Hacl_Chacha20.h index bd54a315e4..850544234d 100644 --- a/lib/freebl/verified/Hacl_Chacha20.h +++ b/lib/freebl/verified/Hacl_Chacha20.h @@ -21,14 +21,18 @@ * SOFTWARE. */ +#ifndef __Hacl_Chacha20_H +#define __Hacl_Chacha20_H + +#if defined(__cplusplus) +extern "C" { +#endif + #include "kremlin/internal/types.h" #include "kremlin/lowstar_endianness.h" #include #include -#ifndef __Hacl_Chacha20_H -#define __Hacl_Chacha20_H - #include "Hacl_Kremlib.h" extern const uint32_t Hacl_Impl_Chacha20_Vec_chacha20_constants[4U]; @@ -51,5 +55,9 @@ Hacl_Chacha20_chacha20_decrypt( uint8_t *n, uint32_t ctr); +#if defined(__cplusplus) +} +#endif + #define __Hacl_Chacha20_H_DEFINED #endif diff --git a/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c b/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c index 1b98e18af6..e45fcd9dfb 100644 --- a/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c +++ b/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c @@ -47,9 +47,9 @@ poly1305_padded_128(Lib_IntVector_Intrinsics_vec128 *ctx, uint32_t len, uint8_t Lib_IntVector_Intrinsics_vec128 e[5U]; for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) e[_i] = Lib_IntVector_Intrinsics_vec128_zero; - Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load_le(block); + Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load64_le(block); Lib_IntVector_Intrinsics_vec128 - b2 = Lib_IntVector_Intrinsics_vec128_load_le(block + (uint32_t)16U); + b2 = Lib_IntVector_Intrinsics_vec128_load64_le(block + (uint32_t)16U); Lib_IntVector_Intrinsics_vec128 lo = Lib_IntVector_Intrinsics_vec128_interleave_low64(b1, b2); Lib_IntVector_Intrinsics_vec128 hi = Lib_IntVector_Intrinsics_vec128_interleave_high64(b1, b2); @@ -480,7 +480,7 @@ poly1305_padded_128(Lib_IntVector_Intrinsics_vec128 *ctx, uint32_t len, uint8_t for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) e[_i] = Lib_IntVector_Intrinsics_vec128_zero; uint8_t tmp[16U] = { 0U }; - memcpy(tmp, last, rem1 * sizeof(last[0U])); + memcpy(tmp, last, rem1 * sizeof(uint8_t)); uint64_t u0 = load64_le(tmp); uint64_t lo = u0; uint64_t u = load64_le(tmp + (uint32_t)8U); @@ -685,7 +685,7 @@ poly1305_padded_128(Lib_IntVector_Intrinsics_vec128 *ctx, uint32_t len, uint8_t acc0[4U] = o4; } uint8_t tmp[16U] = { 0U }; - memcpy(tmp, rem, r * sizeof(rem[0U])); + memcpy(tmp, rem, r * sizeof(uint8_t)); if (r > (uint32_t)0U) { Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U; Lib_IntVector_Intrinsics_vec128 *acc = ctx; @@ -912,7 +912,9 @@ poly1305_do_128( ctx[_i] = Lib_IntVector_Intrinsics_vec128_zero; uint8_t block[16U] = { 0U }; Hacl_Poly1305_128_poly1305_init(ctx, k); - poly1305_padded_128(ctx, aadlen, aad); + if (aadlen != (uint32_t)0U) { + poly1305_padded_128(ctx, aadlen, aad); + } poly1305_padded_128(ctx, mlen, m); store64_le(block, (uint64_t)aadlen); store64_le(block + (uint32_t)8U, (uint64_t)mlen); diff --git a/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h b/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h index 2c4d46bce4..bf5f198a77 100644 --- a/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h +++ b/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h @@ -21,15 +21,19 @@ * SOFTWARE. */ +#ifndef __Hacl_Chacha20Poly1305_128_H +#define __Hacl_Chacha20Poly1305_128_H + +#if defined(__cplusplus) +extern "C" { +#endif + #include "libintvector.h" #include "kremlin/internal/types.h" #include "kremlin/lowstar_endianness.h" #include #include -#ifndef __Hacl_Chacha20Poly1305_128_H -#define __Hacl_Chacha20Poly1305_128_H - #include "Hacl_Kremlib.h" #include "Hacl_Chacha20_Vec128.h" #include "Hacl_Poly1305_128.h" @@ -56,5 +60,9 @@ Hacl_Chacha20Poly1305_128_aead_decrypt( uint8_t *cipher, uint8_t *mac); +#if defined(__cplusplus) +} +#endif + #define __Hacl_Chacha20Poly1305_128_H_DEFINED #endif diff --git a/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c b/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c index efbccbfddb..efa598ae15 100644 --- a/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c +++ b/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c @@ -47,9 +47,9 @@ poly1305_padded_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint32_t len, uint8_t Lib_IntVector_Intrinsics_vec256 e[5U]; for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) e[_i] = Lib_IntVector_Intrinsics_vec256_zero; - Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load_le(block); + Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(block); Lib_IntVector_Intrinsics_vec256 - hi = Lib_IntVector_Intrinsics_vec256_load_le(block + (uint32_t)32U); + hi = Lib_IntVector_Intrinsics_vec256_load64_le(block + (uint32_t)32U); Lib_IntVector_Intrinsics_vec256 mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); Lib_IntVector_Intrinsics_vec256 @@ -482,7 +482,7 @@ poly1305_padded_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint32_t len, uint8_t for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) e[_i] = Lib_IntVector_Intrinsics_vec256_zero; uint8_t tmp[16U] = { 0U }; - memcpy(tmp, last, rem1 * sizeof(last[0U])); + memcpy(tmp, last, rem1 * sizeof(uint8_t)); uint64_t u0 = load64_le(tmp); uint64_t lo = u0; uint64_t u = load64_le(tmp + (uint32_t)8U); @@ -687,7 +687,7 @@ poly1305_padded_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint32_t len, uint8_t acc0[4U] = o4; } uint8_t tmp[16U] = { 0U }; - memcpy(tmp, rem, r * sizeof(rem[0U])); + memcpy(tmp, rem, r * sizeof(uint8_t)); if (r > (uint32_t)0U) { Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; Lib_IntVector_Intrinsics_vec256 *acc = ctx; @@ -914,7 +914,9 @@ poly1305_do_256( ctx[_i] = Lib_IntVector_Intrinsics_vec256_zero; uint8_t block[16U] = { 0U }; Hacl_Poly1305_256_poly1305_init(ctx, k); - poly1305_padded_256(ctx, aadlen, aad); + if (aadlen != (uint32_t)0U) { + poly1305_padded_256(ctx, aadlen, aad); + } poly1305_padded_256(ctx, mlen, m); store64_le(block, (uint64_t)aadlen); store64_le(block + (uint32_t)8U, (uint64_t)mlen); diff --git a/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h b/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h index 550cc0564c..09ebbbf3d2 100644 --- a/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h +++ b/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h @@ -21,15 +21,19 @@ * SOFTWARE. */ +#ifndef __Hacl_Chacha20Poly1305_256_H +#define __Hacl_Chacha20Poly1305_256_H + +#if defined(__cplusplus) +extern "C" { +#endif + #include "libintvector.h" #include "kremlin/internal/types.h" #include "kremlin/lowstar_endianness.h" #include #include -#ifndef __Hacl_Chacha20Poly1305_256_H -#define __Hacl_Chacha20Poly1305_256_H - #include "Hacl_Kremlib.h" #include "Hacl_Chacha20_Vec256.h" #include "Hacl_Poly1305_256.h" @@ -56,5 +60,9 @@ Hacl_Chacha20Poly1305_256_aead_decrypt( uint8_t *cipher, uint8_t *mac); +#if defined(__cplusplus) +} +#endif + #define __Hacl_Chacha20Poly1305_256_H_DEFINED #endif diff --git a/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c b/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c index 760e3d548a..493a31695e 100644 --- a/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c +++ b/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c @@ -157,7 +157,7 @@ poly1305_padded_32(uint64_t *ctx, uint32_t len, uint8_t *text) uint8_t *last = blocks + nb * (uint32_t)16U; uint64_t e[5U] = { 0U }; uint8_t tmp[16U] = { 0U }; - memcpy(tmp, last, rem1 * sizeof(last[0U])); + memcpy(tmp, last, rem1 * sizeof(uint8_t)); uint64_t u0 = load64_le(tmp); uint64_t lo = u0; uint64_t u = load64_le(tmp + (uint32_t)8U); @@ -275,7 +275,7 @@ poly1305_padded_32(uint64_t *ctx, uint32_t len, uint8_t *text) acc0[4U] = o4; } uint8_t tmp[16U] = { 0U }; - memcpy(tmp, rem, r * sizeof(rem[0U])); + memcpy(tmp, rem, r * sizeof(uint8_t)); if (r > (uint32_t)0U) { uint64_t *pre = ctx + (uint32_t)5U; uint64_t *acc = ctx; @@ -411,7 +411,9 @@ poly1305_do_32( uint64_t ctx[25U] = { 0U }; uint8_t block[16U] = { 0U }; Hacl_Poly1305_32_poly1305_init(ctx, k); - poly1305_padded_32(ctx, aadlen, aad); + if (aadlen != (uint32_t)0U) { + poly1305_padded_32(ctx, aadlen, aad); + } poly1305_padded_32(ctx, mlen, m); store64_le(block, (uint64_t)aadlen); store64_le(block + (uint32_t)8U, (uint64_t)mlen); diff --git a/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h b/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h index 615f3f96d7..f7854685c1 100644 --- a/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h +++ b/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h @@ -21,14 +21,18 @@ * SOFTWARE. */ +#ifndef __Hacl_Chacha20Poly1305_32_H +#define __Hacl_Chacha20Poly1305_32_H + +#if defined(__cplusplus) +extern "C" { +#endif + #include "kremlin/internal/types.h" #include "kremlin/lowstar_endianness.h" #include #include -#ifndef __Hacl_Chacha20Poly1305_32_H -#define __Hacl_Chacha20Poly1305_32_H - #include "Hacl_Chacha20.h" #include "Hacl_Kremlib.h" #include "Hacl_Poly1305_32.h" @@ -55,5 +59,9 @@ Hacl_Chacha20Poly1305_32_aead_decrypt( uint8_t *cipher, uint8_t *mac); +#if defined(__cplusplus) +} +#endif + #define __Hacl_Chacha20Poly1305_32_H_DEFINED #endif diff --git a/lib/freebl/verified/Hacl_Chacha20_Vec128.c b/lib/freebl/verified/Hacl_Chacha20_Vec128.c index cf80c431de..485c78d343 100644 --- a/lib/freebl/verified/Hacl_Chacha20_Vec128.c +++ b/lib/freebl/verified/Hacl_Chacha20_Vec128.c @@ -130,7 +130,7 @@ chacha20_core_128( Lib_IntVector_Intrinsics_vec128 *ctx, uint32_t ctr) { - memcpy(k, ctx, (uint32_t)16U * sizeof(ctx[0U])); + memcpy(k, ctx, (uint32_t)16U * sizeof(Lib_IntVector_Intrinsics_vec128)); uint32_t ctr_u32 = (uint32_t)4U * ctr; Lib_IntVector_Intrinsics_vec128 cv = Lib_IntVector_Intrinsics_vec128_load32(ctr_u32); k[12U] = Lib_IntVector_Intrinsics_vec128_add32(k[12U], cv); @@ -334,16 +334,16 @@ Hacl_Chacha20_Vec128_chacha20_encrypt_128( k[15U] = v15; for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)16U; i0++) { Lib_IntVector_Intrinsics_vec128 - x = Lib_IntVector_Intrinsics_vec128_load_le(uu____1 + i0 * (uint32_t)16U); + x = Lib_IntVector_Intrinsics_vec128_load32_le(uu____1 + i0 * (uint32_t)16U); Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i0]); - Lib_IntVector_Intrinsics_vec128_store_le(uu____0 + i0 * (uint32_t)16U, y); + Lib_IntVector_Intrinsics_vec128_store32_le(uu____0 + i0 * (uint32_t)16U, y); } } if (rem1 > (uint32_t)0U) { uint8_t *uu____2 = out + nb * (uint32_t)256U; uint8_t *uu____3 = text + nb * (uint32_t)256U; uint8_t plain[256U] = { 0U }; - memcpy(plain, uu____3, rem * sizeof(uu____3[0U])); + memcpy(plain, uu____3, rem * sizeof(uint8_t)); Lib_IntVector_Intrinsics_vec128 k[16U]; for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) k[_i] = Lib_IntVector_Intrinsics_vec128_zero; @@ -462,11 +462,11 @@ Hacl_Chacha20_Vec128_chacha20_encrypt_128( k[15U] = v15; for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { Lib_IntVector_Intrinsics_vec128 - x = Lib_IntVector_Intrinsics_vec128_load_le(plain + i * (uint32_t)16U); + x = Lib_IntVector_Intrinsics_vec128_load32_le(plain + i * (uint32_t)16U); Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i]); - Lib_IntVector_Intrinsics_vec128_store_le(plain + i * (uint32_t)16U, y); + Lib_IntVector_Intrinsics_vec128_store32_le(plain + i * (uint32_t)16U, y); } - memcpy(uu____2, plain, rem * sizeof(plain[0U])); + memcpy(uu____2, plain, rem * sizeof(uint8_t)); } } @@ -607,16 +607,16 @@ Hacl_Chacha20_Vec128_chacha20_decrypt_128( k[15U] = v15; for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)16U; i0++) { Lib_IntVector_Intrinsics_vec128 - x = Lib_IntVector_Intrinsics_vec128_load_le(uu____1 + i0 * (uint32_t)16U); + x = Lib_IntVector_Intrinsics_vec128_load32_le(uu____1 + i0 * (uint32_t)16U); Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i0]); - Lib_IntVector_Intrinsics_vec128_store_le(uu____0 + i0 * (uint32_t)16U, y); + Lib_IntVector_Intrinsics_vec128_store32_le(uu____0 + i0 * (uint32_t)16U, y); } } if (rem1 > (uint32_t)0U) { uint8_t *uu____2 = out + nb * (uint32_t)256U; uint8_t *uu____3 = cipher + nb * (uint32_t)256U; uint8_t plain[256U] = { 0U }; - memcpy(plain, uu____3, rem * sizeof(uu____3[0U])); + memcpy(plain, uu____3, rem * sizeof(uint8_t)); Lib_IntVector_Intrinsics_vec128 k[16U]; for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) k[_i] = Lib_IntVector_Intrinsics_vec128_zero; @@ -735,10 +735,10 @@ Hacl_Chacha20_Vec128_chacha20_decrypt_128( k[15U] = v15; for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { Lib_IntVector_Intrinsics_vec128 - x = Lib_IntVector_Intrinsics_vec128_load_le(plain + i * (uint32_t)16U); + x = Lib_IntVector_Intrinsics_vec128_load32_le(plain + i * (uint32_t)16U); Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i]); - Lib_IntVector_Intrinsics_vec128_store_le(plain + i * (uint32_t)16U, y); + Lib_IntVector_Intrinsics_vec128_store32_le(plain + i * (uint32_t)16U, y); } - memcpy(uu____2, plain, rem * sizeof(plain[0U])); + memcpy(uu____2, plain, rem * sizeof(uint8_t)); } } diff --git a/lib/freebl/verified/Hacl_Chacha20_Vec128.h b/lib/freebl/verified/Hacl_Chacha20_Vec128.h index dc59ba1c7a..6b3a8e08b4 100644 --- a/lib/freebl/verified/Hacl_Chacha20_Vec128.h +++ b/lib/freebl/verified/Hacl_Chacha20_Vec128.h @@ -21,15 +21,19 @@ * SOFTWARE. */ +#ifndef __Hacl_Chacha20_Vec128_H +#define __Hacl_Chacha20_Vec128_H + +#if defined(__cplusplus) +extern "C" { +#endif + #include "libintvector.h" #include "kremlin/internal/types.h" #include "kremlin/lowstar_endianness.h" #include #include -#ifndef __Hacl_Chacha20_Vec128_H -#define __Hacl_Chacha20_Vec128_H - #include "Hacl_Chacha20.h" #include "Hacl_Kremlib.h" @@ -51,5 +55,9 @@ Hacl_Chacha20_Vec128_chacha20_decrypt_128( uint8_t *n, uint32_t ctr); +#if defined(__cplusplus) +} +#endif + #define __Hacl_Chacha20_Vec128_H_DEFINED #endif diff --git a/lib/freebl/verified/Hacl_Chacha20_Vec256.c b/lib/freebl/verified/Hacl_Chacha20_Vec256.c index 49902c1304..72b235123a 100644 --- a/lib/freebl/verified/Hacl_Chacha20_Vec256.c +++ b/lib/freebl/verified/Hacl_Chacha20_Vec256.c @@ -130,7 +130,7 @@ chacha20_core_256( Lib_IntVector_Intrinsics_vec256 *ctx, uint32_t ctr) { - memcpy(k, ctx, (uint32_t)16U * sizeof(ctx[0U])); + memcpy(k, ctx, (uint32_t)16U * sizeof(Lib_IntVector_Intrinsics_vec256)); uint32_t ctr_u32 = (uint32_t)8U * ctr; Lib_IntVector_Intrinsics_vec256 cv = Lib_IntVector_Intrinsics_vec256_load32(ctr_u32); k[12U] = Lib_IntVector_Intrinsics_vec256_add32(k[12U], cv); @@ -370,16 +370,16 @@ Hacl_Chacha20_Vec256_chacha20_encrypt_256( k[15U] = v15; for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)16U; i0++) { Lib_IntVector_Intrinsics_vec256 - x = Lib_IntVector_Intrinsics_vec256_load_le(uu____1 + i0 * (uint32_t)32U); + x = Lib_IntVector_Intrinsics_vec256_load32_le(uu____1 + i0 * (uint32_t)32U); Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i0]); - Lib_IntVector_Intrinsics_vec256_store_le(uu____0 + i0 * (uint32_t)32U, y); + Lib_IntVector_Intrinsics_vec256_store32_le(uu____0 + i0 * (uint32_t)32U, y); } } if (rem1 > (uint32_t)0U) { uint8_t *uu____2 = out + nb * (uint32_t)512U; uint8_t *uu____3 = text + nb * (uint32_t)512U; uint8_t plain[512U] = { 0U }; - memcpy(plain, uu____3, rem * sizeof(uu____3[0U])); + memcpy(plain, uu____3, rem * sizeof(uint8_t)); Lib_IntVector_Intrinsics_vec256 k[16U]; for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) k[_i] = Lib_IntVector_Intrinsics_vec256_zero; @@ -530,11 +530,11 @@ Hacl_Chacha20_Vec256_chacha20_encrypt_256( k[15U] = v15; for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { Lib_IntVector_Intrinsics_vec256 - x = Lib_IntVector_Intrinsics_vec256_load_le(plain + i * (uint32_t)32U); + x = Lib_IntVector_Intrinsics_vec256_load32_le(plain + i * (uint32_t)32U); Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i]); - Lib_IntVector_Intrinsics_vec256_store_le(plain + i * (uint32_t)32U, y); + Lib_IntVector_Intrinsics_vec256_store32_le(plain + i * (uint32_t)32U, y); } - memcpy(uu____2, plain, rem * sizeof(plain[0U])); + memcpy(uu____2, plain, rem * sizeof(uint8_t)); } } @@ -707,16 +707,16 @@ Hacl_Chacha20_Vec256_chacha20_decrypt_256( k[15U] = v15; for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)16U; i0++) { Lib_IntVector_Intrinsics_vec256 - x = Lib_IntVector_Intrinsics_vec256_load_le(uu____1 + i0 * (uint32_t)32U); + x = Lib_IntVector_Intrinsics_vec256_load32_le(uu____1 + i0 * (uint32_t)32U); Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i0]); - Lib_IntVector_Intrinsics_vec256_store_le(uu____0 + i0 * (uint32_t)32U, y); + Lib_IntVector_Intrinsics_vec256_store32_le(uu____0 + i0 * (uint32_t)32U, y); } } if (rem1 > (uint32_t)0U) { uint8_t *uu____2 = out + nb * (uint32_t)512U; uint8_t *uu____3 = cipher + nb * (uint32_t)512U; uint8_t plain[512U] = { 0U }; - memcpy(plain, uu____3, rem * sizeof(uu____3[0U])); + memcpy(plain, uu____3, rem * sizeof(uint8_t)); Lib_IntVector_Intrinsics_vec256 k[16U]; for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) k[_i] = Lib_IntVector_Intrinsics_vec256_zero; @@ -867,10 +867,10 @@ Hacl_Chacha20_Vec256_chacha20_decrypt_256( k[15U] = v15; for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { Lib_IntVector_Intrinsics_vec256 - x = Lib_IntVector_Intrinsics_vec256_load_le(plain + i * (uint32_t)32U); + x = Lib_IntVector_Intrinsics_vec256_load32_le(plain + i * (uint32_t)32U); Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i]); - Lib_IntVector_Intrinsics_vec256_store_le(plain + i * (uint32_t)32U, y); + Lib_IntVector_Intrinsics_vec256_store32_le(plain + i * (uint32_t)32U, y); } - memcpy(uu____2, plain, rem * sizeof(plain[0U])); + memcpy(uu____2, plain, rem * sizeof(uint8_t)); } } diff --git a/lib/freebl/verified/Hacl_Chacha20_Vec256.h b/lib/freebl/verified/Hacl_Chacha20_Vec256.h index 1ae9ea3798..478f2813f6 100644 --- a/lib/freebl/verified/Hacl_Chacha20_Vec256.h +++ b/lib/freebl/verified/Hacl_Chacha20_Vec256.h @@ -21,15 +21,19 @@ * SOFTWARE. */ +#ifndef __Hacl_Chacha20_Vec256_H +#define __Hacl_Chacha20_Vec256_H + +#if defined(__cplusplus) +extern "C" { +#endif + #include "libintvector.h" #include "kremlin/internal/types.h" #include "kremlin/lowstar_endianness.h" #include #include -#ifndef __Hacl_Chacha20_Vec256_H -#define __Hacl_Chacha20_Vec256_H - #include "Hacl_Chacha20.h" #include "Hacl_Kremlib.h" @@ -51,5 +55,9 @@ Hacl_Chacha20_Vec256_chacha20_decrypt_256( uint8_t *n, uint32_t ctr); +#if defined(__cplusplus) +} +#endif + #define __Hacl_Chacha20_Vec256_H_DEFINED #endif diff --git a/lib/freebl/verified/Hacl_Curve25519_51.c b/lib/freebl/verified/Hacl_Curve25519_51.c index 2b76eafe27..d366d5ced5 100644 --- a/lib/freebl/verified/Hacl_Curve25519_51.c +++ b/lib/freebl/verified/Hacl_Curve25519_51.c @@ -23,631 +23,6 @@ #include "Hacl_Curve25519_51.h" -static inline void -fadd0(uint64_t *out, uint64_t *f1, uint64_t *f2) -{ - uint64_t f10 = f1[0U]; - uint64_t f20 = f2[0U]; - uint64_t f11 = f1[1U]; - uint64_t f21 = f2[1U]; - uint64_t f12 = f1[2U]; - uint64_t f22 = f2[2U]; - uint64_t f13 = f1[3U]; - uint64_t f23 = f2[3U]; - uint64_t f14 = f1[4U]; - uint64_t f24 = f2[4U]; - out[0U] = f10 + f20; - out[1U] = f11 + f21; - out[2U] = f12 + f22; - out[3U] = f13 + f23; - out[4U] = f14 + f24; -} - -static inline void -fsub0(uint64_t *out, uint64_t *f1, uint64_t *f2) -{ - uint64_t f10 = f1[0U]; - uint64_t f20 = f2[0U]; - uint64_t f11 = f1[1U]; - uint64_t f21 = f2[1U]; - uint64_t f12 = f1[2U]; - uint64_t f22 = f2[2U]; - uint64_t f13 = f1[3U]; - uint64_t f23 = f2[3U]; - uint64_t f14 = f1[4U]; - uint64_t f24 = f2[4U]; - out[0U] = f10 + (uint64_t)0x3fffffffffff68U - f20; - out[1U] = f11 + (uint64_t)0x3ffffffffffff8U - f21; - out[2U] = f12 + (uint64_t)0x3ffffffffffff8U - f22; - out[3U] = f13 + (uint64_t)0x3ffffffffffff8U - f23; - out[4U] = f14 + (uint64_t)0x3ffffffffffff8U - f24; -} - -static inline void -fmul0(uint64_t *out, uint64_t *f1, uint64_t *f2) -{ - uint64_t f10 = f1[0U]; - uint64_t f11 = f1[1U]; - uint64_t f12 = f1[2U]; - uint64_t f13 = f1[3U]; - uint64_t f14 = f1[4U]; - uint64_t f20 = f2[0U]; - uint64_t f21 = f2[1U]; - uint64_t f22 = f2[2U]; - uint64_t f23 = f2[3U]; - uint64_t f24 = f2[4U]; - uint64_t tmp1 = f21 * (uint64_t)19U; - uint64_t tmp2 = f22 * (uint64_t)19U; - uint64_t tmp3 = f23 * (uint64_t)19U; - uint64_t tmp4 = f24 * (uint64_t)19U; - FStar_UInt128_uint128 o00 = FStar_UInt128_mul_wide(f10, f20); - FStar_UInt128_uint128 o10 = FStar_UInt128_mul_wide(f10, f21); - FStar_UInt128_uint128 o20 = FStar_UInt128_mul_wide(f10, f22); - FStar_UInt128_uint128 o30 = FStar_UInt128_mul_wide(f10, f23); - FStar_UInt128_uint128 o40 = FStar_UInt128_mul_wide(f10, f24); - FStar_UInt128_uint128 o01 = FStar_UInt128_add(o00, FStar_UInt128_mul_wide(f11, tmp4)); - FStar_UInt128_uint128 o11 = FStar_UInt128_add(o10, FStar_UInt128_mul_wide(f11, f20)); - FStar_UInt128_uint128 o21 = FStar_UInt128_add(o20, FStar_UInt128_mul_wide(f11, f21)); - FStar_UInt128_uint128 o31 = FStar_UInt128_add(o30, FStar_UInt128_mul_wide(f11, f22)); - FStar_UInt128_uint128 o41 = FStar_UInt128_add(o40, FStar_UInt128_mul_wide(f11, f23)); - FStar_UInt128_uint128 o02 = FStar_UInt128_add(o01, FStar_UInt128_mul_wide(f12, tmp3)); - FStar_UInt128_uint128 o12 = FStar_UInt128_add(o11, FStar_UInt128_mul_wide(f12, tmp4)); - FStar_UInt128_uint128 o22 = FStar_UInt128_add(o21, FStar_UInt128_mul_wide(f12, f20)); - FStar_UInt128_uint128 o32 = FStar_UInt128_add(o31, FStar_UInt128_mul_wide(f12, f21)); - FStar_UInt128_uint128 o42 = FStar_UInt128_add(o41, FStar_UInt128_mul_wide(f12, f22)); - FStar_UInt128_uint128 o03 = FStar_UInt128_add(o02, FStar_UInt128_mul_wide(f13, tmp2)); - FStar_UInt128_uint128 o13 = FStar_UInt128_add(o12, FStar_UInt128_mul_wide(f13, tmp3)); - FStar_UInt128_uint128 o23 = FStar_UInt128_add(o22, FStar_UInt128_mul_wide(f13, tmp4)); - FStar_UInt128_uint128 o33 = FStar_UInt128_add(o32, FStar_UInt128_mul_wide(f13, f20)); - FStar_UInt128_uint128 o43 = FStar_UInt128_add(o42, FStar_UInt128_mul_wide(f13, f21)); - FStar_UInt128_uint128 o04 = FStar_UInt128_add(o03, FStar_UInt128_mul_wide(f14, tmp1)); - FStar_UInt128_uint128 o14 = FStar_UInt128_add(o13, FStar_UInt128_mul_wide(f14, tmp2)); - FStar_UInt128_uint128 o24 = FStar_UInt128_add(o23, FStar_UInt128_mul_wide(f14, tmp3)); - FStar_UInt128_uint128 o34 = FStar_UInt128_add(o33, FStar_UInt128_mul_wide(f14, tmp4)); - FStar_UInt128_uint128 o44 = FStar_UInt128_add(o43, FStar_UInt128_mul_wide(f14, f20)); - FStar_UInt128_uint128 tmp_w0 = o04; - FStar_UInt128_uint128 tmp_w1 = o14; - FStar_UInt128_uint128 tmp_w2 = o24; - FStar_UInt128_uint128 tmp_w3 = o34; - FStar_UInt128_uint128 tmp_w4 = o44; - FStar_UInt128_uint128 - l_ = FStar_UInt128_add(tmp_w0, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); - uint64_t tmp01 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; - uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); - FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w1, FStar_UInt128_uint64_to_uint128(c0)); - uint64_t tmp11 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; - uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); - FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w2, FStar_UInt128_uint64_to_uint128(c1)); - uint64_t tmp21 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; - uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); - FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w3, FStar_UInt128_uint64_to_uint128(c2)); - uint64_t tmp31 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; - uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); - FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w4, FStar_UInt128_uint64_to_uint128(c3)); - uint64_t tmp41 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; - uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); - uint64_t l_4 = tmp01 + c4 * (uint64_t)19U; - uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; - uint64_t c5 = l_4 >> (uint32_t)51U; - uint64_t o0 = tmp0_; - uint64_t o1 = tmp11 + c5; - uint64_t o2 = tmp21; - uint64_t o3 = tmp31; - uint64_t o4 = tmp41; - out[0U] = o0; - out[1U] = o1; - out[2U] = o2; - out[3U] = o3; - out[4U] = o4; -} - -static inline void -fmul20(uint64_t *out, uint64_t *f1, uint64_t *f2) -{ - uint64_t f10 = f1[0U]; - uint64_t f11 = f1[1U]; - uint64_t f12 = f1[2U]; - uint64_t f13 = f1[3U]; - uint64_t f14 = f1[4U]; - uint64_t f20 = f2[0U]; - uint64_t f21 = f2[1U]; - uint64_t f22 = f2[2U]; - uint64_t f23 = f2[3U]; - uint64_t f24 = f2[4U]; - uint64_t f30 = f1[5U]; - uint64_t f31 = f1[6U]; - uint64_t f32 = f1[7U]; - uint64_t f33 = f1[8U]; - uint64_t f34 = f1[9U]; - uint64_t f40 = f2[5U]; - uint64_t f41 = f2[6U]; - uint64_t f42 = f2[7U]; - uint64_t f43 = f2[8U]; - uint64_t f44 = f2[9U]; - uint64_t tmp11 = f21 * (uint64_t)19U; - uint64_t tmp12 = f22 * (uint64_t)19U; - uint64_t tmp13 = f23 * (uint64_t)19U; - uint64_t tmp14 = f24 * (uint64_t)19U; - uint64_t tmp21 = f41 * (uint64_t)19U; - uint64_t tmp22 = f42 * (uint64_t)19U; - uint64_t tmp23 = f43 * (uint64_t)19U; - uint64_t tmp24 = f44 * (uint64_t)19U; - FStar_UInt128_uint128 o00 = FStar_UInt128_mul_wide(f10, f20); - FStar_UInt128_uint128 o15 = FStar_UInt128_mul_wide(f10, f21); - FStar_UInt128_uint128 o25 = FStar_UInt128_mul_wide(f10, f22); - FStar_UInt128_uint128 o30 = FStar_UInt128_mul_wide(f10, f23); - FStar_UInt128_uint128 o40 = FStar_UInt128_mul_wide(f10, f24); - FStar_UInt128_uint128 o010 = FStar_UInt128_add(o00, FStar_UInt128_mul_wide(f11, tmp14)); - FStar_UInt128_uint128 o110 = FStar_UInt128_add(o15, FStar_UInt128_mul_wide(f11, f20)); - FStar_UInt128_uint128 o210 = FStar_UInt128_add(o25, FStar_UInt128_mul_wide(f11, f21)); - FStar_UInt128_uint128 o310 = FStar_UInt128_add(o30, FStar_UInt128_mul_wide(f11, f22)); - FStar_UInt128_uint128 o410 = FStar_UInt128_add(o40, FStar_UInt128_mul_wide(f11, f23)); - FStar_UInt128_uint128 o020 = FStar_UInt128_add(o010, FStar_UInt128_mul_wide(f12, tmp13)); - FStar_UInt128_uint128 o120 = FStar_UInt128_add(o110, FStar_UInt128_mul_wide(f12, tmp14)); - FStar_UInt128_uint128 o220 = FStar_UInt128_add(o210, FStar_UInt128_mul_wide(f12, f20)); - FStar_UInt128_uint128 o320 = FStar_UInt128_add(o310, FStar_UInt128_mul_wide(f12, f21)); - FStar_UInt128_uint128 o420 = FStar_UInt128_add(o410, FStar_UInt128_mul_wide(f12, f22)); - FStar_UInt128_uint128 o030 = FStar_UInt128_add(o020, FStar_UInt128_mul_wide(f13, tmp12)); - FStar_UInt128_uint128 o130 = FStar_UInt128_add(o120, FStar_UInt128_mul_wide(f13, tmp13)); - FStar_UInt128_uint128 o230 = FStar_UInt128_add(o220, FStar_UInt128_mul_wide(f13, tmp14)); - FStar_UInt128_uint128 o330 = FStar_UInt128_add(o320, FStar_UInt128_mul_wide(f13, f20)); - FStar_UInt128_uint128 o430 = FStar_UInt128_add(o420, FStar_UInt128_mul_wide(f13, f21)); - FStar_UInt128_uint128 o040 = FStar_UInt128_add(o030, FStar_UInt128_mul_wide(f14, tmp11)); - FStar_UInt128_uint128 o140 = FStar_UInt128_add(o130, FStar_UInt128_mul_wide(f14, tmp12)); - FStar_UInt128_uint128 o240 = FStar_UInt128_add(o230, FStar_UInt128_mul_wide(f14, tmp13)); - FStar_UInt128_uint128 o340 = FStar_UInt128_add(o330, FStar_UInt128_mul_wide(f14, tmp14)); - FStar_UInt128_uint128 o440 = FStar_UInt128_add(o430, FStar_UInt128_mul_wide(f14, f20)); - FStar_UInt128_uint128 tmp_w10 = o040; - FStar_UInt128_uint128 tmp_w11 = o140; - FStar_UInt128_uint128 tmp_w12 = o240; - FStar_UInt128_uint128 tmp_w13 = o340; - FStar_UInt128_uint128 tmp_w14 = o440; - FStar_UInt128_uint128 o0 = FStar_UInt128_mul_wide(f30, f40); - FStar_UInt128_uint128 o1 = FStar_UInt128_mul_wide(f30, f41); - FStar_UInt128_uint128 o2 = FStar_UInt128_mul_wide(f30, f42); - FStar_UInt128_uint128 o3 = FStar_UInt128_mul_wide(f30, f43); - FStar_UInt128_uint128 o4 = FStar_UInt128_mul_wide(f30, f44); - FStar_UInt128_uint128 o01 = FStar_UInt128_add(o0, FStar_UInt128_mul_wide(f31, tmp24)); - FStar_UInt128_uint128 o111 = FStar_UInt128_add(o1, FStar_UInt128_mul_wide(f31, f40)); - FStar_UInt128_uint128 o211 = FStar_UInt128_add(o2, FStar_UInt128_mul_wide(f31, f41)); - FStar_UInt128_uint128 o31 = FStar_UInt128_add(o3, FStar_UInt128_mul_wide(f31, f42)); - FStar_UInt128_uint128 o41 = FStar_UInt128_add(o4, FStar_UInt128_mul_wide(f31, f43)); - FStar_UInt128_uint128 o02 = FStar_UInt128_add(o01, FStar_UInt128_mul_wide(f32, tmp23)); - FStar_UInt128_uint128 o121 = FStar_UInt128_add(o111, FStar_UInt128_mul_wide(f32, tmp24)); - FStar_UInt128_uint128 o221 = FStar_UInt128_add(o211, FStar_UInt128_mul_wide(f32, f40)); - FStar_UInt128_uint128 o32 = FStar_UInt128_add(o31, FStar_UInt128_mul_wide(f32, f41)); - FStar_UInt128_uint128 o42 = FStar_UInt128_add(o41, FStar_UInt128_mul_wide(f32, f42)); - FStar_UInt128_uint128 o03 = FStar_UInt128_add(o02, FStar_UInt128_mul_wide(f33, tmp22)); - FStar_UInt128_uint128 o131 = FStar_UInt128_add(o121, FStar_UInt128_mul_wide(f33, tmp23)); - FStar_UInt128_uint128 o231 = FStar_UInt128_add(o221, FStar_UInt128_mul_wide(f33, tmp24)); - FStar_UInt128_uint128 o33 = FStar_UInt128_add(o32, FStar_UInt128_mul_wide(f33, f40)); - FStar_UInt128_uint128 o43 = FStar_UInt128_add(o42, FStar_UInt128_mul_wide(f33, f41)); - FStar_UInt128_uint128 o04 = FStar_UInt128_add(o03, FStar_UInt128_mul_wide(f34, tmp21)); - FStar_UInt128_uint128 o141 = FStar_UInt128_add(o131, FStar_UInt128_mul_wide(f34, tmp22)); - FStar_UInt128_uint128 o241 = FStar_UInt128_add(o231, FStar_UInt128_mul_wide(f34, tmp23)); - FStar_UInt128_uint128 o34 = FStar_UInt128_add(o33, FStar_UInt128_mul_wide(f34, tmp24)); - FStar_UInt128_uint128 o44 = FStar_UInt128_add(o43, FStar_UInt128_mul_wide(f34, f40)); - FStar_UInt128_uint128 tmp_w20 = o04; - FStar_UInt128_uint128 tmp_w21 = o141; - FStar_UInt128_uint128 tmp_w22 = o241; - FStar_UInt128_uint128 tmp_w23 = o34; - FStar_UInt128_uint128 tmp_w24 = o44; - FStar_UInt128_uint128 - l_ = FStar_UInt128_add(tmp_w10, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); - uint64_t tmp00 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; - uint64_t c00 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); - FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w11, FStar_UInt128_uint64_to_uint128(c00)); - uint64_t tmp10 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; - uint64_t c10 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); - FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w12, FStar_UInt128_uint64_to_uint128(c10)); - uint64_t tmp20 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; - uint64_t c20 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); - FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w13, FStar_UInt128_uint64_to_uint128(c20)); - uint64_t tmp30 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; - uint64_t c30 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); - FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w14, FStar_UInt128_uint64_to_uint128(c30)); - uint64_t tmp40 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; - uint64_t c40 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); - uint64_t l_4 = tmp00 + c40 * (uint64_t)19U; - uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; - uint64_t c50 = l_4 >> (uint32_t)51U; - uint64_t o100 = tmp0_; - uint64_t o112 = tmp10 + c50; - uint64_t o122 = tmp20; - uint64_t o132 = tmp30; - uint64_t o142 = tmp40; - FStar_UInt128_uint128 - l_5 = FStar_UInt128_add(tmp_w20, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); - uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_5) & (uint64_t)0x7ffffffffffffU; - uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_5, (uint32_t)51U)); - FStar_UInt128_uint128 l_6 = FStar_UInt128_add(tmp_w21, FStar_UInt128_uint64_to_uint128(c0)); - uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_6) & (uint64_t)0x7ffffffffffffU; - uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_6, (uint32_t)51U)); - FStar_UInt128_uint128 l_7 = FStar_UInt128_add(tmp_w22, FStar_UInt128_uint64_to_uint128(c1)); - uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_7) & (uint64_t)0x7ffffffffffffU; - uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_7, (uint32_t)51U)); - FStar_UInt128_uint128 l_8 = FStar_UInt128_add(tmp_w23, FStar_UInt128_uint64_to_uint128(c2)); - uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_8) & (uint64_t)0x7ffffffffffffU; - uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_8, (uint32_t)51U)); - FStar_UInt128_uint128 l_9 = FStar_UInt128_add(tmp_w24, FStar_UInt128_uint64_to_uint128(c3)); - uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_9) & (uint64_t)0x7ffffffffffffU; - uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_9, (uint32_t)51U)); - uint64_t l_10 = tmp0 + c4 * (uint64_t)19U; - uint64_t tmp0_0 = l_10 & (uint64_t)0x7ffffffffffffU; - uint64_t c5 = l_10 >> (uint32_t)51U; - uint64_t o200 = tmp0_0; - uint64_t o212 = tmp1 + c5; - uint64_t o222 = tmp2; - uint64_t o232 = tmp3; - uint64_t o242 = tmp4; - uint64_t o10 = o100; - uint64_t o11 = o112; - uint64_t o12 = o122; - uint64_t o13 = o132; - uint64_t o14 = o142; - uint64_t o20 = o200; - uint64_t o21 = o212; - uint64_t o22 = o222; - uint64_t o23 = o232; - uint64_t o24 = o242; - out[0U] = o10; - out[1U] = o11; - out[2U] = o12; - out[3U] = o13; - out[4U] = o14; - out[5U] = o20; - out[6U] = o21; - out[7U] = o22; - out[8U] = o23; - out[9U] = o24; -} - -static inline void -fmul1(uint64_t *out, uint64_t *f1, uint64_t f2) -{ - uint64_t f10 = f1[0U]; - uint64_t f11 = f1[1U]; - uint64_t f12 = f1[2U]; - uint64_t f13 = f1[3U]; - uint64_t f14 = f1[4U]; - FStar_UInt128_uint128 tmp_w0 = FStar_UInt128_mul_wide(f2, f10); - FStar_UInt128_uint128 tmp_w1 = FStar_UInt128_mul_wide(f2, f11); - FStar_UInt128_uint128 tmp_w2 = FStar_UInt128_mul_wide(f2, f12); - FStar_UInt128_uint128 tmp_w3 = FStar_UInt128_mul_wide(f2, f13); - FStar_UInt128_uint128 tmp_w4 = FStar_UInt128_mul_wide(f2, f14); - FStar_UInt128_uint128 - l_ = FStar_UInt128_add(tmp_w0, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); - uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; - uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); - FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w1, FStar_UInt128_uint64_to_uint128(c0)); - uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; - uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); - FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w2, FStar_UInt128_uint64_to_uint128(c1)); - uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; - uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); - FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w3, FStar_UInt128_uint64_to_uint128(c2)); - uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; - uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); - FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w4, FStar_UInt128_uint64_to_uint128(c3)); - uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; - uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); - uint64_t l_4 = tmp0 + c4 * (uint64_t)19U; - uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; - uint64_t c5 = l_4 >> (uint32_t)51U; - uint64_t o0 = tmp0_; - uint64_t o1 = tmp1 + c5; - uint64_t o2 = tmp2; - uint64_t o3 = tmp3; - uint64_t o4 = tmp4; - out[0U] = o0; - out[1U] = o1; - out[2U] = o2; - out[3U] = o3; - out[4U] = o4; -} - -static inline void -fsqr0(uint64_t *out, uint64_t *f) -{ - uint64_t f0 = f[0U]; - uint64_t f1 = f[1U]; - uint64_t f2 = f[2U]; - uint64_t f3 = f[3U]; - uint64_t f4 = f[4U]; - uint64_t d0 = (uint64_t)2U * f0; - uint64_t d1 = (uint64_t)2U * f1; - uint64_t d2 = (uint64_t)38U * f2; - uint64_t d3 = (uint64_t)19U * f3; - uint64_t d419 = (uint64_t)19U * f4; - uint64_t d4 = (uint64_t)2U * d419; - FStar_UInt128_uint128 - s0 = - FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f0, f0), - FStar_UInt128_mul_wide(d4, f1)), - FStar_UInt128_mul_wide(d2, f3)); - FStar_UInt128_uint128 - s1 = - FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f1), - FStar_UInt128_mul_wide(d4, f2)), - FStar_UInt128_mul_wide(d3, f3)); - FStar_UInt128_uint128 - s2 = - FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f2), - FStar_UInt128_mul_wide(f1, f1)), - FStar_UInt128_mul_wide(d4, f3)); - FStar_UInt128_uint128 - s3 = - FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f3), - FStar_UInt128_mul_wide(d1, f2)), - FStar_UInt128_mul_wide(f4, d419)); - FStar_UInt128_uint128 - s4 = - FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f4), - FStar_UInt128_mul_wide(d1, f3)), - FStar_UInt128_mul_wide(f2, f2)); - FStar_UInt128_uint128 o00 = s0; - FStar_UInt128_uint128 o10 = s1; - FStar_UInt128_uint128 o20 = s2; - FStar_UInt128_uint128 o30 = s3; - FStar_UInt128_uint128 o40 = s4; - FStar_UInt128_uint128 - l_ = FStar_UInt128_add(o00, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); - uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; - uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); - FStar_UInt128_uint128 l_0 = FStar_UInt128_add(o10, FStar_UInt128_uint64_to_uint128(c0)); - uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; - uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); - FStar_UInt128_uint128 l_1 = FStar_UInt128_add(o20, FStar_UInt128_uint64_to_uint128(c1)); - uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; - uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); - FStar_UInt128_uint128 l_2 = FStar_UInt128_add(o30, FStar_UInt128_uint64_to_uint128(c2)); - uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; - uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); - FStar_UInt128_uint128 l_3 = FStar_UInt128_add(o40, FStar_UInt128_uint64_to_uint128(c3)); - uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; - uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); - uint64_t l_4 = tmp0 + c4 * (uint64_t)19U; - uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; - uint64_t c5 = l_4 >> (uint32_t)51U; - uint64_t o0 = tmp0_; - uint64_t o1 = tmp1 + c5; - uint64_t o2 = tmp2; - uint64_t o3 = tmp3; - uint64_t o4 = tmp4; - out[0U] = o0; - out[1U] = o1; - out[2U] = o2; - out[3U] = o3; - out[4U] = o4; -} - -static inline void -fsqr20(uint64_t *out, uint64_t *f) -{ - uint64_t f10 = f[0U]; - uint64_t f11 = f[1U]; - uint64_t f12 = f[2U]; - uint64_t f13 = f[3U]; - uint64_t f14 = f[4U]; - uint64_t f20 = f[5U]; - uint64_t f21 = f[6U]; - uint64_t f22 = f[7U]; - uint64_t f23 = f[8U]; - uint64_t f24 = f[9U]; - uint64_t d00 = (uint64_t)2U * f10; - uint64_t d10 = (uint64_t)2U * f11; - uint64_t d20 = (uint64_t)38U * f12; - uint64_t d30 = (uint64_t)19U * f13; - uint64_t d4190 = (uint64_t)19U * f14; - uint64_t d40 = (uint64_t)2U * d4190; - FStar_UInt128_uint128 - s00 = - FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f10, f10), - FStar_UInt128_mul_wide(d40, f11)), - FStar_UInt128_mul_wide(d20, f13)); - FStar_UInt128_uint128 - s10 = - FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f11), - FStar_UInt128_mul_wide(d40, f12)), - FStar_UInt128_mul_wide(d30, f13)); - FStar_UInt128_uint128 - s20 = - FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f12), - FStar_UInt128_mul_wide(f11, f11)), - FStar_UInt128_mul_wide(d40, f13)); - FStar_UInt128_uint128 - s30 = - FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f13), - FStar_UInt128_mul_wide(d10, f12)), - FStar_UInt128_mul_wide(f14, d4190)); - FStar_UInt128_uint128 - s40 = - FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f14), - FStar_UInt128_mul_wide(d10, f13)), - FStar_UInt128_mul_wide(f12, f12)); - FStar_UInt128_uint128 o100 = s00; - FStar_UInt128_uint128 o110 = s10; - FStar_UInt128_uint128 o120 = s20; - FStar_UInt128_uint128 o130 = s30; - FStar_UInt128_uint128 o140 = s40; - uint64_t d0 = (uint64_t)2U * f20; - uint64_t d1 = (uint64_t)2U * f21; - uint64_t d2 = (uint64_t)38U * f22; - uint64_t d3 = (uint64_t)19U * f23; - uint64_t d419 = (uint64_t)19U * f24; - uint64_t d4 = (uint64_t)2U * d419; - FStar_UInt128_uint128 - s0 = - FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f20, f20), - FStar_UInt128_mul_wide(d4, f21)), - FStar_UInt128_mul_wide(d2, f23)); - FStar_UInt128_uint128 - s1 = - FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f21), - FStar_UInt128_mul_wide(d4, f22)), - FStar_UInt128_mul_wide(d3, f23)); - FStar_UInt128_uint128 - s2 = - FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f22), - FStar_UInt128_mul_wide(f21, f21)), - FStar_UInt128_mul_wide(d4, f23)); - FStar_UInt128_uint128 - s3 = - FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f23), - FStar_UInt128_mul_wide(d1, f22)), - FStar_UInt128_mul_wide(f24, d419)); - FStar_UInt128_uint128 - s4 = - FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f24), - FStar_UInt128_mul_wide(d1, f23)), - FStar_UInt128_mul_wide(f22, f22)); - FStar_UInt128_uint128 o200 = s0; - FStar_UInt128_uint128 o210 = s1; - FStar_UInt128_uint128 o220 = s2; - FStar_UInt128_uint128 o230 = s3; - FStar_UInt128_uint128 o240 = s4; - FStar_UInt128_uint128 - l_ = FStar_UInt128_add(o100, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); - uint64_t tmp00 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; - uint64_t c00 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); - FStar_UInt128_uint128 l_0 = FStar_UInt128_add(o110, FStar_UInt128_uint64_to_uint128(c00)); - uint64_t tmp10 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; - uint64_t c10 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); - FStar_UInt128_uint128 l_1 = FStar_UInt128_add(o120, FStar_UInt128_uint64_to_uint128(c10)); - uint64_t tmp20 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; - uint64_t c20 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); - FStar_UInt128_uint128 l_2 = FStar_UInt128_add(o130, FStar_UInt128_uint64_to_uint128(c20)); - uint64_t tmp30 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; - uint64_t c30 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); - FStar_UInt128_uint128 l_3 = FStar_UInt128_add(o140, FStar_UInt128_uint64_to_uint128(c30)); - uint64_t tmp40 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; - uint64_t c40 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); - uint64_t l_4 = tmp00 + c40 * (uint64_t)19U; - uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; - uint64_t c50 = l_4 >> (uint32_t)51U; - uint64_t o101 = tmp0_; - uint64_t o111 = tmp10 + c50; - uint64_t o121 = tmp20; - uint64_t o131 = tmp30; - uint64_t o141 = tmp40; - FStar_UInt128_uint128 - l_5 = FStar_UInt128_add(o200, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); - uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_5) & (uint64_t)0x7ffffffffffffU; - uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_5, (uint32_t)51U)); - FStar_UInt128_uint128 l_6 = FStar_UInt128_add(o210, FStar_UInt128_uint64_to_uint128(c0)); - uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_6) & (uint64_t)0x7ffffffffffffU; - uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_6, (uint32_t)51U)); - FStar_UInt128_uint128 l_7 = FStar_UInt128_add(o220, FStar_UInt128_uint64_to_uint128(c1)); - uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_7) & (uint64_t)0x7ffffffffffffU; - uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_7, (uint32_t)51U)); - FStar_UInt128_uint128 l_8 = FStar_UInt128_add(o230, FStar_UInt128_uint64_to_uint128(c2)); - uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_8) & (uint64_t)0x7ffffffffffffU; - uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_8, (uint32_t)51U)); - FStar_UInt128_uint128 l_9 = FStar_UInt128_add(o240, FStar_UInt128_uint64_to_uint128(c3)); - uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_9) & (uint64_t)0x7ffffffffffffU; - uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_9, (uint32_t)51U)); - uint64_t l_10 = tmp0 + c4 * (uint64_t)19U; - uint64_t tmp0_0 = l_10 & (uint64_t)0x7ffffffffffffU; - uint64_t c5 = l_10 >> (uint32_t)51U; - uint64_t o201 = tmp0_0; - uint64_t o211 = tmp1 + c5; - uint64_t o221 = tmp2; - uint64_t o231 = tmp3; - uint64_t o241 = tmp4; - uint64_t o10 = o101; - uint64_t o11 = o111; - uint64_t o12 = o121; - uint64_t o13 = o131; - uint64_t o14 = o141; - uint64_t o20 = o201; - uint64_t o21 = o211; - uint64_t o22 = o221; - uint64_t o23 = o231; - uint64_t o24 = o241; - out[0U] = o10; - out[1U] = o11; - out[2U] = o12; - out[3U] = o13; - out[4U] = o14; - out[5U] = o20; - out[6U] = o21; - out[7U] = o22; - out[8U] = o23; - out[9U] = o24; -} - -static void -store_felem(uint64_t *u64s, uint64_t *f) -{ - uint64_t f0 = f[0U]; - uint64_t f1 = f[1U]; - uint64_t f2 = f[2U]; - uint64_t f3 = f[3U]; - uint64_t f4 = f[4U]; - uint64_t l_ = f0 + (uint64_t)0U; - uint64_t tmp0 = l_ & (uint64_t)0x7ffffffffffffU; - uint64_t c0 = l_ >> (uint32_t)51U; - uint64_t l_0 = f1 + c0; - uint64_t tmp1 = l_0 & (uint64_t)0x7ffffffffffffU; - uint64_t c1 = l_0 >> (uint32_t)51U; - uint64_t l_1 = f2 + c1; - uint64_t tmp2 = l_1 & (uint64_t)0x7ffffffffffffU; - uint64_t c2 = l_1 >> (uint32_t)51U; - uint64_t l_2 = f3 + c2; - uint64_t tmp3 = l_2 & (uint64_t)0x7ffffffffffffU; - uint64_t c3 = l_2 >> (uint32_t)51U; - uint64_t l_3 = f4 + c3; - uint64_t tmp4 = l_3 & (uint64_t)0x7ffffffffffffU; - uint64_t c4 = l_3 >> (uint32_t)51U; - uint64_t l_4 = tmp0 + c4 * (uint64_t)19U; - uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; - uint64_t c5 = l_4 >> (uint32_t)51U; - uint64_t f01 = tmp0_; - uint64_t f11 = tmp1 + c5; - uint64_t f21 = tmp2; - uint64_t f31 = tmp3; - uint64_t f41 = tmp4; - uint64_t m0 = FStar_UInt64_gte_mask(f01, (uint64_t)0x7ffffffffffedU); - uint64_t m1 = FStar_UInt64_eq_mask(f11, (uint64_t)0x7ffffffffffffU); - uint64_t m2 = FStar_UInt64_eq_mask(f21, (uint64_t)0x7ffffffffffffU); - uint64_t m3 = FStar_UInt64_eq_mask(f31, (uint64_t)0x7ffffffffffffU); - uint64_t m4 = FStar_UInt64_eq_mask(f41, (uint64_t)0x7ffffffffffffU); - uint64_t mask = (((m0 & m1) & m2) & m3) & m4; - uint64_t f0_ = f01 - (mask & (uint64_t)0x7ffffffffffedU); - uint64_t f1_ = f11 - (mask & (uint64_t)0x7ffffffffffffU); - uint64_t f2_ = f21 - (mask & (uint64_t)0x7ffffffffffffU); - uint64_t f3_ = f31 - (mask & (uint64_t)0x7ffffffffffffU); - uint64_t f4_ = f41 - (mask & (uint64_t)0x7ffffffffffffU); - uint64_t f02 = f0_; - uint64_t f12 = f1_; - uint64_t f22 = f2_; - uint64_t f32 = f3_; - uint64_t f42 = f4_; - uint64_t o00 = f02 | f12 << (uint32_t)51U; - uint64_t o10 = f12 >> (uint32_t)13U | f22 << (uint32_t)38U; - uint64_t o20 = f22 >> (uint32_t)26U | f32 << (uint32_t)25U; - uint64_t o30 = f32 >> (uint32_t)39U | f42 << (uint32_t)12U; - uint64_t o0 = o00; - uint64_t o1 = o10; - uint64_t o2 = o20; - uint64_t o3 = o30; - u64s[0U] = o0; - u64s[1U] = o1; - u64s[2U] = o2; - u64s[3U] = o3; -} - -static inline void -cswap20(uint64_t bit, uint64_t *p1, uint64_t *p2) -{ - uint64_t mask = (uint64_t)0U - bit; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)10U; i++) { - uint64_t dummy = mask & (p1[i] ^ p2[i]); - p1[i] = p1[i] ^ dummy; - p2[i] = p2[i] ^ dummy; - } -} - static const uint8_t g25519[32U] = { (uint8_t)9U }; static void @@ -664,35 +39,35 @@ point_add_and_double(uint64_t *q, uint64_t *p01_tmp1, FStar_UInt128_uint128 *tmp uint64_t *b = tmp1 + (uint32_t)5U; uint64_t *ab = tmp1; uint64_t *dc = tmp1 + (uint32_t)10U; - fadd0(a, x2, z2); - fsub0(b, x2, z2); + Hacl_Impl_Curve25519_Field51_fadd(a, x2, z2); + Hacl_Impl_Curve25519_Field51_fsub(b, x2, z2); uint64_t *x3 = nq_p1; uint64_t *z31 = nq_p1 + (uint32_t)5U; uint64_t *d0 = dc; uint64_t *c0 = dc + (uint32_t)5U; - fadd0(c0, x3, z31); - fsub0(d0, x3, z31); - fmul20(dc, dc, ab); - fadd0(x3, d0, c0); - fsub0(z31, d0, c0); + Hacl_Impl_Curve25519_Field51_fadd(c0, x3, z31); + Hacl_Impl_Curve25519_Field51_fsub(d0, x3, z31); + Hacl_Impl_Curve25519_Field51_fmul2(dc, dc, ab, tmp2); + Hacl_Impl_Curve25519_Field51_fadd(x3, d0, c0); + Hacl_Impl_Curve25519_Field51_fsub(z31, d0, c0); uint64_t *a1 = tmp1; uint64_t *b1 = tmp1 + (uint32_t)5U; uint64_t *d = tmp1 + (uint32_t)10U; uint64_t *c = tmp1 + (uint32_t)15U; uint64_t *ab1 = tmp1; uint64_t *dc1 = tmp1 + (uint32_t)10U; - fsqr20(dc1, ab1); - fsqr20(nq_p1, nq_p1); + Hacl_Impl_Curve25519_Field51_fsqr2(dc1, ab1, tmp2); + Hacl_Impl_Curve25519_Field51_fsqr2(nq_p1, nq_p1, tmp2); a1[0U] = c[0U]; a1[1U] = c[1U]; a1[2U] = c[2U]; a1[3U] = c[3U]; a1[4U] = c[4U]; - fsub0(c, d, c); - fmul1(b1, c, (uint64_t)121665U); - fadd0(b1, b1, d); - fmul20(nq, dc1, ab1); - fmul0(z3, z3, x1); + Hacl_Impl_Curve25519_Field51_fsub(c, d, c); + Hacl_Impl_Curve25519_Field51_fmul1(b1, c, (uint64_t)121665U); + Hacl_Impl_Curve25519_Field51_fadd(b1, b1, d); + Hacl_Impl_Curve25519_Field51_fmul2(nq, dc1, ab1, tmp2); + Hacl_Impl_Curve25519_Field51_fmul(z3, z3, x1, tmp2); } static void @@ -706,18 +81,18 @@ point_double(uint64_t *nq, uint64_t *tmp1, FStar_UInt128_uint128 *tmp2) uint64_t *c = tmp1 + (uint32_t)15U; uint64_t *ab = tmp1; uint64_t *dc = tmp1 + (uint32_t)10U; - fadd0(a, x2, z2); - fsub0(b, x2, z2); - fsqr20(dc, ab); + Hacl_Impl_Curve25519_Field51_fadd(a, x2, z2); + Hacl_Impl_Curve25519_Field51_fsub(b, x2, z2); + Hacl_Impl_Curve25519_Field51_fsqr2(dc, ab, tmp2); a[0U] = c[0U]; a[1U] = c[1U]; a[2U] = c[2U]; a[3U] = c[3U]; a[4U] = c[4U]; - fsub0(c, d, c); - fmul1(b, c, (uint64_t)121665U); - fadd0(b, b, d); - fmul20(nq, dc, ab); + Hacl_Impl_Curve25519_Field51_fsub(c, d, c); + Hacl_Impl_Curve25519_Field51_fmul1(b, c, (uint64_t)121665U); + Hacl_Impl_Curve25519_Field51_fadd(b, b, d); + Hacl_Impl_Curve25519_Field51_fmul2(nq, dc, ab, tmp2); } static void @@ -731,7 +106,7 @@ montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init) uint64_t *p01 = p01_tmp1_swap; uint64_t *p03 = p01; uint64_t *p11 = p01 + (uint32_t)10U; - memcpy(p11, init, (uint32_t)10U * sizeof(init[0U])); + memcpy(p11, init, (uint32_t)10U * sizeof(uint64_t)); uint64_t *x0 = p03; uint64_t *z0 = p03 + (uint32_t)5U; x0[0U] = (uint64_t)1U; @@ -749,7 +124,7 @@ montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init) uint64_t *nq1 = p01_tmp1_swap; uint64_t *nq_p11 = p01_tmp1_swap + (uint32_t)10U; uint64_t *swap = p01_tmp1_swap + (uint32_t)40U; - cswap20((uint64_t)1U, nq1, nq_p11); + Hacl_Impl_Curve25519_Field51_cswap2((uint64_t)1U, nq1, nq_p11); point_add_and_double(init, p01_tmp11, tmp2); swap[0U] = (uint64_t)1U; for (uint32_t i = (uint32_t)0U; i < (uint32_t)251U; i++) { @@ -761,26 +136,26 @@ montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init) bit = (uint64_t)(key[((uint32_t)253U - i) / (uint32_t)8U] >> ((uint32_t)253U - i) % (uint32_t)8U & (uint8_t)1U); uint64_t sw = swap1[0U] ^ bit; - cswap20(sw, nq2, nq_p12); + Hacl_Impl_Curve25519_Field51_cswap2(sw, nq2, nq_p12); point_add_and_double(init, p01_tmp12, tmp2); swap1[0U] = bit; } uint64_t sw = swap[0U]; - cswap20(sw, nq1, nq_p11); + Hacl_Impl_Curve25519_Field51_cswap2(sw, nq1, nq_p11); uint64_t *nq10 = p01_tmp1; uint64_t *tmp1 = p01_tmp1 + (uint32_t)20U; point_double(nq10, tmp1, tmp2); point_double(nq10, tmp1, tmp2); point_double(nq10, tmp1, tmp2); - memcpy(out, p0, (uint32_t)10U * sizeof(p0[0U])); + memcpy(out, p0, (uint32_t)10U * sizeof(uint64_t)); } static void fsquare_times(uint64_t *o, uint64_t *inp, FStar_UInt128_uint128 *tmp, uint32_t n) { - fsqr0(o, inp); + Hacl_Impl_Curve25519_Field51_fsqr(o, inp, tmp); for (uint32_t i = (uint32_t)0U; i < n - (uint32_t)1U; i++) { - fsqr0(o, o); + Hacl_Impl_Curve25519_Field51_fsqr(o, o, tmp); } } @@ -788,35 +163,42 @@ static void finv(uint64_t *o, uint64_t *i, FStar_UInt128_uint128 *tmp) { uint64_t t1[20U] = { 0U }; - uint64_t *a = t1; - uint64_t *b = t1 + (uint32_t)5U; - uint64_t *c = t1 + (uint32_t)10U; - uint64_t *t00 = t1 + (uint32_t)15U; + uint64_t *a1 = t1; + uint64_t *b1 = t1 + (uint32_t)5U; + uint64_t *t010 = t1 + (uint32_t)15U; + FStar_UInt128_uint128 *tmp10 = tmp; + fsquare_times(a1, i, tmp10, (uint32_t)1U); + fsquare_times(t010, a1, tmp10, (uint32_t)2U); + Hacl_Impl_Curve25519_Field51_fmul(b1, t010, i, tmp); + Hacl_Impl_Curve25519_Field51_fmul(a1, b1, a1, tmp); + fsquare_times(t010, a1, tmp10, (uint32_t)1U); + Hacl_Impl_Curve25519_Field51_fmul(b1, t010, b1, tmp); + fsquare_times(t010, b1, tmp10, (uint32_t)5U); + Hacl_Impl_Curve25519_Field51_fmul(b1, t010, b1, tmp); + uint64_t *b10 = t1 + (uint32_t)5U; + uint64_t *c10 = t1 + (uint32_t)10U; + uint64_t *t011 = t1 + (uint32_t)15U; + FStar_UInt128_uint128 *tmp11 = tmp; + fsquare_times(t011, b10, tmp11, (uint32_t)10U); + Hacl_Impl_Curve25519_Field51_fmul(c10, t011, b10, tmp); + fsquare_times(t011, c10, tmp11, (uint32_t)20U); + Hacl_Impl_Curve25519_Field51_fmul(t011, t011, c10, tmp); + fsquare_times(t011, t011, tmp11, (uint32_t)10U); + Hacl_Impl_Curve25519_Field51_fmul(b10, t011, b10, tmp); + fsquare_times(t011, b10, tmp11, (uint32_t)50U); + Hacl_Impl_Curve25519_Field51_fmul(c10, t011, b10, tmp); + uint64_t *b11 = t1 + (uint32_t)5U; + uint64_t *c1 = t1 + (uint32_t)10U; + uint64_t *t01 = t1 + (uint32_t)15U; FStar_UInt128_uint128 *tmp1 = tmp; - fsquare_times(a, i, tmp1, (uint32_t)1U); - fsquare_times(t00, a, tmp1, (uint32_t)2U); - fmul0(b, t00, i); - fmul0(a, b, a); - fsquare_times(t00, a, tmp1, (uint32_t)1U); - fmul0(b, t00, b); - fsquare_times(t00, b, tmp1, (uint32_t)5U); - fmul0(b, t00, b); - fsquare_times(t00, b, tmp1, (uint32_t)10U); - fmul0(c, t00, b); - fsquare_times(t00, c, tmp1, (uint32_t)20U); - fmul0(t00, t00, c); - fsquare_times(t00, t00, tmp1, (uint32_t)10U); - fmul0(b, t00, b); - fsquare_times(t00, b, tmp1, (uint32_t)50U); - fmul0(c, t00, b); - fsquare_times(t00, c, tmp1, (uint32_t)100U); - fmul0(t00, t00, c); - fsquare_times(t00, t00, tmp1, (uint32_t)50U); - fmul0(t00, t00, b); - fsquare_times(t00, t00, tmp1, (uint32_t)5U); - uint64_t *a0 = t1; + fsquare_times(t01, c1, tmp1, (uint32_t)100U); + Hacl_Impl_Curve25519_Field51_fmul(t01, t01, c1, tmp); + fsquare_times(t01, t01, tmp1, (uint32_t)50U); + Hacl_Impl_Curve25519_Field51_fmul(t01, t01, b11, tmp); + fsquare_times(t01, t01, tmp1, (uint32_t)5U); + uint64_t *a = t1; uint64_t *t0 = t1 + (uint32_t)15U; - fmul0(o, t0, a0); + Hacl_Impl_Curve25519_Field51_fmul(o, t0, a, tmp); } static void @@ -830,8 +212,8 @@ encode_point(uint8_t *o, uint64_t *i) for (uint32_t _i = 0U; _i < (uint32_t)10U; ++_i) tmp_w[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U); finv(tmp, z, tmp_w); - fmul0(tmp, tmp, x); - store_felem(u64s, tmp); + Hacl_Impl_Curve25519_Field51_fmul(tmp, tmp, x, tmp_w); + Hacl_Impl_Curve25519_Field51_store_felem(u64s, tmp); for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++) { store64_le(o + i0 * (uint32_t)8U, u64s[i0]); } diff --git a/lib/freebl/verified/Hacl_Curve25519_51.h b/lib/freebl/verified/Hacl_Curve25519_51.h index 05050739cf..dade9637b2 100644 --- a/lib/freebl/verified/Hacl_Curve25519_51.h +++ b/lib/freebl/verified/Hacl_Curve25519_51.h @@ -21,15 +21,20 @@ * SOFTWARE. */ +#ifndef __Hacl_Curve25519_51_H +#define __Hacl_Curve25519_51_H + +#if defined(__cplusplus) +extern "C" { +#endif + #include "kremlin/internal/types.h" #include "kremlin/lowstar_endianness.h" #include #include -#ifndef __Hacl_Curve25519_51_H -#define __Hacl_Curve25519_51_H - #include "Hacl_Kremlib.h" +#include "Hacl_Bignum25519_51.h" void Hacl_Curve25519_51_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub); @@ -37,5 +42,9 @@ void Hacl_Curve25519_51_secret_to_public(uint8_t *pub, uint8_t *priv); bool Hacl_Curve25519_51_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub); +#if defined(__cplusplus) +} +#endif + #define __Hacl_Curve25519_51_H_DEFINED #endif diff --git a/lib/freebl/verified/Hacl_Kremlib.h b/lib/freebl/verified/Hacl_Kremlib.h index a2116220ff..1b47ca3b17 100644 --- a/lib/freebl/verified/Hacl_Kremlib.h +++ b/lib/freebl/verified/Hacl_Kremlib.h @@ -21,13 +21,21 @@ * SOFTWARE. */ +#ifndef __Hacl_Kremlib_H +#define __Hacl_Kremlib_H + +#if defined(__cplusplus) +extern "C" { +#endif + #include "kremlin/internal/types.h" #include "kremlin/lowstar_endianness.h" #include #include -#ifndef __Hacl_Kremlib_H -#define __Hacl_Kremlib_H +static inline uint32_t FStar_UInt32_eq_mask(uint32_t a, uint32_t b); + +static inline uint32_t FStar_UInt32_gte_mask(uint32_t a, uint32_t b); static inline uint8_t FStar_UInt8_eq_mask(uint8_t a, uint8_t b); @@ -47,5 +55,9 @@ static inline uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a); static inline FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y); +#if defined(__cplusplus) +} +#endif + #define __Hacl_Kremlib_H_DEFINED #endif diff --git a/lib/freebl/verified/Hacl_Poly1305_128.c b/lib/freebl/verified/Hacl_Poly1305_128.c index 7fbd7fc4b7..963068d428 100644 --- a/lib/freebl/verified/Hacl_Poly1305_128.c +++ b/lib/freebl/verified/Hacl_Poly1305_128.c @@ -29,9 +29,9 @@ Hacl_Impl_Poly1305_Field32xN_128_load_acc2(Lib_IntVector_Intrinsics_vec128 *acc, Lib_IntVector_Intrinsics_vec128 e[5U]; for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) e[_i] = Lib_IntVector_Intrinsics_vec128_zero; - Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load_le(b); + Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load64_le(b); Lib_IntVector_Intrinsics_vec128 - b2 = Lib_IntVector_Intrinsics_vec128_load_le(b + (uint32_t)16U); + b2 = Lib_IntVector_Intrinsics_vec128_load64_le(b + (uint32_t)16U); Lib_IntVector_Intrinsics_vec128 lo = Lib_IntVector_Intrinsics_vec128_interleave_low64(b1, b2); Lib_IntVector_Intrinsics_vec128 hi = Lib_IntVector_Intrinsics_vec128_interleave_high64(b1, b2); Lib_IntVector_Intrinsics_vec128 @@ -803,9 +803,9 @@ Hacl_Poly1305_128_poly1305_update( Lib_IntVector_Intrinsics_vec128 e[5U]; for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) e[_i] = Lib_IntVector_Intrinsics_vec128_zero; - Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load_le(block); + Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load64_le(block); Lib_IntVector_Intrinsics_vec128 - b2 = Lib_IntVector_Intrinsics_vec128_load_le(block + (uint32_t)16U); + b2 = Lib_IntVector_Intrinsics_vec128_load64_le(block + (uint32_t)16U); Lib_IntVector_Intrinsics_vec128 lo = Lib_IntVector_Intrinsics_vec128_interleave_low64(b1, b2); Lib_IntVector_Intrinsics_vec128 hi = Lib_IntVector_Intrinsics_vec128_interleave_high64(b1, b2); @@ -1236,7 +1236,7 @@ Hacl_Poly1305_128_poly1305_update( for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) e[_i] = Lib_IntVector_Intrinsics_vec128_zero; uint8_t tmp[16U] = { 0U }; - memcpy(tmp, last, rem * sizeof(last[0U])); + memcpy(tmp, last, rem * sizeof(uint8_t)); uint64_t u0 = load64_le(tmp); uint64_t lo = u0; uint64_t u = load64_le(tmp + (uint32_t)8U); diff --git a/lib/freebl/verified/Hacl_Poly1305_128.h b/lib/freebl/verified/Hacl_Poly1305_128.h index 8e7cdc74dd..49171ddcc1 100644 --- a/lib/freebl/verified/Hacl_Poly1305_128.h +++ b/lib/freebl/verified/Hacl_Poly1305_128.h @@ -21,15 +21,19 @@ * SOFTWARE. */ +#ifndef __Hacl_Poly1305_128_H +#define __Hacl_Poly1305_128_H + +#if defined(__cplusplus) +extern "C" { +#endif + #include "libintvector.h" #include "kremlin/internal/types.h" #include "kremlin/lowstar_endianness.h" #include #include -#ifndef __Hacl_Poly1305_128_H -#define __Hacl_Poly1305_128_H - #include "Hacl_Kremlib.h" void @@ -62,5 +66,9 @@ Hacl_Poly1305_128_poly1305_finish( void Hacl_Poly1305_128_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key); +#if defined(__cplusplus) +} +#endif + #define __Hacl_Poly1305_128_H_DEFINED #endif diff --git a/lib/freebl/verified/Hacl_Poly1305_256.c b/lib/freebl/verified/Hacl_Poly1305_256.c index 6fddf86af7..6f5bffd977 100644 --- a/lib/freebl/verified/Hacl_Poly1305_256.c +++ b/lib/freebl/verified/Hacl_Poly1305_256.c @@ -29,9 +29,9 @@ Hacl_Impl_Poly1305_Field32xN_256_load_acc4(Lib_IntVector_Intrinsics_vec256 *acc, Lib_IntVector_Intrinsics_vec256 e[5U]; for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) e[_i] = Lib_IntVector_Intrinsics_vec256_zero; - Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load_le(b); + Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(b); Lib_IntVector_Intrinsics_vec256 - hi = Lib_IntVector_Intrinsics_vec256_load_le(b + (uint32_t)32U); + hi = Lib_IntVector_Intrinsics_vec256_load64_le(b + (uint32_t)32U); Lib_IntVector_Intrinsics_vec256 mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); Lib_IntVector_Intrinsics_vec256 m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi); @@ -1272,9 +1272,9 @@ Hacl_Poly1305_256_poly1305_update( Lib_IntVector_Intrinsics_vec256 e[5U]; for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) e[_i] = Lib_IntVector_Intrinsics_vec256_zero; - Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load_le(block); + Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(block); Lib_IntVector_Intrinsics_vec256 - hi = Lib_IntVector_Intrinsics_vec256_load_le(block + (uint32_t)32U); + hi = Lib_IntVector_Intrinsics_vec256_load64_le(block + (uint32_t)32U); Lib_IntVector_Intrinsics_vec256 mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); Lib_IntVector_Intrinsics_vec256 @@ -1707,7 +1707,7 @@ Hacl_Poly1305_256_poly1305_update( for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) e[_i] = Lib_IntVector_Intrinsics_vec256_zero; uint8_t tmp[16U] = { 0U }; - memcpy(tmp, last, rem * sizeof(last[0U])); + memcpy(tmp, last, rem * sizeof(uint8_t)); uint64_t u0 = load64_le(tmp); uint64_t lo = u0; uint64_t u = load64_le(tmp + (uint32_t)8U); diff --git a/lib/freebl/verified/Hacl_Poly1305_256.h b/lib/freebl/verified/Hacl_Poly1305_256.h index 9d5ff5728b..62a2ca0027 100644 --- a/lib/freebl/verified/Hacl_Poly1305_256.h +++ b/lib/freebl/verified/Hacl_Poly1305_256.h @@ -21,15 +21,19 @@ * SOFTWARE. */ +#ifndef __Hacl_Poly1305_256_H +#define __Hacl_Poly1305_256_H + +#if defined(__cplusplus) +extern "C" { +#endif + #include "libintvector.h" #include "kremlin/internal/types.h" #include "kremlin/lowstar_endianness.h" #include #include -#ifndef __Hacl_Poly1305_256_H -#define __Hacl_Poly1305_256_H - #include "Hacl_Kremlib.h" void @@ -62,5 +66,9 @@ Hacl_Poly1305_256_poly1305_finish( void Hacl_Poly1305_256_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key); +#if defined(__cplusplus) +} +#endif + #define __Hacl_Poly1305_256_H_DEFINED #endif diff --git a/lib/freebl/verified/Hacl_Poly1305_32.c b/lib/freebl/verified/Hacl_Poly1305_32.c index b5b118333e..25ee87c1f7 100644 --- a/lib/freebl/verified/Hacl_Poly1305_32.c +++ b/lib/freebl/verified/Hacl_Poly1305_32.c @@ -340,7 +340,7 @@ Hacl_Poly1305_32_poly1305_update(uint64_t *ctx, uint32_t len, uint8_t *text) uint8_t *last = text + nb * (uint32_t)16U; uint64_t e[5U] = { 0U }; uint8_t tmp[16U] = { 0U }; - memcpy(tmp, last, rem * sizeof(last[0U])); + memcpy(tmp, last, rem * sizeof(uint8_t)); uint64_t u0 = load64_le(tmp); uint64_t lo = u0; uint64_t u = load64_le(tmp + (uint32_t)8U); diff --git a/lib/freebl/verified/Hacl_Poly1305_32.h b/lib/freebl/verified/Hacl_Poly1305_32.h index 442b5db429..c552d6f42e 100644 --- a/lib/freebl/verified/Hacl_Poly1305_32.h +++ b/lib/freebl/verified/Hacl_Poly1305_32.h @@ -21,14 +21,18 @@ * SOFTWARE. */ +#ifndef __Hacl_Poly1305_32_H +#define __Hacl_Poly1305_32_H + +#if defined(__cplusplus) +extern "C" { +#endif + #include "kremlin/internal/types.h" #include "kremlin/lowstar_endianness.h" #include #include -#ifndef __Hacl_Poly1305_32_H -#define __Hacl_Poly1305_32_H - #include "Hacl_Kremlib.h" extern uint32_t Hacl_Poly1305_32_blocklen; @@ -45,5 +49,9 @@ void Hacl_Poly1305_32_poly1305_finish(uint8_t *tag, uint8_t *key, uint64_t *ctx) void Hacl_Poly1305_32_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key); +#if defined(__cplusplus) +} +#endif + #define __Hacl_Poly1305_32_H_DEFINED #endif diff --git a/lib/freebl/verified/kremlin/include/kremlin/internal/target.h b/lib/freebl/verified/kremlin/include/kremlin/internal/target.h index 25f0fd0ac4..0affdaa809 100644 --- a/lib/freebl/verified/kremlin/include/kremlin/internal/target.h +++ b/lib/freebl/verified/kremlin/include/kremlin/internal/target.h @@ -26,6 +26,8 @@ (defined __STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ (!(defined KRML_HOST_EPRINTF))) #define KRML_HOST_EPRINTF(...) fprintf(stderr, __VA_ARGS__) +#elif !(defined KRML_HOST_EPRINTF) && defined(_MSC_VER) +#define KRML_HOST_EPRINTF(...) fprintf(stderr, __VA_ARGS__) #endif #ifndef KRML_HOST_EXIT diff --git a/lib/freebl/verified/kremlin/include/kremlin/internal/types.h b/lib/freebl/verified/kremlin/include/kremlin/internal/types.h index 4654b8c017..2c966cb54b 100644 --- a/lib/freebl/verified/kremlin/include/kremlin/internal/types.h +++ b/lib/freebl/verified/kremlin/include/kremlin/internal/types.h @@ -54,14 +54,14 @@ typedef const char *Prims_string; /* This code makes a number of assumptions and should be refined. In particular, * it assumes that: any non-MSVC amd64 compiler supports int128. Maybe it would * be easier to just test for defined(__SIZEOF_INT128__) only? */ -#if (defined(__x86_64__) || \ - defined(__x86_64) || \ - defined(__aarch64__) || \ - (defined(__powerpc64__) && defined(__LITTLE_ENDIAN__)) || \ - defined(__s390x__) || \ - (defined(_MSC_VER) && !defined(_M_X64) && defined(__clang__)) || \ - (defined(__mips__) && defined(__LP64__)) || \ - (defined(__riscv) && __riscv_xlen == 64) || \ +#if (defined(__x86_64__) || \ + defined(__x86_64) || \ + defined(__aarch64__) || \ + (defined(__powerpc64__) && defined(__LITTLE_ENDIAN__)) || \ + defined(__s390x__) || \ + (defined(_MSC_VER) && defined(_M_X64) && defined(__clang__)) || \ + (defined(__mips__) && defined(__LP64__)) || \ + (defined(__riscv) && __riscv_xlen == 64) || \ defined(__SIZEOF_INT128__)) #define HAS_INT128 1 #endif @@ -87,6 +87,12 @@ typedef FStar_UInt128_uint128 FStar_UInt128_t, uint128_t; #include "kremlin/lowstar_endianness.h" +#endif + +/* Avoid a circular loop: if this header is included via FStar_UInt8_16_32_64, + * then don't bring the uint128 definitions into scope. */ +#ifndef __FStar_UInt_8_16_32_64_H + #if !defined(KRML_VERIFIED_UINT128) && defined(IS_MSVC64) #include "fstar_uint128_msvc.h" #elif !defined(KRML_VERIFIED_UINT128) && defined(HAS_INT128) diff --git a/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128.h b/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128.h index 6f254639f1..57b9b7156e 100644 --- a/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128.h +++ b/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128.h @@ -3,6 +3,8 @@ Licensed under the Apache 2.0 License. */ +#ifndef __FStar_UInt128_H +#define __FStar_UInt128_H #include #include #include "kremlin/internal/compat.h" @@ -10,9 +12,6 @@ #include "kremlin/internal/types.h" #include "kremlin/internal/target.h" -#ifndef __FStar_UInt128_H -#define __FStar_UInt128_H - static inline FStar_UInt128_uint128 FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); diff --git a/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128_Verified.h b/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128_Verified.h index a7da435c31..a5de037514 100644 --- a/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128_Verified.h +++ b/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128_Verified.h @@ -3,14 +3,13 @@ Licensed under the Apache 2.0 License. */ +#ifndef __FStar_UInt128_Verified_H +#define __FStar_UInt128_Verified_H #include #include #include "kremlin/internal/types.h" #include "kremlin/internal/target.h" -#ifndef __FStar_UInt128_Verified_H -#define __FStar_UInt128_Verified_H - #include "FStar_UInt_8_16_32_64.h" static inline uint64_t diff --git a/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt_8_16_32_64.h b/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt_8_16_32_64.h index 809ea58ac2..08884599c9 100644 --- a/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt_8_16_32_64.h +++ b/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt_8_16_32_64.h @@ -3,6 +3,8 @@ Licensed under the Apache 2.0 License. */ +#ifndef __FStar_UInt_8_16_32_64_H +#define __FStar_UInt_8_16_32_64_H #include #include #include "kremlin/internal/compat.h" @@ -10,9 +12,6 @@ #include "kremlin/internal/types.h" #include "kremlin/internal/target.h" -#ifndef __FStar_UInt_8_16_32_64_H -#define __FStar_UInt_8_16_32_64_H - extern Prims_int FStar_UInt64_n; extern bool FStar_UInt64_uu___is_Mk(uint64_t projectee); @@ -51,13 +50,13 @@ FStar_UInt64_gte_mask(uint64_t a, uint64_t b) return x_xor_q_ - (uint64_t)1U; } -extern Prims_string FStar_UInt64_to_string(uint64_t uu____888); +extern Prims_string FStar_UInt64_to_string(uint64_t uu___); -extern Prims_string FStar_UInt64_to_string_hex(uint64_t uu____899); +extern Prims_string FStar_UInt64_to_string_hex(uint64_t uu___); -extern Prims_string FStar_UInt64_to_string_hex_pad(uint64_t uu____910); +extern Prims_string FStar_UInt64_to_string_hex_pad(uint64_t uu___); -extern uint64_t FStar_UInt64_of_string(Prims_string uu____921); +extern uint64_t FStar_UInt64_of_string(Prims_string uu___); extern Prims_int FStar_UInt32_n; @@ -97,13 +96,13 @@ FStar_UInt32_gte_mask(uint32_t a, uint32_t b) return x_xor_q_ - (uint32_t)1U; } -extern Prims_string FStar_UInt32_to_string(uint32_t uu____888); +extern Prims_string FStar_UInt32_to_string(uint32_t uu___); -extern Prims_string FStar_UInt32_to_string_hex(uint32_t uu____899); +extern Prims_string FStar_UInt32_to_string_hex(uint32_t uu___); -extern Prims_string FStar_UInt32_to_string_hex_pad(uint32_t uu____910); +extern Prims_string FStar_UInt32_to_string_hex_pad(uint32_t uu___); -extern uint32_t FStar_UInt32_of_string(Prims_string uu____921); +extern uint32_t FStar_UInt32_of_string(Prims_string uu___); extern Prims_int FStar_UInt16_n; @@ -143,13 +142,13 @@ FStar_UInt16_gte_mask(uint16_t a, uint16_t b) return x_xor_q_ - (uint16_t)1U; } -extern Prims_string FStar_UInt16_to_string(uint16_t uu____888); +extern Prims_string FStar_UInt16_to_string(uint16_t uu___); -extern Prims_string FStar_UInt16_to_string_hex(uint16_t uu____899); +extern Prims_string FStar_UInt16_to_string_hex(uint16_t uu___); -extern Prims_string FStar_UInt16_to_string_hex_pad(uint16_t uu____910); +extern Prims_string FStar_UInt16_to_string_hex_pad(uint16_t uu___); -extern uint16_t FStar_UInt16_of_string(Prims_string uu____921); +extern uint16_t FStar_UInt16_of_string(Prims_string uu___); extern Prims_int FStar_UInt8_n; @@ -189,13 +188,13 @@ FStar_UInt8_gte_mask(uint8_t a, uint8_t b) return x_xor_q_ - (uint8_t)1U; } -extern Prims_string FStar_UInt8_to_string(uint8_t uu____888); +extern Prims_string FStar_UInt8_to_string(uint8_t uu___); -extern Prims_string FStar_UInt8_to_string_hex(uint8_t uu____899); +extern Prims_string FStar_UInt8_to_string_hex(uint8_t uu___); -extern Prims_string FStar_UInt8_to_string_hex_pad(uint8_t uu____910); +extern Prims_string FStar_UInt8_to_string_hex_pad(uint8_t uu___); -extern uint8_t FStar_UInt8_of_string(Prims_string uu____921); +extern uint8_t FStar_UInt8_of_string(Prims_string uu___); typedef uint8_t FStar_UInt8_byte; diff --git a/lib/freebl/verified/kremlin/kremlib/dist/minimal/LowStar_Endianness.h b/lib/freebl/verified/kremlin/kremlib/dist/minimal/LowStar_Endianness.h index a6bff78d99..6d86cd5841 100644 --- a/lib/freebl/verified/kremlin/kremlib/dist/minimal/LowStar_Endianness.h +++ b/lib/freebl/verified/kremlin/kremlib/dist/minimal/LowStar_Endianness.h @@ -3,6 +3,8 @@ Licensed under the Apache 2.0 License. */ +#ifndef __LowStar_Endianness_H +#define __LowStar_Endianness_H #include #include #include "kremlin/internal/compat.h" @@ -10,11 +12,6 @@ #include "kremlin/internal/types.h" #include "kremlin/internal/target.h" -#ifndef __LowStar_Endianness_H -#define __LowStar_Endianness_H - -#include "FStar_UInt128.h" - static inline void store128_le(uint8_t *x0, FStar_UInt128_uint128 x1); static inline FStar_UInt128_uint128 load128_le(uint8_t *x0); diff --git a/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_gcc64.h b/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_gcc64.h index 5372de42a7..441928def7 100644 --- a/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_gcc64.h +++ b/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_gcc64.h @@ -20,6 +20,9 @@ * such, it assumes that the machine integers have been bundled the exact same * way in both cases. */ +#ifndef FSTAR_UINT128_GCC64 +#define FSTAR_UINT128_GCC64 + #include "FStar_UInt128.h" #include "FStar_UInt_8_16_32_64.h" #include "LowStar_Endianness.h" @@ -218,3 +221,5 @@ FStar_UInt128_mul32(uint64_t x, uint32_t y) { return (uint128_t)x * (uint128_t)y; } + +#endif diff --git a/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_msvc.h b/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_msvc.h index fca0e2d962..73addd3a2c 100644 --- a/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_msvc.h +++ b/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_msvc.h @@ -7,6 +7,10 @@ * F* version: 15104ff8 * KreMLin version: 318b7fa8 */ + +#ifndef FSTAR_UINT128_MSVC +#define FSTAR_UINT128_MSVC + #include "kremlin/internal/types.h" #include "FStar_UInt128.h" #include "FStar_UInt_8_16_32_64.h" @@ -526,3 +530,8 @@ FStar_UInt128_mul_wide(uint64_t x, uint64_t y) return FStar_UInt128_mul_wide_impl(x, y); #endif } + +#undef low +#undef high + +#endif diff --git a/lib/freebl/verified/libintvector.h b/lib/freebl/verified/libintvector.h index 24a2217860..e43be92b9e 100644 --- a/lib/freebl/verified/libintvector.h +++ b/lib/freebl/verified/libintvector.h @@ -3,6 +3,23 @@ #include +// # DEBUGGING FLAGS +// ================= +// It is possible to debug the trace of the primitives defined in +// this file by using the [DEBUG_VECTOR_TRACE] C flag. +// As we use the same vector types to manipulate blocks of uint32 and blocks +// of uint64, the log results will vary with the endianess, in particular for +// some generic operations like [and] or [xor]. By default, the printing is +// performed as if we were manipulating blocks of uint32. If you want to +// switch to blocks of uint64, use the flag: [DEBUG_VECTOR_TRACE_ELEMENTS_64]. +// Note that if those flags are activated, it may be necessary to tweak a bit +// the compilation options to build HACL. More specifically, you may need to +// always activate the compiler options to use vector support (even for files +// which actually don't make use of vectors, if they have libintvector.h as +// a dependency). When comparing traces, note that some instructions are not +// compiled in the same order on the different platforms, but it doesn't lead +// to a lot of discrepancies in practice. + #define Lib_IntVector_Intrinsics_bit_mask64(x) -((x)&1) #if defined(__x86_64__) || defined(_M_X64) @@ -96,10 +113,16 @@ typedef __m128i Lib_IntVector_Intrinsics_vec128; #define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes64(x0, x1) \ (_mm_shuffle_epi32(x0, _MM_SHUFFLE((2 * x1 + 3) % 4, (2 * x1 + 2) % 4, (2 * x1 + 1) % 4, (2 * x1) % 4))) -#define Lib_IntVector_Intrinsics_vec128_load_le(x0) \ +#define Lib_IntVector_Intrinsics_vec128_load32_le(x0) \ + (_mm_loadu_si128((__m128i*)(x0))) + +#define Lib_IntVector_Intrinsics_vec128_load64_le(x0) \ (_mm_loadu_si128((__m128i*)(x0))) -#define Lib_IntVector_Intrinsics_vec128_store_le(x0, x1) \ +#define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \ (_mm_storeu_si128((__m128i*)(x0), x1)) #define Lib_IntVector_Intrinsics_vec128_load_be(x0) \ @@ -295,7 +318,10 @@ typedef __m256i Lib_IntVector_Intrinsics_vec256; #define Lib_IntVector_Intrinsics_vec256_rotate_right_lanes64(x0, x1) \ (_mm256_permute4x64_epi64(x0, _MM_SHUFFLE((x1 + 3) % 4, (x1 + 2) % 4, (x1 + 1) % 4, x1 % 4))) -#define Lib_IntVector_Intrinsics_vec256_load_le(x0) \ +#define Lib_IntVector_Intrinsics_vec256_load32_le(x0) \ + (_mm256_loadu_si256((__m256i*)(x0))) + +#define Lib_IntVector_Intrinsics_vec256_load64_le(x0) \ (_mm256_loadu_si256((__m256i*)(x0))) #define Lib_IntVector_Intrinsics_vec256_load32_be(x0) \ @@ -304,7 +330,10 @@ typedef __m256i Lib_IntVector_Intrinsics_vec256; #define Lib_IntVector_Intrinsics_vec256_load64_be(x0) \ (_mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*)(x0)), _mm256_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7))) -#define Lib_IntVector_Intrinsics_vec256_store_le(x0, x1) \ +#define Lib_IntVector_Intrinsics_vec256_store32_le(x0, x1) \ + (_mm256_storeu_si256((__m256i*)(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec256_store64_le(x0, x1) \ (_mm256_storeu_si256((__m256i*)(x0), x1)) #define Lib_IntVector_Intrinsics_vec256_store32_be(x0, x1) \ @@ -394,7 +423,7 @@ typedef __m256i Lib_IntVector_Intrinsics_vec256; #define Lib_IntVector_Intrinsics_vec256_interleave_high128(x1, x2) \ (_mm256_permute2x128_si256(x1, x2, 0x31)) -#elif defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) +#elif (defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)) && !defined(__ARM_32BIT_STATE) #include typedef uint32x4_t Lib_IntVector_Intrinsics_vec128; @@ -473,10 +502,16 @@ typedef uint32x4_t Lib_IntVector_Intrinsics_vec128; (_mm_shuffle_epi32(x0, _MM_SHUFFLE(2*x1+1,2*x1,2*x2+1,2*x2))) */ -#define Lib_IntVector_Intrinsics_vec128_load_le(x0) \ +#define Lib_IntVector_Intrinsics_vec128_load32_le(x0) \ + (vld1q_u32((const uint32_t*)(x0))) + +#define Lib_IntVector_Intrinsics_vec128_load64_le(x0) \ (vld1q_u32((const uint32_t*)(x0))) -#define Lib_IntVector_Intrinsics_vec128_store_le(x0, x1) \ +#define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \ + (vst1q_u32((uint32_t*)(x0), (x1))) + +#define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \ (vst1q_u32((uint32_t*)(x0), (x1))) /* @@ -582,5 +617,155 @@ Lib_IntVector_Intrinsics_vec128_load32s(uint32_t x1, uint32_t x2, uint32_t x3, u #define Lib_IntVector_Intrinsics_vec128_interleave_high64(x1, x2) \ (vreinterpretq_u32_u64(vzip2q_u64(vreinterpretq_u64_u32(x1), vreinterpretq_u64_u32(x2)))) -#endif +// IBM z architecture +#elif defined(__s390x__) // this flag is for GCC only + +#include + +// The main vector 128 type +// We can't use uint8_t, uint32_t, uint64_t... instead of unsigned char, +// unsigned int, unsigned long long: the compiler complains that the parameter +// combination is invalid. +typedef unsigned char vector128_8 __attribute__((vector_size(16))); +typedef unsigned int vector128_32 __attribute__((vector_size(16))); +typedef unsigned long long vector128_64 __attribute__((vector_size(16))); + +typedef vector128_8 Lib_IntVector_Intrinsics_vec128; +typedef vector128_8 vector128; + +// Small helper to change the endianess of the vector's elements, seen as uint32. +// Note that we can't use vec_revb. +#define Lib_IntVector_Intrinsics_vec128_load_store_switch_endian32(x0) \ + ((vector128)(vec_perm((vector128_8)(x0), (vector128_8){}, \ + (vector128_8){ 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }))) + +// Small helper to change the endianess of the vector's elements, seen as uint64 +// Note that we can't use vec_revb. +#define Lib_IntVector_Intrinsics_vec128_load_store_switch_endian64(x0) \ + ((vector128)(vec_perm((vector128_8)(x0), (vector128_8){}, \ + (vector128_8){ 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }))) + +#define Lib_IntVector_Intrinsics_vec128_load32_le(x) \ + ((vector128)Lib_IntVector_Intrinsics_vec128_load_store_switch_endian32( \ + ((vector128_8)vec_load_len((const uint8_t*)(x), 16)))) + +#define Lib_IntVector_Intrinsics_vec128_load64_le(x) \ + ((vector128)Lib_IntVector_Intrinsics_vec128_load_store_switch_endian64( \ + ((vector128_8)vec_load_len((const uint8_t*)(x), 16)))) + +#define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \ + (vec_store_len(((vector128_8)Lib_IntVector_Intrinsics_vec128_load_store_switch_endian32(x1)), \ + ((uint8_t*)(x0)), (uint32_t)16)) + +#define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \ + (vec_store_len(((vector128_8)Lib_IntVector_Intrinsics_vec128_load_store_switch_endian64(x1)), \ + ((uint8_t*)(x0)), (uint32_t)16)) + +#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ + ((vector128)((vector128_32)(((vector128_32)(x0)) + ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ + ((vector128)((vector128_64)(((vector128_64)(x0)) + ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ + ((vector128)(vec_and((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ + ((vector128)(vec_cmpeq(((vector128_32)(x0)), ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ + ((vector128)(vec_cmpeq(((vector128_64)(x0)), ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ + ((unsigned int)(vec_extract((vector128_32)(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ + ((unsigned long long)(vec_extract((vector128_64)(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ + ((vector128)((vector128_32)(((vector128_32)(x0)) > ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ + ((vector128)((vector128_64)(((vector128_64)(x0)) > ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ + ((vector128)((vector128_32)vec_insert((unsigned int)(x1), (vector128_32)(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ + ((vector128)((vector128_64)vec_insert((unsigned long long)(x1), (vector128_64)(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x0, x1) \ + ((vector128)((vector128_32)vec_mergel((vector128_32)(x0), (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x0, x1) \ + ((vector128)((vector128_64)vec_mergel((vector128_64)(x0), (vector128_64)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x0, x1) \ + ((vector128)((vector128_32)vec_mergeh((vector128_32)(x0), (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x0, x1) \ + ((vector128)((vector128_64)vec_mergeh((vector128_64)(x0), (vector128_64)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_load32(x) \ + ((vector128)((vector128_32){ (unsigned int)(x), (unsigned int)(x), \ + (unsigned int)(x), (unsigned int)(x) })) + +#define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \ + ((vector128)((vector128_32){ (unsigned int)(x0), (unsigned int)(x1), (unsigned int)(x2), (unsigned int)(x3) })) + +#define Lib_IntVector_Intrinsics_vec128_load64(x) \ + ((vector128)((vector128_64)vec_load_pair((unsigned long long)(x), (unsigned long long)(x)))) + +#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ + ((vector128)(vec_xor((vector128)(x0), (vector128)vec_splat_u32(-1)))) + +// We need to permute the low and high components of the uint64 +// before calling vec_mule. The following helper does that. +#define Lib_IntVector_Intrinsics_vec128_mul64_perm_low_high_(x0) \ + ((vector128)(vec_perm((vector128_8)(x0), (vector128_8){}, \ + (vector128_8){ 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 }))) + +#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ + ((vector128)(vec_mule((vector128_32)Lib_IntVector_Intrinsics_vec128_mul64_perm_low_high_(x0), \ + (vector128_32)Lib_IntVector_Intrinsics_vec128_mul64_perm_low_high_(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ + ((vector128)(vec_or((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ + ((vector128)(vec_rli((vector128_32)(x0), (unsigned long)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ + (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, (uint32_t)(32 - (x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ + ((vector128)(vec_perm((vector128)(x0), (vector128){}, (vector128_8){ \ + (x1 % 4) * 4 + 0, (x1 % 4) * 4 + 1, (x1 % 4) * 4 + 2, (x1 % 4) * 4 + 3, \ + ((x1 + 1) % 4) * 4 + 0, ((x1 + 1) % 4) * 4 + 1, ((x1 + 1) % 4) * 4 + 2, ((x1 + 1) % 4) * 4 + 3, \ + ((x1 + 2) % 4) * 4 + 0, ((x1 + 2) % 4) * 4 + 1, ((x1 + 2) % 4) * 4 + 2, ((x1 + 2) % 4) * 4 + 3, \ + ((x1 + 3) % 4) * 4 + 0, ((x1 + 3) % 4) * 4 + 1, ((x1 + 3) % 4) * 4 + 2, ((x1 + 3) % 4) * 4 + 3 }))) + +#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ + (((vector128)((vector128_64)vec_rli((vector128_64)(x0), (unsigned long)(x1)))) & \ + ((vector128)((vector128_64){ 0xffffffffffffffff << (x1), 0xffffffffffffffff << (x1) }))) + +#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ + (((vector128)((vector128_64)vec_rli((vector128_64)(x0), (unsigned long)(64 - (x1))))) & \ + ((vector128)((vector128_64){ 0xffffffffffffffff >> (x1), 0xffffffffffffffff >> (x1) }))) + +// Doesn't work with vec_splat_u64 +#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ + ((vector128)(Lib_IntVector_Intrinsics_vec128_mul64(x0, ((vector128_64){ (unsigned long long)(x1), (unsigned long long)(x1) })))) + +#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ + ((vector128)((vector128_64)(x0) - (vector128_64)(x1))) + +#define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \ + ((vector128)(vec_xor((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_zero \ + ((vector128){}) + +#endif // IBM z architecture + #endif