From b119c4193d8f374298795dd2bbf5af5d92b6952f Mon Sep 17 00:00:00 2001 From: Franziskus Kiefer Date: Sat, 21 Dec 2019 07:49:58 +0000 Subject: [PATCH] Bug 1574643 - haclv2 code r=kjacobs This updates the in-tree version of our existing HACL* code to v2, replacing what we have already. Once this landed NSS can pick up more (faster) code from HACL*. Differential Revision: https://phabricator.services.mozilla.com/D55413 --HG-- extra : moz-landing-system : lando --- lib/freebl/verified/FStar.c | 255 --- lib/freebl/verified/FStar.h | 69 - lib/freebl/verified/Hacl_Chacha20.c | 424 ++--- lib/freebl/verified/Hacl_Chacha20.h | 104 +- .../verified/Hacl_Chacha20Poly1305_128.c | 1247 ++++++++++++ .../verified/Hacl_Chacha20Poly1305_128.h | 60 + .../verified/Hacl_Chacha20Poly1305_32.c | 564 ++++++ .../verified/Hacl_Chacha20Poly1305_32.h | 59 + lib/freebl/verified/Hacl_Chacha20_Vec128.c | 1116 +++++++---- lib/freebl/verified/Hacl_Chacha20_Vec128.h | 86 +- lib/freebl/verified/Hacl_Curve25519.c | 845 --------- lib/freebl/verified/Hacl_Curve25519.h | 57 - lib/freebl/verified/Hacl_Curve25519_51.c | 933 +++++++++ lib/freebl/verified/Hacl_Curve25519_51.h | 41 + lib/freebl/verified/Hacl_Kremlib.h | 51 + lib/freebl/verified/Hacl_Poly1305_128.c | 1678 +++++++++++++++++ lib/freebl/verified/Hacl_Poly1305_128.h | 66 + lib/freebl/verified/Hacl_Poly1305_32.c | 1064 +++++------ lib/freebl/verified/Hacl_Poly1305_32.h | 118 +- lib/freebl/verified/Hacl_Poly1305_64.c | 485 ----- lib/freebl/verified/Hacl_Poly1305_64.h | 99 - lib/freebl/verified/kremlib.h | 672 ------- lib/freebl/verified/kremlib_base.h | 192 -- .../include/kremlin/internal/callconv.h | 46 + .../kremlin/include/kremlin/internal/compat.h | 32 + .../kremlin/include/kremlin/internal/target.h | 113 ++ .../kremlin/include/kremlin/internal/types.h | 85 + .../include/kremlin/lowstar_endianness.h | 242 +++ .../kremlib/dist/minimal/FStar_UInt128.h | 87 + .../dist/minimal/FStar_UInt128_Verified.h | 346 ++++ .../dist/minimal/FStar_UInt_8_16_32_64.h | 176 ++ .../kremlib/dist/minimal/LowStar_Endianness.h | 32 + .../dist/minimal/fstar_uint128_gcc64.h | 303 +++ .../kremlib/dist/minimal/fstar_uint128_msvc.h | 528 ++++++ lib/freebl/verified/libintvector.h | 335 ++++ lib/freebl/verified/specs/Spec.CTR.fst | 98 - lib/freebl/verified/specs/Spec.Chacha20.fst | 169 -- lib/freebl/verified/specs/Spec.Curve25519.fst | 168 -- lib/freebl/verified/specs/Spec.Poly1305.fst | 107 -- lib/freebl/verified/vec128.h | 345 ---- 40 files changed, 8620 insertions(+), 4877 deletions(-) delete mode 100644 lib/freebl/verified/FStar.c delete mode 100644 lib/freebl/verified/FStar.h create mode 100644 lib/freebl/verified/Hacl_Chacha20Poly1305_128.c create mode 100644 lib/freebl/verified/Hacl_Chacha20Poly1305_128.h create mode 100644 lib/freebl/verified/Hacl_Chacha20Poly1305_32.c create mode 100644 lib/freebl/verified/Hacl_Chacha20Poly1305_32.h delete mode 100644 lib/freebl/verified/Hacl_Curve25519.c delete mode 100644 lib/freebl/verified/Hacl_Curve25519.h create mode 100644 lib/freebl/verified/Hacl_Curve25519_51.c create mode 100644 lib/freebl/verified/Hacl_Curve25519_51.h create mode 100644 lib/freebl/verified/Hacl_Kremlib.h create mode 100644 lib/freebl/verified/Hacl_Poly1305_128.c create mode 100644 lib/freebl/verified/Hacl_Poly1305_128.h delete mode 100644 lib/freebl/verified/Hacl_Poly1305_64.c delete mode 100644 lib/freebl/verified/Hacl_Poly1305_64.h delete mode 100644 lib/freebl/verified/kremlib.h delete mode 100644 lib/freebl/verified/kremlib_base.h create mode 100644 lib/freebl/verified/kremlin/include/kremlin/internal/callconv.h create mode 100644 lib/freebl/verified/kremlin/include/kremlin/internal/compat.h create mode 100644 lib/freebl/verified/kremlin/include/kremlin/internal/target.h create mode 100644 lib/freebl/verified/kremlin/include/kremlin/internal/types.h create mode 100644 lib/freebl/verified/kremlin/include/kremlin/lowstar_endianness.h create mode 100644 lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128.h create mode 100644 lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128_Verified.h create mode 100644 lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt_8_16_32_64.h create mode 100644 lib/freebl/verified/kremlin/kremlib/dist/minimal/LowStar_Endianness.h create mode 100644 lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_gcc64.h create mode 100644 lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_msvc.h create mode 100644 lib/freebl/verified/libintvector.h delete mode 100644 lib/freebl/verified/specs/Spec.CTR.fst delete mode 100644 lib/freebl/verified/specs/Spec.Chacha20.fst delete mode 100644 lib/freebl/verified/specs/Spec.Curve25519.fst delete mode 100644 lib/freebl/verified/specs/Spec.Poly1305.fst delete mode 100644 lib/freebl/verified/vec128.h diff --git a/lib/freebl/verified/FStar.c b/lib/freebl/verified/FStar.c deleted file mode 100644 index 74c0318764..0000000000 --- a/lib/freebl/verified/FStar.c +++ /dev/null @@ -1,255 +0,0 @@ -/* Copyright 2016-2018 INRIA and Microsoft Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* This file was auto-generated by KreMLin! */ - -#include "FStar.h" - -static uint64_t -FStar_UInt128_constant_time_carry(uint64_t a, uint64_t b) -{ - return (a ^ ((a ^ b) | ((a - b) ^ b))) >> (uint32_t)63U; -} - -static uint64_t -FStar_UInt128_carry(uint64_t a, uint64_t b) -{ - return FStar_UInt128_constant_time_carry(a, b); -} - -FStar_UInt128_uint128 -FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) -{ - return ( - (FStar_UInt128_uint128){ - .low = a.low + b.low, - .high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low) }); -} - -FStar_UInt128_uint128 -FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) -{ - return ( - (FStar_UInt128_uint128){ - .low = a.low + b.low, - .high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low) }); -} - -FStar_UInt128_uint128 -FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) -{ - return ( - (FStar_UInt128_uint128){ - .low = a.low - b.low, - .high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low) }); -} - -static FStar_UInt128_uint128 -FStar_UInt128_sub_mod_impl(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) -{ - return ( - (FStar_UInt128_uint128){ - .low = a.low - b.low, - .high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low) }); -} - -FStar_UInt128_uint128 -FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) -{ - return FStar_UInt128_sub_mod_impl(a, b); -} - -FStar_UInt128_uint128 -FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) -{ - return ((FStar_UInt128_uint128){.low = a.low & b.low, .high = a.high & b.high }); -} - -FStar_UInt128_uint128 -FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) -{ - return ((FStar_UInt128_uint128){.low = a.low ^ b.low, .high = a.high ^ b.high }); -} - -FStar_UInt128_uint128 -FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) -{ - return ((FStar_UInt128_uint128){.low = a.low | b.low, .high = a.high | b.high }); -} - -FStar_UInt128_uint128 -FStar_UInt128_lognot(FStar_UInt128_uint128 a) -{ - return ((FStar_UInt128_uint128){.low = ~a.low, .high = ~a.high }); -} - -static uint32_t FStar_UInt128_u32_64 = (uint32_t)64U; - -static uint64_t -FStar_UInt128_add_u64_shift_left(uint64_t hi, uint64_t lo, uint32_t s) -{ - return (hi << s) + (lo >> (FStar_UInt128_u32_64 - s)); -} - -static uint64_t -FStar_UInt128_add_u64_shift_left_respec(uint64_t hi, uint64_t lo, uint32_t s) -{ - return FStar_UInt128_add_u64_shift_left(hi, lo, s); -} - -static FStar_UInt128_uint128 -FStar_UInt128_shift_left_small(FStar_UInt128_uint128 a, uint32_t s) -{ - if (s == (uint32_t)0U) - return a; - else - return ( - (FStar_UInt128_uint128){ - .low = a.low << s, - .high = FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s) }); -} - -static FStar_UInt128_uint128 -FStar_UInt128_shift_left_large(FStar_UInt128_uint128 a, uint32_t s) -{ - return ((FStar_UInt128_uint128){.low = (uint64_t)0U, .high = a.low << (s - FStar_UInt128_u32_64) }); -} - -FStar_UInt128_uint128 -FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s) -{ - if (s < FStar_UInt128_u32_64) - return FStar_UInt128_shift_left_small(a, s); - else - return FStar_UInt128_shift_left_large(a, s); -} - -static uint64_t -FStar_UInt128_add_u64_shift_right(uint64_t hi, uint64_t lo, uint32_t s) -{ - return (lo >> s) + (hi << (FStar_UInt128_u32_64 - s)); -} - -static uint64_t -FStar_UInt128_add_u64_shift_right_respec(uint64_t hi, uint64_t lo, uint32_t s) -{ - return FStar_UInt128_add_u64_shift_right(hi, lo, s); -} - -static FStar_UInt128_uint128 -FStar_UInt128_shift_right_small(FStar_UInt128_uint128 a, uint32_t s) -{ - if (s == (uint32_t)0U) - return a; - else - return ( - (FStar_UInt128_uint128){ - .low = FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s), - .high = a.high >> s }); -} - -static FStar_UInt128_uint128 -FStar_UInt128_shift_right_large(FStar_UInt128_uint128 a, uint32_t s) -{ - return ((FStar_UInt128_uint128){.low = a.high >> (s - FStar_UInt128_u32_64), .high = (uint64_t)0U }); -} - -FStar_UInt128_uint128 -FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s) -{ - if (s < FStar_UInt128_u32_64) - return FStar_UInt128_shift_right_small(a, s); - else - return FStar_UInt128_shift_right_large(a, s); -} - -FStar_UInt128_uint128 -FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) -{ - return ( - (FStar_UInt128_uint128){ - .low = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high), - .high = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high) }); -} - -FStar_UInt128_uint128 -FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) -{ - return ( - (FStar_UInt128_uint128){ - .low = (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)), - .high = (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)) }); -} - -FStar_UInt128_uint128 -FStar_UInt128_uint64_to_uint128(uint64_t a) -{ - return ((FStar_UInt128_uint128){.low = a, .high = (uint64_t)0U }); -} - -uint64_t -FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a) -{ - return a.low; -} - -static uint64_t FStar_UInt128_u64_l32_mask = (uint64_t)0xffffffffU; - -static uint64_t -FStar_UInt128_u64_mod_32(uint64_t a) -{ - return a & FStar_UInt128_u64_l32_mask; -} - -static uint32_t FStar_UInt128_u32_32 = (uint32_t)32U; - -static K___uint64_t_uint64_t_uint64_t_uint64_t -FStar_UInt128_mul_wide_impl_t_(uint64_t x, uint64_t y) -{ - return ( - (K___uint64_t_uint64_t_uint64_t_uint64_t){ - .fst = FStar_UInt128_u64_mod_32(x), - .snd = FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y)), - .thd = x >> FStar_UInt128_u32_32, - .f3 = (x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32) }); -} - -static uint64_t -FStar_UInt128_u32_combine_(uint64_t hi, uint64_t lo) -{ - return lo + (hi << FStar_UInt128_u32_32); -} - -static FStar_UInt128_uint128 -FStar_UInt128_mul_wide_impl(uint64_t x, uint64_t y) -{ - K___uint64_t_uint64_t_uint64_t_uint64_t scrut = FStar_UInt128_mul_wide_impl_t_(x, y); - uint64_t u1 = scrut.fst; - uint64_t w3 = scrut.snd; - uint64_t x_ = scrut.thd; - uint64_t t_ = scrut.f3; - return ( - (FStar_UInt128_uint128){ - .low = FStar_UInt128_u32_combine_(u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_), - w3), - .high = x_ * (y >> FStar_UInt128_u32_32) + (t_ >> FStar_UInt128_u32_32) + - ((u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_)) >> FStar_UInt128_u32_32) }); -} - -FStar_UInt128_uint128 -FStar_UInt128_mul_wide(uint64_t x, uint64_t y) -{ - return FStar_UInt128_mul_wide_impl(x, y); -} diff --git a/lib/freebl/verified/FStar.h b/lib/freebl/verified/FStar.h deleted file mode 100644 index 56b1c89f85..0000000000 --- a/lib/freebl/verified/FStar.h +++ /dev/null @@ -1,69 +0,0 @@ -/* Copyright 2016-2018 INRIA and Microsoft Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* This file was auto-generated by KreMLin! */ -#ifndef __FStar_H -#define __FStar_H - -#include "kremlib_base.h" - -typedef struct -{ - uint64_t low; - uint64_t high; -} FStar_UInt128_uint128; - -typedef FStar_UInt128_uint128 FStar_UInt128_t; - -extern void FStar_UInt128_constant_time_carry_ok(uint64_t x0, uint64_t x1); - -FStar_UInt128_uint128 FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); - -FStar_UInt128_uint128 FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); - -FStar_UInt128_uint128 FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); - -FStar_UInt128_uint128 FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); - -FStar_UInt128_uint128 FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); - -FStar_UInt128_uint128 FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); - -FStar_UInt128_uint128 FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); - -FStar_UInt128_uint128 FStar_UInt128_lognot(FStar_UInt128_uint128 a); - -FStar_UInt128_uint128 FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s); - -FStar_UInt128_uint128 FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s); - -FStar_UInt128_uint128 FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); - -FStar_UInt128_uint128 FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); - -FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a); - -uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a); - -typedef struct -{ - uint64_t fst; - uint64_t snd; - uint64_t thd; - uint64_t f3; -} K___uint64_t_uint64_t_uint64_t_uint64_t; - -FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y); -#endif diff --git a/lib/freebl/verified/Hacl_Chacha20.c b/lib/freebl/verified/Hacl_Chacha20.c index 38f25166b2..c18421aa01 100644 --- a/lib/freebl/verified/Hacl_Chacha20.c +++ b/lib/freebl/verified/Hacl_Chacha20.c @@ -1,270 +1,270 @@ -/* Copyright 2016-2018 INRIA and Microsoft Corporation +/* MIT License * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation * - * http://www.apache.org/licenses/LICENSE-2.0 + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #include "Hacl_Chacha20.h" -static void -Hacl_Lib_LoadStore32_uint32s_from_le_bytes(uint32_t *output, uint8_t *input, uint32_t len) -{ - for (uint32_t i = (uint32_t)0U; i < len; i = i + (uint32_t)1U) { - uint8_t *x0 = input + (uint32_t)4U * i; - uint32_t inputi = load32_le(x0); - output[i] = inputi; - } -} - -static void -Hacl_Lib_LoadStore32_uint32s_to_le_bytes(uint8_t *output, uint32_t *input, uint32_t len) -{ - for (uint32_t i = (uint32_t)0U; i < len; i = i + (uint32_t)1U) { - uint32_t hd1 = input[i]; - uint8_t *x0 = output + (uint32_t)4U * i; - store32_le(x0, hd1); - } -} - -inline static uint32_t -Hacl_Impl_Chacha20_rotate_left(uint32_t a, uint32_t s) -{ - return a << s | a >> ((uint32_t)32U - s); -} +uint32_t + Hacl_Impl_Chacha20_Vec_chacha20_constants[4U] = + { (uint32_t)0x61707865U, (uint32_t)0x3320646eU, (uint32_t)0x79622d32U, (uint32_t)0x6b206574U }; inline static void -Hacl_Impl_Chacha20_quarter_round(uint32_t *st, uint32_t a, uint32_t b, uint32_t c, uint32_t d) +Hacl_Impl_Chacha20_Core32_quarter_round( + uint32_t *st, + uint32_t a, + uint32_t b, + uint32_t c, + uint32_t d) { - uint32_t sa = st[a]; - uint32_t sb0 = st[b]; - st[a] = sa + sb0; - uint32_t sd = st[d]; - uint32_t sa10 = st[a]; - uint32_t sda = sd ^ sa10; - st[d] = Hacl_Impl_Chacha20_rotate_left(sda, (uint32_t)16U); - uint32_t sa0 = st[c]; - uint32_t sb1 = st[d]; - st[c] = sa0 + sb1; - uint32_t sd0 = st[b]; - uint32_t sa11 = st[c]; - uint32_t sda0 = sd0 ^ sa11; - st[b] = Hacl_Impl_Chacha20_rotate_left(sda0, (uint32_t)12U); - uint32_t sa2 = st[a]; - uint32_t sb2 = st[b]; - st[a] = sa2 + sb2; - uint32_t sd1 = st[d]; - uint32_t sa12 = st[a]; - uint32_t sda1 = sd1 ^ sa12; - st[d] = Hacl_Impl_Chacha20_rotate_left(sda1, (uint32_t)8U); - uint32_t sa3 = st[c]; - uint32_t sb = st[d]; - st[c] = sa3 + sb; - uint32_t sd2 = st[b]; - uint32_t sa1 = st[c]; - uint32_t sda2 = sd2 ^ sa1; - st[b] = Hacl_Impl_Chacha20_rotate_left(sda2, (uint32_t)7U); + uint32_t sta = st[a]; + uint32_t stb0 = st[b]; + uint32_t std0 = st[d]; + uint32_t sta10 = sta + stb0; + uint32_t std10 = std0 ^ sta10; + uint32_t std2 = std10 << (uint32_t)16U | std10 >> (uint32_t)16U; + st[a] = sta10; + st[d] = std2; + uint32_t sta0 = st[c]; + uint32_t stb1 = st[d]; + uint32_t std3 = st[b]; + uint32_t sta11 = sta0 + stb1; + uint32_t std11 = std3 ^ sta11; + uint32_t std20 = std11 << (uint32_t)12U | std11 >> (uint32_t)20U; + st[c] = sta11; + st[b] = std20; + uint32_t sta2 = st[a]; + uint32_t stb2 = st[b]; + uint32_t std4 = st[d]; + uint32_t sta12 = sta2 + stb2; + uint32_t std12 = std4 ^ sta12; + uint32_t std21 = std12 << (uint32_t)8U | std12 >> (uint32_t)24U; + st[a] = sta12; + st[d] = std21; + uint32_t sta3 = st[c]; + uint32_t stb = st[d]; + uint32_t std = st[b]; + uint32_t sta1 = sta3 + stb; + uint32_t std1 = std ^ sta1; + uint32_t std22 = std1 << (uint32_t)7U | std1 >> (uint32_t)25U; + st[c] = sta1; + st[b] = std22; } inline static void -Hacl_Impl_Chacha20_double_round(uint32_t *st) +Hacl_Impl_Chacha20_Core32_double_round(uint32_t *st) { - Hacl_Impl_Chacha20_quarter_round(st, (uint32_t)0U, (uint32_t)4U, (uint32_t)8U, (uint32_t)12U); - Hacl_Impl_Chacha20_quarter_round(st, (uint32_t)1U, (uint32_t)5U, (uint32_t)9U, (uint32_t)13U); - Hacl_Impl_Chacha20_quarter_round(st, (uint32_t)2U, (uint32_t)6U, (uint32_t)10U, (uint32_t)14U); - Hacl_Impl_Chacha20_quarter_round(st, (uint32_t)3U, (uint32_t)7U, (uint32_t)11U, (uint32_t)15U); - Hacl_Impl_Chacha20_quarter_round(st, (uint32_t)0U, (uint32_t)5U, (uint32_t)10U, (uint32_t)15U); - Hacl_Impl_Chacha20_quarter_round(st, (uint32_t)1U, (uint32_t)6U, (uint32_t)11U, (uint32_t)12U); - Hacl_Impl_Chacha20_quarter_round(st, (uint32_t)2U, (uint32_t)7U, (uint32_t)8U, (uint32_t)13U); - Hacl_Impl_Chacha20_quarter_round(st, (uint32_t)3U, (uint32_t)4U, (uint32_t)9U, (uint32_t)14U); + Hacl_Impl_Chacha20_Core32_quarter_round(st, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)8U, + (uint32_t)12U); + Hacl_Impl_Chacha20_Core32_quarter_round(st, + (uint32_t)1U, + (uint32_t)5U, + (uint32_t)9U, + (uint32_t)13U); + Hacl_Impl_Chacha20_Core32_quarter_round(st, + (uint32_t)2U, + (uint32_t)6U, + (uint32_t)10U, + (uint32_t)14U); + Hacl_Impl_Chacha20_Core32_quarter_round(st, + (uint32_t)3U, + (uint32_t)7U, + (uint32_t)11U, + (uint32_t)15U); + Hacl_Impl_Chacha20_Core32_quarter_round(st, + (uint32_t)0U, + (uint32_t)5U, + (uint32_t)10U, + (uint32_t)15U); + Hacl_Impl_Chacha20_Core32_quarter_round(st, + (uint32_t)1U, + (uint32_t)6U, + (uint32_t)11U, + (uint32_t)12U); + Hacl_Impl_Chacha20_Core32_quarter_round(st, + (uint32_t)2U, + (uint32_t)7U, + (uint32_t)8U, + (uint32_t)13U); + Hacl_Impl_Chacha20_Core32_quarter_round(st, + (uint32_t)3U, + (uint32_t)4U, + (uint32_t)9U, + (uint32_t)14U); } inline static void Hacl_Impl_Chacha20_rounds(uint32_t *st) { - for (uint32_t i = (uint32_t)0U; i < (uint32_t)10U; i = i + (uint32_t)1U) - Hacl_Impl_Chacha20_double_round(st); + Hacl_Impl_Chacha20_Core32_double_round(st); + Hacl_Impl_Chacha20_Core32_double_round(st); + Hacl_Impl_Chacha20_Core32_double_round(st); + Hacl_Impl_Chacha20_Core32_double_round(st); + Hacl_Impl_Chacha20_Core32_double_round(st); + Hacl_Impl_Chacha20_Core32_double_round(st); + Hacl_Impl_Chacha20_Core32_double_round(st); + Hacl_Impl_Chacha20_Core32_double_round(st); + Hacl_Impl_Chacha20_Core32_double_round(st); + Hacl_Impl_Chacha20_Core32_double_round(st); } inline static void -Hacl_Impl_Chacha20_sum_states(uint32_t *st, uint32_t *st_) +Hacl_Impl_Chacha20_chacha20_core(uint32_t *k, uint32_t *ctx, uint32_t ctr) { + memcpy(k, ctx, (uint32_t)16U * sizeof ctx[0U]); + uint32_t ctr_u32 = ctr; + k[12U] = k[12U] + ctr_u32; + Hacl_Impl_Chacha20_rounds(k); for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { - uint32_t xi = st[i]; - uint32_t yi = st_[i]; - st[i] = xi + yi; + uint32_t *os = k; + uint32_t x = k[i] + ctx[i]; + os[i] = x; } + k[12U] = k[12U] + ctr_u32; } -inline static void -Hacl_Impl_Chacha20_copy_state(uint32_t *st, uint32_t *st_) -{ - memcpy(st, st_, (uint32_t)16U * sizeof st_[0U]); -} +static uint32_t + Hacl_Impl_Chacha20_chacha20_constants[4U] = + { (uint32_t)0x61707865U, (uint32_t)0x3320646eU, (uint32_t)0x79622d32U, (uint32_t)0x6b206574U }; inline static void -Hacl_Impl_Chacha20_chacha20_core(uint32_t *k, uint32_t *st, uint32_t ctr) +Hacl_Impl_Chacha20_chacha20_init(uint32_t *ctx, uint8_t *k, uint8_t *n1, uint32_t ctr) { - st[12U] = ctr; - Hacl_Impl_Chacha20_copy_state(k, st); - Hacl_Impl_Chacha20_rounds(k); - Hacl_Impl_Chacha20_sum_states(k, st); -} - -inline static void -Hacl_Impl_Chacha20_chacha20_block(uint8_t *stream_block, uint32_t *st, uint32_t ctr) -{ - uint32_t st_[16U] = { 0U }; - Hacl_Impl_Chacha20_chacha20_core(st_, st, ctr); - Hacl_Lib_LoadStore32_uint32s_to_le_bytes(stream_block, st_, (uint32_t)16U); + uint32_t *uu____0 = ctx; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) { + uint32_t *os = uu____0; + uint32_t x = Hacl_Impl_Chacha20_chacha20_constants[i]; + os[i] = x; + } + uint32_t *uu____1 = ctx + (uint32_t)4U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)8U; i = i + (uint32_t)1U) { + uint32_t *os = uu____1; + uint8_t *bj = k + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x; + } + ctx[12U] = ctr; + uint32_t *uu____2 = ctx + (uint32_t)13U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)3U; i = i + (uint32_t)1U) { + uint32_t *os = uu____2; + uint8_t *bj = n1 + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x; + } } inline static void -Hacl_Impl_Chacha20_init(uint32_t *st, uint8_t *k, uint8_t *n1) +Hacl_Impl_Chacha20_chacha20_encrypt_block( + uint32_t *ctx, + uint8_t *out, + uint32_t incr1, + uint8_t *text) { - uint32_t *stcst = st; - uint32_t *stk = st + (uint32_t)4U; - uint32_t *stc = st + (uint32_t)12U; - uint32_t *stn = st + (uint32_t)13U; - stcst[0U] = (uint32_t)0x61707865U; - stcst[1U] = (uint32_t)0x3320646eU; - stcst[2U] = (uint32_t)0x79622d32U; - stcst[3U] = (uint32_t)0x6b206574U; - Hacl_Lib_LoadStore32_uint32s_from_le_bytes(stk, k, (uint32_t)8U); - stc[0U] = (uint32_t)0U; - Hacl_Lib_LoadStore32_uint32s_from_le_bytes(stn, n1, (uint32_t)3U); -} - -static void -Hacl_Impl_Chacha20_update(uint8_t *output, uint8_t *plain, uint32_t *st, uint32_t ctr) -{ - uint32_t b[48U] = { 0U }; - uint32_t *k = b; - uint32_t *ib = b + (uint32_t)16U; - uint32_t *ob = b + (uint32_t)32U; - Hacl_Impl_Chacha20_chacha20_core(k, st, ctr); - Hacl_Lib_LoadStore32_uint32s_from_le_bytes(ib, plain, (uint32_t)16U); + uint32_t k[16U] = { 0U }; + Hacl_Impl_Chacha20_chacha20_core(k, ctx, incr1); + uint32_t bl[16U] = { 0U }; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { + uint32_t *os = bl; + uint8_t *bj = text + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x; + } for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { - uint32_t xi = ib[i]; - uint32_t yi = k[i]; - ob[i] = xi ^ yi; + uint32_t *os = bl; + uint32_t x = bl[i] ^ k[i]; + os[i] = x; + } + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { + store32_le(out + i * (uint32_t)4U, bl[i]); } - Hacl_Lib_LoadStore32_uint32s_to_le_bytes(output, ob, (uint32_t)16U); } -static void -Hacl_Impl_Chacha20_update_last( - uint8_t *output, - uint8_t *plain, +inline static void +Hacl_Impl_Chacha20_chacha20_encrypt_last( + uint32_t *ctx, uint32_t len, - uint32_t *st, - uint32_t ctr) + uint8_t *out, + uint32_t incr1, + uint8_t *text) { - uint8_t block[64U] = { 0U }; - Hacl_Impl_Chacha20_chacha20_block(block, st, ctr); - uint8_t *mask = block; - for (uint32_t i = (uint32_t)0U; i < len; i = i + (uint32_t)1U) { - uint8_t xi = plain[i]; - uint8_t yi = mask[i]; - output[i] = xi ^ yi; - } + uint8_t plain[64U] = { 0U }; + memcpy(plain, text, len * sizeof text[0U]); + Hacl_Impl_Chacha20_chacha20_encrypt_block(ctx, plain, incr1, plain); + memcpy(out, plain, len * sizeof plain[0U]); } -static void -Hacl_Impl_Chacha20_chacha20_counter_mode_blocks( - uint8_t *output, - uint8_t *plain, - uint32_t num_blocks, - uint32_t *st, - uint32_t ctr) +inline static void +Hacl_Impl_Chacha20_chacha20_update(uint32_t *ctx, uint32_t len, uint8_t *out, uint8_t *text) { - for (uint32_t i = (uint32_t)0U; i < num_blocks; i = i + (uint32_t)1U) { - uint8_t *b = plain + (uint32_t)64U * i; - uint8_t *o = output + (uint32_t)64U * i; - Hacl_Impl_Chacha20_update(o, b, st, ctr + i); + uint32_t rem1 = len % (uint32_t)64U; + uint32_t nb = len / (uint32_t)64U; + uint32_t rem2 = len % (uint32_t)64U; + for (uint32_t i = (uint32_t)0U; i < nb; i = i + (uint32_t)1U) { + Hacl_Impl_Chacha20_chacha20_encrypt_block(ctx, + out + i * (uint32_t)64U, + i, + text + i * (uint32_t)64U); + } + if (rem2 > (uint32_t)0U) { + Hacl_Impl_Chacha20_chacha20_encrypt_last(ctx, + rem1, + out + nb * (uint32_t)64U, + nb, + text + nb * (uint32_t)64U); } } -static void -Hacl_Impl_Chacha20_chacha20_counter_mode( - uint8_t *output, - uint8_t *plain, - uint32_t len, - uint32_t *st, - uint32_t ctr) -{ - uint32_t blocks_len = len >> (uint32_t)6U; - uint32_t part_len = len & (uint32_t)0x3fU; - uint8_t *output_ = output; - uint8_t *plain_ = plain; - uint8_t *output__ = output + (uint32_t)64U * blocks_len; - uint8_t *plain__ = plain + (uint32_t)64U * blocks_len; - Hacl_Impl_Chacha20_chacha20_counter_mode_blocks(output_, plain_, blocks_len, st, ctr); - if (part_len > (uint32_t)0U) - Hacl_Impl_Chacha20_update_last(output__, plain__, part_len, st, ctr + blocks_len); -} - -static void -Hacl_Impl_Chacha20_chacha20( - uint8_t *output, - uint8_t *plain, +void +Hacl_Chacha20_chacha20_encrypt( uint32_t len, - uint8_t *k, + uint8_t *out, + uint8_t *text, + uint8_t *key, uint8_t *n1, uint32_t ctr) { - uint32_t buf[16U] = { 0U }; - uint32_t *st = buf; - Hacl_Impl_Chacha20_init(st, k, n1); - Hacl_Impl_Chacha20_chacha20_counter_mode(output, plain, len, st, ctr); -} - -void -Hacl_Chacha20_chacha20_key_block(uint8_t *block, uint8_t *k, uint8_t *n1, uint32_t ctr) -{ - uint32_t buf[16U] = { 0U }; - uint32_t *st = buf; - Hacl_Impl_Chacha20_init(st, k, n1); - Hacl_Impl_Chacha20_chacha20_block(block, st, ctr); + uint32_t ctx[16U] = { 0U }; + Hacl_Impl_Chacha20_chacha20_init(ctx, key, n1, ctr); + Hacl_Impl_Chacha20_chacha20_update(ctx, len, out, text); } -/* - This function implements Chacha20 - - val chacha20 : - output:uint8_p -> - plain:uint8_p{ disjoint output plain } -> - len:uint32_t{ v len = length output /\ v len = length plain } -> - key:uint8_p{ length key = 32 } -> - nonce:uint8_p{ length nonce = 12 } -> - ctr:uint32_t{ v ctr + length plain / 64 < pow2 32 } -> - Stack unit - (requires - fun h -> live h output /\ live h plain /\ live h nonce /\ live h key) - (ensures - fun h0 _ h1 -> - live h1 output /\ live h0 plain /\ modifies_1 output h0 h1 /\ - live h0 nonce /\ - live h0 key /\ - h1.[ output ] == - chacha20_encrypt_bytes h0.[ key ] h0.[ nonce ] (v ctr) h0.[ plain ]) -*/ void -Hacl_Chacha20_chacha20( - uint8_t *output, - uint8_t *plain, +Hacl_Chacha20_chacha20_decrypt( uint32_t len, - uint8_t *k, + uint8_t *out, + uint8_t *cipher, + uint8_t *key, uint8_t *n1, uint32_t ctr) { - Hacl_Impl_Chacha20_chacha20(output, plain, len, k, n1, ctr); + uint32_t ctx[16U] = { 0U }; + Hacl_Impl_Chacha20_chacha20_init(ctx, key, n1, ctr); + Hacl_Impl_Chacha20_chacha20_update(ctx, len, out, cipher); } diff --git a/lib/freebl/verified/Hacl_Chacha20.h b/lib/freebl/verified/Hacl_Chacha20.h index e7876aef79..6a5841804d 100644 --- a/lib/freebl/verified/Hacl_Chacha20.h +++ b/lib/freebl/verified/Hacl_Chacha20.h @@ -1,81 +1,55 @@ -/* Copyright 2016-2018 INRIA and Microsoft Corporation +/* MIT License * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation * - * http://www.apache.org/licenses/LICENSE-2.0 + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ -#include "kremlib.h" +#include "kremlin/internal/types.h" +#include "kremlin/lowstar_endianness.h" +#include +#include + #ifndef __Hacl_Chacha20_H #define __Hacl_Chacha20_H -typedef uint32_t Hacl_Impl_Xor_Lemmas_u32; - -typedef uint8_t Hacl_Impl_Xor_Lemmas_u8; - -typedef uint8_t *Hacl_Lib_LoadStore32_uint8_p; - -typedef uint32_t Hacl_Impl_Chacha20_u32; - -typedef uint32_t Hacl_Impl_Chacha20_h32; - -typedef uint8_t *Hacl_Impl_Chacha20_uint8_p; - -typedef uint32_t *Hacl_Impl_Chacha20_state; - -typedef uint32_t Hacl_Impl_Chacha20_idx; - -typedef struct -{ - void *k; - void *n; -} Hacl_Impl_Chacha20_log_t_; +#include "Hacl_Kremlib.h" -typedef void *Hacl_Impl_Chacha20_log_t; +extern uint32_t Hacl_Impl_Chacha20_Vec_chacha20_constants[4U]; -typedef uint32_t Hacl_Lib_Create_h32; - -typedef uint8_t *Hacl_Chacha20_uint8_p; - -typedef uint32_t Hacl_Chacha20_uint32_t; - -void Hacl_Chacha20_chacha20_key_block(uint8_t *block, uint8_t *k, uint8_t *n1, uint32_t ctr); - -/* - This function implements Chacha20 +void +Hacl_Chacha20_chacha20_encrypt( + uint32_t len, + uint8_t *out, + uint8_t *text, + uint8_t *key, + uint8_t *n1, + uint32_t ctr); - val chacha20 : - output:uint8_p -> - plain:uint8_p{ disjoint output plain } -> - len:uint32_t{ v len = length output /\ v len = length plain } -> - key:uint8_p{ length key = 32 } -> - nonce:uint8_p{ length nonce = 12 } -> - ctr:uint32_t{ v ctr + length plain / 64 < pow2 32 } -> - Stack unit - (requires - fun h -> live h output /\ live h plain /\ live h nonce /\ live h key) - (ensures - fun h0 _ h1 -> - live h1 output /\ live h0 plain /\ modifies_1 output h0 h1 /\ - live h0 nonce /\ - live h0 key /\ - h1.[ output ] == - chacha20_encrypt_bytes h0.[ key ] h0.[ nonce ] (v ctr) h0.[ plain ]) -*/ void -Hacl_Chacha20_chacha20( - uint8_t *output, - uint8_t *plain, +Hacl_Chacha20_chacha20_decrypt( uint32_t len, - uint8_t *k, + uint8_t *out, + uint8_t *cipher, + uint8_t *key, uint8_t *n1, uint32_t ctr); + +#define __Hacl_Chacha20_H_DEFINED #endif diff --git a/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c b/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c new file mode 100644 index 0000000000..41334e4ce9 --- /dev/null +++ b/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c @@ -0,0 +1,1247 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Hacl_Chacha20Poly1305_128.h" + +inline static void +Hacl_Chacha20Poly1305_128_poly1305_padded_128( + Lib_IntVector_Intrinsics_vec128 *ctx, + uint32_t len, + uint8_t *text) +{ + uint32_t n1 = len / (uint32_t)16U; + uint32_t r = len % (uint32_t)16U; + uint8_t *blocks = text; + uint8_t *rem1 = text + n1 * (uint32_t)16U; + Lib_IntVector_Intrinsics_vec128 *pre0 = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 *acc0 = ctx; + uint32_t sz_block = (uint32_t)32U; + uint32_t len0 = n1 * (uint32_t)16U / sz_block * sz_block; + uint8_t *t00 = blocks; + if (len0 > (uint32_t)0U) { + uint32_t bs = (uint32_t)32U; + uint8_t *text0 = t00; + Hacl_Impl_Poly1305_Field32xN_128_load_acc2(acc0, text0); + uint32_t len1 = len0 - bs; + uint8_t *text1 = t00 + bs; + uint32_t nb = len1 / bs; + for (uint32_t i = (uint32_t)0U; i < nb; i = i + (uint32_t)1U) { + uint8_t *block = text1 + i * bs; + Lib_IntVector_Intrinsics_vec128 e[5U]; + for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) + e[_i] = Lib_IntVector_Intrinsics_vec128_zero; + Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load_le(block); + Lib_IntVector_Intrinsics_vec128 + b2 = Lib_IntVector_Intrinsics_vec128_load_le(block + (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 lo = Lib_IntVector_Intrinsics_vec128_interleave_low64(b1, b2); + Lib_IntVector_Intrinsics_vec128 + hi = Lib_IntVector_Intrinsics_vec128_interleave_high64(b1, b2); + Lib_IntVector_Intrinsics_vec128 + f00 = + Lib_IntVector_Intrinsics_vec128_and(lo, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f15 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(lo, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f25 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(lo, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(hi, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(hi, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(hi, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f0 = f00; + Lib_IntVector_Intrinsics_vec128 f1 = f15; + Lib_IntVector_Intrinsics_vec128 f2 = f25; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f0; + e[1U] = f1; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *rn = pre0 + (uint32_t)10U; + Lib_IntVector_Intrinsics_vec128 *rn5 = pre0 + (uint32_t)15U; + Lib_IntVector_Intrinsics_vec128 r0 = rn[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = rn[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = rn[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = rn[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = rn[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = rn5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = rn5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = rn5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = rn5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = acc0[0U]; + Lib_IntVector_Intrinsics_vec128 f110 = acc0[1U]; + Lib_IntVector_Intrinsics_vec128 f120 = acc0[2U]; + Lib_IntVector_Intrinsics_vec128 f130 = acc0[3U]; + Lib_IntVector_Intrinsics_vec128 f140 = acc0[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = Lib_IntVector_Intrinsics_vec128_mul64(r0, f10); + Lib_IntVector_Intrinsics_vec128 a1 = Lib_IntVector_Intrinsics_vec128_mul64(r1, f10); + Lib_IntVector_Intrinsics_vec128 a2 = Lib_IntVector_Intrinsics_vec128_mul64(r2, f10); + Lib_IntVector_Intrinsics_vec128 a3 = Lib_IntVector_Intrinsics_vec128_mul64(r3, f10); + Lib_IntVector_Intrinsics_vec128 a4 = Lib_IntVector_Intrinsics_vec128_mul64(r4, f10); + Lib_IntVector_Intrinsics_vec128 + a01 = + Lib_IntVector_Intrinsics_vec128_add64(a0, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f110)); + Lib_IntVector_Intrinsics_vec128 + a11 = + Lib_IntVector_Intrinsics_vec128_add64(a1, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f110)); + Lib_IntVector_Intrinsics_vec128 + a21 = + Lib_IntVector_Intrinsics_vec128_add64(a2, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f110)); + Lib_IntVector_Intrinsics_vec128 + a31 = + Lib_IntVector_Intrinsics_vec128_add64(a3, + Lib_IntVector_Intrinsics_vec128_mul64(r2, f110)); + Lib_IntVector_Intrinsics_vec128 + a41 = + Lib_IntVector_Intrinsics_vec128_add64(a4, + Lib_IntVector_Intrinsics_vec128_mul64(r3, f110)); + Lib_IntVector_Intrinsics_vec128 + a02 = + Lib_IntVector_Intrinsics_vec128_add64(a01, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f120)); + Lib_IntVector_Intrinsics_vec128 + a12 = + Lib_IntVector_Intrinsics_vec128_add64(a11, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f120)); + Lib_IntVector_Intrinsics_vec128 + a22 = + Lib_IntVector_Intrinsics_vec128_add64(a21, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f120)); + Lib_IntVector_Intrinsics_vec128 + a32 = + Lib_IntVector_Intrinsics_vec128_add64(a31, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f120)); + Lib_IntVector_Intrinsics_vec128 + a42 = + Lib_IntVector_Intrinsics_vec128_add64(a41, + Lib_IntVector_Intrinsics_vec128_mul64(r2, f120)); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r52, f130)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f130)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f130)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f130)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f130)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r51, f140)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r52, f140)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f140)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f140)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f140)); + Lib_IntVector_Intrinsics_vec128 t01 = a04; + Lib_IntVector_Intrinsics_vec128 t1 = a14; + Lib_IntVector_Intrinsics_vec128 t2 = a24; + Lib_IntVector_Intrinsics_vec128 t3 = a34; + Lib_IntVector_Intrinsics_vec128 t4 = a44; + Lib_IntVector_Intrinsics_vec128 + l = Lib_IntVector_Intrinsics_vec128_add64(t01, Lib_IntVector_Intrinsics_vec128_zero); + Lib_IntVector_Intrinsics_vec128 + tmp0 = + Lib_IntVector_Intrinsics_vec128_and(l, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c01 = Lib_IntVector_Intrinsics_vec128_shift_right64(l, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l0 = Lib_IntVector_Intrinsics_vec128_add64(t1, c01); + Lib_IntVector_Intrinsics_vec128 + tmp1 = + Lib_IntVector_Intrinsics_vec128_and(l0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c11 = Lib_IntVector_Intrinsics_vec128_shift_right64(l0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l1 = Lib_IntVector_Intrinsics_vec128_add64(t2, c11); + Lib_IntVector_Intrinsics_vec128 + tmp2 = + Lib_IntVector_Intrinsics_vec128_and(l1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c21 = Lib_IntVector_Intrinsics_vec128_shift_right64(l1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l2 = Lib_IntVector_Intrinsics_vec128_add64(t3, c21); + Lib_IntVector_Intrinsics_vec128 + tmp3 = + Lib_IntVector_Intrinsics_vec128_and(l2, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c31 = Lib_IntVector_Intrinsics_vec128_shift_right64(l2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l3 = Lib_IntVector_Intrinsics_vec128_add64(t4, c31); + Lib_IntVector_Intrinsics_vec128 + tmp4 = + Lib_IntVector_Intrinsics_vec128_and(l3, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c4 = Lib_IntVector_Intrinsics_vec128_shift_right64(l3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + l4 = + Lib_IntVector_Intrinsics_vec128_add64(tmp0, + Lib_IntVector_Intrinsics_vec128_smul64(c4, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec128 + tmp01 = + Lib_IntVector_Intrinsics_vec128_and(l4, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c5 = Lib_IntVector_Intrinsics_vec128_shift_right64(l4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 tmp11 = Lib_IntVector_Intrinsics_vec128_add64(tmp1, c5); + Lib_IntVector_Intrinsics_vec128 o00 = tmp01; + Lib_IntVector_Intrinsics_vec128 o10 = tmp11; + Lib_IntVector_Intrinsics_vec128 o20 = tmp2; + Lib_IntVector_Intrinsics_vec128 o30 = tmp3; + Lib_IntVector_Intrinsics_vec128 o40 = tmp4; + acc0[0U] = o00; + acc0[1U] = o10; + acc0[2U] = o20; + acc0[3U] = o30; + acc0[4U] = o40; + Lib_IntVector_Intrinsics_vec128 f100 = acc0[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = acc0[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = acc0[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = acc0[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = acc0[4U]; + Lib_IntVector_Intrinsics_vec128 f20 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f21 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f22 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f23 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f24 = e[4U]; + Lib_IntVector_Intrinsics_vec128 o0 = Lib_IntVector_Intrinsics_vec128_add64(f100, f20); + Lib_IntVector_Intrinsics_vec128 o1 = Lib_IntVector_Intrinsics_vec128_add64(f11, f21); + Lib_IntVector_Intrinsics_vec128 o2 = Lib_IntVector_Intrinsics_vec128_add64(f12, f22); + Lib_IntVector_Intrinsics_vec128 o3 = Lib_IntVector_Intrinsics_vec128_add64(f13, f23); + Lib_IntVector_Intrinsics_vec128 o4 = Lib_IntVector_Intrinsics_vec128_add64(f14, f24); + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize(acc0, pre0); + } + uint32_t len1 = n1 * (uint32_t)16U - len0; + uint8_t *t10 = blocks + len0; + uint32_t nb = len1 / (uint32_t)16U; + uint32_t rem2 = len1 % (uint32_t)16U; + for (uint32_t i = (uint32_t)0U; i < nb; i = i + (uint32_t)1U) { + uint8_t *block = t10 + i * (uint32_t)16U; + Lib_IntVector_Intrinsics_vec128 e[5U]; + for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) + e[_i] = Lib_IntVector_Intrinsics_vec128_zero; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *r1 = pre0; + Lib_IntVector_Intrinsics_vec128 *r5 = pre0 + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r1[0U]; + Lib_IntVector_Intrinsics_vec128 r11 = r1[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r1[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r1[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r1[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc0[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc0[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc0[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc0[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc0[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r11, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t01 = a06; + Lib_IntVector_Intrinsics_vec128 t11 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + l = Lib_IntVector_Intrinsics_vec128_add64(t01, Lib_IntVector_Intrinsics_vec128_zero); + Lib_IntVector_Intrinsics_vec128 + tmp0 = + Lib_IntVector_Intrinsics_vec128_and(l, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c01 = Lib_IntVector_Intrinsics_vec128_shift_right64(l, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l0 = Lib_IntVector_Intrinsics_vec128_add64(t11, c01); + Lib_IntVector_Intrinsics_vec128 + tmp1 = + Lib_IntVector_Intrinsics_vec128_and(l0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c11 = Lib_IntVector_Intrinsics_vec128_shift_right64(l0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l1 = Lib_IntVector_Intrinsics_vec128_add64(t2, c11); + Lib_IntVector_Intrinsics_vec128 + tmp2 = + Lib_IntVector_Intrinsics_vec128_and(l1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c21 = Lib_IntVector_Intrinsics_vec128_shift_right64(l1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l2 = Lib_IntVector_Intrinsics_vec128_add64(t3, c21); + Lib_IntVector_Intrinsics_vec128 + tmp3 = + Lib_IntVector_Intrinsics_vec128_and(l2, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c31 = Lib_IntVector_Intrinsics_vec128_shift_right64(l2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l3 = Lib_IntVector_Intrinsics_vec128_add64(t4, c31); + Lib_IntVector_Intrinsics_vec128 + tmp4 = + Lib_IntVector_Intrinsics_vec128_and(l3, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c4 = Lib_IntVector_Intrinsics_vec128_shift_right64(l3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + l4 = + Lib_IntVector_Intrinsics_vec128_add64(tmp0, + Lib_IntVector_Intrinsics_vec128_smul64(c4, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec128 + tmp01 = + Lib_IntVector_Intrinsics_vec128_and(l4, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c5 = Lib_IntVector_Intrinsics_vec128_shift_right64(l4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 tmp11 = Lib_IntVector_Intrinsics_vec128_add64(tmp1, c5); + Lib_IntVector_Intrinsics_vec128 o0 = tmp01; + Lib_IntVector_Intrinsics_vec128 o1 = tmp11; + Lib_IntVector_Intrinsics_vec128 o2 = tmp2; + Lib_IntVector_Intrinsics_vec128 o3 = tmp3; + Lib_IntVector_Intrinsics_vec128 o4 = tmp4; + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + if (rem2 > (uint32_t)0U) { + uint8_t *last1 = t10 + nb * (uint32_t)16U; + Lib_IntVector_Intrinsics_vec128 e[5U]; + for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) + e[_i] = Lib_IntVector_Intrinsics_vec128_zero; + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, last1, rem2 * sizeof last1[0U]); + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f4 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f4; + uint64_t b = (uint64_t)1U << rem2 * (uint32_t)8U % (uint32_t)26U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 fi = e[rem2 * (uint32_t)8U / (uint32_t)26U]; + e[rem2 * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec128_or(fi, mask); + Lib_IntVector_Intrinsics_vec128 *r1 = pre0; + Lib_IntVector_Intrinsics_vec128 *r5 = pre0 + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r1[0U]; + Lib_IntVector_Intrinsics_vec128 r11 = r1[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r1[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r1[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r1[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc0[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc0[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc0[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc0[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc0[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r11, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t01 = a06; + Lib_IntVector_Intrinsics_vec128 t11 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + l = Lib_IntVector_Intrinsics_vec128_add64(t01, Lib_IntVector_Intrinsics_vec128_zero); + Lib_IntVector_Intrinsics_vec128 + tmp0 = + Lib_IntVector_Intrinsics_vec128_and(l, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c01 = Lib_IntVector_Intrinsics_vec128_shift_right64(l, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l0 = Lib_IntVector_Intrinsics_vec128_add64(t11, c01); + Lib_IntVector_Intrinsics_vec128 + tmp1 = + Lib_IntVector_Intrinsics_vec128_and(l0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c11 = Lib_IntVector_Intrinsics_vec128_shift_right64(l0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l1 = Lib_IntVector_Intrinsics_vec128_add64(t2, c11); + Lib_IntVector_Intrinsics_vec128 + tmp2 = + Lib_IntVector_Intrinsics_vec128_and(l1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c21 = Lib_IntVector_Intrinsics_vec128_shift_right64(l1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l2 = Lib_IntVector_Intrinsics_vec128_add64(t3, c21); + Lib_IntVector_Intrinsics_vec128 + tmp3 = + Lib_IntVector_Intrinsics_vec128_and(l2, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c31 = Lib_IntVector_Intrinsics_vec128_shift_right64(l2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l3 = Lib_IntVector_Intrinsics_vec128_add64(t4, c31); + Lib_IntVector_Intrinsics_vec128 + tmp4 = + Lib_IntVector_Intrinsics_vec128_and(l3, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c4 = Lib_IntVector_Intrinsics_vec128_shift_right64(l3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + l4 = + Lib_IntVector_Intrinsics_vec128_add64(tmp0, + Lib_IntVector_Intrinsics_vec128_smul64(c4, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec128 + tmp01 = + Lib_IntVector_Intrinsics_vec128_and(l4, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c5 = Lib_IntVector_Intrinsics_vec128_shift_right64(l4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 tmp11 = Lib_IntVector_Intrinsics_vec128_add64(tmp1, c5); + Lib_IntVector_Intrinsics_vec128 o0 = tmp01; + Lib_IntVector_Intrinsics_vec128 o1 = tmp11; + Lib_IntVector_Intrinsics_vec128 o2 = tmp2; + Lib_IntVector_Intrinsics_vec128 o3 = tmp3; + Lib_IntVector_Intrinsics_vec128 o4 = tmp4; + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, rem1, r * sizeof rem1[0U]); + if (r > (uint32_t)0U) { + Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 *acc = ctx; + Lib_IntVector_Intrinsics_vec128 e[5U]; + for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) + e[_i] = Lib_IntVector_Intrinsics_vec128_zero; + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *r1 = pre; + Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r1[0U]; + Lib_IntVector_Intrinsics_vec128 r11 = r1[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r1[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r1[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r1[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r11, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t0 = a06; + Lib_IntVector_Intrinsics_vec128 t1 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + l = Lib_IntVector_Intrinsics_vec128_add64(t0, Lib_IntVector_Intrinsics_vec128_zero); + Lib_IntVector_Intrinsics_vec128 + tmp0 = + Lib_IntVector_Intrinsics_vec128_and(l, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c01 = Lib_IntVector_Intrinsics_vec128_shift_right64(l, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l0 = Lib_IntVector_Intrinsics_vec128_add64(t1, c01); + Lib_IntVector_Intrinsics_vec128 + tmp1 = + Lib_IntVector_Intrinsics_vec128_and(l0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c11 = Lib_IntVector_Intrinsics_vec128_shift_right64(l0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l1 = Lib_IntVector_Intrinsics_vec128_add64(t2, c11); + Lib_IntVector_Intrinsics_vec128 + tmp2 = + Lib_IntVector_Intrinsics_vec128_and(l1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c21 = Lib_IntVector_Intrinsics_vec128_shift_right64(l1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l2 = Lib_IntVector_Intrinsics_vec128_add64(t3, c21); + Lib_IntVector_Intrinsics_vec128 + tmp3 = + Lib_IntVector_Intrinsics_vec128_and(l2, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c31 = Lib_IntVector_Intrinsics_vec128_shift_right64(l2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l3 = Lib_IntVector_Intrinsics_vec128_add64(t4, c31); + Lib_IntVector_Intrinsics_vec128 + tmp4 = + Lib_IntVector_Intrinsics_vec128_and(l3, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c4 = Lib_IntVector_Intrinsics_vec128_shift_right64(l3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + l4 = + Lib_IntVector_Intrinsics_vec128_add64(tmp0, + Lib_IntVector_Intrinsics_vec128_smul64(c4, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec128 + tmp01 = + Lib_IntVector_Intrinsics_vec128_and(l4, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c5 = Lib_IntVector_Intrinsics_vec128_shift_right64(l4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 tmp11 = Lib_IntVector_Intrinsics_vec128_add64(tmp1, c5); + Lib_IntVector_Intrinsics_vec128 o0 = tmp01; + Lib_IntVector_Intrinsics_vec128 o1 = tmp11; + Lib_IntVector_Intrinsics_vec128 o2 = tmp2; + Lib_IntVector_Intrinsics_vec128 o3 = tmp3; + Lib_IntVector_Intrinsics_vec128 o4 = tmp4; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + return; + } +} + +inline static void +Hacl_Chacha20Poly1305_128_poly1305_do_128( + uint8_t *k, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *out) +{ + Lib_IntVector_Intrinsics_vec128 ctx[25U]; + for (uint32_t _i = 0U; _i < (uint32_t)25U; ++_i) + ctx[_i] = Lib_IntVector_Intrinsics_vec128_zero; + uint8_t block[16U] = { 0U }; + Hacl_Poly1305_128_poly1305_init(ctx, k); + Hacl_Chacha20Poly1305_128_poly1305_padded_128(ctx, aadlen, aad); + Hacl_Chacha20Poly1305_128_poly1305_padded_128(ctx, mlen, m); + store64_le(block, (uint64_t)aadlen); + store64_le(block + (uint32_t)8U, (uint64_t)mlen); + Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 *acc = ctx; + Lib_IntVector_Intrinsics_vec128 e[5U]; + for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) + e[_i] = Lib_IntVector_Intrinsics_vec128_zero; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *r = pre; + Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r1, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t0 = a06; + Lib_IntVector_Intrinsics_vec128 t1 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + l = Lib_IntVector_Intrinsics_vec128_add64(t0, Lib_IntVector_Intrinsics_vec128_zero); + Lib_IntVector_Intrinsics_vec128 + tmp0 = + Lib_IntVector_Intrinsics_vec128_and(l, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c01 = Lib_IntVector_Intrinsics_vec128_shift_right64(l, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l0 = Lib_IntVector_Intrinsics_vec128_add64(t1, c01); + Lib_IntVector_Intrinsics_vec128 + tmp1 = + Lib_IntVector_Intrinsics_vec128_and(l0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c11 = Lib_IntVector_Intrinsics_vec128_shift_right64(l0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l1 = Lib_IntVector_Intrinsics_vec128_add64(t2, c11); + Lib_IntVector_Intrinsics_vec128 + tmp2 = + Lib_IntVector_Intrinsics_vec128_and(l1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c21 = Lib_IntVector_Intrinsics_vec128_shift_right64(l1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l2 = Lib_IntVector_Intrinsics_vec128_add64(t3, c21); + Lib_IntVector_Intrinsics_vec128 + tmp3 = + Lib_IntVector_Intrinsics_vec128_and(l2, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c31 = Lib_IntVector_Intrinsics_vec128_shift_right64(l2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l3 = Lib_IntVector_Intrinsics_vec128_add64(t4, c31); + Lib_IntVector_Intrinsics_vec128 + tmp4 = + Lib_IntVector_Intrinsics_vec128_and(l3, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c4 = Lib_IntVector_Intrinsics_vec128_shift_right64(l3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + l4 = + Lib_IntVector_Intrinsics_vec128_add64(tmp0, + Lib_IntVector_Intrinsics_vec128_smul64(c4, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec128 + tmp01 = + Lib_IntVector_Intrinsics_vec128_and(l4, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c5 = Lib_IntVector_Intrinsics_vec128_shift_right64(l4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 tmp11 = Lib_IntVector_Intrinsics_vec128_add64(tmp1, c5); + Lib_IntVector_Intrinsics_vec128 o0 = tmp01; + Lib_IntVector_Intrinsics_vec128 o1 = tmp11; + Lib_IntVector_Intrinsics_vec128 o2 = tmp2; + Lib_IntVector_Intrinsics_vec128 o3 = tmp3; + Lib_IntVector_Intrinsics_vec128 o4 = tmp4; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + Hacl_Poly1305_128_poly1305_finish(out, k, ctx); +} + +void +Hacl_Chacha20Poly1305_128_aead_encrypt( + uint8_t *k, + uint8_t *n1, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac) +{ + Hacl_Chacha20_Vec128_chacha20_encrypt_128(mlen, cipher, m, k, n1, (uint32_t)1U); + uint8_t tmp[64U] = { 0U }; + Hacl_Chacha20_Vec128_chacha20_encrypt_128((uint32_t)64U, tmp, tmp, k, n1, (uint32_t)0U); + uint8_t *key = tmp; + Hacl_Chacha20Poly1305_128_poly1305_do_128(key, aadlen, aad, mlen, cipher, mac); +} + +uint32_t +Hacl_Chacha20Poly1305_128_aead_decrypt( + uint8_t *k, + uint8_t *n1, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac) +{ + uint8_t computed_mac[16U] = { 0U }; + uint8_t tmp[64U] = { 0U }; + Hacl_Chacha20_Vec128_chacha20_encrypt_128((uint32_t)64U, tmp, tmp, k, n1, (uint32_t)0U); + uint8_t *key = tmp; + Hacl_Chacha20Poly1305_128_poly1305_do_128(key, aadlen, aad, mlen, cipher, computed_mac); + uint8_t res = (uint8_t)255U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { + uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]); + res = uu____0 & res; + } + uint8_t z = res; + if (z == (uint8_t)255U) { + Hacl_Chacha20_Vec128_chacha20_encrypt_128(mlen, m, cipher, k, n1, (uint32_t)1U); + return (uint32_t)0U; + } + return (uint32_t)1U; +} diff --git a/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h b/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h new file mode 100644 index 0000000000..a88af0201c --- /dev/null +++ b/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h @@ -0,0 +1,60 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "libintvector.h" +#include "kremlin/internal/types.h" +#include "kremlin/lowstar_endianness.h" +#include +#include + +#ifndef __Hacl_Chacha20Poly1305_128_H +#define __Hacl_Chacha20Poly1305_128_H + +#include "Hacl_Kremlib.h" +#include "Hacl_Chacha20_Vec128.h" +#include "Hacl_Poly1305_128.h" + +void +Hacl_Chacha20Poly1305_128_aead_encrypt( + uint8_t *k, + uint8_t *n1, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac); + +uint32_t +Hacl_Chacha20Poly1305_128_aead_decrypt( + uint8_t *k, + uint8_t *n1, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac); + +#define __Hacl_Chacha20Poly1305_128_H_DEFINED +#endif diff --git a/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c b/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c new file mode 100644 index 0000000000..f6524fe956 --- /dev/null +++ b/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c @@ -0,0 +1,564 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Hacl_Chacha20Poly1305_32.h" + +static void +Hacl_Chacha20Poly1305_32_poly1305_padded_32(uint64_t *ctx, uint32_t len, uint8_t *text) +{ + uint32_t n1 = len / (uint32_t)16U; + uint32_t r = len % (uint32_t)16U; + uint8_t *blocks = text; + uint8_t *rem1 = text + n1 * (uint32_t)16U; + uint64_t *pre0 = ctx + (uint32_t)5U; + uint64_t *acc0 = ctx; + uint32_t nb = n1 * (uint32_t)16U / (uint32_t)16U; + uint32_t rem2 = n1 * (uint32_t)16U % (uint32_t)16U; + for (uint32_t i = (uint32_t)0U; i < nb; i = i + (uint32_t)1U) { + uint8_t *block = blocks + i * (uint32_t)16U; + uint64_t e[5U] = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + uint64_t mask = b; + uint64_t f4 = e[4U]; + e[4U] = f4 | mask; + uint64_t *r1 = pre0; + uint64_t *r5 = pre0 + (uint32_t)5U; + uint64_t r0 = r1[0U]; + uint64_t r11 = r1[1U]; + uint64_t r2 = r1[2U]; + uint64_t r3 = r1[3U]; + uint64_t r4 = r1[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc0[0U]; + uint64_t a1 = acc0[1U]; + uint64_t a2 = acc0[2U]; + uint64_t a3 = acc0[3U]; + uint64_t a4 = acc0[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r11 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r11 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r11 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r11 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t l = t0 + (uint64_t)0U; + uint64_t tmp0 = l & (uint64_t)0x3ffffffU; + uint64_t c01 = l >> (uint32_t)26U; + uint64_t l0 = t1 + c01; + uint64_t tmp1 = l0 & (uint64_t)0x3ffffffU; + uint64_t c11 = l0 >> (uint32_t)26U; + uint64_t l1 = t2 + c11; + uint64_t tmp2 = l1 & (uint64_t)0x3ffffffU; + uint64_t c21 = l1 >> (uint32_t)26U; + uint64_t l2 = t3 + c21; + uint64_t tmp3 = l2 & (uint64_t)0x3ffffffU; + uint64_t c31 = l2 >> (uint32_t)26U; + uint64_t l3 = t4 + c31; + uint64_t tmp4 = l3 & (uint64_t)0x3ffffffU; + uint64_t c4 = l3 >> (uint32_t)26U; + uint64_t l4 = tmp0 + c4 * (uint64_t)5U; + uint64_t tmp01 = l4 & (uint64_t)0x3ffffffU; + uint64_t c5 = l4 >> (uint32_t)26U; + uint64_t tmp11 = tmp1 + c5; + uint64_t o0 = tmp01; + uint64_t o1 = tmp11; + uint64_t o2 = tmp2; + uint64_t o3 = tmp3; + uint64_t o4 = tmp4; + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + if (rem2 > (uint32_t)0U) { + uint8_t *last1 = blocks + nb * (uint32_t)16U; + uint64_t e[5U] = { 0U }; + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, last1, rem2 * sizeof last1[0U]); + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f4 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f4; + uint64_t b = (uint64_t)1U << rem2 * (uint32_t)8U % (uint32_t)26U; + uint64_t mask = b; + uint64_t fi = e[rem2 * (uint32_t)8U / (uint32_t)26U]; + e[rem2 * (uint32_t)8U / (uint32_t)26U] = fi | mask; + uint64_t *r1 = pre0; + uint64_t *r5 = pre0 + (uint32_t)5U; + uint64_t r0 = r1[0U]; + uint64_t r11 = r1[1U]; + uint64_t r2 = r1[2U]; + uint64_t r3 = r1[3U]; + uint64_t r4 = r1[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc0[0U]; + uint64_t a1 = acc0[1U]; + uint64_t a2 = acc0[2U]; + uint64_t a3 = acc0[3U]; + uint64_t a4 = acc0[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r11 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r11 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r11 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r11 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t l = t0 + (uint64_t)0U; + uint64_t tmp0 = l & (uint64_t)0x3ffffffU; + uint64_t c01 = l >> (uint32_t)26U; + uint64_t l0 = t1 + c01; + uint64_t tmp1 = l0 & (uint64_t)0x3ffffffU; + uint64_t c11 = l0 >> (uint32_t)26U; + uint64_t l1 = t2 + c11; + uint64_t tmp2 = l1 & (uint64_t)0x3ffffffU; + uint64_t c21 = l1 >> (uint32_t)26U; + uint64_t l2 = t3 + c21; + uint64_t tmp3 = l2 & (uint64_t)0x3ffffffU; + uint64_t c31 = l2 >> (uint32_t)26U; + uint64_t l3 = t4 + c31; + uint64_t tmp4 = l3 & (uint64_t)0x3ffffffU; + uint64_t c4 = l3 >> (uint32_t)26U; + uint64_t l4 = tmp0 + c4 * (uint64_t)5U; + uint64_t tmp01 = l4 & (uint64_t)0x3ffffffU; + uint64_t c5 = l4 >> (uint32_t)26U; + uint64_t tmp11 = tmp1 + c5; + uint64_t o0 = tmp01; + uint64_t o1 = tmp11; + uint64_t o2 = tmp2; + uint64_t o3 = tmp3; + uint64_t o4 = tmp4; + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, rem1, r * sizeof rem1[0U]); + if (r > (uint32_t)0U) { + uint64_t *pre = ctx + (uint32_t)5U; + uint64_t *acc = ctx; + uint64_t e[5U] = { 0U }; + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + uint64_t mask = b; + uint64_t f4 = e[4U]; + e[4U] = f4 | mask; + uint64_t *r1 = pre; + uint64_t *r5 = pre + (uint32_t)5U; + uint64_t r0 = r1[0U]; + uint64_t r11 = r1[1U]; + uint64_t r2 = r1[2U]; + uint64_t r3 = r1[3U]; + uint64_t r4 = r1[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc[0U]; + uint64_t a1 = acc[1U]; + uint64_t a2 = acc[2U]; + uint64_t a3 = acc[3U]; + uint64_t a4 = acc[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r11 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r11 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r11 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r11 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t l = t0 + (uint64_t)0U; + uint64_t tmp0 = l & (uint64_t)0x3ffffffU; + uint64_t c01 = l >> (uint32_t)26U; + uint64_t l0 = t1 + c01; + uint64_t tmp1 = l0 & (uint64_t)0x3ffffffU; + uint64_t c11 = l0 >> (uint32_t)26U; + uint64_t l1 = t2 + c11; + uint64_t tmp2 = l1 & (uint64_t)0x3ffffffU; + uint64_t c21 = l1 >> (uint32_t)26U; + uint64_t l2 = t3 + c21; + uint64_t tmp3 = l2 & (uint64_t)0x3ffffffU; + uint64_t c31 = l2 >> (uint32_t)26U; + uint64_t l3 = t4 + c31; + uint64_t tmp4 = l3 & (uint64_t)0x3ffffffU; + uint64_t c4 = l3 >> (uint32_t)26U; + uint64_t l4 = tmp0 + c4 * (uint64_t)5U; + uint64_t tmp01 = l4 & (uint64_t)0x3ffffffU; + uint64_t c5 = l4 >> (uint32_t)26U; + uint64_t tmp11 = tmp1 + c5; + uint64_t o0 = tmp01; + uint64_t o1 = tmp11; + uint64_t o2 = tmp2; + uint64_t o3 = tmp3; + uint64_t o4 = tmp4; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + return; + } +} + +static void +Hacl_Chacha20Poly1305_32_poly1305_do_32( + uint8_t *k, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *out) +{ + uint64_t ctx[25U] = { 0U }; + uint8_t block[16U] = { 0U }; + Hacl_Poly1305_32_poly1305_init(ctx, k); + Hacl_Chacha20Poly1305_32_poly1305_padded_32(ctx, aadlen, aad); + Hacl_Chacha20Poly1305_32_poly1305_padded_32(ctx, mlen, m); + store64_le(block, (uint64_t)aadlen); + store64_le(block + (uint32_t)8U, (uint64_t)mlen); + uint64_t *pre = ctx + (uint32_t)5U; + uint64_t *acc = ctx; + uint64_t e[5U] = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + uint64_t mask = b; + uint64_t f4 = e[4U]; + e[4U] = f4 | mask; + uint64_t *r = pre; + uint64_t *r5 = pre + (uint32_t)5U; + uint64_t r0 = r[0U]; + uint64_t r1 = r[1U]; + uint64_t r2 = r[2U]; + uint64_t r3 = r[3U]; + uint64_t r4 = r[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc[0U]; + uint64_t a1 = acc[1U]; + uint64_t a2 = acc[2U]; + uint64_t a3 = acc[3U]; + uint64_t a4 = acc[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r1 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r1 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r1 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r1 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t l = t0 + (uint64_t)0U; + uint64_t tmp0 = l & (uint64_t)0x3ffffffU; + uint64_t c01 = l >> (uint32_t)26U; + uint64_t l0 = t1 + c01; + uint64_t tmp1 = l0 & (uint64_t)0x3ffffffU; + uint64_t c11 = l0 >> (uint32_t)26U; + uint64_t l1 = t2 + c11; + uint64_t tmp2 = l1 & (uint64_t)0x3ffffffU; + uint64_t c21 = l1 >> (uint32_t)26U; + uint64_t l2 = t3 + c21; + uint64_t tmp3 = l2 & (uint64_t)0x3ffffffU; + uint64_t c31 = l2 >> (uint32_t)26U; + uint64_t l3 = t4 + c31; + uint64_t tmp4 = l3 & (uint64_t)0x3ffffffU; + uint64_t c4 = l3 >> (uint32_t)26U; + uint64_t l4 = tmp0 + c4 * (uint64_t)5U; + uint64_t tmp01 = l4 & (uint64_t)0x3ffffffU; + uint64_t c5 = l4 >> (uint32_t)26U; + uint64_t tmp11 = tmp1 + c5; + uint64_t o0 = tmp01; + uint64_t o1 = tmp11; + uint64_t o2 = tmp2; + uint64_t o3 = tmp3; + uint64_t o4 = tmp4; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + Hacl_Poly1305_32_poly1305_finish(out, k, ctx); +} + +void +Hacl_Chacha20Poly1305_32_aead_encrypt( + uint8_t *k, + uint8_t *n1, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac) +{ + Hacl_Chacha20_chacha20_encrypt(mlen, cipher, m, k, n1, (uint32_t)1U); + uint8_t tmp[64U] = { 0U }; + Hacl_Chacha20_chacha20_encrypt((uint32_t)64U, tmp, tmp, k, n1, (uint32_t)0U); + uint8_t *key = tmp; + Hacl_Chacha20Poly1305_32_poly1305_do_32(key, aadlen, aad, mlen, cipher, mac); +} + +uint32_t +Hacl_Chacha20Poly1305_32_aead_decrypt( + uint8_t *k, + uint8_t *n1, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac) +{ + uint8_t computed_mac[16U] = { 0U }; + uint8_t tmp[64U] = { 0U }; + Hacl_Chacha20_chacha20_encrypt((uint32_t)64U, tmp, tmp, k, n1, (uint32_t)0U); + uint8_t *key = tmp; + Hacl_Chacha20Poly1305_32_poly1305_do_32(key, aadlen, aad, mlen, cipher, computed_mac); + uint8_t res = (uint8_t)255U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { + uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]); + res = uu____0 & res; + } + uint8_t z = res; + if (z == (uint8_t)255U) { + Hacl_Chacha20_chacha20_encrypt(mlen, m, cipher, k, n1, (uint32_t)1U); + return (uint32_t)0U; + } + return (uint32_t)1U; +} diff --git a/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h b/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h new file mode 100644 index 0000000000..05ee9540ca --- /dev/null +++ b/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h @@ -0,0 +1,59 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "kremlin/internal/types.h" +#include "kremlin/lowstar_endianness.h" +#include +#include + +#ifndef __Hacl_Chacha20Poly1305_32_H +#define __Hacl_Chacha20Poly1305_32_H + +#include "Hacl_Chacha20.h" +#include "Hacl_Kremlib.h" +#include "Hacl_Poly1305_32.h" + +void +Hacl_Chacha20Poly1305_32_aead_encrypt( + uint8_t *k, + uint8_t *n1, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac); + +uint32_t +Hacl_Chacha20Poly1305_32_aead_decrypt( + uint8_t *k, + uint8_t *n1, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac); + +#define __Hacl_Chacha20Poly1305_32_H_DEFINED +#endif diff --git a/lib/freebl/verified/Hacl_Chacha20_Vec128.c b/lib/freebl/verified/Hacl_Chacha20_Vec128.c index b5568cc73d..718473be8b 100644 --- a/lib/freebl/verified/Hacl_Chacha20_Vec128.c +++ b/lib/freebl/verified/Hacl_Chacha20_Vec128.c @@ -1,390 +1,792 @@ -/* Copyright 2016-2018 INRIA and Microsoft Corporation +/* MIT License * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation * - * http://www.apache.org/licenses/LICENSE-2.0 + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #include "Hacl_Chacha20_Vec128.h" -inline static void -Hacl_Impl_Chacha20_Vec128_State_state_incr(vec *k) -{ - vec k3 = k[3U]; - k[3U] = vec_increment(k3); -} - -inline static void -Hacl_Impl_Chacha20_Vec128_State_state_to_key_block(uint8_t *stream_block, vec *k) -{ - vec k0 = k[0U]; - vec k1 = k[1U]; - vec k2 = k[2U]; - vec k3 = k[3U]; - uint8_t *a = stream_block; - uint8_t *b = stream_block + (uint32_t)16U; - uint8_t *c = stream_block + (uint32_t)32U; - uint8_t *d = stream_block + (uint32_t)48U; - vec_store_le(a, k0); - vec_store_le(b, k1); - vec_store_le(c, k2); - vec_store_le(d, k3); -} - -inline static void -Hacl_Impl_Chacha20_Vec128_State_state_setup(vec *st, uint8_t *k, uint8_t *n1, uint32_t c) -{ - st[0U] = - vec_load_32x4((uint32_t)0x61707865U, - (uint32_t)0x3320646eU, - (uint32_t)0x79622d32U, - (uint32_t)0x6b206574U); - vec k0 = vec_load128_le(k); - vec k1 = vec_load128_le(k + (uint32_t)16U); - st[1U] = k0; - st[2U] = k1; - uint32_t n0 = load32_le(n1); - uint8_t *x00 = n1 + (uint32_t)4U; - uint32_t n10 = load32_le(x00); - uint8_t *x0 = n1 + (uint32_t)8U; - uint32_t n2 = load32_le(x0); - vec v1 = vec_load_32x4(c, n0, n10, n2); - st[3U] = v1; -} - -inline static void -Hacl_Impl_Chacha20_Vec128_round(vec *st) -{ - vec sa = st[0U]; - vec sb0 = st[1U]; - vec sd0 = st[3U]; - vec sa10 = vec_add(sa, sb0); - vec sd10 = vec_rotate_left(vec_xor(sd0, sa10), (uint32_t)16U); - st[0U] = sa10; - st[3U] = sd10; - vec sa0 = st[2U]; - vec sb1 = st[3U]; - vec sd2 = st[1U]; - vec sa11 = vec_add(sa0, sb1); - vec sd11 = vec_rotate_left(vec_xor(sd2, sa11), (uint32_t)12U); - st[2U] = sa11; - st[1U] = sd11; - vec sa2 = st[0U]; - vec sb2 = st[1U]; - vec sd3 = st[3U]; - vec sa12 = vec_add(sa2, sb2); - vec sd12 = vec_rotate_left(vec_xor(sd3, sa12), (uint32_t)8U); - st[0U] = sa12; - st[3U] = sd12; - vec sa3 = st[2U]; - vec sb = st[3U]; - vec sd = st[1U]; - vec sa1 = vec_add(sa3, sb); - vec sd1 = vec_rotate_left(vec_xor(sd, sa1), (uint32_t)7U); - st[2U] = sa1; - st[1U] = sd1; -} - -inline static void -Hacl_Impl_Chacha20_Vec128_double_round(vec *st) -{ - Hacl_Impl_Chacha20_Vec128_round(st); - vec r1 = st[1U]; - vec r20 = st[2U]; - vec r30 = st[3U]; - st[1U] = vec_shuffle_right(r1, (uint32_t)1U); - st[2U] = vec_shuffle_right(r20, (uint32_t)2U); - st[3U] = vec_shuffle_right(r30, (uint32_t)3U); - Hacl_Impl_Chacha20_Vec128_round(st); - vec r10 = st[1U]; - vec r2 = st[2U]; - vec r3 = st[3U]; - st[1U] = vec_shuffle_right(r10, (uint32_t)3U); - st[2U] = vec_shuffle_right(r2, (uint32_t)2U); - st[3U] = vec_shuffle_right(r3, (uint32_t)1U); -} - -inline static void -Hacl_Impl_Chacha20_Vec128_double_round3(vec *st, vec *st_, vec *st__) -{ - Hacl_Impl_Chacha20_Vec128_double_round(st); - Hacl_Impl_Chacha20_Vec128_double_round(st_); - Hacl_Impl_Chacha20_Vec128_double_round(st__); -} - -inline static void -Hacl_Impl_Chacha20_Vec128_sum_states(vec *st_, vec *st) -{ - vec s0 = st[0U]; - vec s1 = st[1U]; - vec s2 = st[2U]; - vec s3 = st[3U]; - vec s0_ = st_[0U]; - vec s1_ = st_[1U]; - vec s2_ = st_[2U]; - vec s3_ = st_[3U]; - st_[0U] = vec_add(s0_, s0); - st_[1U] = vec_add(s1_, s1); - st_[2U] = vec_add(s2_, s2); - st_[3U] = vec_add(s3_, s3); -} - -inline static void -Hacl_Impl_Chacha20_Vec128_copy_state(vec *st_, vec *st) -{ - vec st0 = st[0U]; - vec st1 = st[1U]; - vec st2 = st[2U]; - vec st3 = st[3U]; - st_[0U] = st0; - st_[1U] = st1; - st_[2U] = st2; - st_[3U] = st3; -} - -inline static void -Hacl_Impl_Chacha20_Vec128_chacha20_core(vec *k, vec *st) -{ - Hacl_Impl_Chacha20_Vec128_copy_state(k, st); - for (uint32_t i = (uint32_t)0U; i < (uint32_t)10U; i = i + (uint32_t)1U) - Hacl_Impl_Chacha20_Vec128_double_round(k); - Hacl_Impl_Chacha20_Vec128_sum_states(k, st); -} - static void -Hacl_Impl_Chacha20_Vec128_state_incr(vec *st) -{ - Hacl_Impl_Chacha20_Vec128_State_state_incr(st); -} - -inline static void -Hacl_Impl_Chacha20_Vec128_chacha20_incr3(vec *k0, vec *k1, vec *k2, vec *st) -{ - Hacl_Impl_Chacha20_Vec128_copy_state(k0, st); - Hacl_Impl_Chacha20_Vec128_copy_state(k1, st); - Hacl_Impl_Chacha20_Vec128_state_incr(k1); - Hacl_Impl_Chacha20_Vec128_copy_state(k2, k1); - Hacl_Impl_Chacha20_Vec128_state_incr(k2); -} - -inline static void -Hacl_Impl_Chacha20_Vec128_chacha20_sum3(vec *k0, vec *k1, vec *k2, vec *st) -{ - Hacl_Impl_Chacha20_Vec128_sum_states(k0, st); - Hacl_Impl_Chacha20_Vec128_state_incr(st); - Hacl_Impl_Chacha20_Vec128_sum_states(k1, st); - Hacl_Impl_Chacha20_Vec128_state_incr(st); - Hacl_Impl_Chacha20_Vec128_sum_states(k2, st); -} - -inline static void -Hacl_Impl_Chacha20_Vec128_chacha20_core3(vec *k0, vec *k1, vec *k2, vec *st) -{ - Hacl_Impl_Chacha20_Vec128_chacha20_incr3(k0, k1, k2, st); - for (uint32_t i = (uint32_t)0U; i < (uint32_t)10U; i = i + (uint32_t)1U) - Hacl_Impl_Chacha20_Vec128_double_round3(k0, k1, k2); - Hacl_Impl_Chacha20_Vec128_chacha20_sum3(k0, k1, k2, st); -} - -inline static void -Hacl_Impl_Chacha20_Vec128_chacha20_block(uint8_t *stream_block, vec *st) -{ - KRML_CHECK_SIZE(vec_zero(), (uint32_t)4U); - vec k[4U]; - for (uint32_t _i = 0U; _i < (uint32_t)4U; ++_i) - k[_i] = vec_zero(); - Hacl_Impl_Chacha20_Vec128_chacha20_core(k, st); - Hacl_Impl_Chacha20_Vec128_State_state_to_key_block(stream_block, k); -} - -inline static void -Hacl_Impl_Chacha20_Vec128_init(vec *st, uint8_t *k, uint8_t *n1, uint32_t ctr) +Hacl_Chacha20_Vec128_double_round_128(Lib_IntVector_Intrinsics_vec128 *st) { - Hacl_Impl_Chacha20_Vec128_State_state_setup(st, k, n1, ctr); + st[0U] = Lib_IntVector_Intrinsics_vec128_add32(st[0U], st[4U]); + Lib_IntVector_Intrinsics_vec128 std = Lib_IntVector_Intrinsics_vec128_xor(st[12U], st[0U]); + st[12U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std, (uint32_t)16U); + st[8U] = Lib_IntVector_Intrinsics_vec128_add32(st[8U], st[12U]); + Lib_IntVector_Intrinsics_vec128 std0 = Lib_IntVector_Intrinsics_vec128_xor(st[4U], st[8U]); + st[4U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std0, (uint32_t)12U); + st[0U] = Lib_IntVector_Intrinsics_vec128_add32(st[0U], st[4U]); + Lib_IntVector_Intrinsics_vec128 std1 = Lib_IntVector_Intrinsics_vec128_xor(st[12U], st[0U]); + st[12U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std1, (uint32_t)8U); + st[8U] = Lib_IntVector_Intrinsics_vec128_add32(st[8U], st[12U]); + Lib_IntVector_Intrinsics_vec128 std2 = Lib_IntVector_Intrinsics_vec128_xor(st[4U], st[8U]); + st[4U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std2, (uint32_t)7U); + st[1U] = Lib_IntVector_Intrinsics_vec128_add32(st[1U], st[5U]); + Lib_IntVector_Intrinsics_vec128 std3 = Lib_IntVector_Intrinsics_vec128_xor(st[13U], st[1U]); + st[13U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std3, (uint32_t)16U); + st[9U] = Lib_IntVector_Intrinsics_vec128_add32(st[9U], st[13U]); + Lib_IntVector_Intrinsics_vec128 std4 = Lib_IntVector_Intrinsics_vec128_xor(st[5U], st[9U]); + st[5U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std4, (uint32_t)12U); + st[1U] = Lib_IntVector_Intrinsics_vec128_add32(st[1U], st[5U]); + Lib_IntVector_Intrinsics_vec128 std5 = Lib_IntVector_Intrinsics_vec128_xor(st[13U], st[1U]); + st[13U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std5, (uint32_t)8U); + st[9U] = Lib_IntVector_Intrinsics_vec128_add32(st[9U], st[13U]); + Lib_IntVector_Intrinsics_vec128 std6 = Lib_IntVector_Intrinsics_vec128_xor(st[5U], st[9U]); + st[5U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std6, (uint32_t)7U); + st[2U] = Lib_IntVector_Intrinsics_vec128_add32(st[2U], st[6U]); + Lib_IntVector_Intrinsics_vec128 std7 = Lib_IntVector_Intrinsics_vec128_xor(st[14U], st[2U]); + st[14U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std7, (uint32_t)16U); + st[10U] = Lib_IntVector_Intrinsics_vec128_add32(st[10U], st[14U]); + Lib_IntVector_Intrinsics_vec128 std8 = Lib_IntVector_Intrinsics_vec128_xor(st[6U], st[10U]); + st[6U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std8, (uint32_t)12U); + st[2U] = Lib_IntVector_Intrinsics_vec128_add32(st[2U], st[6U]); + Lib_IntVector_Intrinsics_vec128 std9 = Lib_IntVector_Intrinsics_vec128_xor(st[14U], st[2U]); + st[14U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std9, (uint32_t)8U); + st[10U] = Lib_IntVector_Intrinsics_vec128_add32(st[10U], st[14U]); + Lib_IntVector_Intrinsics_vec128 std10 = Lib_IntVector_Intrinsics_vec128_xor(st[6U], st[10U]); + st[6U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std10, (uint32_t)7U); + st[3U] = Lib_IntVector_Intrinsics_vec128_add32(st[3U], st[7U]); + Lib_IntVector_Intrinsics_vec128 std11 = Lib_IntVector_Intrinsics_vec128_xor(st[15U], st[3U]); + st[15U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std11, (uint32_t)16U); + st[11U] = Lib_IntVector_Intrinsics_vec128_add32(st[11U], st[15U]); + Lib_IntVector_Intrinsics_vec128 std12 = Lib_IntVector_Intrinsics_vec128_xor(st[7U], st[11U]); + st[7U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std12, (uint32_t)12U); + st[3U] = Lib_IntVector_Intrinsics_vec128_add32(st[3U], st[7U]); + Lib_IntVector_Intrinsics_vec128 std13 = Lib_IntVector_Intrinsics_vec128_xor(st[15U], st[3U]); + st[15U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std13, (uint32_t)8U); + st[11U] = Lib_IntVector_Intrinsics_vec128_add32(st[11U], st[15U]); + Lib_IntVector_Intrinsics_vec128 std14 = Lib_IntVector_Intrinsics_vec128_xor(st[7U], st[11U]); + st[7U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std14, (uint32_t)7U); + st[0U] = Lib_IntVector_Intrinsics_vec128_add32(st[0U], st[5U]); + Lib_IntVector_Intrinsics_vec128 std15 = Lib_IntVector_Intrinsics_vec128_xor(st[15U], st[0U]); + st[15U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std15, (uint32_t)16U); + st[10U] = Lib_IntVector_Intrinsics_vec128_add32(st[10U], st[15U]); + Lib_IntVector_Intrinsics_vec128 std16 = Lib_IntVector_Intrinsics_vec128_xor(st[5U], st[10U]); + st[5U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std16, (uint32_t)12U); + st[0U] = Lib_IntVector_Intrinsics_vec128_add32(st[0U], st[5U]); + Lib_IntVector_Intrinsics_vec128 std17 = Lib_IntVector_Intrinsics_vec128_xor(st[15U], st[0U]); + st[15U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std17, (uint32_t)8U); + st[10U] = Lib_IntVector_Intrinsics_vec128_add32(st[10U], st[15U]); + Lib_IntVector_Intrinsics_vec128 std18 = Lib_IntVector_Intrinsics_vec128_xor(st[5U], st[10U]); + st[5U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std18, (uint32_t)7U); + st[1U] = Lib_IntVector_Intrinsics_vec128_add32(st[1U], st[6U]); + Lib_IntVector_Intrinsics_vec128 std19 = Lib_IntVector_Intrinsics_vec128_xor(st[12U], st[1U]); + st[12U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std19, (uint32_t)16U); + st[11U] = Lib_IntVector_Intrinsics_vec128_add32(st[11U], st[12U]); + Lib_IntVector_Intrinsics_vec128 std20 = Lib_IntVector_Intrinsics_vec128_xor(st[6U], st[11U]); + st[6U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std20, (uint32_t)12U); + st[1U] = Lib_IntVector_Intrinsics_vec128_add32(st[1U], st[6U]); + Lib_IntVector_Intrinsics_vec128 std21 = Lib_IntVector_Intrinsics_vec128_xor(st[12U], st[1U]); + st[12U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std21, (uint32_t)8U); + st[11U] = Lib_IntVector_Intrinsics_vec128_add32(st[11U], st[12U]); + Lib_IntVector_Intrinsics_vec128 std22 = Lib_IntVector_Intrinsics_vec128_xor(st[6U], st[11U]); + st[6U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std22, (uint32_t)7U); + st[2U] = Lib_IntVector_Intrinsics_vec128_add32(st[2U], st[7U]); + Lib_IntVector_Intrinsics_vec128 std23 = Lib_IntVector_Intrinsics_vec128_xor(st[13U], st[2U]); + st[13U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std23, (uint32_t)16U); + st[8U] = Lib_IntVector_Intrinsics_vec128_add32(st[8U], st[13U]); + Lib_IntVector_Intrinsics_vec128 std24 = Lib_IntVector_Intrinsics_vec128_xor(st[7U], st[8U]); + st[7U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std24, (uint32_t)12U); + st[2U] = Lib_IntVector_Intrinsics_vec128_add32(st[2U], st[7U]); + Lib_IntVector_Intrinsics_vec128 std25 = Lib_IntVector_Intrinsics_vec128_xor(st[13U], st[2U]); + st[13U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std25, (uint32_t)8U); + st[8U] = Lib_IntVector_Intrinsics_vec128_add32(st[8U], st[13U]); + Lib_IntVector_Intrinsics_vec128 std26 = Lib_IntVector_Intrinsics_vec128_xor(st[7U], st[8U]); + st[7U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std26, (uint32_t)7U); + st[3U] = Lib_IntVector_Intrinsics_vec128_add32(st[3U], st[4U]); + Lib_IntVector_Intrinsics_vec128 std27 = Lib_IntVector_Intrinsics_vec128_xor(st[14U], st[3U]); + st[14U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std27, (uint32_t)16U); + st[9U] = Lib_IntVector_Intrinsics_vec128_add32(st[9U], st[14U]); + Lib_IntVector_Intrinsics_vec128 std28 = Lib_IntVector_Intrinsics_vec128_xor(st[4U], st[9U]); + st[4U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std28, (uint32_t)12U); + st[3U] = Lib_IntVector_Intrinsics_vec128_add32(st[3U], st[4U]); + Lib_IntVector_Intrinsics_vec128 std29 = Lib_IntVector_Intrinsics_vec128_xor(st[14U], st[3U]); + st[14U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std29, (uint32_t)8U); + st[9U] = Lib_IntVector_Intrinsics_vec128_add32(st[9U], st[14U]); + Lib_IntVector_Intrinsics_vec128 std30 = Lib_IntVector_Intrinsics_vec128_xor(st[4U], st[9U]); + st[4U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std30, (uint32_t)7U); } static void -Hacl_Impl_Chacha20_Vec128_update_last(uint8_t *output, uint8_t *plain, uint32_t len, vec *st) +Hacl_Chacha20_Vec128_chacha20_core_128( + Lib_IntVector_Intrinsics_vec128 *k, + Lib_IntVector_Intrinsics_vec128 *ctx, + uint32_t ctr) { - uint8_t block[64U] = { 0U }; - Hacl_Impl_Chacha20_Vec128_chacha20_block(block, st); - uint8_t *mask = block; - for (uint32_t i = (uint32_t)0U; i < len; i = i + (uint32_t)1U) { - uint8_t xi = plain[i]; - uint8_t yi = mask[i]; - output[i] = xi ^ yi; + memcpy(k, ctx, (uint32_t)16U * sizeof ctx[0U]); + uint32_t ctr_u32 = (uint32_t)4U * ctr; + Lib_IntVector_Intrinsics_vec128 cv = Lib_IntVector_Intrinsics_vec128_load32(ctr_u32); + k[12U] = Lib_IntVector_Intrinsics_vec128_add32(k[12U], cv); + Hacl_Chacha20_Vec128_double_round_128(k); + Hacl_Chacha20_Vec128_double_round_128(k); + Hacl_Chacha20_Vec128_double_round_128(k); + Hacl_Chacha20_Vec128_double_round_128(k); + Hacl_Chacha20_Vec128_double_round_128(k); + Hacl_Chacha20_Vec128_double_round_128(k); + Hacl_Chacha20_Vec128_double_round_128(k); + Hacl_Chacha20_Vec128_double_round_128(k); + Hacl_Chacha20_Vec128_double_round_128(k); + Hacl_Chacha20_Vec128_double_round_128(k); + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { + Lib_IntVector_Intrinsics_vec128 *os = k; + Lib_IntVector_Intrinsics_vec128 x = Lib_IntVector_Intrinsics_vec128_add32(k[i], ctx[i]); + os[i] = x; } + k[12U] = Lib_IntVector_Intrinsics_vec128_add32(k[12U], cv); } static void -Hacl_Impl_Chacha20_Vec128_xor_block(uint8_t *output, uint8_t *plain, vec *st) -{ - vec p0 = vec_load_le(plain); - vec p1 = vec_load_le(plain + (uint32_t)16U); - vec p2 = vec_load_le(plain + (uint32_t)32U); - vec p3 = vec_load_le(plain + (uint32_t)48U); - vec k0 = st[0U]; - vec k1 = st[1U]; - vec k2 = st[2U]; - vec k3 = st[3U]; - vec o00 = vec_xor(p0, k0); - vec o10 = vec_xor(p1, k1); - vec o20 = vec_xor(p2, k2); - vec o30 = vec_xor(p3, k3); - uint8_t *o0 = output; - uint8_t *o1 = output + (uint32_t)16U; - uint8_t *o2 = output + (uint32_t)32U; - uint8_t *o3 = output + (uint32_t)48U; - vec_store_le(o0, o00); - vec_store_le(o1, o10); - vec_store_le(o2, o20); - vec_store_le(o3, o30); -} - -static void -Hacl_Impl_Chacha20_Vec128_update(uint8_t *output, uint8_t *plain, vec *st) -{ - KRML_CHECK_SIZE(vec_zero(), (uint32_t)4U); - vec k[4U]; - for (uint32_t _i = 0U; _i < (uint32_t)4U; ++_i) - k[_i] = vec_zero(); - Hacl_Impl_Chacha20_Vec128_chacha20_core(k, st); - Hacl_Impl_Chacha20_Vec128_xor_block(output, plain, k); -} - -static void -Hacl_Impl_Chacha20_Vec128_update3(uint8_t *output, uint8_t *plain, vec *st) -{ - KRML_CHECK_SIZE(vec_zero(), (uint32_t)4U); - vec k0[4U]; - for (uint32_t _i = 0U; _i < (uint32_t)4U; ++_i) - k0[_i] = vec_zero(); - KRML_CHECK_SIZE(vec_zero(), (uint32_t)4U); - vec k1[4U]; - for (uint32_t _i = 0U; _i < (uint32_t)4U; ++_i) - k1[_i] = vec_zero(); - KRML_CHECK_SIZE(vec_zero(), (uint32_t)4U); - vec k2[4U]; - for (uint32_t _i = 0U; _i < (uint32_t)4U; ++_i) - k2[_i] = vec_zero(); - Hacl_Impl_Chacha20_Vec128_chacha20_core3(k0, k1, k2, st); - uint8_t *p0 = plain; - uint8_t *p1 = plain + (uint32_t)64U; - uint8_t *p2 = plain + (uint32_t)128U; - uint8_t *o0 = output; - uint8_t *o1 = output + (uint32_t)64U; - uint8_t *o2 = output + (uint32_t)128U; - Hacl_Impl_Chacha20_Vec128_xor_block(o0, p0, k0); - Hacl_Impl_Chacha20_Vec128_xor_block(o1, p1, k1); - Hacl_Impl_Chacha20_Vec128_xor_block(o2, p2, k2); -} - -static void -Hacl_Impl_Chacha20_Vec128_update3_( - uint8_t *output, - uint8_t *plain, - uint32_t len, - vec *st, - uint32_t i) -{ - uint8_t *out_block = output + (uint32_t)192U * i; - uint8_t *plain_block = plain + (uint32_t)192U * i; - Hacl_Impl_Chacha20_Vec128_update3(out_block, plain_block, st); - Hacl_Impl_Chacha20_Vec128_state_incr(st); -} - -static void -Hacl_Impl_Chacha20_Vec128_chacha20_counter_mode_blocks3( - uint8_t *output, - uint8_t *plain, - uint32_t len, - vec *st) -{ - for (uint32_t i = (uint32_t)0U; i < len; i = i + (uint32_t)1U) - Hacl_Impl_Chacha20_Vec128_update3_(output, plain, len, st, i); -} - -static void -Hacl_Impl_Chacha20_Vec128_chacha20_counter_mode_blocks( - uint8_t *output, - uint8_t *plain, - uint32_t len, - vec *st) +Hacl_Chacha20_Vec128_chacha20_init_128( + Lib_IntVector_Intrinsics_vec128 *ctx, + uint8_t *k, + uint8_t *n1, + uint32_t ctr) { - uint32_t len3 = len / (uint32_t)3U; - uint32_t rest3 = len % (uint32_t)3U; - uint8_t *plain_ = plain; - uint8_t *blocks1 = plain + (uint32_t)192U * len3; - uint8_t *output_ = output; - uint8_t *outs = output + (uint32_t)192U * len3; - Hacl_Impl_Chacha20_Vec128_chacha20_counter_mode_blocks3(output_, plain_, len3, st); - if (rest3 == (uint32_t)2U) { - uint8_t *block0 = blocks1; - uint8_t *block1 = blocks1 + (uint32_t)64U; - uint8_t *out0 = outs; - uint8_t *out1 = outs + (uint32_t)64U; - Hacl_Impl_Chacha20_Vec128_update(out0, block0, st); - Hacl_Impl_Chacha20_Vec128_state_incr(st); - Hacl_Impl_Chacha20_Vec128_update(out1, block1, st); - Hacl_Impl_Chacha20_Vec128_state_incr(st); - } else if (rest3 == (uint32_t)1U) { - Hacl_Impl_Chacha20_Vec128_update(outs, blocks1, st); - Hacl_Impl_Chacha20_Vec128_state_incr(st); + uint32_t ctx1[16U] = { 0U }; + uint32_t *uu____0 = ctx1; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) { + uint32_t *os = uu____0; + uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i]; + os[i] = x; } + uint32_t *uu____1 = ctx1 + (uint32_t)4U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)8U; i = i + (uint32_t)1U) { + uint32_t *os = uu____1; + uint8_t *bj = k + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x; + } + ctx1[12U] = ctr; + uint32_t *uu____2 = ctx1 + (uint32_t)13U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)3U; i = i + (uint32_t)1U) { + uint32_t *os = uu____2; + uint8_t *bj = n1 + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x; + } + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { + Lib_IntVector_Intrinsics_vec128 *os = ctx; + uint32_t x = ctx1[i]; + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_load32(x); + os[i] = x0; + } + Lib_IntVector_Intrinsics_vec128 + ctr1 = + Lib_IntVector_Intrinsics_vec128_load32s((uint32_t)3U, + (uint32_t)2U, + (uint32_t)1U, + (uint32_t)0U); + Lib_IntVector_Intrinsics_vec128 c12 = ctx[12U]; + ctx[12U] = Lib_IntVector_Intrinsics_vec128_add32(c12, ctr1); } -static void -Hacl_Impl_Chacha20_Vec128_chacha20_counter_mode( - uint8_t *output, - uint8_t *plain, - uint32_t len, - vec *st) -{ - uint32_t blocks_len = len >> (uint32_t)6U; - uint32_t part_len = len & (uint32_t)0x3fU; - uint8_t *output_ = output; - uint8_t *plain_ = plain; - uint8_t *output__ = output + (uint32_t)64U * blocks_len; - uint8_t *plain__ = plain + (uint32_t)64U * blocks_len; - Hacl_Impl_Chacha20_Vec128_chacha20_counter_mode_blocks(output_, plain_, blocks_len, st); - if (part_len > (uint32_t)0U) - Hacl_Impl_Chacha20_Vec128_update_last(output__, plain__, part_len, st); -} - -static void -Hacl_Impl_Chacha20_Vec128_chacha20( - uint8_t *output, - uint8_t *plain, +void +Hacl_Chacha20_Vec128_chacha20_encrypt_128( uint32_t len, - uint8_t *k, + uint8_t *out, + uint8_t *text, + uint8_t *key, uint8_t *n1, uint32_t ctr) { - KRML_CHECK_SIZE(vec_zero(), (uint32_t)4U); - vec buf[4U]; - for (uint32_t _i = 0U; _i < (uint32_t)4U; ++_i) - buf[_i] = vec_zero(); - vec *st = buf; - Hacl_Impl_Chacha20_Vec128_init(st, k, n1, ctr); - Hacl_Impl_Chacha20_Vec128_chacha20_counter_mode(output, plain, len, st); + Lib_IntVector_Intrinsics_vec128 ctx[16U]; + for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) + ctx[_i] = Lib_IntVector_Intrinsics_vec128_zero; + Hacl_Chacha20_Vec128_chacha20_init_128(ctx, key, n1, ctr); + uint32_t rem1 = len % ((uint32_t)4U * (uint32_t)64U); + uint32_t nb = len / ((uint32_t)4U * (uint32_t)64U); + uint32_t rem2 = len % ((uint32_t)4U * (uint32_t)64U); + for (uint32_t i0 = (uint32_t)0U; i0 < nb; i0 = i0 + (uint32_t)1U) { + uint8_t *uu____0 = out + i0 * (uint32_t)4U * (uint32_t)64U; + uint8_t *uu____1 = text + i0 * (uint32_t)256U; + Lib_IntVector_Intrinsics_vec128 k[16U]; + for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) + k[_i] = Lib_IntVector_Intrinsics_vec128_zero; + Hacl_Chacha20_Vec128_chacha20_core_128(k, ctx, i0); + Lib_IntVector_Intrinsics_vec128 bl[16U]; + for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) + bl[_i] = Lib_IntVector_Intrinsics_vec128_zero; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { + Lib_IntVector_Intrinsics_vec128 *os = bl; + Lib_IntVector_Intrinsics_vec128 + x = Lib_IntVector_Intrinsics_vec128_load_le(uu____1 + i * (uint32_t)4U * (uint32_t)4U); + os[i] = x; + } + Lib_IntVector_Intrinsics_vec128 v00 = k[0U]; + Lib_IntVector_Intrinsics_vec128 v16 = k[1U]; + Lib_IntVector_Intrinsics_vec128 v20 = k[2U]; + Lib_IntVector_Intrinsics_vec128 v30 = k[3U]; + Lib_IntVector_Intrinsics_vec128 + v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v00, v16); + Lib_IntVector_Intrinsics_vec128 + v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v00, v16); + Lib_IntVector_Intrinsics_vec128 + v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v20, v30); + Lib_IntVector_Intrinsics_vec128 + v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v20, v30); + Lib_IntVector_Intrinsics_vec128 + v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v1__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 + v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 v0 = v0__; + Lib_IntVector_Intrinsics_vec128 v1 = v1__; + Lib_IntVector_Intrinsics_vec128 v2 = v2__; + Lib_IntVector_Intrinsics_vec128 v3 = v3__; + Lib_IntVector_Intrinsics_vec128 v010 = k[4U]; + Lib_IntVector_Intrinsics_vec128 v110 = k[5U]; + Lib_IntVector_Intrinsics_vec128 v210 = k[6U]; + Lib_IntVector_Intrinsics_vec128 v310 = k[7U]; + Lib_IntVector_Intrinsics_vec128 + v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v010, v110); + Lib_IntVector_Intrinsics_vec128 + v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v010, v110); + Lib_IntVector_Intrinsics_vec128 + v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v210, v310); + Lib_IntVector_Intrinsics_vec128 + v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v210, v310); + Lib_IntVector_Intrinsics_vec128 + v0__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v1__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v2__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 + v3__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 v4 = v0__0; + Lib_IntVector_Intrinsics_vec128 v5 = v1__0; + Lib_IntVector_Intrinsics_vec128 v6 = v2__0; + Lib_IntVector_Intrinsics_vec128 v7 = v3__0; + Lib_IntVector_Intrinsics_vec128 v011 = k[8U]; + Lib_IntVector_Intrinsics_vec128 v111 = k[9U]; + Lib_IntVector_Intrinsics_vec128 v211 = k[10U]; + Lib_IntVector_Intrinsics_vec128 v311 = k[11U]; + Lib_IntVector_Intrinsics_vec128 + v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v011, v111); + Lib_IntVector_Intrinsics_vec128 + v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v011, v111); + Lib_IntVector_Intrinsics_vec128 + v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v211, v311); + Lib_IntVector_Intrinsics_vec128 + v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v211, v311); + Lib_IntVector_Intrinsics_vec128 + v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 + v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 v8 = v0__1; + Lib_IntVector_Intrinsics_vec128 v9 = v1__1; + Lib_IntVector_Intrinsics_vec128 v10 = v2__1; + Lib_IntVector_Intrinsics_vec128 v11 = v3__1; + Lib_IntVector_Intrinsics_vec128 v01 = k[12U]; + Lib_IntVector_Intrinsics_vec128 v120 = k[13U]; + Lib_IntVector_Intrinsics_vec128 v21 = k[14U]; + Lib_IntVector_Intrinsics_vec128 v31 = k[15U]; + Lib_IntVector_Intrinsics_vec128 + v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v01, v120); + Lib_IntVector_Intrinsics_vec128 + v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v01, v120); + Lib_IntVector_Intrinsics_vec128 + v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v21, v31); + Lib_IntVector_Intrinsics_vec128 + v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v21, v31); + Lib_IntVector_Intrinsics_vec128 + v0__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v1__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v2__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 + v3__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 v12 = v0__2; + Lib_IntVector_Intrinsics_vec128 v13 = v1__2; + Lib_IntVector_Intrinsics_vec128 v14 = v2__2; + Lib_IntVector_Intrinsics_vec128 v15 = v3__2; + k[0U] = v0; + k[1U] = v4; + k[2U] = v8; + k[3U] = v12; + k[4U] = v1; + k[5U] = v5; + k[6U] = v9; + k[7U] = v13; + k[8U] = v2; + k[9U] = v6; + k[10U] = v10; + k[11U] = v14; + k[12U] = v3; + k[13U] = v7; + k[14U] = v11; + k[15U] = v15; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { + Lib_IntVector_Intrinsics_vec128 *os = bl; + Lib_IntVector_Intrinsics_vec128 x = Lib_IntVector_Intrinsics_vec128_xor(bl[i], k[i]); + os[i] = x; + } + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { + Lib_IntVector_Intrinsics_vec128_store_le(uu____0 + i * (uint32_t)16U, bl[i]); + } + } + if (rem2 > (uint32_t)0U) { + uint8_t *uu____2 = out + nb * (uint32_t)4U * (uint32_t)64U; + uint8_t *uu____3 = text + nb * (uint32_t)4U * (uint32_t)64U; + uint8_t plain[256U] = { 0U }; + memcpy(plain, uu____3, rem1 * sizeof uu____3[0U]); + Lib_IntVector_Intrinsics_vec128 k[16U]; + for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) + k[_i] = Lib_IntVector_Intrinsics_vec128_zero; + Hacl_Chacha20_Vec128_chacha20_core_128(k, ctx, nb); + Lib_IntVector_Intrinsics_vec128 bl[16U]; + for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) + bl[_i] = Lib_IntVector_Intrinsics_vec128_zero; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { + Lib_IntVector_Intrinsics_vec128 *os = bl; + Lib_IntVector_Intrinsics_vec128 + x = Lib_IntVector_Intrinsics_vec128_load_le(plain + i * (uint32_t)4U * (uint32_t)4U); + os[i] = x; + } + Lib_IntVector_Intrinsics_vec128 v00 = k[0U]; + Lib_IntVector_Intrinsics_vec128 v16 = k[1U]; + Lib_IntVector_Intrinsics_vec128 v20 = k[2U]; + Lib_IntVector_Intrinsics_vec128 v30 = k[3U]; + Lib_IntVector_Intrinsics_vec128 + v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v00, v16); + Lib_IntVector_Intrinsics_vec128 + v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v00, v16); + Lib_IntVector_Intrinsics_vec128 + v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v20, v30); + Lib_IntVector_Intrinsics_vec128 + v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v20, v30); + Lib_IntVector_Intrinsics_vec128 + v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v1__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 + v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 v0 = v0__; + Lib_IntVector_Intrinsics_vec128 v1 = v1__; + Lib_IntVector_Intrinsics_vec128 v2 = v2__; + Lib_IntVector_Intrinsics_vec128 v3 = v3__; + Lib_IntVector_Intrinsics_vec128 v010 = k[4U]; + Lib_IntVector_Intrinsics_vec128 v110 = k[5U]; + Lib_IntVector_Intrinsics_vec128 v210 = k[6U]; + Lib_IntVector_Intrinsics_vec128 v310 = k[7U]; + Lib_IntVector_Intrinsics_vec128 + v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v010, v110); + Lib_IntVector_Intrinsics_vec128 + v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v010, v110); + Lib_IntVector_Intrinsics_vec128 + v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v210, v310); + Lib_IntVector_Intrinsics_vec128 + v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v210, v310); + Lib_IntVector_Intrinsics_vec128 + v0__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v1__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v2__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 + v3__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 v4 = v0__0; + Lib_IntVector_Intrinsics_vec128 v5 = v1__0; + Lib_IntVector_Intrinsics_vec128 v6 = v2__0; + Lib_IntVector_Intrinsics_vec128 v7 = v3__0; + Lib_IntVector_Intrinsics_vec128 v011 = k[8U]; + Lib_IntVector_Intrinsics_vec128 v111 = k[9U]; + Lib_IntVector_Intrinsics_vec128 v211 = k[10U]; + Lib_IntVector_Intrinsics_vec128 v311 = k[11U]; + Lib_IntVector_Intrinsics_vec128 + v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v011, v111); + Lib_IntVector_Intrinsics_vec128 + v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v011, v111); + Lib_IntVector_Intrinsics_vec128 + v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v211, v311); + Lib_IntVector_Intrinsics_vec128 + v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v211, v311); + Lib_IntVector_Intrinsics_vec128 + v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 + v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 v8 = v0__1; + Lib_IntVector_Intrinsics_vec128 v9 = v1__1; + Lib_IntVector_Intrinsics_vec128 v10 = v2__1; + Lib_IntVector_Intrinsics_vec128 v11 = v3__1; + Lib_IntVector_Intrinsics_vec128 v01 = k[12U]; + Lib_IntVector_Intrinsics_vec128 v120 = k[13U]; + Lib_IntVector_Intrinsics_vec128 v21 = k[14U]; + Lib_IntVector_Intrinsics_vec128 v31 = k[15U]; + Lib_IntVector_Intrinsics_vec128 + v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v01, v120); + Lib_IntVector_Intrinsics_vec128 + v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v01, v120); + Lib_IntVector_Intrinsics_vec128 + v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v21, v31); + Lib_IntVector_Intrinsics_vec128 + v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v21, v31); + Lib_IntVector_Intrinsics_vec128 + v0__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v1__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v2__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 + v3__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 v12 = v0__2; + Lib_IntVector_Intrinsics_vec128 v13 = v1__2; + Lib_IntVector_Intrinsics_vec128 v14 = v2__2; + Lib_IntVector_Intrinsics_vec128 v15 = v3__2; + k[0U] = v0; + k[1U] = v4; + k[2U] = v8; + k[3U] = v12; + k[4U] = v1; + k[5U] = v5; + k[6U] = v9; + k[7U] = v13; + k[8U] = v2; + k[9U] = v6; + k[10U] = v10; + k[11U] = v14; + k[12U] = v3; + k[13U] = v7; + k[14U] = v11; + k[15U] = v15; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { + Lib_IntVector_Intrinsics_vec128 *os = bl; + Lib_IntVector_Intrinsics_vec128 x = Lib_IntVector_Intrinsics_vec128_xor(bl[i], k[i]); + os[i] = x; + } + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { + Lib_IntVector_Intrinsics_vec128_store_le(plain + i * (uint32_t)16U, bl[i]); + } + memcpy(uu____2, plain, rem1 * sizeof plain[0U]); + } } void -Hacl_Chacha20_Vec128_chacha20( - uint8_t *output, - uint8_t *plain, +Hacl_Chacha20_Vec128_chacha20_decrypt_128( uint32_t len, - uint8_t *k, + uint8_t *out, + uint8_t *cipher, + uint8_t *key, uint8_t *n1, uint32_t ctr) { - Hacl_Impl_Chacha20_Vec128_chacha20(output, plain, len, k, n1, ctr); + Lib_IntVector_Intrinsics_vec128 ctx[16U]; + for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) + ctx[_i] = Lib_IntVector_Intrinsics_vec128_zero; + Hacl_Chacha20_Vec128_chacha20_init_128(ctx, key, n1, ctr); + uint32_t rem1 = len % ((uint32_t)4U * (uint32_t)64U); + uint32_t nb = len / ((uint32_t)4U * (uint32_t)64U); + uint32_t rem2 = len % ((uint32_t)4U * (uint32_t)64U); + for (uint32_t i0 = (uint32_t)0U; i0 < nb; i0 = i0 + (uint32_t)1U) { + uint8_t *uu____0 = out + i0 * (uint32_t)4U * (uint32_t)64U; + uint8_t *uu____1 = cipher + i0 * (uint32_t)256U; + Lib_IntVector_Intrinsics_vec128 k[16U]; + for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) + k[_i] = Lib_IntVector_Intrinsics_vec128_zero; + Hacl_Chacha20_Vec128_chacha20_core_128(k, ctx, i0); + Lib_IntVector_Intrinsics_vec128 bl[16U]; + for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) + bl[_i] = Lib_IntVector_Intrinsics_vec128_zero; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { + Lib_IntVector_Intrinsics_vec128 *os = bl; + Lib_IntVector_Intrinsics_vec128 + x = Lib_IntVector_Intrinsics_vec128_load_le(uu____1 + i * (uint32_t)4U * (uint32_t)4U); + os[i] = x; + } + Lib_IntVector_Intrinsics_vec128 v00 = k[0U]; + Lib_IntVector_Intrinsics_vec128 v16 = k[1U]; + Lib_IntVector_Intrinsics_vec128 v20 = k[2U]; + Lib_IntVector_Intrinsics_vec128 v30 = k[3U]; + Lib_IntVector_Intrinsics_vec128 + v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v00, v16); + Lib_IntVector_Intrinsics_vec128 + v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v00, v16); + Lib_IntVector_Intrinsics_vec128 + v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v20, v30); + Lib_IntVector_Intrinsics_vec128 + v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v20, v30); + Lib_IntVector_Intrinsics_vec128 + v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v1__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 + v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 v0 = v0__; + Lib_IntVector_Intrinsics_vec128 v1 = v1__; + Lib_IntVector_Intrinsics_vec128 v2 = v2__; + Lib_IntVector_Intrinsics_vec128 v3 = v3__; + Lib_IntVector_Intrinsics_vec128 v010 = k[4U]; + Lib_IntVector_Intrinsics_vec128 v110 = k[5U]; + Lib_IntVector_Intrinsics_vec128 v210 = k[6U]; + Lib_IntVector_Intrinsics_vec128 v310 = k[7U]; + Lib_IntVector_Intrinsics_vec128 + v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v010, v110); + Lib_IntVector_Intrinsics_vec128 + v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v010, v110); + Lib_IntVector_Intrinsics_vec128 + v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v210, v310); + Lib_IntVector_Intrinsics_vec128 + v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v210, v310); + Lib_IntVector_Intrinsics_vec128 + v0__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v1__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v2__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 + v3__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 v4 = v0__0; + Lib_IntVector_Intrinsics_vec128 v5 = v1__0; + Lib_IntVector_Intrinsics_vec128 v6 = v2__0; + Lib_IntVector_Intrinsics_vec128 v7 = v3__0; + Lib_IntVector_Intrinsics_vec128 v011 = k[8U]; + Lib_IntVector_Intrinsics_vec128 v111 = k[9U]; + Lib_IntVector_Intrinsics_vec128 v211 = k[10U]; + Lib_IntVector_Intrinsics_vec128 v311 = k[11U]; + Lib_IntVector_Intrinsics_vec128 + v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v011, v111); + Lib_IntVector_Intrinsics_vec128 + v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v011, v111); + Lib_IntVector_Intrinsics_vec128 + v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v211, v311); + Lib_IntVector_Intrinsics_vec128 + v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v211, v311); + Lib_IntVector_Intrinsics_vec128 + v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 + v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 v8 = v0__1; + Lib_IntVector_Intrinsics_vec128 v9 = v1__1; + Lib_IntVector_Intrinsics_vec128 v10 = v2__1; + Lib_IntVector_Intrinsics_vec128 v11 = v3__1; + Lib_IntVector_Intrinsics_vec128 v01 = k[12U]; + Lib_IntVector_Intrinsics_vec128 v120 = k[13U]; + Lib_IntVector_Intrinsics_vec128 v21 = k[14U]; + Lib_IntVector_Intrinsics_vec128 v31 = k[15U]; + Lib_IntVector_Intrinsics_vec128 + v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v01, v120); + Lib_IntVector_Intrinsics_vec128 + v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v01, v120); + Lib_IntVector_Intrinsics_vec128 + v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v21, v31); + Lib_IntVector_Intrinsics_vec128 + v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v21, v31); + Lib_IntVector_Intrinsics_vec128 + v0__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v1__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v2__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 + v3__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 v12 = v0__2; + Lib_IntVector_Intrinsics_vec128 v13 = v1__2; + Lib_IntVector_Intrinsics_vec128 v14 = v2__2; + Lib_IntVector_Intrinsics_vec128 v15 = v3__2; + k[0U] = v0; + k[1U] = v4; + k[2U] = v8; + k[3U] = v12; + k[4U] = v1; + k[5U] = v5; + k[6U] = v9; + k[7U] = v13; + k[8U] = v2; + k[9U] = v6; + k[10U] = v10; + k[11U] = v14; + k[12U] = v3; + k[13U] = v7; + k[14U] = v11; + k[15U] = v15; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { + Lib_IntVector_Intrinsics_vec128 *os = bl; + Lib_IntVector_Intrinsics_vec128 x = Lib_IntVector_Intrinsics_vec128_xor(bl[i], k[i]); + os[i] = x; + } + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { + Lib_IntVector_Intrinsics_vec128_store_le(uu____0 + i * (uint32_t)16U, bl[i]); + } + } + if (rem2 > (uint32_t)0U) { + uint8_t *uu____2 = out + nb * (uint32_t)4U * (uint32_t)64U; + uint8_t *uu____3 = cipher + nb * (uint32_t)4U * (uint32_t)64U; + uint8_t plain[256U] = { 0U }; + memcpy(plain, uu____3, rem1 * sizeof uu____3[0U]); + Lib_IntVector_Intrinsics_vec128 k[16U]; + for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) + k[_i] = Lib_IntVector_Intrinsics_vec128_zero; + Hacl_Chacha20_Vec128_chacha20_core_128(k, ctx, nb); + Lib_IntVector_Intrinsics_vec128 bl[16U]; + for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) + bl[_i] = Lib_IntVector_Intrinsics_vec128_zero; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { + Lib_IntVector_Intrinsics_vec128 *os = bl; + Lib_IntVector_Intrinsics_vec128 + x = Lib_IntVector_Intrinsics_vec128_load_le(plain + i * (uint32_t)4U * (uint32_t)4U); + os[i] = x; + } + Lib_IntVector_Intrinsics_vec128 v00 = k[0U]; + Lib_IntVector_Intrinsics_vec128 v16 = k[1U]; + Lib_IntVector_Intrinsics_vec128 v20 = k[2U]; + Lib_IntVector_Intrinsics_vec128 v30 = k[3U]; + Lib_IntVector_Intrinsics_vec128 + v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v00, v16); + Lib_IntVector_Intrinsics_vec128 + v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v00, v16); + Lib_IntVector_Intrinsics_vec128 + v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v20, v30); + Lib_IntVector_Intrinsics_vec128 + v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v20, v30); + Lib_IntVector_Intrinsics_vec128 + v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v1__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 + v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 v0 = v0__; + Lib_IntVector_Intrinsics_vec128 v1 = v1__; + Lib_IntVector_Intrinsics_vec128 v2 = v2__; + Lib_IntVector_Intrinsics_vec128 v3 = v3__; + Lib_IntVector_Intrinsics_vec128 v010 = k[4U]; + Lib_IntVector_Intrinsics_vec128 v110 = k[5U]; + Lib_IntVector_Intrinsics_vec128 v210 = k[6U]; + Lib_IntVector_Intrinsics_vec128 v310 = k[7U]; + Lib_IntVector_Intrinsics_vec128 + v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v010, v110); + Lib_IntVector_Intrinsics_vec128 + v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v010, v110); + Lib_IntVector_Intrinsics_vec128 + v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v210, v310); + Lib_IntVector_Intrinsics_vec128 + v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v210, v310); + Lib_IntVector_Intrinsics_vec128 + v0__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v1__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v2__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 + v3__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 v4 = v0__0; + Lib_IntVector_Intrinsics_vec128 v5 = v1__0; + Lib_IntVector_Intrinsics_vec128 v6 = v2__0; + Lib_IntVector_Intrinsics_vec128 v7 = v3__0; + Lib_IntVector_Intrinsics_vec128 v011 = k[8U]; + Lib_IntVector_Intrinsics_vec128 v111 = k[9U]; + Lib_IntVector_Intrinsics_vec128 v211 = k[10U]; + Lib_IntVector_Intrinsics_vec128 v311 = k[11U]; + Lib_IntVector_Intrinsics_vec128 + v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v011, v111); + Lib_IntVector_Intrinsics_vec128 + v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v011, v111); + Lib_IntVector_Intrinsics_vec128 + v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v211, v311); + Lib_IntVector_Intrinsics_vec128 + v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v211, v311); + Lib_IntVector_Intrinsics_vec128 + v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 + v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 v8 = v0__1; + Lib_IntVector_Intrinsics_vec128 v9 = v1__1; + Lib_IntVector_Intrinsics_vec128 v10 = v2__1; + Lib_IntVector_Intrinsics_vec128 v11 = v3__1; + Lib_IntVector_Intrinsics_vec128 v01 = k[12U]; + Lib_IntVector_Intrinsics_vec128 v120 = k[13U]; + Lib_IntVector_Intrinsics_vec128 v21 = k[14U]; + Lib_IntVector_Intrinsics_vec128 v31 = k[15U]; + Lib_IntVector_Intrinsics_vec128 + v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v01, v120); + Lib_IntVector_Intrinsics_vec128 + v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v01, v120); + Lib_IntVector_Intrinsics_vec128 + v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v21, v31); + Lib_IntVector_Intrinsics_vec128 + v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v21, v31); + Lib_IntVector_Intrinsics_vec128 + v0__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v1__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v2__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 + v3__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 v12 = v0__2; + Lib_IntVector_Intrinsics_vec128 v13 = v1__2; + Lib_IntVector_Intrinsics_vec128 v14 = v2__2; + Lib_IntVector_Intrinsics_vec128 v15 = v3__2; + k[0U] = v0; + k[1U] = v4; + k[2U] = v8; + k[3U] = v12; + k[4U] = v1; + k[5U] = v5; + k[6U] = v9; + k[7U] = v13; + k[8U] = v2; + k[9U] = v6; + k[10U] = v10; + k[11U] = v14; + k[12U] = v3; + k[13U] = v7; + k[14U] = v11; + k[15U] = v15; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { + Lib_IntVector_Intrinsics_vec128 *os = bl; + Lib_IntVector_Intrinsics_vec128 x = Lib_IntVector_Intrinsics_vec128_xor(bl[i], k[i]); + os[i] = x; + } + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { + Lib_IntVector_Intrinsics_vec128_store_le(plain + i * (uint32_t)16U, bl[i]); + } + memcpy(uu____2, plain, rem1 * sizeof plain[0U]); + } } diff --git a/lib/freebl/verified/Hacl_Chacha20_Vec128.h b/lib/freebl/verified/Hacl_Chacha20_Vec128.h index a3970ead60..a04f30c7b2 100644 --- a/lib/freebl/verified/Hacl_Chacha20_Vec128.h +++ b/lib/freebl/verified/Hacl_Chacha20_Vec128.h @@ -1,61 +1,55 @@ -/* Copyright 2016-2018 INRIA and Microsoft Corporation +/* MIT License * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation * - * http://www.apache.org/licenses/LICENSE-2.0 + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ -#include "kremlib.h" +#include "libintvector.h" +#include "kremlin/internal/types.h" +#include "kremlin/lowstar_endianness.h" +#include +#include + #ifndef __Hacl_Chacha20_Vec128_H #define __Hacl_Chacha20_Vec128_H -#include "vec128.h" - -typedef uint32_t Hacl_Impl_Xor_Lemmas_u32; - -typedef uint8_t Hacl_Impl_Xor_Lemmas_u8; - -typedef uint32_t Hacl_Impl_Chacha20_Vec128_State_u32; - -typedef uint32_t Hacl_Impl_Chacha20_Vec128_State_h32; - -typedef uint8_t *Hacl_Impl_Chacha20_Vec128_State_uint8_p; - -typedef vec *Hacl_Impl_Chacha20_Vec128_State_state; - -typedef uint32_t Hacl_Impl_Chacha20_Vec128_u32; - -typedef uint32_t Hacl_Impl_Chacha20_Vec128_h32; +#include "Hacl_Chacha20.h" +#include "Hacl_Kremlib.h" -typedef uint8_t *Hacl_Impl_Chacha20_Vec128_uint8_p; - -typedef uint32_t Hacl_Impl_Chacha20_Vec128_idx; - -typedef struct -{ - void *k; - void *n; - uint32_t ctr; -} Hacl_Impl_Chacha20_Vec128_log_t_; - -typedef void *Hacl_Impl_Chacha20_Vec128_log_t; - -typedef uint8_t *Hacl_Chacha20_Vec128_uint8_p; +void +Hacl_Chacha20_Vec128_chacha20_encrypt_128( + uint32_t len, + uint8_t *out, + uint8_t *text, + uint8_t *key, + uint8_t *n1, + uint32_t ctr); void -Hacl_Chacha20_Vec128_chacha20( - uint8_t *output, - uint8_t *plain, +Hacl_Chacha20_Vec128_chacha20_decrypt_128( uint32_t len, - uint8_t *k, + uint8_t *out, + uint8_t *cipher, + uint8_t *key, uint8_t *n1, uint32_t ctr); + +#define __Hacl_Chacha20_Vec128_H_DEFINED #endif diff --git a/lib/freebl/verified/Hacl_Curve25519.c b/lib/freebl/verified/Hacl_Curve25519.c deleted file mode 100644 index f6f6f930bb..0000000000 --- a/lib/freebl/verified/Hacl_Curve25519.c +++ /dev/null @@ -1,845 +0,0 @@ -/* Copyright 2016-2018 INRIA and Microsoft Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Hacl_Curve25519.h" - -static void -Hacl_Bignum_Modulo_carry_top(uint64_t *b) -{ - uint64_t b4 = b[4U]; - uint64_t b0 = b[0U]; - uint64_t b4_ = b4 & (uint64_t)0x7ffffffffffffU; - uint64_t b0_ = b0 + (uint64_t)19U * (b4 >> (uint32_t)51U); - b[4U] = b4_; - b[0U] = b0_; -} - -inline static void -Hacl_Bignum_Fproduct_copy_from_wide_(uint64_t *output, FStar_UInt128_t *input) -{ - { - FStar_UInt128_t xi = input[0U]; - output[0U] = FStar_UInt128_uint128_to_uint64(xi); - } - { - FStar_UInt128_t xi = input[1U]; - output[1U] = FStar_UInt128_uint128_to_uint64(xi); - } - { - FStar_UInt128_t xi = input[2U]; - output[2U] = FStar_UInt128_uint128_to_uint64(xi); - } - { - FStar_UInt128_t xi = input[3U]; - output[3U] = FStar_UInt128_uint128_to_uint64(xi); - } - { - FStar_UInt128_t xi = input[4U]; - output[4U] = FStar_UInt128_uint128_to_uint64(xi); - } -} - -inline static void -Hacl_Bignum_Fproduct_sum_scalar_multiplication_( - FStar_UInt128_t *output, - uint64_t *input, - uint64_t s) -{ - { - FStar_UInt128_t xi = output[0U]; - uint64_t yi = input[0U]; - output[0U] = FStar_UInt128_add_mod(xi, FStar_UInt128_mul_wide(yi, s)); - } - { - FStar_UInt128_t xi = output[1U]; - uint64_t yi = input[1U]; - output[1U] = FStar_UInt128_add_mod(xi, FStar_UInt128_mul_wide(yi, s)); - } - { - FStar_UInt128_t xi = output[2U]; - uint64_t yi = input[2U]; - output[2U] = FStar_UInt128_add_mod(xi, FStar_UInt128_mul_wide(yi, s)); - } - { - FStar_UInt128_t xi = output[3U]; - uint64_t yi = input[3U]; - output[3U] = FStar_UInt128_add_mod(xi, FStar_UInt128_mul_wide(yi, s)); - } - { - FStar_UInt128_t xi = output[4U]; - uint64_t yi = input[4U]; - output[4U] = FStar_UInt128_add_mod(xi, FStar_UInt128_mul_wide(yi, s)); - } -} - -inline static void -Hacl_Bignum_Fproduct_carry_wide_(FStar_UInt128_t *tmp) -{ - { - uint32_t ctr = (uint32_t)0U; - FStar_UInt128_t tctr = tmp[ctr]; - FStar_UInt128_t tctrp1 = tmp[ctr + (uint32_t)1U]; - uint64_t r0 = FStar_UInt128_uint128_to_uint64(tctr) & (uint64_t)0x7ffffffffffffU; - FStar_UInt128_t c = FStar_UInt128_shift_right(tctr, (uint32_t)51U); - tmp[ctr] = FStar_UInt128_uint64_to_uint128(r0); - tmp[ctr + (uint32_t)1U] = FStar_UInt128_add(tctrp1, c); - } - { - uint32_t ctr = (uint32_t)1U; - FStar_UInt128_t tctr = tmp[ctr]; - FStar_UInt128_t tctrp1 = tmp[ctr + (uint32_t)1U]; - uint64_t r0 = FStar_UInt128_uint128_to_uint64(tctr) & (uint64_t)0x7ffffffffffffU; - FStar_UInt128_t c = FStar_UInt128_shift_right(tctr, (uint32_t)51U); - tmp[ctr] = FStar_UInt128_uint64_to_uint128(r0); - tmp[ctr + (uint32_t)1U] = FStar_UInt128_add(tctrp1, c); - } - { - uint32_t ctr = (uint32_t)2U; - FStar_UInt128_t tctr = tmp[ctr]; - FStar_UInt128_t tctrp1 = tmp[ctr + (uint32_t)1U]; - uint64_t r0 = FStar_UInt128_uint128_to_uint64(tctr) & (uint64_t)0x7ffffffffffffU; - FStar_UInt128_t c = FStar_UInt128_shift_right(tctr, (uint32_t)51U); - tmp[ctr] = FStar_UInt128_uint64_to_uint128(r0); - tmp[ctr + (uint32_t)1U] = FStar_UInt128_add(tctrp1, c); - } - { - uint32_t ctr = (uint32_t)3U; - FStar_UInt128_t tctr = tmp[ctr]; - FStar_UInt128_t tctrp1 = tmp[ctr + (uint32_t)1U]; - uint64_t r0 = FStar_UInt128_uint128_to_uint64(tctr) & (uint64_t)0x7ffffffffffffU; - FStar_UInt128_t c = FStar_UInt128_shift_right(tctr, (uint32_t)51U); - tmp[ctr] = FStar_UInt128_uint64_to_uint128(r0); - tmp[ctr + (uint32_t)1U] = FStar_UInt128_add(tctrp1, c); - } -} - -inline static void -Hacl_Bignum_Fmul_shift_reduce(uint64_t *output) -{ - uint64_t tmp = output[4U]; - { - uint32_t ctr = (uint32_t)5U - (uint32_t)0U - (uint32_t)1U; - uint64_t z = output[ctr - (uint32_t)1U]; - output[ctr] = z; - } - { - uint32_t ctr = (uint32_t)5U - (uint32_t)1U - (uint32_t)1U; - uint64_t z = output[ctr - (uint32_t)1U]; - output[ctr] = z; - } - { - uint32_t ctr = (uint32_t)5U - (uint32_t)2U - (uint32_t)1U; - uint64_t z = output[ctr - (uint32_t)1U]; - output[ctr] = z; - } - { - uint32_t ctr = (uint32_t)5U - (uint32_t)3U - (uint32_t)1U; - uint64_t z = output[ctr - (uint32_t)1U]; - output[ctr] = z; - } - output[0U] = tmp; - uint64_t b0 = output[0U]; - output[0U] = (uint64_t)19U * b0; -} - -static void -Hacl_Bignum_Fmul_mul_shift_reduce_(FStar_UInt128_t *output, uint64_t *input, uint64_t *input21) -{ - { - uint64_t input2i = input21[0U]; - Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i); - Hacl_Bignum_Fmul_shift_reduce(input); - } - { - uint64_t input2i = input21[1U]; - Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i); - Hacl_Bignum_Fmul_shift_reduce(input); - } - { - uint64_t input2i = input21[2U]; - Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i); - Hacl_Bignum_Fmul_shift_reduce(input); - } - { - uint64_t input2i = input21[3U]; - Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i); - Hacl_Bignum_Fmul_shift_reduce(input); - } - uint32_t i = (uint32_t)4U; - uint64_t input2i = input21[i]; - Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i); -} - -inline static void -Hacl_Bignum_Fmul_fmul(uint64_t *output, uint64_t *input, uint64_t *input21) -{ - uint64_t tmp[5U] = { 0U }; - memcpy(tmp, input, (uint32_t)5U * sizeof input[0U]); - KRML_CHECK_SIZE(FStar_UInt128_uint64_to_uint128((uint64_t)0U), (uint32_t)5U); - FStar_UInt128_t t[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - t[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U); - Hacl_Bignum_Fmul_mul_shift_reduce_(t, tmp, input21); - Hacl_Bignum_Fproduct_carry_wide_(t); - FStar_UInt128_t b4 = t[4U]; - FStar_UInt128_t b0 = t[0U]; - FStar_UInt128_t - b4_ = FStar_UInt128_logand(b4, FStar_UInt128_uint64_to_uint128((uint64_t)0x7ffffffffffffU)); - FStar_UInt128_t - b0_ = - FStar_UInt128_add(b0, - FStar_UInt128_mul_wide((uint64_t)19U, - FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(b4, (uint32_t)51U)))); - t[4U] = b4_; - t[0U] = b0_; - Hacl_Bignum_Fproduct_copy_from_wide_(output, t); - uint64_t i0 = output[0U]; - uint64_t i1 = output[1U]; - uint64_t i0_ = i0 & (uint64_t)0x7ffffffffffffU; - uint64_t i1_ = i1 + (i0 >> (uint32_t)51U); - output[0U] = i0_; - output[1U] = i1_; -} - -inline static void -Hacl_Bignum_Fsquare_fsquare__(FStar_UInt128_t *tmp, uint64_t *output) -{ - uint64_t r0 = output[0U]; - uint64_t r1 = output[1U]; - uint64_t r2 = output[2U]; - uint64_t r3 = output[3U]; - uint64_t r4 = output[4U]; - uint64_t d0 = r0 * (uint64_t)2U; - uint64_t d1 = r1 * (uint64_t)2U; - uint64_t d2 = r2 * (uint64_t)2U * (uint64_t)19U; - uint64_t d419 = r4 * (uint64_t)19U; - uint64_t d4 = d419 * (uint64_t)2U; - FStar_UInt128_t - s0 = - FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(r0, r0), - FStar_UInt128_mul_wide(d4, r1)), - FStar_UInt128_mul_wide(d2, r3)); - FStar_UInt128_t - s1 = - FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, r1), - FStar_UInt128_mul_wide(d4, r2)), - FStar_UInt128_mul_wide(r3 * (uint64_t)19U, r3)); - FStar_UInt128_t - s2 = - FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, r2), - FStar_UInt128_mul_wide(r1, r1)), - FStar_UInt128_mul_wide(d4, r3)); - FStar_UInt128_t - s3 = - FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, r3), - FStar_UInt128_mul_wide(d1, r2)), - FStar_UInt128_mul_wide(r4, d419)); - FStar_UInt128_t - s4 = - FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, r4), - FStar_UInt128_mul_wide(d1, r3)), - FStar_UInt128_mul_wide(r2, r2)); - tmp[0U] = s0; - tmp[1U] = s1; - tmp[2U] = s2; - tmp[3U] = s3; - tmp[4U] = s4; -} - -inline static void -Hacl_Bignum_Fsquare_fsquare_(FStar_UInt128_t *tmp, uint64_t *output) -{ - Hacl_Bignum_Fsquare_fsquare__(tmp, output); - Hacl_Bignum_Fproduct_carry_wide_(tmp); - FStar_UInt128_t b4 = tmp[4U]; - FStar_UInt128_t b0 = tmp[0U]; - FStar_UInt128_t - b4_ = FStar_UInt128_logand(b4, FStar_UInt128_uint64_to_uint128((uint64_t)0x7ffffffffffffU)); - FStar_UInt128_t - b0_ = - FStar_UInt128_add(b0, - FStar_UInt128_mul_wide((uint64_t)19U, - FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(b4, (uint32_t)51U)))); - tmp[4U] = b4_; - tmp[0U] = b0_; - Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp); - uint64_t i0 = output[0U]; - uint64_t i1 = output[1U]; - uint64_t i0_ = i0 & (uint64_t)0x7ffffffffffffU; - uint64_t i1_ = i1 + (i0 >> (uint32_t)51U); - output[0U] = i0_; - output[1U] = i1_; -} - -static void -Hacl_Bignum_Fsquare_fsquare_times_(uint64_t *input, FStar_UInt128_t *tmp, uint32_t count1) -{ - Hacl_Bignum_Fsquare_fsquare_(tmp, input); - for (uint32_t i = (uint32_t)1U; i < count1; i = i + (uint32_t)1U) - Hacl_Bignum_Fsquare_fsquare_(tmp, input); -} - -inline static void -Hacl_Bignum_Fsquare_fsquare_times(uint64_t *output, uint64_t *input, uint32_t count1) -{ - KRML_CHECK_SIZE(FStar_UInt128_uint64_to_uint128((uint64_t)0U), (uint32_t)5U); - FStar_UInt128_t t[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - t[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U); - memcpy(output, input, (uint32_t)5U * sizeof input[0U]); - Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1); -} - -inline static void -Hacl_Bignum_Fsquare_fsquare_times_inplace(uint64_t *output, uint32_t count1) -{ - KRML_CHECK_SIZE(FStar_UInt128_uint64_to_uint128((uint64_t)0U), (uint32_t)5U); - FStar_UInt128_t t[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - t[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U); - Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1); -} - -inline static void -Hacl_Bignum_Crecip_crecip(uint64_t *out, uint64_t *z) -{ - uint64_t buf[20U] = { 0U }; - uint64_t *a = buf; - uint64_t *t00 = buf + (uint32_t)5U; - uint64_t *b0 = buf + (uint32_t)10U; - Hacl_Bignum_Fsquare_fsquare_times(a, z, (uint32_t)1U); - Hacl_Bignum_Fsquare_fsquare_times(t00, a, (uint32_t)2U); - Hacl_Bignum_Fmul_fmul(b0, t00, z); - Hacl_Bignum_Fmul_fmul(a, b0, a); - Hacl_Bignum_Fsquare_fsquare_times(t00, a, (uint32_t)1U); - Hacl_Bignum_Fmul_fmul(b0, t00, b0); - Hacl_Bignum_Fsquare_fsquare_times(t00, b0, (uint32_t)5U); - uint64_t *t01 = buf + (uint32_t)5U; - uint64_t *b1 = buf + (uint32_t)10U; - uint64_t *c0 = buf + (uint32_t)15U; - Hacl_Bignum_Fmul_fmul(b1, t01, b1); - Hacl_Bignum_Fsquare_fsquare_times(t01, b1, (uint32_t)10U); - Hacl_Bignum_Fmul_fmul(c0, t01, b1); - Hacl_Bignum_Fsquare_fsquare_times(t01, c0, (uint32_t)20U); - Hacl_Bignum_Fmul_fmul(t01, t01, c0); - Hacl_Bignum_Fsquare_fsquare_times_inplace(t01, (uint32_t)10U); - Hacl_Bignum_Fmul_fmul(b1, t01, b1); - Hacl_Bignum_Fsquare_fsquare_times(t01, b1, (uint32_t)50U); - uint64_t *a0 = buf; - uint64_t *t0 = buf + (uint32_t)5U; - uint64_t *b = buf + (uint32_t)10U; - uint64_t *c = buf + (uint32_t)15U; - Hacl_Bignum_Fmul_fmul(c, t0, b); - Hacl_Bignum_Fsquare_fsquare_times(t0, c, (uint32_t)100U); - Hacl_Bignum_Fmul_fmul(t0, t0, c); - Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, (uint32_t)50U); - Hacl_Bignum_Fmul_fmul(t0, t0, b); - Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, (uint32_t)5U); - Hacl_Bignum_Fmul_fmul(out, t0, a0); -} - -inline static void -Hacl_Bignum_fsum(uint64_t *a, uint64_t *b) -{ - { - uint64_t xi = a[0U]; - uint64_t yi = b[0U]; - a[0U] = xi + yi; - } - { - uint64_t xi = a[1U]; - uint64_t yi = b[1U]; - a[1U] = xi + yi; - } - { - uint64_t xi = a[2U]; - uint64_t yi = b[2U]; - a[2U] = xi + yi; - } - { - uint64_t xi = a[3U]; - uint64_t yi = b[3U]; - a[3U] = xi + yi; - } - { - uint64_t xi = a[4U]; - uint64_t yi = b[4U]; - a[4U] = xi + yi; - } -} - -inline static void -Hacl_Bignum_fdifference(uint64_t *a, uint64_t *b) -{ - uint64_t tmp[5U] = { 0U }; - memcpy(tmp, b, (uint32_t)5U * sizeof b[0U]); - uint64_t b0 = tmp[0U]; - uint64_t b1 = tmp[1U]; - uint64_t b2 = tmp[2U]; - uint64_t b3 = tmp[3U]; - uint64_t b4 = tmp[4U]; - tmp[0U] = b0 + (uint64_t)0x3fffffffffff68U; - tmp[1U] = b1 + (uint64_t)0x3ffffffffffff8U; - tmp[2U] = b2 + (uint64_t)0x3ffffffffffff8U; - tmp[3U] = b3 + (uint64_t)0x3ffffffffffff8U; - tmp[4U] = b4 + (uint64_t)0x3ffffffffffff8U; - { - uint64_t xi = a[0U]; - uint64_t yi = tmp[0U]; - a[0U] = yi - xi; - } - { - uint64_t xi = a[1U]; - uint64_t yi = tmp[1U]; - a[1U] = yi - xi; - } - { - uint64_t xi = a[2U]; - uint64_t yi = tmp[2U]; - a[2U] = yi - xi; - } - { - uint64_t xi = a[3U]; - uint64_t yi = tmp[3U]; - a[3U] = yi - xi; - } - { - uint64_t xi = a[4U]; - uint64_t yi = tmp[4U]; - a[4U] = yi - xi; - } -} - -inline static void -Hacl_Bignum_fscalar(uint64_t *output, uint64_t *b, uint64_t s) -{ - KRML_CHECK_SIZE(FStar_UInt128_uint64_to_uint128((uint64_t)0U), (uint32_t)5U); - FStar_UInt128_t tmp[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - tmp[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U); - { - uint64_t xi = b[0U]; - tmp[0U] = FStar_UInt128_mul_wide(xi, s); - } - { - uint64_t xi = b[1U]; - tmp[1U] = FStar_UInt128_mul_wide(xi, s); - } - { - uint64_t xi = b[2U]; - tmp[2U] = FStar_UInt128_mul_wide(xi, s); - } - { - uint64_t xi = b[3U]; - tmp[3U] = FStar_UInt128_mul_wide(xi, s); - } - { - uint64_t xi = b[4U]; - tmp[4U] = FStar_UInt128_mul_wide(xi, s); - } - Hacl_Bignum_Fproduct_carry_wide_(tmp); - FStar_UInt128_t b4 = tmp[4U]; - FStar_UInt128_t b0 = tmp[0U]; - FStar_UInt128_t - b4_ = FStar_UInt128_logand(b4, FStar_UInt128_uint64_to_uint128((uint64_t)0x7ffffffffffffU)); - FStar_UInt128_t - b0_ = - FStar_UInt128_add(b0, - FStar_UInt128_mul_wide((uint64_t)19U, - FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(b4, (uint32_t)51U)))); - tmp[4U] = b4_; - tmp[0U] = b0_; - Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp); -} - -inline static void -Hacl_Bignum_fmul(uint64_t *output, uint64_t *a, uint64_t *b) -{ - Hacl_Bignum_Fmul_fmul(output, a, b); -} - -inline static void -Hacl_Bignum_crecip(uint64_t *output, uint64_t *input) -{ - Hacl_Bignum_Crecip_crecip(output, input); -} - -static void -Hacl_EC_Point_swap_conditional_step(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr) -{ - uint32_t i = ctr - (uint32_t)1U; - uint64_t ai = a[i]; - uint64_t bi = b[i]; - uint64_t x = swap1 & (ai ^ bi); - uint64_t ai1 = ai ^ x; - uint64_t bi1 = bi ^ x; - a[i] = ai1; - b[i] = bi1; -} - -static void -Hacl_EC_Point_swap_conditional_(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr) -{ - if (!(ctr == (uint32_t)0U)) { - Hacl_EC_Point_swap_conditional_step(a, b, swap1, ctr); - uint32_t i = ctr - (uint32_t)1U; - Hacl_EC_Point_swap_conditional_(a, b, swap1, i); - } -} - -static void -Hacl_EC_Point_swap_conditional(uint64_t *a, uint64_t *b, uint64_t iswap) -{ - uint64_t swap1 = (uint64_t)0U - iswap; - Hacl_EC_Point_swap_conditional_(a, b, swap1, (uint32_t)5U); - Hacl_EC_Point_swap_conditional_(a + (uint32_t)5U, b + (uint32_t)5U, swap1, (uint32_t)5U); -} - -static void -Hacl_EC_Point_copy(uint64_t *output, uint64_t *input) -{ - memcpy(output, input, (uint32_t)5U * sizeof input[0U]); - memcpy(output + (uint32_t)5U, - input + (uint32_t)5U, - (uint32_t)5U * sizeof(input + (uint32_t)5U)[0U]); -} - -static void -Hacl_EC_AddAndDouble_fmonty( - uint64_t *pp, - uint64_t *ppq, - uint64_t *p, - uint64_t *pq, - uint64_t *qmqp) -{ - uint64_t *qx = qmqp; - uint64_t *x2 = pp; - uint64_t *z2 = pp + (uint32_t)5U; - uint64_t *x3 = ppq; - uint64_t *z3 = ppq + (uint32_t)5U; - uint64_t *x = p; - uint64_t *z = p + (uint32_t)5U; - uint64_t *xprime = pq; - uint64_t *zprime = pq + (uint32_t)5U; - uint64_t buf[40U] = { 0U }; - uint64_t *origx = buf; - uint64_t *origxprime = buf + (uint32_t)5U; - uint64_t *xxprime0 = buf + (uint32_t)25U; - uint64_t *zzprime0 = buf + (uint32_t)30U; - memcpy(origx, x, (uint32_t)5U * sizeof x[0U]); - Hacl_Bignum_fsum(x, z); - Hacl_Bignum_fdifference(z, origx); - memcpy(origxprime, xprime, (uint32_t)5U * sizeof xprime[0U]); - Hacl_Bignum_fsum(xprime, zprime); - Hacl_Bignum_fdifference(zprime, origxprime); - Hacl_Bignum_fmul(xxprime0, xprime, z); - Hacl_Bignum_fmul(zzprime0, x, zprime); - uint64_t *origxprime0 = buf + (uint32_t)5U; - uint64_t *xx0 = buf + (uint32_t)15U; - uint64_t *zz0 = buf + (uint32_t)20U; - uint64_t *xxprime = buf + (uint32_t)25U; - uint64_t *zzprime = buf + (uint32_t)30U; - uint64_t *zzzprime = buf + (uint32_t)35U; - memcpy(origxprime0, xxprime, (uint32_t)5U * sizeof xxprime[0U]); - Hacl_Bignum_fsum(xxprime, zzprime); - Hacl_Bignum_fdifference(zzprime, origxprime0); - Hacl_Bignum_Fsquare_fsquare_times(x3, xxprime, (uint32_t)1U); - Hacl_Bignum_Fsquare_fsquare_times(zzzprime, zzprime, (uint32_t)1U); - Hacl_Bignum_fmul(z3, zzzprime, qx); - Hacl_Bignum_Fsquare_fsquare_times(xx0, x, (uint32_t)1U); - Hacl_Bignum_Fsquare_fsquare_times(zz0, z, (uint32_t)1U); - uint64_t *zzz = buf + (uint32_t)10U; - uint64_t *xx = buf + (uint32_t)15U; - uint64_t *zz = buf + (uint32_t)20U; - Hacl_Bignum_fmul(x2, xx, zz); - Hacl_Bignum_fdifference(zz, xx); - uint64_t scalar = (uint64_t)121665U; - Hacl_Bignum_fscalar(zzz, zz, scalar); - Hacl_Bignum_fsum(zzz, xx); - Hacl_Bignum_fmul(z2, zzz, zz); -} - -static void -Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step( - uint64_t *nq, - uint64_t *nqpq, - uint64_t *nq2, - uint64_t *nqpq2, - uint64_t *q, - uint8_t byt) -{ - uint64_t bit = (uint64_t)(byt >> (uint32_t)7U); - Hacl_EC_Point_swap_conditional(nq, nqpq, bit); - Hacl_EC_AddAndDouble_fmonty(nq2, nqpq2, nq, nqpq, q); - uint64_t bit0 = (uint64_t)(byt >> (uint32_t)7U); - Hacl_EC_Point_swap_conditional(nq2, nqpq2, bit0); -} - -static void -Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step( - uint64_t *nq, - uint64_t *nqpq, - uint64_t *nq2, - uint64_t *nqpq2, - uint64_t *q, - uint8_t byt) -{ - Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt); - uint8_t byt1 = byt << (uint32_t)1U; - Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt1); -} - -static void -Hacl_EC_Ladder_SmallLoop_cmult_small_loop( - uint64_t *nq, - uint64_t *nqpq, - uint64_t *nq2, - uint64_t *nqpq2, - uint64_t *q, - uint8_t byt, - uint32_t i) -{ - if (!(i == (uint32_t)0U)) { - uint32_t i_ = i - (uint32_t)1U; - Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step(nq, nqpq, nq2, nqpq2, q, byt); - uint8_t byt_ = byt << (uint32_t)2U; - Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byt_, i_); - } -} - -static void -Hacl_EC_Ladder_BigLoop_cmult_big_loop( - uint8_t *n1, - uint64_t *nq, - uint64_t *nqpq, - uint64_t *nq2, - uint64_t *nqpq2, - uint64_t *q, - uint32_t i) -{ - if (!(i == (uint32_t)0U)) { - uint32_t i1 = i - (uint32_t)1U; - uint8_t byte = n1[i1]; - Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byte, (uint32_t)4U); - Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, i1); - } -} - -static void -Hacl_EC_Ladder_cmult(uint64_t *result, uint8_t *n1, uint64_t *q) -{ - uint64_t point_buf[40U] = { 0U }; - uint64_t *nq = point_buf; - uint64_t *nqpq = point_buf + (uint32_t)10U; - uint64_t *nq2 = point_buf + (uint32_t)20U; - uint64_t *nqpq2 = point_buf + (uint32_t)30U; - Hacl_EC_Point_copy(nqpq, q); - nq[0U] = (uint64_t)1U; - Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, (uint32_t)32U); - Hacl_EC_Point_copy(result, nq); -} - -static void -Hacl_EC_Format_fexpand(uint64_t *output, uint8_t *input) -{ - uint64_t i0 = load64_le(input); - uint8_t *x00 = input + (uint32_t)6U; - uint64_t i1 = load64_le(x00); - uint8_t *x01 = input + (uint32_t)12U; - uint64_t i2 = load64_le(x01); - uint8_t *x02 = input + (uint32_t)19U; - uint64_t i3 = load64_le(x02); - uint8_t *x0 = input + (uint32_t)24U; - uint64_t i4 = load64_le(x0); - uint64_t output0 = i0 & (uint64_t)0x7ffffffffffffU; - uint64_t output1 = i1 >> (uint32_t)3U & (uint64_t)0x7ffffffffffffU; - uint64_t output2 = i2 >> (uint32_t)6U & (uint64_t)0x7ffffffffffffU; - uint64_t output3 = i3 >> (uint32_t)1U & (uint64_t)0x7ffffffffffffU; - uint64_t output4 = i4 >> (uint32_t)12U & (uint64_t)0x7ffffffffffffU; - output[0U] = output0; - output[1U] = output1; - output[2U] = output2; - output[3U] = output3; - output[4U] = output4; -} - -static void -Hacl_EC_Format_fcontract_first_carry_pass(uint64_t *input) -{ - uint64_t t0 = input[0U]; - uint64_t t1 = input[1U]; - uint64_t t2 = input[2U]; - uint64_t t3 = input[3U]; - uint64_t t4 = input[4U]; - uint64_t t1_ = t1 + (t0 >> (uint32_t)51U); - uint64_t t0_ = t0 & (uint64_t)0x7ffffffffffffU; - uint64_t t2_ = t2 + (t1_ >> (uint32_t)51U); - uint64_t t1__ = t1_ & (uint64_t)0x7ffffffffffffU; - uint64_t t3_ = t3 + (t2_ >> (uint32_t)51U); - uint64_t t2__ = t2_ & (uint64_t)0x7ffffffffffffU; - uint64_t t4_ = t4 + (t3_ >> (uint32_t)51U); - uint64_t t3__ = t3_ & (uint64_t)0x7ffffffffffffU; - input[0U] = t0_; - input[1U] = t1__; - input[2U] = t2__; - input[3U] = t3__; - input[4U] = t4_; -} - -static void -Hacl_EC_Format_fcontract_first_carry_full(uint64_t *input) -{ - Hacl_EC_Format_fcontract_first_carry_pass(input); - Hacl_Bignum_Modulo_carry_top(input); -} - -static void -Hacl_EC_Format_fcontract_second_carry_pass(uint64_t *input) -{ - uint64_t t0 = input[0U]; - uint64_t t1 = input[1U]; - uint64_t t2 = input[2U]; - uint64_t t3 = input[3U]; - uint64_t t4 = input[4U]; - uint64_t t1_ = t1 + (t0 >> (uint32_t)51U); - uint64_t t0_ = t0 & (uint64_t)0x7ffffffffffffU; - uint64_t t2_ = t2 + (t1_ >> (uint32_t)51U); - uint64_t t1__ = t1_ & (uint64_t)0x7ffffffffffffU; - uint64_t t3_ = t3 + (t2_ >> (uint32_t)51U); - uint64_t t2__ = t2_ & (uint64_t)0x7ffffffffffffU; - uint64_t t4_ = t4 + (t3_ >> (uint32_t)51U); - uint64_t t3__ = t3_ & (uint64_t)0x7ffffffffffffU; - input[0U] = t0_; - input[1U] = t1__; - input[2U] = t2__; - input[3U] = t3__; - input[4U] = t4_; -} - -static void -Hacl_EC_Format_fcontract_second_carry_full(uint64_t *input) -{ - Hacl_EC_Format_fcontract_second_carry_pass(input); - Hacl_Bignum_Modulo_carry_top(input); - uint64_t i0 = input[0U]; - uint64_t i1 = input[1U]; - uint64_t i0_ = i0 & (uint64_t)0x7ffffffffffffU; - uint64_t i1_ = i1 + (i0 >> (uint32_t)51U); - input[0U] = i0_; - input[1U] = i1_; -} - -static void -Hacl_EC_Format_fcontract_trim(uint64_t *input) -{ - uint64_t a0 = input[0U]; - uint64_t a1 = input[1U]; - uint64_t a2 = input[2U]; - uint64_t a3 = input[3U]; - uint64_t a4 = input[4U]; - uint64_t mask0 = FStar_UInt64_gte_mask(a0, (uint64_t)0x7ffffffffffedU); - uint64_t mask1 = FStar_UInt64_eq_mask(a1, (uint64_t)0x7ffffffffffffU); - uint64_t mask2 = FStar_UInt64_eq_mask(a2, (uint64_t)0x7ffffffffffffU); - uint64_t mask3 = FStar_UInt64_eq_mask(a3, (uint64_t)0x7ffffffffffffU); - uint64_t mask4 = FStar_UInt64_eq_mask(a4, (uint64_t)0x7ffffffffffffU); - uint64_t mask = (((mask0 & mask1) & mask2) & mask3) & mask4; - uint64_t a0_ = a0 - ((uint64_t)0x7ffffffffffedU & mask); - uint64_t a1_ = a1 - ((uint64_t)0x7ffffffffffffU & mask); - uint64_t a2_ = a2 - ((uint64_t)0x7ffffffffffffU & mask); - uint64_t a3_ = a3 - ((uint64_t)0x7ffffffffffffU & mask); - uint64_t a4_ = a4 - ((uint64_t)0x7ffffffffffffU & mask); - input[0U] = a0_; - input[1U] = a1_; - input[2U] = a2_; - input[3U] = a3_; - input[4U] = a4_; -} - -static void -Hacl_EC_Format_fcontract_store(uint8_t *output, uint64_t *input) -{ - uint64_t t0 = input[0U]; - uint64_t t1 = input[1U]; - uint64_t t2 = input[2U]; - uint64_t t3 = input[3U]; - uint64_t t4 = input[4U]; - uint64_t o0 = t1 << (uint32_t)51U | t0; - uint64_t o1 = t2 << (uint32_t)38U | t1 >> (uint32_t)13U; - uint64_t o2 = t3 << (uint32_t)25U | t2 >> (uint32_t)26U; - uint64_t o3 = t4 << (uint32_t)12U | t3 >> (uint32_t)39U; - uint8_t *b0 = output; - uint8_t *b1 = output + (uint32_t)8U; - uint8_t *b2 = output + (uint32_t)16U; - uint8_t *b3 = output + (uint32_t)24U; - store64_le(b0, o0); - store64_le(b1, o1); - store64_le(b2, o2); - store64_le(b3, o3); -} - -static void -Hacl_EC_Format_fcontract(uint8_t *output, uint64_t *input) -{ - Hacl_EC_Format_fcontract_first_carry_full(input); - Hacl_EC_Format_fcontract_second_carry_full(input); - Hacl_EC_Format_fcontract_trim(input); - Hacl_EC_Format_fcontract_store(output, input); -} - -static void -Hacl_EC_Format_scalar_of_point(uint8_t *scalar, uint64_t *point) -{ - uint64_t *x = point; - uint64_t *z = point + (uint32_t)5U; - uint64_t buf[10U] = { 0U }; - uint64_t *zmone = buf; - uint64_t *sc = buf + (uint32_t)5U; - Hacl_Bignum_crecip(zmone, z); - Hacl_Bignum_fmul(sc, x, zmone); - Hacl_EC_Format_fcontract(scalar, sc); -} - -void -Hacl_EC_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint) -{ - uint64_t buf0[10U] = { 0U }; - uint64_t *x0 = buf0; - uint64_t *z = buf0 + (uint32_t)5U; - Hacl_EC_Format_fexpand(x0, basepoint); - z[0U] = (uint64_t)1U; - uint64_t *q = buf0; - uint8_t e[32U] = { 0U }; - memcpy(e, secret, (uint32_t)32U * sizeof secret[0U]); - uint8_t e0 = e[0U]; - uint8_t e31 = e[31U]; - uint8_t e01 = e0 & (uint8_t)248U; - uint8_t e311 = e31 & (uint8_t)127U; - uint8_t e312 = e311 | (uint8_t)64U; - e[0U] = e01; - e[31U] = e312; - uint8_t *scalar = e; - uint64_t buf[15U] = { 0U }; - uint64_t *nq = buf; - uint64_t *x = nq; - x[0U] = (uint64_t)1U; - Hacl_EC_Ladder_cmult(nq, scalar, q); - Hacl_EC_Format_scalar_of_point(mypublic, nq); -} - -void -Hacl_Curve25519_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint) -{ - Hacl_EC_crypto_scalarmult(mypublic, secret, basepoint); -} diff --git a/lib/freebl/verified/Hacl_Curve25519.h b/lib/freebl/verified/Hacl_Curve25519.h deleted file mode 100644 index dd2615ac72..0000000000 --- a/lib/freebl/verified/Hacl_Curve25519.h +++ /dev/null @@ -1,57 +0,0 @@ -/* Copyright 2016-2018 INRIA and Microsoft Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kremlib.h" -#ifndef __Hacl_Curve25519_H -#define __Hacl_Curve25519_H - -typedef uint64_t Hacl_Bignum_Constants_limb; - -typedef FStar_UInt128_t Hacl_Bignum_Constants_wide; - -typedef uint64_t Hacl_Bignum_Parameters_limb; - -typedef FStar_UInt128_t Hacl_Bignum_Parameters_wide; - -typedef uint32_t Hacl_Bignum_Parameters_ctr; - -typedef uint64_t *Hacl_Bignum_Parameters_felem; - -typedef FStar_UInt128_t *Hacl_Bignum_Parameters_felem_wide; - -typedef void *Hacl_Bignum_Parameters_seqelem; - -typedef void *Hacl_Bignum_Parameters_seqelem_wide; - -typedef FStar_UInt128_t Hacl_Bignum_Wide_t; - -typedef uint64_t Hacl_Bignum_Limb_t; - -extern void Hacl_Bignum_lemma_diff(Prims_int x0, Prims_int x1, Prims_pos x2); - -typedef uint64_t *Hacl_EC_Point_point; - -typedef uint8_t *Hacl_EC_Ladder_SmallLoop_uint8_p; - -typedef uint8_t *Hacl_EC_Ladder_uint8_p; - -typedef uint8_t *Hacl_EC_Format_uint8_p; - -void Hacl_EC_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint); - -typedef uint8_t *Hacl_Curve25519_uint8_p; - -void Hacl_Curve25519_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint); -#endif diff --git a/lib/freebl/verified/Hacl_Curve25519_51.c b/lib/freebl/verified/Hacl_Curve25519_51.c new file mode 100644 index 0000000000..6cd48027b9 --- /dev/null +++ b/lib/freebl/verified/Hacl_Curve25519_51.c @@ -0,0 +1,933 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Hacl_Curve25519_51.h" + +inline static void +Hacl_Impl_Curve25519_Field51_fadd(uint64_t *out, uint64_t *f1, uint64_t *f2) +{ + uint64_t f10 = f1[0U]; + uint64_t f20 = f2[0U]; + uint64_t f11 = f1[1U]; + uint64_t f21 = f2[1U]; + uint64_t f12 = f1[2U]; + uint64_t f22 = f2[2U]; + uint64_t f13 = f1[3U]; + uint64_t f23 = f2[3U]; + uint64_t f14 = f1[4U]; + uint64_t f24 = f2[4U]; + out[0U] = f10 + f20; + out[1U] = f11 + f21; + out[2U] = f12 + f22; + out[3U] = f13 + f23; + out[4U] = f14 + f24; +} + +inline static void +Hacl_Impl_Curve25519_Field51_fsub(uint64_t *out, uint64_t *f1, uint64_t *f2) +{ + uint64_t f10 = f1[0U]; + uint64_t f20 = f2[0U]; + uint64_t f11 = f1[1U]; + uint64_t f21 = f2[1U]; + uint64_t f12 = f1[2U]; + uint64_t f22 = f2[2U]; + uint64_t f13 = f1[3U]; + uint64_t f23 = f2[3U]; + uint64_t f14 = f1[4U]; + uint64_t f24 = f2[4U]; + out[0U] = f10 + (uint64_t)0x3fffffffffff68U - f20; + out[1U] = f11 + (uint64_t)0x3ffffffffffff8U - f21; + out[2U] = f12 + (uint64_t)0x3ffffffffffff8U - f22; + out[3U] = f13 + (uint64_t)0x3ffffffffffff8U - f23; + out[4U] = f14 + (uint64_t)0x3ffffffffffff8U - f24; +} + +inline static void +Hacl_Impl_Curve25519_Field51_fmul( + uint64_t *out, + uint64_t *f1, + uint64_t *f2, + FStar_UInt128_uint128 *uu____2959) +{ + uint64_t f10 = f1[0U]; + uint64_t f11 = f1[1U]; + uint64_t f12 = f1[2U]; + uint64_t f13 = f1[3U]; + uint64_t f14 = f1[4U]; + uint64_t f20 = f2[0U]; + uint64_t f21 = f2[1U]; + uint64_t f22 = f2[2U]; + uint64_t f23 = f2[3U]; + uint64_t f24 = f2[4U]; + uint64_t tmp1 = f21 * (uint64_t)19U; + uint64_t tmp2 = f22 * (uint64_t)19U; + uint64_t tmp3 = f23 * (uint64_t)19U; + uint64_t tmp4 = f24 * (uint64_t)19U; + FStar_UInt128_uint128 o00 = FStar_UInt128_mul_wide(f10, f20); + FStar_UInt128_uint128 o10 = FStar_UInt128_mul_wide(f10, f21); + FStar_UInt128_uint128 o20 = FStar_UInt128_mul_wide(f10, f22); + FStar_UInt128_uint128 o30 = FStar_UInt128_mul_wide(f10, f23); + FStar_UInt128_uint128 o40 = FStar_UInt128_mul_wide(f10, f24); + FStar_UInt128_uint128 o01 = FStar_UInt128_add(o00, FStar_UInt128_mul_wide(f11, tmp4)); + FStar_UInt128_uint128 o11 = FStar_UInt128_add(o10, FStar_UInt128_mul_wide(f11, f20)); + FStar_UInt128_uint128 o21 = FStar_UInt128_add(o20, FStar_UInt128_mul_wide(f11, f21)); + FStar_UInt128_uint128 o31 = FStar_UInt128_add(o30, FStar_UInt128_mul_wide(f11, f22)); + FStar_UInt128_uint128 o41 = FStar_UInt128_add(o40, FStar_UInt128_mul_wide(f11, f23)); + FStar_UInt128_uint128 o02 = FStar_UInt128_add(o01, FStar_UInt128_mul_wide(f12, tmp3)); + FStar_UInt128_uint128 o12 = FStar_UInt128_add(o11, FStar_UInt128_mul_wide(f12, tmp4)); + FStar_UInt128_uint128 o22 = FStar_UInt128_add(o21, FStar_UInt128_mul_wide(f12, f20)); + FStar_UInt128_uint128 o32 = FStar_UInt128_add(o31, FStar_UInt128_mul_wide(f12, f21)); + FStar_UInt128_uint128 o42 = FStar_UInt128_add(o41, FStar_UInt128_mul_wide(f12, f22)); + FStar_UInt128_uint128 o03 = FStar_UInt128_add(o02, FStar_UInt128_mul_wide(f13, tmp2)); + FStar_UInt128_uint128 o13 = FStar_UInt128_add(o12, FStar_UInt128_mul_wide(f13, tmp3)); + FStar_UInt128_uint128 o23 = FStar_UInt128_add(o22, FStar_UInt128_mul_wide(f13, tmp4)); + FStar_UInt128_uint128 o33 = FStar_UInt128_add(o32, FStar_UInt128_mul_wide(f13, f20)); + FStar_UInt128_uint128 o43 = FStar_UInt128_add(o42, FStar_UInt128_mul_wide(f13, f21)); + FStar_UInt128_uint128 o04 = FStar_UInt128_add(o03, FStar_UInt128_mul_wide(f14, tmp1)); + FStar_UInt128_uint128 o14 = FStar_UInt128_add(o13, FStar_UInt128_mul_wide(f14, tmp2)); + FStar_UInt128_uint128 o24 = FStar_UInt128_add(o23, FStar_UInt128_mul_wide(f14, tmp3)); + FStar_UInt128_uint128 o34 = FStar_UInt128_add(o33, FStar_UInt128_mul_wide(f14, tmp4)); + FStar_UInt128_uint128 o44 = FStar_UInt128_add(o43, FStar_UInt128_mul_wide(f14, f20)); + FStar_UInt128_uint128 tmp_w0 = o04; + FStar_UInt128_uint128 tmp_w1 = o14; + FStar_UInt128_uint128 tmp_w2 = o24; + FStar_UInt128_uint128 tmp_w3 = o34; + FStar_UInt128_uint128 tmp_w4 = o44; + FStar_UInt128_uint128 + l_ = FStar_UInt128_add(tmp_w0, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp01 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); + FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w1, FStar_UInt128_uint64_to_uint128(c0)); + uint64_t tmp11 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); + FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w2, FStar_UInt128_uint64_to_uint128(c1)); + uint64_t tmp21 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); + FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w3, FStar_UInt128_uint64_to_uint128(c2)); + uint64_t tmp31 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); + FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w4, FStar_UInt128_uint64_to_uint128(c3)); + uint64_t tmp41 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); + uint64_t l_4 = tmp01 + c4 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_4 >> (uint32_t)51U; + uint64_t o0 = tmp0_; + uint64_t o1 = tmp11 + c5; + uint64_t o2 = tmp21; + uint64_t o3 = tmp31; + uint64_t o4 = tmp41; + out[0U] = o0; + out[1U] = o1; + out[2U] = o2; + out[3U] = o3; + out[4U] = o4; +} + +inline static void +Hacl_Impl_Curve25519_Field51_fmul2( + uint64_t *out, + uint64_t *f1, + uint64_t *f2, + FStar_UInt128_uint128 *uu____4281) +{ + uint64_t f10 = f1[0U]; + uint64_t f11 = f1[1U]; + uint64_t f12 = f1[2U]; + uint64_t f13 = f1[3U]; + uint64_t f14 = f1[4U]; + uint64_t f20 = f2[0U]; + uint64_t f21 = f2[1U]; + uint64_t f22 = f2[2U]; + uint64_t f23 = f2[3U]; + uint64_t f24 = f2[4U]; + uint64_t f30 = f1[5U]; + uint64_t f31 = f1[6U]; + uint64_t f32 = f1[7U]; + uint64_t f33 = f1[8U]; + uint64_t f34 = f1[9U]; + uint64_t f40 = f2[5U]; + uint64_t f41 = f2[6U]; + uint64_t f42 = f2[7U]; + uint64_t f43 = f2[8U]; + uint64_t f44 = f2[9U]; + uint64_t tmp11 = f21 * (uint64_t)19U; + uint64_t tmp12 = f22 * (uint64_t)19U; + uint64_t tmp13 = f23 * (uint64_t)19U; + uint64_t tmp14 = f24 * (uint64_t)19U; + uint64_t tmp21 = f41 * (uint64_t)19U; + uint64_t tmp22 = f42 * (uint64_t)19U; + uint64_t tmp23 = f43 * (uint64_t)19U; + uint64_t tmp24 = f44 * (uint64_t)19U; + FStar_UInt128_uint128 o00 = FStar_UInt128_mul_wide(f10, f20); + FStar_UInt128_uint128 o15 = FStar_UInt128_mul_wide(f10, f21); + FStar_UInt128_uint128 o25 = FStar_UInt128_mul_wide(f10, f22); + FStar_UInt128_uint128 o30 = FStar_UInt128_mul_wide(f10, f23); + FStar_UInt128_uint128 o40 = FStar_UInt128_mul_wide(f10, f24); + FStar_UInt128_uint128 o010 = FStar_UInt128_add(o00, FStar_UInt128_mul_wide(f11, tmp14)); + FStar_UInt128_uint128 o110 = FStar_UInt128_add(o15, FStar_UInt128_mul_wide(f11, f20)); + FStar_UInt128_uint128 o210 = FStar_UInt128_add(o25, FStar_UInt128_mul_wide(f11, f21)); + FStar_UInt128_uint128 o310 = FStar_UInt128_add(o30, FStar_UInt128_mul_wide(f11, f22)); + FStar_UInt128_uint128 o410 = FStar_UInt128_add(o40, FStar_UInt128_mul_wide(f11, f23)); + FStar_UInt128_uint128 o020 = FStar_UInt128_add(o010, FStar_UInt128_mul_wide(f12, tmp13)); + FStar_UInt128_uint128 o120 = FStar_UInt128_add(o110, FStar_UInt128_mul_wide(f12, tmp14)); + FStar_UInt128_uint128 o220 = FStar_UInt128_add(o210, FStar_UInt128_mul_wide(f12, f20)); + FStar_UInt128_uint128 o320 = FStar_UInt128_add(o310, FStar_UInt128_mul_wide(f12, f21)); + FStar_UInt128_uint128 o420 = FStar_UInt128_add(o410, FStar_UInt128_mul_wide(f12, f22)); + FStar_UInt128_uint128 o030 = FStar_UInt128_add(o020, FStar_UInt128_mul_wide(f13, tmp12)); + FStar_UInt128_uint128 o130 = FStar_UInt128_add(o120, FStar_UInt128_mul_wide(f13, tmp13)); + FStar_UInt128_uint128 o230 = FStar_UInt128_add(o220, FStar_UInt128_mul_wide(f13, tmp14)); + FStar_UInt128_uint128 o330 = FStar_UInt128_add(o320, FStar_UInt128_mul_wide(f13, f20)); + FStar_UInt128_uint128 o430 = FStar_UInt128_add(o420, FStar_UInt128_mul_wide(f13, f21)); + FStar_UInt128_uint128 o040 = FStar_UInt128_add(o030, FStar_UInt128_mul_wide(f14, tmp11)); + FStar_UInt128_uint128 o140 = FStar_UInt128_add(o130, FStar_UInt128_mul_wide(f14, tmp12)); + FStar_UInt128_uint128 o240 = FStar_UInt128_add(o230, FStar_UInt128_mul_wide(f14, tmp13)); + FStar_UInt128_uint128 o340 = FStar_UInt128_add(o330, FStar_UInt128_mul_wide(f14, tmp14)); + FStar_UInt128_uint128 o440 = FStar_UInt128_add(o430, FStar_UInt128_mul_wide(f14, f20)); + FStar_UInt128_uint128 tmp_w10 = o040; + FStar_UInt128_uint128 tmp_w11 = o140; + FStar_UInt128_uint128 tmp_w12 = o240; + FStar_UInt128_uint128 tmp_w13 = o340; + FStar_UInt128_uint128 tmp_w14 = o440; + FStar_UInt128_uint128 o0 = FStar_UInt128_mul_wide(f30, f40); + FStar_UInt128_uint128 o1 = FStar_UInt128_mul_wide(f30, f41); + FStar_UInt128_uint128 o2 = FStar_UInt128_mul_wide(f30, f42); + FStar_UInt128_uint128 o3 = FStar_UInt128_mul_wide(f30, f43); + FStar_UInt128_uint128 o4 = FStar_UInt128_mul_wide(f30, f44); + FStar_UInt128_uint128 o01 = FStar_UInt128_add(o0, FStar_UInt128_mul_wide(f31, tmp24)); + FStar_UInt128_uint128 o111 = FStar_UInt128_add(o1, FStar_UInt128_mul_wide(f31, f40)); + FStar_UInt128_uint128 o211 = FStar_UInt128_add(o2, FStar_UInt128_mul_wide(f31, f41)); + FStar_UInt128_uint128 o31 = FStar_UInt128_add(o3, FStar_UInt128_mul_wide(f31, f42)); + FStar_UInt128_uint128 o41 = FStar_UInt128_add(o4, FStar_UInt128_mul_wide(f31, f43)); + FStar_UInt128_uint128 o02 = FStar_UInt128_add(o01, FStar_UInt128_mul_wide(f32, tmp23)); + FStar_UInt128_uint128 o121 = FStar_UInt128_add(o111, FStar_UInt128_mul_wide(f32, tmp24)); + FStar_UInt128_uint128 o221 = FStar_UInt128_add(o211, FStar_UInt128_mul_wide(f32, f40)); + FStar_UInt128_uint128 o32 = FStar_UInt128_add(o31, FStar_UInt128_mul_wide(f32, f41)); + FStar_UInt128_uint128 o42 = FStar_UInt128_add(o41, FStar_UInt128_mul_wide(f32, f42)); + FStar_UInt128_uint128 o03 = FStar_UInt128_add(o02, FStar_UInt128_mul_wide(f33, tmp22)); + FStar_UInt128_uint128 o131 = FStar_UInt128_add(o121, FStar_UInt128_mul_wide(f33, tmp23)); + FStar_UInt128_uint128 o231 = FStar_UInt128_add(o221, FStar_UInt128_mul_wide(f33, tmp24)); + FStar_UInt128_uint128 o33 = FStar_UInt128_add(o32, FStar_UInt128_mul_wide(f33, f40)); + FStar_UInt128_uint128 o43 = FStar_UInt128_add(o42, FStar_UInt128_mul_wide(f33, f41)); + FStar_UInt128_uint128 o04 = FStar_UInt128_add(o03, FStar_UInt128_mul_wide(f34, tmp21)); + FStar_UInt128_uint128 o141 = FStar_UInt128_add(o131, FStar_UInt128_mul_wide(f34, tmp22)); + FStar_UInt128_uint128 o241 = FStar_UInt128_add(o231, FStar_UInt128_mul_wide(f34, tmp23)); + FStar_UInt128_uint128 o34 = FStar_UInt128_add(o33, FStar_UInt128_mul_wide(f34, tmp24)); + FStar_UInt128_uint128 o44 = FStar_UInt128_add(o43, FStar_UInt128_mul_wide(f34, f40)); + FStar_UInt128_uint128 tmp_w20 = o04; + FStar_UInt128_uint128 tmp_w21 = o141; + FStar_UInt128_uint128 tmp_w22 = o241; + FStar_UInt128_uint128 tmp_w23 = o34; + FStar_UInt128_uint128 tmp_w24 = o44; + FStar_UInt128_uint128 + l_ = FStar_UInt128_add(tmp_w10, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp00 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; + uint64_t c00 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); + FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w11, FStar_UInt128_uint64_to_uint128(c00)); + uint64_t tmp10 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; + uint64_t c10 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); + FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w12, FStar_UInt128_uint64_to_uint128(c10)); + uint64_t tmp20 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; + uint64_t c20 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); + FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w13, FStar_UInt128_uint64_to_uint128(c20)); + uint64_t tmp30 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; + uint64_t c30 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); + FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w14, FStar_UInt128_uint64_to_uint128(c30)); + uint64_t tmp40 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; + uint64_t c40 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); + uint64_t l_4 = tmp00 + c40 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c50 = l_4 >> (uint32_t)51U; + uint64_t o100 = tmp0_; + uint64_t o112 = tmp10 + c50; + uint64_t o122 = tmp20; + uint64_t o132 = tmp30; + uint64_t o142 = tmp40; + FStar_UInt128_uint128 + l_5 = FStar_UInt128_add(tmp_w20, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_5) & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_5, (uint32_t)51U)); + FStar_UInt128_uint128 l_6 = FStar_UInt128_add(tmp_w21, FStar_UInt128_uint64_to_uint128(c0)); + uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_6) & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_6, (uint32_t)51U)); + FStar_UInt128_uint128 l_7 = FStar_UInt128_add(tmp_w22, FStar_UInt128_uint64_to_uint128(c1)); + uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_7) & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_7, (uint32_t)51U)); + FStar_UInt128_uint128 l_8 = FStar_UInt128_add(tmp_w23, FStar_UInt128_uint64_to_uint128(c2)); + uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_8) & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_8, (uint32_t)51U)); + FStar_UInt128_uint128 l_9 = FStar_UInt128_add(tmp_w24, FStar_UInt128_uint64_to_uint128(c3)); + uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_9) & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_9, (uint32_t)51U)); + uint64_t l_10 = tmp0 + c4 * (uint64_t)19U; + uint64_t tmp0_0 = l_10 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_10 >> (uint32_t)51U; + uint64_t o200 = tmp0_0; + uint64_t o212 = tmp1 + c5; + uint64_t o222 = tmp2; + uint64_t o232 = tmp3; + uint64_t o242 = tmp4; + uint64_t o10 = o100; + uint64_t o11 = o112; + uint64_t o12 = o122; + uint64_t o13 = o132; + uint64_t o14 = o142; + uint64_t o20 = o200; + uint64_t o21 = o212; + uint64_t o22 = o222; + uint64_t o23 = o232; + uint64_t o24 = o242; + out[0U] = o10; + out[1U] = o11; + out[2U] = o12; + out[3U] = o13; + out[4U] = o14; + out[5U] = o20; + out[6U] = o21; + out[7U] = o22; + out[8U] = o23; + out[9U] = o24; +} + +inline static void +Hacl_Impl_Curve25519_Field51_fmul1(uint64_t *out, uint64_t *f1, uint64_t f2) +{ + uint64_t f10 = f1[0U]; + uint64_t f11 = f1[1U]; + uint64_t f12 = f1[2U]; + uint64_t f13 = f1[3U]; + uint64_t f14 = f1[4U]; + FStar_UInt128_uint128 tmp_w0 = FStar_UInt128_mul_wide(f2, f10); + FStar_UInt128_uint128 tmp_w1 = FStar_UInt128_mul_wide(f2, f11); + FStar_UInt128_uint128 tmp_w2 = FStar_UInt128_mul_wide(f2, f12); + FStar_UInt128_uint128 tmp_w3 = FStar_UInt128_mul_wide(f2, f13); + FStar_UInt128_uint128 tmp_w4 = FStar_UInt128_mul_wide(f2, f14); + FStar_UInt128_uint128 + l_ = FStar_UInt128_add(tmp_w0, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); + FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w1, FStar_UInt128_uint64_to_uint128(c0)); + uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); + FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w2, FStar_UInt128_uint64_to_uint128(c1)); + uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); + FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w3, FStar_UInt128_uint64_to_uint128(c2)); + uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); + FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w4, FStar_UInt128_uint64_to_uint128(c3)); + uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); + uint64_t l_4 = tmp0 + c4 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_4 >> (uint32_t)51U; + uint64_t o0 = tmp0_; + uint64_t o1 = tmp1 + c5; + uint64_t o2 = tmp2; + uint64_t o3 = tmp3; + uint64_t o4 = tmp4; + out[0U] = o0; + out[1U] = o1; + out[2U] = o2; + out[3U] = o3; + out[4U] = o4; +} + +inline static void +Hacl_Impl_Curve25519_Field51_fsqr( + uint64_t *out, + uint64_t *f, + FStar_UInt128_uint128 *uu____6941) +{ + uint64_t f0 = f[0U]; + uint64_t f1 = f[1U]; + uint64_t f2 = f[2U]; + uint64_t f3 = f[3U]; + uint64_t f4 = f[4U]; + uint64_t d0 = (uint64_t)2U * f0; + uint64_t d1 = (uint64_t)2U * f1; + uint64_t d2 = (uint64_t)38U * f2; + uint64_t d3 = (uint64_t)19U * f3; + uint64_t d419 = (uint64_t)19U * f4; + uint64_t d4 = (uint64_t)2U * d419; + FStar_UInt128_uint128 + s0 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f0, f0), + FStar_UInt128_mul_wide(d4, f1)), + FStar_UInt128_mul_wide(d2, f3)); + FStar_UInt128_uint128 + s1 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f1), + FStar_UInt128_mul_wide(d4, f2)), + FStar_UInt128_mul_wide(d3, f3)); + FStar_UInt128_uint128 + s2 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f2), + FStar_UInt128_mul_wide(f1, f1)), + FStar_UInt128_mul_wide(d4, f3)); + FStar_UInt128_uint128 + s3 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f3), + FStar_UInt128_mul_wide(d1, f2)), + FStar_UInt128_mul_wide(f4, d419)); + FStar_UInt128_uint128 + s4 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f4), + FStar_UInt128_mul_wide(d1, f3)), + FStar_UInt128_mul_wide(f2, f2)); + FStar_UInt128_uint128 o00 = s0; + FStar_UInt128_uint128 o10 = s1; + FStar_UInt128_uint128 o20 = s2; + FStar_UInt128_uint128 o30 = s3; + FStar_UInt128_uint128 o40 = s4; + FStar_UInt128_uint128 + l_ = FStar_UInt128_add(o00, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); + FStar_UInt128_uint128 l_0 = FStar_UInt128_add(o10, FStar_UInt128_uint64_to_uint128(c0)); + uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); + FStar_UInt128_uint128 l_1 = FStar_UInt128_add(o20, FStar_UInt128_uint64_to_uint128(c1)); + uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); + FStar_UInt128_uint128 l_2 = FStar_UInt128_add(o30, FStar_UInt128_uint64_to_uint128(c2)); + uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); + FStar_UInt128_uint128 l_3 = FStar_UInt128_add(o40, FStar_UInt128_uint64_to_uint128(c3)); + uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); + uint64_t l_4 = tmp0 + c4 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_4 >> (uint32_t)51U; + uint64_t o0 = tmp0_; + uint64_t o1 = tmp1 + c5; + uint64_t o2 = tmp2; + uint64_t o3 = tmp3; + uint64_t o4 = tmp4; + out[0U] = o0; + out[1U] = o1; + out[2U] = o2; + out[3U] = o3; + out[4U] = o4; +} + +inline static void +Hacl_Impl_Curve25519_Field51_fsqr2( + uint64_t *out, + uint64_t *f, + FStar_UInt128_uint128 *uu____7692) +{ + uint64_t f10 = f[0U]; + uint64_t f11 = f[1U]; + uint64_t f12 = f[2U]; + uint64_t f13 = f[3U]; + uint64_t f14 = f[4U]; + uint64_t f20 = f[5U]; + uint64_t f21 = f[6U]; + uint64_t f22 = f[7U]; + uint64_t f23 = f[8U]; + uint64_t f24 = f[9U]; + uint64_t d00 = (uint64_t)2U * f10; + uint64_t d10 = (uint64_t)2U * f11; + uint64_t d20 = (uint64_t)38U * f12; + uint64_t d30 = (uint64_t)19U * f13; + uint64_t d4190 = (uint64_t)19U * f14; + uint64_t d40 = (uint64_t)2U * d4190; + FStar_UInt128_uint128 + s00 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f10, f10), + FStar_UInt128_mul_wide(d40, f11)), + FStar_UInt128_mul_wide(d20, f13)); + FStar_UInt128_uint128 + s10 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f11), + FStar_UInt128_mul_wide(d40, f12)), + FStar_UInt128_mul_wide(d30, f13)); + FStar_UInt128_uint128 + s20 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f12), + FStar_UInt128_mul_wide(f11, f11)), + FStar_UInt128_mul_wide(d40, f13)); + FStar_UInt128_uint128 + s30 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f13), + FStar_UInt128_mul_wide(d10, f12)), + FStar_UInt128_mul_wide(f14, d4190)); + FStar_UInt128_uint128 + s40 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f14), + FStar_UInt128_mul_wide(d10, f13)), + FStar_UInt128_mul_wide(f12, f12)); + FStar_UInt128_uint128 o100 = s00; + FStar_UInt128_uint128 o110 = s10; + FStar_UInt128_uint128 o120 = s20; + FStar_UInt128_uint128 o130 = s30; + FStar_UInt128_uint128 o140 = s40; + uint64_t d0 = (uint64_t)2U * f20; + uint64_t d1 = (uint64_t)2U * f21; + uint64_t d2 = (uint64_t)38U * f22; + uint64_t d3 = (uint64_t)19U * f23; + uint64_t d419 = (uint64_t)19U * f24; + uint64_t d4 = (uint64_t)2U * d419; + FStar_UInt128_uint128 + s0 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f20, f20), + FStar_UInt128_mul_wide(d4, f21)), + FStar_UInt128_mul_wide(d2, f23)); + FStar_UInt128_uint128 + s1 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f21), + FStar_UInt128_mul_wide(d4, f22)), + FStar_UInt128_mul_wide(d3, f23)); + FStar_UInt128_uint128 + s2 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f22), + FStar_UInt128_mul_wide(f21, f21)), + FStar_UInt128_mul_wide(d4, f23)); + FStar_UInt128_uint128 + s3 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f23), + FStar_UInt128_mul_wide(d1, f22)), + FStar_UInt128_mul_wide(f24, d419)); + FStar_UInt128_uint128 + s4 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f24), + FStar_UInt128_mul_wide(d1, f23)), + FStar_UInt128_mul_wide(f22, f22)); + FStar_UInt128_uint128 o200 = s0; + FStar_UInt128_uint128 o210 = s1; + FStar_UInt128_uint128 o220 = s2; + FStar_UInt128_uint128 o230 = s3; + FStar_UInt128_uint128 o240 = s4; + FStar_UInt128_uint128 + l_ = FStar_UInt128_add(o100, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp00 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; + uint64_t c00 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); + FStar_UInt128_uint128 l_0 = FStar_UInt128_add(o110, FStar_UInt128_uint64_to_uint128(c00)); + uint64_t tmp10 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; + uint64_t c10 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); + FStar_UInt128_uint128 l_1 = FStar_UInt128_add(o120, FStar_UInt128_uint64_to_uint128(c10)); + uint64_t tmp20 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; + uint64_t c20 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); + FStar_UInt128_uint128 l_2 = FStar_UInt128_add(o130, FStar_UInt128_uint64_to_uint128(c20)); + uint64_t tmp30 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; + uint64_t c30 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); + FStar_UInt128_uint128 l_3 = FStar_UInt128_add(o140, FStar_UInt128_uint64_to_uint128(c30)); + uint64_t tmp40 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; + uint64_t c40 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); + uint64_t l_4 = tmp00 + c40 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c50 = l_4 >> (uint32_t)51U; + uint64_t o101 = tmp0_; + uint64_t o111 = tmp10 + c50; + uint64_t o121 = tmp20; + uint64_t o131 = tmp30; + uint64_t o141 = tmp40; + FStar_UInt128_uint128 + l_5 = FStar_UInt128_add(o200, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_5) & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_5, (uint32_t)51U)); + FStar_UInt128_uint128 l_6 = FStar_UInt128_add(o210, FStar_UInt128_uint64_to_uint128(c0)); + uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_6) & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_6, (uint32_t)51U)); + FStar_UInt128_uint128 l_7 = FStar_UInt128_add(o220, FStar_UInt128_uint64_to_uint128(c1)); + uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_7) & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_7, (uint32_t)51U)); + FStar_UInt128_uint128 l_8 = FStar_UInt128_add(o230, FStar_UInt128_uint64_to_uint128(c2)); + uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_8) & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_8, (uint32_t)51U)); + FStar_UInt128_uint128 l_9 = FStar_UInt128_add(o240, FStar_UInt128_uint64_to_uint128(c3)); + uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_9) & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_9, (uint32_t)51U)); + uint64_t l_10 = tmp0 + c4 * (uint64_t)19U; + uint64_t tmp0_0 = l_10 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_10 >> (uint32_t)51U; + uint64_t o201 = tmp0_0; + uint64_t o211 = tmp1 + c5; + uint64_t o221 = tmp2; + uint64_t o231 = tmp3; + uint64_t o241 = tmp4; + uint64_t o10 = o101; + uint64_t o11 = o111; + uint64_t o12 = o121; + uint64_t o13 = o131; + uint64_t o14 = o141; + uint64_t o20 = o201; + uint64_t o21 = o211; + uint64_t o22 = o221; + uint64_t o23 = o231; + uint64_t o24 = o241; + out[0U] = o10; + out[1U] = o11; + out[2U] = o12; + out[3U] = o13; + out[4U] = o14; + out[5U] = o20; + out[6U] = o21; + out[7U] = o22; + out[8U] = o23; + out[9U] = o24; +} + +static void +Hacl_Impl_Curve25519_Field51_store_felem(uint64_t *u64s, uint64_t *f) +{ + uint64_t f0 = f[0U]; + uint64_t f1 = f[1U]; + uint64_t f2 = f[2U]; + uint64_t f3 = f[3U]; + uint64_t f4 = f[4U]; + uint64_t l_ = f0 + (uint64_t)0U; + uint64_t tmp0 = l_ & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = l_ >> (uint32_t)51U; + uint64_t l_0 = f1 + c0; + uint64_t tmp1 = l_0 & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = l_0 >> (uint32_t)51U; + uint64_t l_1 = f2 + c1; + uint64_t tmp2 = l_1 & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = l_1 >> (uint32_t)51U; + uint64_t l_2 = f3 + c2; + uint64_t tmp3 = l_2 & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = l_2 >> (uint32_t)51U; + uint64_t l_3 = f4 + c3; + uint64_t tmp4 = l_3 & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = l_3 >> (uint32_t)51U; + uint64_t l_4 = tmp0 + c4 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_4 >> (uint32_t)51U; + uint64_t f01 = tmp0_; + uint64_t f11 = tmp1 + c5; + uint64_t f21 = tmp2; + uint64_t f31 = tmp3; + uint64_t f41 = tmp4; + uint64_t m0 = FStar_UInt64_gte_mask(f01, (uint64_t)0x7ffffffffffedU); + uint64_t m1 = FStar_UInt64_eq_mask(f11, (uint64_t)0x7ffffffffffffU); + uint64_t m2 = FStar_UInt64_eq_mask(f21, (uint64_t)0x7ffffffffffffU); + uint64_t m3 = FStar_UInt64_eq_mask(f31, (uint64_t)0x7ffffffffffffU); + uint64_t m4 = FStar_UInt64_eq_mask(f41, (uint64_t)0x7ffffffffffffU); + uint64_t mask = (((m0 & m1) & m2) & m3) & m4; + uint64_t f0_ = f01 - (mask & (uint64_t)0x7ffffffffffedU); + uint64_t f1_ = f11 - (mask & (uint64_t)0x7ffffffffffffU); + uint64_t f2_ = f21 - (mask & (uint64_t)0x7ffffffffffffU); + uint64_t f3_ = f31 - (mask & (uint64_t)0x7ffffffffffffU); + uint64_t f4_ = f41 - (mask & (uint64_t)0x7ffffffffffffU); + uint64_t f02 = f0_; + uint64_t f12 = f1_; + uint64_t f22 = f2_; + uint64_t f32 = f3_; + uint64_t f42 = f4_; + uint64_t o00 = f02 | f12 << (uint32_t)51U; + uint64_t o10 = f12 >> (uint32_t)13U | f22 << (uint32_t)38U; + uint64_t o20 = f22 >> (uint32_t)26U | f32 << (uint32_t)25U; + uint64_t o30 = f32 >> (uint32_t)39U | f42 << (uint32_t)12U; + uint64_t o0 = o00; + uint64_t o1 = o10; + uint64_t o2 = o20; + uint64_t o3 = o30; + u64s[0U] = o0; + u64s[1U] = o1; + u64s[2U] = o2; + u64s[3U] = o3; +} + +inline static void +Hacl_Impl_Curve25519_Field51_cswap2(uint64_t bit, uint64_t *p1, uint64_t *p2) +{ + uint64_t mask = (uint64_t)0U - bit; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)10U; i = i + (uint32_t)1U) { + uint64_t dummy = mask & (p1[i] ^ p2[i]); + p1[i] = p1[i] ^ dummy; + p2[i] = p2[i] ^ dummy; + } +} + +static uint8_t + Hacl_Curve25519_51_g25519[32U] = + { + (uint8_t)9U, (uint8_t)0U, (uint8_t)0U, (uint8_t)0U, (uint8_t)0U, (uint8_t)0U, (uint8_t)0U, + (uint8_t)0U, (uint8_t)0U, (uint8_t)0U, (uint8_t)0U, (uint8_t)0U, (uint8_t)0U, (uint8_t)0U, + (uint8_t)0U, (uint8_t)0U, (uint8_t)0U, (uint8_t)0U, (uint8_t)0U, (uint8_t)0U, (uint8_t)0U, + (uint8_t)0U, (uint8_t)0U, (uint8_t)0U, (uint8_t)0U, (uint8_t)0U, (uint8_t)0U, (uint8_t)0U, + (uint8_t)0U, (uint8_t)0U, (uint8_t)0U, (uint8_t)0U + }; + +static void +Hacl_Curve25519_51_point_add_and_double( + uint64_t *q, + uint64_t *p01_tmp1, + FStar_UInt128_uint128 *tmp2) +{ + uint64_t *nq = p01_tmp1; + uint64_t *nq_p1 = p01_tmp1 + (uint32_t)10U; + uint64_t *tmp1 = p01_tmp1 + (uint32_t)20U; + uint64_t *x1 = q; + uint64_t *x2 = nq; + uint64_t *z2 = nq + (uint32_t)5U; + uint64_t *z3 = nq_p1 + (uint32_t)5U; + uint64_t *a = tmp1; + uint64_t *b = tmp1 + (uint32_t)5U; + uint64_t *ab = tmp1; + uint64_t *dc = tmp1 + (uint32_t)10U; + Hacl_Impl_Curve25519_Field51_fadd(a, x2, z2); + Hacl_Impl_Curve25519_Field51_fsub(b, x2, z2); + uint64_t *x3 = nq_p1; + uint64_t *z31 = nq_p1 + (uint32_t)5U; + uint64_t *d0 = dc; + uint64_t *c0 = dc + (uint32_t)5U; + Hacl_Impl_Curve25519_Field51_fadd(c0, x3, z31); + Hacl_Impl_Curve25519_Field51_fsub(d0, x3, z31); + Hacl_Impl_Curve25519_Field51_fmul2(dc, dc, ab, tmp2); + Hacl_Impl_Curve25519_Field51_fadd(x3, d0, c0); + Hacl_Impl_Curve25519_Field51_fsub(z31, d0, c0); + uint64_t *a1 = tmp1; + uint64_t *b1 = tmp1 + (uint32_t)5U; + uint64_t *d = tmp1 + (uint32_t)10U; + uint64_t *c = tmp1 + (uint32_t)15U; + uint64_t *ab1 = tmp1; + uint64_t *dc1 = tmp1 + (uint32_t)10U; + Hacl_Impl_Curve25519_Field51_fsqr2(dc1, ab1, tmp2); + Hacl_Impl_Curve25519_Field51_fsqr2(nq_p1, nq_p1, tmp2); + a1[0U] = c[0U]; + a1[1U] = c[1U]; + a1[2U] = c[2U]; + a1[3U] = c[3U]; + a1[4U] = c[4U]; + Hacl_Impl_Curve25519_Field51_fsub(c, d, c); + Hacl_Impl_Curve25519_Field51_fmul1(b1, c, (uint64_t)121665U); + Hacl_Impl_Curve25519_Field51_fadd(b1, b1, d); + Hacl_Impl_Curve25519_Field51_fmul2(nq, dc1, ab1, tmp2); + Hacl_Impl_Curve25519_Field51_fmul(z3, z3, x1, tmp2); +} + +static void +Hacl_Curve25519_51_point_double(uint64_t *nq, uint64_t *tmp1, FStar_UInt128_uint128 *tmp2) +{ + uint64_t *x2 = nq; + uint64_t *z2 = nq + (uint32_t)5U; + uint64_t *a = tmp1; + uint64_t *b = tmp1 + (uint32_t)5U; + uint64_t *d = tmp1 + (uint32_t)10U; + uint64_t *c = tmp1 + (uint32_t)15U; + uint64_t *ab = tmp1; + uint64_t *dc = tmp1 + (uint32_t)10U; + Hacl_Impl_Curve25519_Field51_fadd(a, x2, z2); + Hacl_Impl_Curve25519_Field51_fsub(b, x2, z2); + Hacl_Impl_Curve25519_Field51_fsqr2(dc, ab, tmp2); + a[0U] = c[0U]; + a[1U] = c[1U]; + a[2U] = c[2U]; + a[3U] = c[3U]; + a[4U] = c[4U]; + Hacl_Impl_Curve25519_Field51_fsub(c, d, c); + Hacl_Impl_Curve25519_Field51_fmul1(b, c, (uint64_t)121665U); + Hacl_Impl_Curve25519_Field51_fadd(b, b, d); + Hacl_Impl_Curve25519_Field51_fmul2(nq, dc, ab, tmp2); +} + +static void +Hacl_Curve25519_51_montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init1) +{ + FStar_UInt128_uint128 tmp2[10U]; + for (uint32_t _i = 0U; _i < (uint32_t)10U; ++_i) + tmp2[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U); + uint64_t p01_tmp1_swap[41U] = { 0U }; + uint64_t *p0 = p01_tmp1_swap; + uint64_t *p01 = p01_tmp1_swap; + uint64_t *p03 = p01; + uint64_t *p11 = p01 + (uint32_t)10U; + memcpy(p11, init1, (uint32_t)10U * sizeof init1[0U]); + uint64_t *x0 = p03; + uint64_t *z0 = p03 + (uint32_t)5U; + x0[0U] = (uint64_t)1U; + x0[1U] = (uint64_t)0U; + x0[2U] = (uint64_t)0U; + x0[3U] = (uint64_t)0U; + x0[4U] = (uint64_t)0U; + z0[0U] = (uint64_t)0U; + z0[1U] = (uint64_t)0U; + z0[2U] = (uint64_t)0U; + z0[3U] = (uint64_t)0U; + z0[4U] = (uint64_t)0U; + uint64_t *p01_tmp1 = p01_tmp1_swap; + uint64_t *p01_tmp11 = p01_tmp1_swap; + uint64_t *nq1 = p01_tmp1_swap; + uint64_t *nq_p11 = p01_tmp1_swap + (uint32_t)10U; + uint64_t *swap1 = p01_tmp1_swap + (uint32_t)40U; + Hacl_Impl_Curve25519_Field51_cswap2((uint64_t)1U, nq1, nq_p11); + Hacl_Curve25519_51_point_add_and_double(init1, p01_tmp11, tmp2); + swap1[0U] = (uint64_t)1U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)251U; i = i + (uint32_t)1U) { + uint64_t *p01_tmp12 = p01_tmp1_swap; + uint64_t *swap2 = p01_tmp1_swap + (uint32_t)40U; + uint64_t *nq2 = p01_tmp12; + uint64_t *nq_p12 = p01_tmp12 + (uint32_t)10U; + uint64_t + bit = + (uint64_t)(key[((uint32_t)253U - i) / (uint32_t)8U] >> ((uint32_t)253U - i) % (uint32_t)8U & (uint8_t)1U); + uint64_t sw = swap2[0U] ^ bit; + Hacl_Impl_Curve25519_Field51_cswap2(sw, nq2, nq_p12); + Hacl_Curve25519_51_point_add_and_double(init1, p01_tmp12, tmp2); + swap2[0U] = bit; + } + uint64_t sw = swap1[0U]; + Hacl_Impl_Curve25519_Field51_cswap2(sw, nq1, nq_p11); + uint64_t *nq10 = p01_tmp1; + uint64_t *tmp1 = p01_tmp1 + (uint32_t)20U; + Hacl_Curve25519_51_point_double(nq10, tmp1, tmp2); + Hacl_Curve25519_51_point_double(nq10, tmp1, tmp2); + Hacl_Curve25519_51_point_double(nq10, tmp1, tmp2); + memcpy(out, p0, (uint32_t)10U * sizeof p0[0U]); +} + +static void +Hacl_Curve25519_51_fsquare_times( + uint64_t *o, + uint64_t *inp, + FStar_UInt128_uint128 *tmp, + uint32_t n1) +{ + Hacl_Impl_Curve25519_Field51_fsqr(o, inp, tmp); + for (uint32_t i = (uint32_t)0U; i < n1 - (uint32_t)1U; i = i + (uint32_t)1U) { + Hacl_Impl_Curve25519_Field51_fsqr(o, o, tmp); + } +} + +static void +Hacl_Curve25519_51_finv(uint64_t *o, uint64_t *i, FStar_UInt128_uint128 *tmp) +{ + uint64_t t1[20U] = { 0U }; + uint64_t *a = t1; + uint64_t *b = t1 + (uint32_t)5U; + uint64_t *c = t1 + (uint32_t)10U; + uint64_t *t00 = t1 + (uint32_t)15U; + FStar_UInt128_uint128 *tmp1 = tmp; + Hacl_Curve25519_51_fsquare_times(a, i, tmp1, (uint32_t)1U); + Hacl_Curve25519_51_fsquare_times(t00, a, tmp1, (uint32_t)2U); + Hacl_Impl_Curve25519_Field51_fmul(b, t00, i, tmp); + Hacl_Impl_Curve25519_Field51_fmul(a, b, a, tmp); + Hacl_Curve25519_51_fsquare_times(t00, a, tmp1, (uint32_t)1U); + Hacl_Impl_Curve25519_Field51_fmul(b, t00, b, tmp); + Hacl_Curve25519_51_fsquare_times(t00, b, tmp1, (uint32_t)5U); + Hacl_Impl_Curve25519_Field51_fmul(b, t00, b, tmp); + Hacl_Curve25519_51_fsquare_times(t00, b, tmp1, (uint32_t)10U); + Hacl_Impl_Curve25519_Field51_fmul(c, t00, b, tmp); + Hacl_Curve25519_51_fsquare_times(t00, c, tmp1, (uint32_t)20U); + Hacl_Impl_Curve25519_Field51_fmul(t00, t00, c, tmp); + Hacl_Curve25519_51_fsquare_times(t00, t00, tmp1, (uint32_t)10U); + Hacl_Impl_Curve25519_Field51_fmul(b, t00, b, tmp); + Hacl_Curve25519_51_fsquare_times(t00, b, tmp1, (uint32_t)50U); + Hacl_Impl_Curve25519_Field51_fmul(c, t00, b, tmp); + Hacl_Curve25519_51_fsquare_times(t00, c, tmp1, (uint32_t)100U); + Hacl_Impl_Curve25519_Field51_fmul(t00, t00, c, tmp); + Hacl_Curve25519_51_fsquare_times(t00, t00, tmp1, (uint32_t)50U); + Hacl_Impl_Curve25519_Field51_fmul(t00, t00, b, tmp); + Hacl_Curve25519_51_fsquare_times(t00, t00, tmp1, (uint32_t)5U); + uint64_t *a0 = t1; + uint64_t *t0 = t1 + (uint32_t)15U; + Hacl_Impl_Curve25519_Field51_fmul(o, t0, a0, tmp); +} + +static void +Hacl_Curve25519_51_encode_point(uint8_t *o, uint64_t *i) +{ + uint64_t *x = i; + uint64_t *z = i + (uint32_t)5U; + uint64_t tmp[5U] = { 0U }; + uint64_t u64s[4U] = { 0U }; + FStar_UInt128_uint128 tmp_w[10U]; + for (uint32_t _i = 0U; _i < (uint32_t)10U; ++_i) + tmp_w[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U); + Hacl_Curve25519_51_finv(tmp, z, tmp_w); + Hacl_Impl_Curve25519_Field51_fmul(tmp, tmp, x, tmp_w); + Hacl_Impl_Curve25519_Field51_store_felem(u64s, tmp); + for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0 = i0 + (uint32_t)1U) { + store64_le(o + i0 * (uint32_t)8U, u64s[i0]); + } +} + +void +Hacl_Curve25519_51_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub) +{ + uint64_t init1[10U] = { 0U }; + uint64_t tmp[4U] = { 0U }; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) { + uint64_t *os = tmp; + uint8_t *bj = pub + i * (uint32_t)8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x; + } + uint64_t tmp3 = tmp[3U]; + tmp[3U] = tmp3 & (uint64_t)0x7fffffffffffffffU; + uint64_t *x = init1; + uint64_t *z = init1 + (uint32_t)5U; + z[0U] = (uint64_t)1U; + z[1U] = (uint64_t)0U; + z[2U] = (uint64_t)0U; + z[3U] = (uint64_t)0U; + z[4U] = (uint64_t)0U; + uint64_t f0l = tmp[0U] & (uint64_t)0x7ffffffffffffU; + uint64_t f0h = tmp[0U] >> (uint32_t)51U; + uint64_t f1l = (tmp[1U] & (uint64_t)0x3fffffffffU) << (uint32_t)13U; + uint64_t f1h = tmp[1U] >> (uint32_t)38U; + uint64_t f2l = (tmp[2U] & (uint64_t)0x1ffffffU) << (uint32_t)26U; + uint64_t f2h = tmp[2U] >> (uint32_t)25U; + uint64_t f3l = (tmp[3U] & (uint64_t)0xfffU) << (uint32_t)39U; + uint64_t f3h = tmp[3U] >> (uint32_t)12U; + x[0U] = f0l; + x[1U] = f0h | f1l; + x[2U] = f1h | f2l; + x[3U] = f2h | f3l; + x[4U] = f3h; + Hacl_Curve25519_51_montgomery_ladder(init1, priv, init1); + Hacl_Curve25519_51_encode_point(out, init1); +} + +void +Hacl_Curve25519_51_secret_to_public(uint8_t *pub, uint8_t *priv) +{ + uint8_t basepoint[32U] = { 0U }; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i = i + (uint32_t)1U) { + uint8_t *os = basepoint; + uint8_t x = Hacl_Curve25519_51_g25519[i]; + os[i] = x; + } + Hacl_Curve25519_51_scalarmult(pub, priv, basepoint); +} + +bool +Hacl_Curve25519_51_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub) +{ + uint8_t zeros1[32U] = { 0U }; + Hacl_Curve25519_51_scalarmult(out, priv, pub); + uint8_t res = (uint8_t)255U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i = i + (uint32_t)1U) { + uint8_t uu____0 = FStar_UInt8_eq_mask(out[i], zeros1[i]); + res = uu____0 & res; + } + uint8_t z = res; + bool r = z == (uint8_t)255U; + return !r; +} diff --git a/lib/freebl/verified/Hacl_Curve25519_51.h b/lib/freebl/verified/Hacl_Curve25519_51.h new file mode 100644 index 0000000000..05050739cf --- /dev/null +++ b/lib/freebl/verified/Hacl_Curve25519_51.h @@ -0,0 +1,41 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "kremlin/internal/types.h" +#include "kremlin/lowstar_endianness.h" +#include +#include + +#ifndef __Hacl_Curve25519_51_H +#define __Hacl_Curve25519_51_H + +#include "Hacl_Kremlib.h" + +void Hacl_Curve25519_51_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub); + +void Hacl_Curve25519_51_secret_to_public(uint8_t *pub, uint8_t *priv); + +bool Hacl_Curve25519_51_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub); + +#define __Hacl_Curve25519_51_H_DEFINED +#endif diff --git a/lib/freebl/verified/Hacl_Kremlib.h b/lib/freebl/verified/Hacl_Kremlib.h new file mode 100644 index 0000000000..675fe4417e --- /dev/null +++ b/lib/freebl/verified/Hacl_Kremlib.h @@ -0,0 +1,51 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "kremlin/internal/types.h" +#include "kremlin/lowstar_endianness.h" +#include +#include + +#ifndef __Hacl_Kremlib_H +#define __Hacl_Kremlib_H + +inline static uint8_t FStar_UInt8_eq_mask(uint8_t a, uint8_t b); + +inline static uint64_t FStar_UInt64_eq_mask(uint64_t a, uint64_t b); + +inline static uint64_t FStar_UInt64_gte_mask(uint64_t a, uint64_t b); + +inline static FStar_UInt128_uint128 +FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s); + +inline static FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a); + +inline static uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a); + +inline static FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y); + +#define __Hacl_Kremlib_H_DEFINED +#endif diff --git a/lib/freebl/verified/Hacl_Poly1305_128.c b/lib/freebl/verified/Hacl_Poly1305_128.c new file mode 100644 index 0000000000..2d1af23761 --- /dev/null +++ b/lib/freebl/verified/Hacl_Poly1305_128.c @@ -0,0 +1,1678 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Hacl_Poly1305_128.h" + +void +Hacl_Impl_Poly1305_Field32xN_128_load_acc2(Lib_IntVector_Intrinsics_vec128 *acc, uint8_t *b) +{ + Lib_IntVector_Intrinsics_vec128 e[5U]; + for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) + e[_i] = Lib_IntVector_Intrinsics_vec128_zero; + Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load_le(b); + Lib_IntVector_Intrinsics_vec128 + b2 = Lib_IntVector_Intrinsics_vec128_load_le(b + (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 lo = Lib_IntVector_Intrinsics_vec128_interleave_low64(b1, b2); + Lib_IntVector_Intrinsics_vec128 hi = Lib_IntVector_Intrinsics_vec128_interleave_high64(b1, b2); + Lib_IntVector_Intrinsics_vec128 + f00 = + Lib_IntVector_Intrinsics_vec128_and(lo, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f10 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(lo, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(lo, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(hi, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(hi, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(hi, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f02 = f00; + Lib_IntVector_Intrinsics_vec128 f12 = f10; + Lib_IntVector_Intrinsics_vec128 f22 = f20; + Lib_IntVector_Intrinsics_vec128 f32 = f30; + Lib_IntVector_Intrinsics_vec128 f42 = f40; + e[0U] = f02; + e[1U] = f12; + e[2U] = f22; + e[3U] = f32; + e[4U] = f42; + uint64_t b10 = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b10); + Lib_IntVector_Intrinsics_vec128 f43 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f43, mask); + Lib_IntVector_Intrinsics_vec128 acc0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 acc1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 acc2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 acc3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 acc4 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 e0 = e[0U]; + Lib_IntVector_Intrinsics_vec128 e1 = e[1U]; + Lib_IntVector_Intrinsics_vec128 e2 = e[2U]; + Lib_IntVector_Intrinsics_vec128 e3 = e[3U]; + Lib_IntVector_Intrinsics_vec128 e4 = e[4U]; + Lib_IntVector_Intrinsics_vec128 + f0 = Lib_IntVector_Intrinsics_vec128_insert64(acc0, (uint64_t)0U, (uint32_t)1U); + Lib_IntVector_Intrinsics_vec128 + f1 = Lib_IntVector_Intrinsics_vec128_insert64(acc1, (uint64_t)0U, (uint32_t)1U); + Lib_IntVector_Intrinsics_vec128 + f2 = Lib_IntVector_Intrinsics_vec128_insert64(acc2, (uint64_t)0U, (uint32_t)1U); + Lib_IntVector_Intrinsics_vec128 + f3 = Lib_IntVector_Intrinsics_vec128_insert64(acc3, (uint64_t)0U, (uint32_t)1U); + Lib_IntVector_Intrinsics_vec128 + f4 = Lib_IntVector_Intrinsics_vec128_insert64(acc4, (uint64_t)0U, (uint32_t)1U); + Lib_IntVector_Intrinsics_vec128 f01 = Lib_IntVector_Intrinsics_vec128_add64(f0, e0); + Lib_IntVector_Intrinsics_vec128 f11 = Lib_IntVector_Intrinsics_vec128_add64(f1, e1); + Lib_IntVector_Intrinsics_vec128 f21 = Lib_IntVector_Intrinsics_vec128_add64(f2, e2); + Lib_IntVector_Intrinsics_vec128 f31 = Lib_IntVector_Intrinsics_vec128_add64(f3, e3); + Lib_IntVector_Intrinsics_vec128 f41 = Lib_IntVector_Intrinsics_vec128_add64(f4, e4); + Lib_IntVector_Intrinsics_vec128 acc01 = f01; + Lib_IntVector_Intrinsics_vec128 acc11 = f11; + Lib_IntVector_Intrinsics_vec128 acc21 = f21; + Lib_IntVector_Intrinsics_vec128 acc31 = f31; + Lib_IntVector_Intrinsics_vec128 acc41 = f41; + acc[0U] = acc01; + acc[1U] = acc11; + acc[2U] = acc21; + acc[3U] = acc31; + acc[4U] = acc41; +} + +void +Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize( + Lib_IntVector_Intrinsics_vec128 *out, + Lib_IntVector_Intrinsics_vec128 *p) +{ + Lib_IntVector_Intrinsics_vec128 *r = p; + Lib_IntVector_Intrinsics_vec128 *r2 = p + (uint32_t)10U; + Lib_IntVector_Intrinsics_vec128 a0 = out[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = out[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = out[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = out[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = out[4U]; + Lib_IntVector_Intrinsics_vec128 r10 = r[0U]; + Lib_IntVector_Intrinsics_vec128 r11 = r[1U]; + Lib_IntVector_Intrinsics_vec128 r12 = r[2U]; + Lib_IntVector_Intrinsics_vec128 r13 = r[3U]; + Lib_IntVector_Intrinsics_vec128 r14 = r[4U]; + Lib_IntVector_Intrinsics_vec128 r20 = r2[0U]; + Lib_IntVector_Intrinsics_vec128 r21 = r2[1U]; + Lib_IntVector_Intrinsics_vec128 r22 = r2[2U]; + Lib_IntVector_Intrinsics_vec128 r23 = r2[3U]; + Lib_IntVector_Intrinsics_vec128 r24 = r2[4U]; + Lib_IntVector_Intrinsics_vec128 + r201 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r20, r10); + Lib_IntVector_Intrinsics_vec128 + r211 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r21, r11); + Lib_IntVector_Intrinsics_vec128 + r221 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r22, r12); + Lib_IntVector_Intrinsics_vec128 + r231 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r23, r13); + Lib_IntVector_Intrinsics_vec128 + r241 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r24, r14); + Lib_IntVector_Intrinsics_vec128 + r251 = Lib_IntVector_Intrinsics_vec128_smul64(r211, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec128 + r252 = Lib_IntVector_Intrinsics_vec128_smul64(r221, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec128 + r253 = Lib_IntVector_Intrinsics_vec128_smul64(r231, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec128 + r254 = Lib_IntVector_Intrinsics_vec128_smul64(r241, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_mul64(r201, a0); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_mul64(r211, a0); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_mul64(r221, a0); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_mul64(r231, a0); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_mul64(r241, a0); + Lib_IntVector_Intrinsics_vec128 + a02 = + Lib_IntVector_Intrinsics_vec128_add64(a01, + Lib_IntVector_Intrinsics_vec128_mul64(r254, a1)); + Lib_IntVector_Intrinsics_vec128 + a12 = + Lib_IntVector_Intrinsics_vec128_add64(a11, + Lib_IntVector_Intrinsics_vec128_mul64(r201, a1)); + Lib_IntVector_Intrinsics_vec128 + a22 = + Lib_IntVector_Intrinsics_vec128_add64(a21, + Lib_IntVector_Intrinsics_vec128_mul64(r211, a1)); + Lib_IntVector_Intrinsics_vec128 + a32 = + Lib_IntVector_Intrinsics_vec128_add64(a31, + Lib_IntVector_Intrinsics_vec128_mul64(r221, a1)); + Lib_IntVector_Intrinsics_vec128 + a42 = + Lib_IntVector_Intrinsics_vec128_add64(a41, + Lib_IntVector_Intrinsics_vec128_mul64(r231, a1)); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r253, a2)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r254, a2)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r201, a2)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r211, a2)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r221, a2)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r252, a3)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r253, a3)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r254, a3)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r201, a3)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r211, a3)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r251, a4)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r252, a4)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r253, a4)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r254, a4)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r201, a4)); + Lib_IntVector_Intrinsics_vec128 t0 = a05; + Lib_IntVector_Intrinsics_vec128 t1 = a15; + Lib_IntVector_Intrinsics_vec128 t2 = a25; + Lib_IntVector_Intrinsics_vec128 t3 = a35; + Lib_IntVector_Intrinsics_vec128 t4 = a45; + Lib_IntVector_Intrinsics_vec128 + l0 = Lib_IntVector_Intrinsics_vec128_add64(t0, Lib_IntVector_Intrinsics_vec128_zero); + Lib_IntVector_Intrinsics_vec128 + tmp00 = + Lib_IntVector_Intrinsics_vec128_and(l0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c00 = Lib_IntVector_Intrinsics_vec128_shift_right64(l0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l1 = Lib_IntVector_Intrinsics_vec128_add64(t1, c00); + Lib_IntVector_Intrinsics_vec128 + tmp10 = + Lib_IntVector_Intrinsics_vec128_and(l1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c10 = Lib_IntVector_Intrinsics_vec128_shift_right64(l1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l2 = Lib_IntVector_Intrinsics_vec128_add64(t2, c10); + Lib_IntVector_Intrinsics_vec128 + tmp20 = + Lib_IntVector_Intrinsics_vec128_and(l2, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c20 = Lib_IntVector_Intrinsics_vec128_shift_right64(l2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l3 = Lib_IntVector_Intrinsics_vec128_add64(t3, c20); + Lib_IntVector_Intrinsics_vec128 + tmp30 = + Lib_IntVector_Intrinsics_vec128_and(l3, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c30 = Lib_IntVector_Intrinsics_vec128_shift_right64(l3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l4 = Lib_IntVector_Intrinsics_vec128_add64(t4, c30); + Lib_IntVector_Intrinsics_vec128 + tmp40 = + Lib_IntVector_Intrinsics_vec128_and(l4, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c40 = Lib_IntVector_Intrinsics_vec128_shift_right64(l4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + l5 = + Lib_IntVector_Intrinsics_vec128_add64(tmp00, + Lib_IntVector_Intrinsics_vec128_smul64(c40, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec128 + tmp01 = + Lib_IntVector_Intrinsics_vec128_and(l5, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c50 = Lib_IntVector_Intrinsics_vec128_shift_right64(l5, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 tmp11 = Lib_IntVector_Intrinsics_vec128_add64(tmp10, c50); + Lib_IntVector_Intrinsics_vec128 o00 = tmp01; + Lib_IntVector_Intrinsics_vec128 o10 = tmp11; + Lib_IntVector_Intrinsics_vec128 o20 = tmp20; + Lib_IntVector_Intrinsics_vec128 o30 = tmp30; + Lib_IntVector_Intrinsics_vec128 o40 = tmp40; + Lib_IntVector_Intrinsics_vec128 + o01 = + Lib_IntVector_Intrinsics_vec128_add64(o00, + Lib_IntVector_Intrinsics_vec128_interleave_high64(o00, o00)); + Lib_IntVector_Intrinsics_vec128 + o11 = + Lib_IntVector_Intrinsics_vec128_add64(o10, + Lib_IntVector_Intrinsics_vec128_interleave_high64(o10, o10)); + Lib_IntVector_Intrinsics_vec128 + o21 = + Lib_IntVector_Intrinsics_vec128_add64(o20, + Lib_IntVector_Intrinsics_vec128_interleave_high64(o20, o20)); + Lib_IntVector_Intrinsics_vec128 + o31 = + Lib_IntVector_Intrinsics_vec128_add64(o30, + Lib_IntVector_Intrinsics_vec128_interleave_high64(o30, o30)); + Lib_IntVector_Intrinsics_vec128 + o41 = + Lib_IntVector_Intrinsics_vec128_add64(o40, + Lib_IntVector_Intrinsics_vec128_interleave_high64(o40, o40)); + Lib_IntVector_Intrinsics_vec128 + l = Lib_IntVector_Intrinsics_vec128_add64(o01, Lib_IntVector_Intrinsics_vec128_zero); + Lib_IntVector_Intrinsics_vec128 + tmp0 = + Lib_IntVector_Intrinsics_vec128_and(l, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c0 = Lib_IntVector_Intrinsics_vec128_shift_right64(l, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l6 = Lib_IntVector_Intrinsics_vec128_add64(o11, c0); + Lib_IntVector_Intrinsics_vec128 + tmp1 = + Lib_IntVector_Intrinsics_vec128_and(l6, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c1 = Lib_IntVector_Intrinsics_vec128_shift_right64(l6, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l7 = Lib_IntVector_Intrinsics_vec128_add64(o21, c1); + Lib_IntVector_Intrinsics_vec128 + tmp2 = + Lib_IntVector_Intrinsics_vec128_and(l7, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c2 = Lib_IntVector_Intrinsics_vec128_shift_right64(l7, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l8 = Lib_IntVector_Intrinsics_vec128_add64(o31, c2); + Lib_IntVector_Intrinsics_vec128 + tmp3 = + Lib_IntVector_Intrinsics_vec128_and(l8, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c3 = Lib_IntVector_Intrinsics_vec128_shift_right64(l8, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l9 = Lib_IntVector_Intrinsics_vec128_add64(o41, c3); + Lib_IntVector_Intrinsics_vec128 + tmp4 = + Lib_IntVector_Intrinsics_vec128_and(l9, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c4 = Lib_IntVector_Intrinsics_vec128_shift_right64(l9, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + l10 = + Lib_IntVector_Intrinsics_vec128_add64(tmp0, + Lib_IntVector_Intrinsics_vec128_smul64(c4, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec128 + tmp0_ = + Lib_IntVector_Intrinsics_vec128_and(l10, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c5 = Lib_IntVector_Intrinsics_vec128_shift_right64(l10, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 o0 = tmp0_; + Lib_IntVector_Intrinsics_vec128 o1 = Lib_IntVector_Intrinsics_vec128_add64(tmp1, c5); + Lib_IntVector_Intrinsics_vec128 o2 = tmp2; + Lib_IntVector_Intrinsics_vec128 o3 = tmp3; + Lib_IntVector_Intrinsics_vec128 o4 = tmp4; + out[0U] = o0; + out[1U] = o1; + out[2U] = o2; + out[3U] = o3; + out[4U] = o4; +} + +uint32_t Hacl_Poly1305_128_blocklen = (uint32_t)16U; + +void +Hacl_Poly1305_128_poly1305_init(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *key) +{ + Lib_IntVector_Intrinsics_vec128 *acc = ctx; + Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U; + uint8_t *kr = key; + acc[0U] = Lib_IntVector_Intrinsics_vec128_zero; + acc[1U] = Lib_IntVector_Intrinsics_vec128_zero; + acc[2U] = Lib_IntVector_Intrinsics_vec128_zero; + acc[3U] = Lib_IntVector_Intrinsics_vec128_zero; + acc[4U] = Lib_IntVector_Intrinsics_vec128_zero; + uint64_t u0 = load64_le(kr); + uint64_t lo = u0; + uint64_t u = load64_le(kr + (uint32_t)8U); + uint64_t hi = u; + uint64_t mask0 = (uint64_t)0x0ffffffc0fffffffU; + uint64_t mask1 = (uint64_t)0x0ffffffc0ffffffcU; + uint64_t lo1 = lo & mask0; + uint64_t hi1 = hi & mask1; + Lib_IntVector_Intrinsics_vec128 *r = pre; + Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 *rn = pre + (uint32_t)10U; + Lib_IntVector_Intrinsics_vec128 *rn_5 = pre + (uint32_t)15U; + Lib_IntVector_Intrinsics_vec128 r_vec0 = Lib_IntVector_Intrinsics_vec128_load64(lo1); + Lib_IntVector_Intrinsics_vec128 r_vec1 = Lib_IntVector_Intrinsics_vec128_load64(hi1); + Lib_IntVector_Intrinsics_vec128 + f00 = + Lib_IntVector_Intrinsics_vec128_and(r_vec0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f15 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(r_vec0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(r_vec0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(r_vec1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(r_vec1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(r_vec1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f0 = f00; + Lib_IntVector_Intrinsics_vec128 f1 = f15; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f4 = f40; + r[0U] = f0; + r[1U] = f1; + r[2U] = f2; + r[3U] = f3; + r[4U] = f4; + Lib_IntVector_Intrinsics_vec128 f200 = r[0U]; + Lib_IntVector_Intrinsics_vec128 f210 = r[1U]; + Lib_IntVector_Intrinsics_vec128 f220 = r[2U]; + Lib_IntVector_Intrinsics_vec128 f230 = r[3U]; + Lib_IntVector_Intrinsics_vec128 f240 = r[4U]; + r5[0U] = Lib_IntVector_Intrinsics_vec128_smul64(f200, (uint64_t)5U); + r5[1U] = Lib_IntVector_Intrinsics_vec128_smul64(f210, (uint64_t)5U); + r5[2U] = Lib_IntVector_Intrinsics_vec128_smul64(f220, (uint64_t)5U); + r5[3U] = Lib_IntVector_Intrinsics_vec128_smul64(f230, (uint64_t)5U); + r5[4U] = Lib_IntVector_Intrinsics_vec128_smul64(f240, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec128 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = r[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = r[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = r[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = r[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = r[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = Lib_IntVector_Intrinsics_vec128_mul64(r0, f10); + Lib_IntVector_Intrinsics_vec128 a1 = Lib_IntVector_Intrinsics_vec128_mul64(r1, f10); + Lib_IntVector_Intrinsics_vec128 a2 = Lib_IntVector_Intrinsics_vec128_mul64(r2, f10); + Lib_IntVector_Intrinsics_vec128 a3 = Lib_IntVector_Intrinsics_vec128_mul64(r3, f10); + Lib_IntVector_Intrinsics_vec128 a4 = Lib_IntVector_Intrinsics_vec128_mul64(r4, f10); + Lib_IntVector_Intrinsics_vec128 + a01 = + Lib_IntVector_Intrinsics_vec128_add64(a0, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f11)); + Lib_IntVector_Intrinsics_vec128 + a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, Lib_IntVector_Intrinsics_vec128_mul64(r0, f11)); + Lib_IntVector_Intrinsics_vec128 + a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, Lib_IntVector_Intrinsics_vec128_mul64(r1, f11)); + Lib_IntVector_Intrinsics_vec128 + a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, Lib_IntVector_Intrinsics_vec128_mul64(r2, f11)); + Lib_IntVector_Intrinsics_vec128 + a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, Lib_IntVector_Intrinsics_vec128_mul64(r3, f11)); + Lib_IntVector_Intrinsics_vec128 + a02 = + Lib_IntVector_Intrinsics_vec128_add64(a01, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f12)); + Lib_IntVector_Intrinsics_vec128 + a12 = + Lib_IntVector_Intrinsics_vec128_add64(a11, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f12)); + Lib_IntVector_Intrinsics_vec128 + a22 = + Lib_IntVector_Intrinsics_vec128_add64(a21, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f12)); + Lib_IntVector_Intrinsics_vec128 + a32 = + Lib_IntVector_Intrinsics_vec128_add64(a31, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f12)); + Lib_IntVector_Intrinsics_vec128 + a42 = + Lib_IntVector_Intrinsics_vec128_add64(a41, + Lib_IntVector_Intrinsics_vec128_mul64(r2, f12)); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r52, f13)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f13)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f13)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f13)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f13)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r51, f14)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r52, f14)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f14)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f14)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f14)); + Lib_IntVector_Intrinsics_vec128 t0 = a04; + Lib_IntVector_Intrinsics_vec128 t1 = a14; + Lib_IntVector_Intrinsics_vec128 t2 = a24; + Lib_IntVector_Intrinsics_vec128 t3 = a34; + Lib_IntVector_Intrinsics_vec128 t4 = a44; + Lib_IntVector_Intrinsics_vec128 + l = Lib_IntVector_Intrinsics_vec128_add64(t0, Lib_IntVector_Intrinsics_vec128_zero); + Lib_IntVector_Intrinsics_vec128 + tmp0 = + Lib_IntVector_Intrinsics_vec128_and(l, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c0 = Lib_IntVector_Intrinsics_vec128_shift_right64(l, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l0 = Lib_IntVector_Intrinsics_vec128_add64(t1, c0); + Lib_IntVector_Intrinsics_vec128 + tmp1 = + Lib_IntVector_Intrinsics_vec128_and(l0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c1 = Lib_IntVector_Intrinsics_vec128_shift_right64(l0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l1 = Lib_IntVector_Intrinsics_vec128_add64(t2, c1); + Lib_IntVector_Intrinsics_vec128 + tmp2 = + Lib_IntVector_Intrinsics_vec128_and(l1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c2 = Lib_IntVector_Intrinsics_vec128_shift_right64(l1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l2 = Lib_IntVector_Intrinsics_vec128_add64(t3, c2); + Lib_IntVector_Intrinsics_vec128 + tmp3 = + Lib_IntVector_Intrinsics_vec128_and(l2, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c3 = Lib_IntVector_Intrinsics_vec128_shift_right64(l2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l3 = Lib_IntVector_Intrinsics_vec128_add64(t4, c3); + Lib_IntVector_Intrinsics_vec128 + tmp4 = + Lib_IntVector_Intrinsics_vec128_and(l3, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c4 = Lib_IntVector_Intrinsics_vec128_shift_right64(l3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + l4 = + Lib_IntVector_Intrinsics_vec128_add64(tmp0, + Lib_IntVector_Intrinsics_vec128_smul64(c4, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec128 + tmp01 = + Lib_IntVector_Intrinsics_vec128_and(l4, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c5 = Lib_IntVector_Intrinsics_vec128_shift_right64(l4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 tmp11 = Lib_IntVector_Intrinsics_vec128_add64(tmp1, c5); + Lib_IntVector_Intrinsics_vec128 o0 = tmp01; + Lib_IntVector_Intrinsics_vec128 o1 = tmp11; + Lib_IntVector_Intrinsics_vec128 o2 = tmp2; + Lib_IntVector_Intrinsics_vec128 o3 = tmp3; + Lib_IntVector_Intrinsics_vec128 o4 = tmp4; + rn[0U] = o0; + rn[1U] = o1; + rn[2U] = o2; + rn[3U] = o3; + rn[4U] = o4; + Lib_IntVector_Intrinsics_vec128 f201 = rn[0U]; + Lib_IntVector_Intrinsics_vec128 f21 = rn[1U]; + Lib_IntVector_Intrinsics_vec128 f22 = rn[2U]; + Lib_IntVector_Intrinsics_vec128 f23 = rn[3U]; + Lib_IntVector_Intrinsics_vec128 f24 = rn[4U]; + rn_5[0U] = Lib_IntVector_Intrinsics_vec128_smul64(f201, (uint64_t)5U); + rn_5[1U] = Lib_IntVector_Intrinsics_vec128_smul64(f21, (uint64_t)5U); + rn_5[2U] = Lib_IntVector_Intrinsics_vec128_smul64(f22, (uint64_t)5U); + rn_5[3U] = Lib_IntVector_Intrinsics_vec128_smul64(f23, (uint64_t)5U); + rn_5[4U] = Lib_IntVector_Intrinsics_vec128_smul64(f24, (uint64_t)5U); +} + +void +Hacl_Poly1305_128_poly1305_update1(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *text) +{ + Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 *acc = ctx; + Lib_IntVector_Intrinsics_vec128 e[5U]; + for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) + e[_i] = Lib_IntVector_Intrinsics_vec128_zero; + uint64_t u0 = load64_le(text); + uint64_t lo = u0; + uint64_t u = load64_le(text + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *r = pre; + Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r1, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t0 = a06; + Lib_IntVector_Intrinsics_vec128 t1 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + l = Lib_IntVector_Intrinsics_vec128_add64(t0, Lib_IntVector_Intrinsics_vec128_zero); + Lib_IntVector_Intrinsics_vec128 + tmp0 = + Lib_IntVector_Intrinsics_vec128_and(l, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c0 = Lib_IntVector_Intrinsics_vec128_shift_right64(l, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l0 = Lib_IntVector_Intrinsics_vec128_add64(t1, c0); + Lib_IntVector_Intrinsics_vec128 + tmp1 = + Lib_IntVector_Intrinsics_vec128_and(l0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c1 = Lib_IntVector_Intrinsics_vec128_shift_right64(l0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l1 = Lib_IntVector_Intrinsics_vec128_add64(t2, c1); + Lib_IntVector_Intrinsics_vec128 + tmp2 = + Lib_IntVector_Intrinsics_vec128_and(l1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c2 = Lib_IntVector_Intrinsics_vec128_shift_right64(l1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l2 = Lib_IntVector_Intrinsics_vec128_add64(t3, c2); + Lib_IntVector_Intrinsics_vec128 + tmp3 = + Lib_IntVector_Intrinsics_vec128_and(l2, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c3 = Lib_IntVector_Intrinsics_vec128_shift_right64(l2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l3 = Lib_IntVector_Intrinsics_vec128_add64(t4, c3); + Lib_IntVector_Intrinsics_vec128 + tmp4 = + Lib_IntVector_Intrinsics_vec128_and(l3, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c4 = Lib_IntVector_Intrinsics_vec128_shift_right64(l3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + l4 = + Lib_IntVector_Intrinsics_vec128_add64(tmp0, + Lib_IntVector_Intrinsics_vec128_smul64(c4, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec128 + tmp01 = + Lib_IntVector_Intrinsics_vec128_and(l4, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c5 = Lib_IntVector_Intrinsics_vec128_shift_right64(l4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 tmp11 = Lib_IntVector_Intrinsics_vec128_add64(tmp1, c5); + Lib_IntVector_Intrinsics_vec128 o0 = tmp01; + Lib_IntVector_Intrinsics_vec128 o1 = tmp11; + Lib_IntVector_Intrinsics_vec128 o2 = tmp2; + Lib_IntVector_Intrinsics_vec128 o3 = tmp3; + Lib_IntVector_Intrinsics_vec128 o4 = tmp4; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; +} + +void +Hacl_Poly1305_128_poly1305_update( + Lib_IntVector_Intrinsics_vec128 *ctx, + uint32_t len, + uint8_t *text) +{ + Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 *acc = ctx; + uint32_t sz_block = (uint32_t)32U; + uint32_t len0 = len / sz_block * sz_block; + uint8_t *t0 = text; + if (len0 > (uint32_t)0U) { + uint32_t bs = (uint32_t)32U; + uint8_t *text0 = t0; + Hacl_Impl_Poly1305_Field32xN_128_load_acc2(acc, text0); + uint32_t len1 = len0 - bs; + uint8_t *text1 = t0 + bs; + uint32_t nb = len1 / bs; + for (uint32_t i = (uint32_t)0U; i < nb; i = i + (uint32_t)1U) { + uint8_t *block = text1 + i * bs; + Lib_IntVector_Intrinsics_vec128 e[5U]; + for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) + e[_i] = Lib_IntVector_Intrinsics_vec128_zero; + Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load_le(block); + Lib_IntVector_Intrinsics_vec128 + b2 = Lib_IntVector_Intrinsics_vec128_load_le(block + (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 lo = Lib_IntVector_Intrinsics_vec128_interleave_low64(b1, b2); + Lib_IntVector_Intrinsics_vec128 + hi = Lib_IntVector_Intrinsics_vec128_interleave_high64(b1, b2); + Lib_IntVector_Intrinsics_vec128 + f00 = + Lib_IntVector_Intrinsics_vec128_and(lo, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f15 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(lo, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f25 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(lo, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(hi, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(hi, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(hi, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f0 = f00; + Lib_IntVector_Intrinsics_vec128 f1 = f15; + Lib_IntVector_Intrinsics_vec128 f2 = f25; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f0; + e[1U] = f1; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *rn = pre + (uint32_t)10U; + Lib_IntVector_Intrinsics_vec128 *rn5 = pre + (uint32_t)15U; + Lib_IntVector_Intrinsics_vec128 r0 = rn[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = rn[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = rn[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = rn[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = rn[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = rn5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = rn5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = rn5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = rn5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 f110 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 f120 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 f130 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 f140 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = Lib_IntVector_Intrinsics_vec128_mul64(r0, f10); + Lib_IntVector_Intrinsics_vec128 a1 = Lib_IntVector_Intrinsics_vec128_mul64(r1, f10); + Lib_IntVector_Intrinsics_vec128 a2 = Lib_IntVector_Intrinsics_vec128_mul64(r2, f10); + Lib_IntVector_Intrinsics_vec128 a3 = Lib_IntVector_Intrinsics_vec128_mul64(r3, f10); + Lib_IntVector_Intrinsics_vec128 a4 = Lib_IntVector_Intrinsics_vec128_mul64(r4, f10); + Lib_IntVector_Intrinsics_vec128 + a01 = + Lib_IntVector_Intrinsics_vec128_add64(a0, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f110)); + Lib_IntVector_Intrinsics_vec128 + a11 = + Lib_IntVector_Intrinsics_vec128_add64(a1, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f110)); + Lib_IntVector_Intrinsics_vec128 + a21 = + Lib_IntVector_Intrinsics_vec128_add64(a2, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f110)); + Lib_IntVector_Intrinsics_vec128 + a31 = + Lib_IntVector_Intrinsics_vec128_add64(a3, + Lib_IntVector_Intrinsics_vec128_mul64(r2, f110)); + Lib_IntVector_Intrinsics_vec128 + a41 = + Lib_IntVector_Intrinsics_vec128_add64(a4, + Lib_IntVector_Intrinsics_vec128_mul64(r3, f110)); + Lib_IntVector_Intrinsics_vec128 + a02 = + Lib_IntVector_Intrinsics_vec128_add64(a01, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f120)); + Lib_IntVector_Intrinsics_vec128 + a12 = + Lib_IntVector_Intrinsics_vec128_add64(a11, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f120)); + Lib_IntVector_Intrinsics_vec128 + a22 = + Lib_IntVector_Intrinsics_vec128_add64(a21, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f120)); + Lib_IntVector_Intrinsics_vec128 + a32 = + Lib_IntVector_Intrinsics_vec128_add64(a31, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f120)); + Lib_IntVector_Intrinsics_vec128 + a42 = + Lib_IntVector_Intrinsics_vec128_add64(a41, + Lib_IntVector_Intrinsics_vec128_mul64(r2, f120)); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r52, f130)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f130)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f130)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f130)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f130)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r51, f140)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r52, f140)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f140)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f140)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f140)); + Lib_IntVector_Intrinsics_vec128 t01 = a04; + Lib_IntVector_Intrinsics_vec128 t1 = a14; + Lib_IntVector_Intrinsics_vec128 t2 = a24; + Lib_IntVector_Intrinsics_vec128 t3 = a34; + Lib_IntVector_Intrinsics_vec128 t4 = a44; + Lib_IntVector_Intrinsics_vec128 + l = Lib_IntVector_Intrinsics_vec128_add64(t01, Lib_IntVector_Intrinsics_vec128_zero); + Lib_IntVector_Intrinsics_vec128 + tmp0 = + Lib_IntVector_Intrinsics_vec128_and(l, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c0 = Lib_IntVector_Intrinsics_vec128_shift_right64(l, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l0 = Lib_IntVector_Intrinsics_vec128_add64(t1, c0); + Lib_IntVector_Intrinsics_vec128 + tmp1 = + Lib_IntVector_Intrinsics_vec128_and(l0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c1 = Lib_IntVector_Intrinsics_vec128_shift_right64(l0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l1 = Lib_IntVector_Intrinsics_vec128_add64(t2, c1); + Lib_IntVector_Intrinsics_vec128 + tmp2 = + Lib_IntVector_Intrinsics_vec128_and(l1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c2 = Lib_IntVector_Intrinsics_vec128_shift_right64(l1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l2 = Lib_IntVector_Intrinsics_vec128_add64(t3, c2); + Lib_IntVector_Intrinsics_vec128 + tmp3 = + Lib_IntVector_Intrinsics_vec128_and(l2, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c3 = Lib_IntVector_Intrinsics_vec128_shift_right64(l2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l3 = Lib_IntVector_Intrinsics_vec128_add64(t4, c3); + Lib_IntVector_Intrinsics_vec128 + tmp4 = + Lib_IntVector_Intrinsics_vec128_and(l3, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c4 = Lib_IntVector_Intrinsics_vec128_shift_right64(l3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + l4 = + Lib_IntVector_Intrinsics_vec128_add64(tmp0, + Lib_IntVector_Intrinsics_vec128_smul64(c4, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec128 + tmp01 = + Lib_IntVector_Intrinsics_vec128_and(l4, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c5 = Lib_IntVector_Intrinsics_vec128_shift_right64(l4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 tmp11 = Lib_IntVector_Intrinsics_vec128_add64(tmp1, c5); + Lib_IntVector_Intrinsics_vec128 o00 = tmp01; + Lib_IntVector_Intrinsics_vec128 o10 = tmp11; + Lib_IntVector_Intrinsics_vec128 o20 = tmp2; + Lib_IntVector_Intrinsics_vec128 o30 = tmp3; + Lib_IntVector_Intrinsics_vec128 o40 = tmp4; + acc[0U] = o00; + acc[1U] = o10; + acc[2U] = o20; + acc[3U] = o30; + acc[4U] = o40; + Lib_IntVector_Intrinsics_vec128 f100 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 f20 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f21 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f22 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f23 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f24 = e[4U]; + Lib_IntVector_Intrinsics_vec128 o0 = Lib_IntVector_Intrinsics_vec128_add64(f100, f20); + Lib_IntVector_Intrinsics_vec128 o1 = Lib_IntVector_Intrinsics_vec128_add64(f11, f21); + Lib_IntVector_Intrinsics_vec128 o2 = Lib_IntVector_Intrinsics_vec128_add64(f12, f22); + Lib_IntVector_Intrinsics_vec128 o3 = Lib_IntVector_Intrinsics_vec128_add64(f13, f23); + Lib_IntVector_Intrinsics_vec128 o4 = Lib_IntVector_Intrinsics_vec128_add64(f14, f24); + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + } + Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize(acc, pre); + } + uint32_t len1 = len - len0; + uint8_t *t1 = text + len0; + uint32_t nb = len1 / (uint32_t)16U; + uint32_t rem1 = len1 % (uint32_t)16U; + for (uint32_t i = (uint32_t)0U; i < nb; i = i + (uint32_t)1U) { + uint8_t *block = t1 + i * (uint32_t)16U; + Lib_IntVector_Intrinsics_vec128 e[5U]; + for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) + e[_i] = Lib_IntVector_Intrinsics_vec128_zero; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *r = pre; + Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r1, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t01 = a06; + Lib_IntVector_Intrinsics_vec128 t11 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + l = Lib_IntVector_Intrinsics_vec128_add64(t01, Lib_IntVector_Intrinsics_vec128_zero); + Lib_IntVector_Intrinsics_vec128 + tmp0 = + Lib_IntVector_Intrinsics_vec128_and(l, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c0 = Lib_IntVector_Intrinsics_vec128_shift_right64(l, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l0 = Lib_IntVector_Intrinsics_vec128_add64(t11, c0); + Lib_IntVector_Intrinsics_vec128 + tmp1 = + Lib_IntVector_Intrinsics_vec128_and(l0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c1 = Lib_IntVector_Intrinsics_vec128_shift_right64(l0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l1 = Lib_IntVector_Intrinsics_vec128_add64(t2, c1); + Lib_IntVector_Intrinsics_vec128 + tmp2 = + Lib_IntVector_Intrinsics_vec128_and(l1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c2 = Lib_IntVector_Intrinsics_vec128_shift_right64(l1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l2 = Lib_IntVector_Intrinsics_vec128_add64(t3, c2); + Lib_IntVector_Intrinsics_vec128 + tmp3 = + Lib_IntVector_Intrinsics_vec128_and(l2, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c3 = Lib_IntVector_Intrinsics_vec128_shift_right64(l2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l3 = Lib_IntVector_Intrinsics_vec128_add64(t4, c3); + Lib_IntVector_Intrinsics_vec128 + tmp4 = + Lib_IntVector_Intrinsics_vec128_and(l3, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c4 = Lib_IntVector_Intrinsics_vec128_shift_right64(l3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + l4 = + Lib_IntVector_Intrinsics_vec128_add64(tmp0, + Lib_IntVector_Intrinsics_vec128_smul64(c4, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec128 + tmp01 = + Lib_IntVector_Intrinsics_vec128_and(l4, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c5 = Lib_IntVector_Intrinsics_vec128_shift_right64(l4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 tmp11 = Lib_IntVector_Intrinsics_vec128_add64(tmp1, c5); + Lib_IntVector_Intrinsics_vec128 o0 = tmp01; + Lib_IntVector_Intrinsics_vec128 o1 = tmp11; + Lib_IntVector_Intrinsics_vec128 o2 = tmp2; + Lib_IntVector_Intrinsics_vec128 o3 = tmp3; + Lib_IntVector_Intrinsics_vec128 o4 = tmp4; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + } + if (rem1 > (uint32_t)0U) { + uint8_t *last1 = t1 + nb * (uint32_t)16U; + Lib_IntVector_Intrinsics_vec128 e[5U]; + for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) + e[_i] = Lib_IntVector_Intrinsics_vec128_zero; + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, last1, rem1 * sizeof last1[0U]); + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f4 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f4; + uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 fi = e[rem1 * (uint32_t)8U / (uint32_t)26U]; + e[rem1 * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec128_or(fi, mask); + Lib_IntVector_Intrinsics_vec128 *r = pre; + Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r1, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t01 = a06; + Lib_IntVector_Intrinsics_vec128 t11 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + l = Lib_IntVector_Intrinsics_vec128_add64(t01, Lib_IntVector_Intrinsics_vec128_zero); + Lib_IntVector_Intrinsics_vec128 + tmp0 = + Lib_IntVector_Intrinsics_vec128_and(l, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c0 = Lib_IntVector_Intrinsics_vec128_shift_right64(l, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l0 = Lib_IntVector_Intrinsics_vec128_add64(t11, c0); + Lib_IntVector_Intrinsics_vec128 + tmp1 = + Lib_IntVector_Intrinsics_vec128_and(l0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c1 = Lib_IntVector_Intrinsics_vec128_shift_right64(l0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l1 = Lib_IntVector_Intrinsics_vec128_add64(t2, c1); + Lib_IntVector_Intrinsics_vec128 + tmp2 = + Lib_IntVector_Intrinsics_vec128_and(l1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c2 = Lib_IntVector_Intrinsics_vec128_shift_right64(l1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l2 = Lib_IntVector_Intrinsics_vec128_add64(t3, c2); + Lib_IntVector_Intrinsics_vec128 + tmp3 = + Lib_IntVector_Intrinsics_vec128_and(l2, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c3 = Lib_IntVector_Intrinsics_vec128_shift_right64(l2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l3 = Lib_IntVector_Intrinsics_vec128_add64(t4, c3); + Lib_IntVector_Intrinsics_vec128 + tmp4 = + Lib_IntVector_Intrinsics_vec128_and(l3, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c4 = Lib_IntVector_Intrinsics_vec128_shift_right64(l3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + l4 = + Lib_IntVector_Intrinsics_vec128_add64(tmp0, + Lib_IntVector_Intrinsics_vec128_smul64(c4, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec128 + tmp01 = + Lib_IntVector_Intrinsics_vec128_and(l4, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c5 = Lib_IntVector_Intrinsics_vec128_shift_right64(l4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 tmp11 = Lib_IntVector_Intrinsics_vec128_add64(tmp1, c5); + Lib_IntVector_Intrinsics_vec128 o0 = tmp01; + Lib_IntVector_Intrinsics_vec128 o1 = tmp11; + Lib_IntVector_Intrinsics_vec128 o2 = tmp2; + Lib_IntVector_Intrinsics_vec128 o3 = tmp3; + Lib_IntVector_Intrinsics_vec128 o4 = tmp4; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + return; + } +} + +void +Hacl_Poly1305_128_poly1305_finish( + uint8_t *tag, + uint8_t *key, + Lib_IntVector_Intrinsics_vec128 *ctx) +{ + Lib_IntVector_Intrinsics_vec128 *acc = ctx; + uint8_t *ks = key + (uint32_t)16U; + Lib_IntVector_Intrinsics_vec128 f0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 f12 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 f22 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 f32 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 f40 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 + l = Lib_IntVector_Intrinsics_vec128_add64(f0, Lib_IntVector_Intrinsics_vec128_zero); + Lib_IntVector_Intrinsics_vec128 + tmp0 = + Lib_IntVector_Intrinsics_vec128_and(l, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c0 = Lib_IntVector_Intrinsics_vec128_shift_right64(l, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l0 = Lib_IntVector_Intrinsics_vec128_add64(f12, c0); + Lib_IntVector_Intrinsics_vec128 + tmp1 = + Lib_IntVector_Intrinsics_vec128_and(l0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c1 = Lib_IntVector_Intrinsics_vec128_shift_right64(l0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l1 = Lib_IntVector_Intrinsics_vec128_add64(f22, c1); + Lib_IntVector_Intrinsics_vec128 + tmp2 = + Lib_IntVector_Intrinsics_vec128_and(l1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c2 = Lib_IntVector_Intrinsics_vec128_shift_right64(l1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l2 = Lib_IntVector_Intrinsics_vec128_add64(f32, c2); + Lib_IntVector_Intrinsics_vec128 + tmp3 = + Lib_IntVector_Intrinsics_vec128_and(l2, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c3 = Lib_IntVector_Intrinsics_vec128_shift_right64(l2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l3 = Lib_IntVector_Intrinsics_vec128_add64(f40, c3); + Lib_IntVector_Intrinsics_vec128 + tmp4 = + Lib_IntVector_Intrinsics_vec128_and(l3, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c4 = Lib_IntVector_Intrinsics_vec128_shift_right64(l3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + l4 = + Lib_IntVector_Intrinsics_vec128_add64(tmp0, + Lib_IntVector_Intrinsics_vec128_smul64(c4, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec128 + tmp0_ = + Lib_IntVector_Intrinsics_vec128_and(l4, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c5 = Lib_IntVector_Intrinsics_vec128_shift_right64(l4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 f010 = tmp0_; + Lib_IntVector_Intrinsics_vec128 f110 = Lib_IntVector_Intrinsics_vec128_add64(tmp1, c5); + Lib_IntVector_Intrinsics_vec128 f210 = tmp2; + Lib_IntVector_Intrinsics_vec128 f310 = tmp3; + Lib_IntVector_Intrinsics_vec128 f410 = tmp4; + Lib_IntVector_Intrinsics_vec128 + mh = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + ml = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffffbU); + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_eq64(f410, mh); + Lib_IntVector_Intrinsics_vec128 + mask1 = + Lib_IntVector_Intrinsics_vec128_and(mask, + Lib_IntVector_Intrinsics_vec128_eq64(f310, mh)); + Lib_IntVector_Intrinsics_vec128 + mask2 = + Lib_IntVector_Intrinsics_vec128_and(mask1, + Lib_IntVector_Intrinsics_vec128_eq64(f210, mh)); + Lib_IntVector_Intrinsics_vec128 + mask3 = + Lib_IntVector_Intrinsics_vec128_and(mask2, + Lib_IntVector_Intrinsics_vec128_eq64(f110, mh)); + Lib_IntVector_Intrinsics_vec128 + mask4 = + Lib_IntVector_Intrinsics_vec128_and(mask3, + Lib_IntVector_Intrinsics_vec128_lognot(Lib_IntVector_Intrinsics_vec128_gt64(ml, f010))); + Lib_IntVector_Intrinsics_vec128 ph = Lib_IntVector_Intrinsics_vec128_and(mask4, mh); + Lib_IntVector_Intrinsics_vec128 pl = Lib_IntVector_Intrinsics_vec128_and(mask4, ml); + Lib_IntVector_Intrinsics_vec128 o0 = Lib_IntVector_Intrinsics_vec128_sub64(f010, pl); + Lib_IntVector_Intrinsics_vec128 o1 = Lib_IntVector_Intrinsics_vec128_sub64(f110, ph); + Lib_IntVector_Intrinsics_vec128 o2 = Lib_IntVector_Intrinsics_vec128_sub64(f210, ph); + Lib_IntVector_Intrinsics_vec128 o3 = Lib_IntVector_Intrinsics_vec128_sub64(f310, ph); + Lib_IntVector_Intrinsics_vec128 o4 = Lib_IntVector_Intrinsics_vec128_sub64(f410, ph); + Lib_IntVector_Intrinsics_vec128 f011 = o0; + Lib_IntVector_Intrinsics_vec128 f111 = o1; + Lib_IntVector_Intrinsics_vec128 f211 = o2; + Lib_IntVector_Intrinsics_vec128 f311 = o3; + Lib_IntVector_Intrinsics_vec128 f411 = o4; + acc[0U] = f011; + acc[1U] = f111; + acc[2U] = f211; + acc[3U] = f311; + acc[4U] = f411; + Lib_IntVector_Intrinsics_vec128 f00 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 f1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 f2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 f3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 f4 = acc[4U]; + uint64_t f01 = Lib_IntVector_Intrinsics_vec128_extract64(f00, (uint32_t)0U); + uint64_t f112 = Lib_IntVector_Intrinsics_vec128_extract64(f1, (uint32_t)0U); + uint64_t f212 = Lib_IntVector_Intrinsics_vec128_extract64(f2, (uint32_t)0U); + uint64_t f312 = Lib_IntVector_Intrinsics_vec128_extract64(f3, (uint32_t)0U); + uint64_t f41 = Lib_IntVector_Intrinsics_vec128_extract64(f4, (uint32_t)0U); + uint64_t lo = (f01 | f112 << (uint32_t)26U) | f212 << (uint32_t)52U; + uint64_t hi = (f212 >> (uint32_t)12U | f312 << (uint32_t)14U) | f41 << (uint32_t)40U; + uint64_t f10 = lo; + uint64_t f11 = hi; + uint64_t u0 = load64_le(ks); + uint64_t lo0 = u0; + uint64_t u = load64_le(ks + (uint32_t)8U); + uint64_t hi0 = u; + uint64_t f20 = lo0; + uint64_t f21 = hi0; + uint64_t r0 = f10 + f20; + uint64_t r1 = f11 + f21; + uint64_t c = (r0 ^ ((r0 ^ f20) | ((r0 - f20) ^ f20))) >> (uint32_t)63U; + uint64_t r11 = r1 + c; + uint64_t f30 = r0; + uint64_t f31 = r11; + store64_le(tag, f30); + store64_le(tag + (uint32_t)8U, f31); +} + +void +Hacl_Poly1305_128_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key) +{ + Lib_IntVector_Intrinsics_vec128 ctx[25U]; + for (uint32_t _i = 0U; _i < (uint32_t)25U; ++_i) + ctx[_i] = Lib_IntVector_Intrinsics_vec128_zero; + Hacl_Poly1305_128_poly1305_init(ctx, key); + Hacl_Poly1305_128_poly1305_update(ctx, len, text); + Hacl_Poly1305_128_poly1305_finish(tag, key, ctx); +} diff --git a/lib/freebl/verified/Hacl_Poly1305_128.h b/lib/freebl/verified/Hacl_Poly1305_128.h new file mode 100644 index 0000000000..8e7cdc74dd --- /dev/null +++ b/lib/freebl/verified/Hacl_Poly1305_128.h @@ -0,0 +1,66 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "libintvector.h" +#include "kremlin/internal/types.h" +#include "kremlin/lowstar_endianness.h" +#include +#include + +#ifndef __Hacl_Poly1305_128_H +#define __Hacl_Poly1305_128_H + +#include "Hacl_Kremlib.h" + +void +Hacl_Impl_Poly1305_Field32xN_128_load_acc2(Lib_IntVector_Intrinsics_vec128 *acc, uint8_t *b); + +void +Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize( + Lib_IntVector_Intrinsics_vec128 *out, + Lib_IntVector_Intrinsics_vec128 *p); + +extern uint32_t Hacl_Poly1305_128_blocklen; + +typedef Lib_IntVector_Intrinsics_vec128 *Hacl_Poly1305_128_poly1305_ctx; + +void Hacl_Poly1305_128_poly1305_init(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *key); + +void Hacl_Poly1305_128_poly1305_update1(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *text); + +void +Hacl_Poly1305_128_poly1305_update( + Lib_IntVector_Intrinsics_vec128 *ctx, + uint32_t len, + uint8_t *text); + +void +Hacl_Poly1305_128_poly1305_finish( + uint8_t *tag, + uint8_t *key, + Lib_IntVector_Intrinsics_vec128 *ctx); + +void Hacl_Poly1305_128_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key); + +#define __Hacl_Poly1305_128_H_DEFINED +#endif diff --git a/lib/freebl/verified/Hacl_Poly1305_32.c b/lib/freebl/verified/Hacl_Poly1305_32.c index d07a069bf4..a447d8307a 100644 --- a/lib/freebl/verified/Hacl_Poly1305_32.c +++ b/lib/freebl/verified/Hacl_Poly1305_32.c @@ -1,578 +1,542 @@ -/* Copyright 2016-2018 INRIA and Microsoft Corporation +/* MIT License * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation * - * http://www.apache.org/licenses/LICENSE-2.0 + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #include "Hacl_Poly1305_32.h" -inline static void -Hacl_Bignum_Modulo_reduce(uint32_t *b) -{ - uint32_t b0 = b[0U]; - b[0U] = (b0 << (uint32_t)2U) + b0; -} - -inline static void -Hacl_Bignum_Modulo_carry_top(uint32_t *b) -{ - uint32_t b4 = b[4U]; - uint32_t b0 = b[0U]; - uint32_t b4_26 = b4 >> (uint32_t)26U; - b[4U] = b4 & (uint32_t)0x3ffffffU; - b[0U] = (b4_26 << (uint32_t)2U) + b4_26 + b0; -} - -inline static void -Hacl_Bignum_Modulo_carry_top_wide(uint64_t *b) -{ - uint64_t b4 = b[4U]; - uint64_t b0 = b[0U]; - uint64_t b4_ = b4 & (uint64_t)(uint32_t)0x3ffffffU; - uint32_t b4_26 = (uint32_t)(b4 >> (uint32_t)26U); - uint64_t b0_ = b0 + (uint64_t)((b4_26 << (uint32_t)2U) + b4_26); - b[4U] = b4_; - b[0U] = b0_; -} - -inline static void -Hacl_Bignum_Fproduct_copy_from_wide_(uint32_t *output, uint64_t *input) -{ - for (uint32_t i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U) { - uint64_t xi = input[i]; - output[i] = (uint32_t)xi; - } -} - -inline static void -Hacl_Bignum_Fproduct_sum_scalar_multiplication_(uint64_t *output, uint32_t *input, uint32_t s) -{ - for (uint32_t i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U) { - uint64_t xi = output[i]; - uint32_t yi = input[i]; - uint64_t x_wide = (uint64_t)yi; - uint64_t y_wide = (uint64_t)s; - output[i] = xi + x_wide * y_wide; - } -} - -inline static void -Hacl_Bignum_Fproduct_carry_wide_(uint64_t *tmp) -{ - for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) { - uint32_t ctr = i; - uint64_t tctr = tmp[ctr]; - uint64_t tctrp1 = tmp[ctr + (uint32_t)1U]; - uint32_t r0 = (uint32_t)tctr & (uint32_t)0x3ffffffU; - uint64_t c = tctr >> (uint32_t)26U; - tmp[ctr] = (uint64_t)r0; - tmp[ctr + (uint32_t)1U] = tctrp1 + c; - } -} - -inline static void -Hacl_Bignum_Fproduct_carry_limb_(uint32_t *tmp) -{ - for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) { - uint32_t ctr = i; - uint32_t tctr = tmp[ctr]; - uint32_t tctrp1 = tmp[ctr + (uint32_t)1U]; - uint32_t r0 = tctr & (uint32_t)0x3ffffffU; - uint32_t c = tctr >> (uint32_t)26U; - tmp[ctr] = r0; - tmp[ctr + (uint32_t)1U] = tctrp1 + c; - } -} - -inline static void -Hacl_Bignum_Fmul_shift_reduce(uint32_t *output) -{ - uint32_t tmp = output[4U]; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) { - uint32_t ctr = (uint32_t)5U - i - (uint32_t)1U; - uint32_t z = output[ctr - (uint32_t)1U]; - output[ctr] = z; - } - output[0U] = tmp; - Hacl_Bignum_Modulo_reduce(output); -} - -static void -Hacl_Bignum_Fmul_mul_shift_reduce_(uint64_t *output, uint32_t *input, uint32_t *input2) -{ - for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) { - uint32_t input2i = input2[i]; - Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i); - Hacl_Bignum_Fmul_shift_reduce(input); - } - uint32_t i = (uint32_t)4U; - uint32_t input2i = input2[i]; - Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i); -} - -inline static void -Hacl_Bignum_Fmul_fmul(uint32_t *output, uint32_t *input, uint32_t *input2) -{ - uint32_t tmp[5U] = { 0U }; - memcpy(tmp, input, (uint32_t)5U * sizeof input[0U]); - uint64_t t[5U] = { 0U }; - Hacl_Bignum_Fmul_mul_shift_reduce_(t, tmp, input2); - Hacl_Bignum_Fproduct_carry_wide_(t); - Hacl_Bignum_Modulo_carry_top_wide(t); - Hacl_Bignum_Fproduct_copy_from_wide_(output, t); - uint32_t i0 = output[0U]; - uint32_t i1 = output[1U]; - uint32_t i0_ = i0 & (uint32_t)0x3ffffffU; - uint32_t i1_ = i1 + (i0 >> (uint32_t)26U); - output[0U] = i0_; - output[1U] = i1_; -} - -inline static void -Hacl_Bignum_AddAndMultiply_add_and_multiply(uint32_t *acc, uint32_t *block, uint32_t *r) -{ - for (uint32_t i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U) { - uint32_t xi = acc[i]; - uint32_t yi = block[i]; - acc[i] = xi + yi; - } - Hacl_Bignum_Fmul_fmul(acc, acc, r); -} - -inline static void -Hacl_Impl_Poly1305_32_poly1305_update( - Hacl_Impl_Poly1305_32_State_poly1305_state st, - uint8_t *m) -{ - Hacl_Impl_Poly1305_32_State_poly1305_state scrut0 = st; - uint32_t *h = scrut0.h; - uint32_t *acc = h; - Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st; - uint32_t *r = scrut.r; - uint32_t *r5 = r; - uint32_t tmp[5U] = { 0U }; - uint8_t *s0 = m; - uint8_t *s1 = m + (uint32_t)3U; - uint8_t *s2 = m + (uint32_t)6U; - uint8_t *s3 = m + (uint32_t)9U; - uint8_t *s4 = m + (uint32_t)12U; - uint32_t i0 = load32_le(s0); - uint32_t i1 = load32_le(s1); - uint32_t i2 = load32_le(s2); - uint32_t i3 = load32_le(s3); - uint32_t i4 = load32_le(s4); - uint32_t r0 = i0 & (uint32_t)0x3ffffffU; - uint32_t r1 = i1 >> (uint32_t)2U & (uint32_t)0x3ffffffU; - uint32_t r2 = i2 >> (uint32_t)4U & (uint32_t)0x3ffffffU; - uint32_t r3 = i3 >> (uint32_t)6U & (uint32_t)0x3ffffffU; - uint32_t r4 = i4 >> (uint32_t)8U; - tmp[0U] = r0; - tmp[1U] = r1; - tmp[2U] = r2; - tmp[3U] = r3; - tmp[4U] = r4; - uint32_t b4 = tmp[4U]; - uint32_t b4_ = (uint32_t)0x1000000U | b4; - tmp[4U] = b4_; - Hacl_Bignum_AddAndMultiply_add_and_multiply(acc, tmp, r5); -} - -inline static void -Hacl_Impl_Poly1305_32_poly1305_process_last_block_( - uint8_t *block, - Hacl_Impl_Poly1305_32_State_poly1305_state st, - uint8_t *m, - uint64_t rem_) -{ - uint32_t tmp[5U] = { 0U }; - uint8_t *s0 = block; - uint8_t *s1 = block + (uint32_t)3U; - uint8_t *s2 = block + (uint32_t)6U; - uint8_t *s3 = block + (uint32_t)9U; - uint8_t *s4 = block + (uint32_t)12U; - uint32_t i0 = load32_le(s0); - uint32_t i1 = load32_le(s1); - uint32_t i2 = load32_le(s2); - uint32_t i3 = load32_le(s3); - uint32_t i4 = load32_le(s4); - uint32_t r0 = i0 & (uint32_t)0x3ffffffU; - uint32_t r1 = i1 >> (uint32_t)2U & (uint32_t)0x3ffffffU; - uint32_t r2 = i2 >> (uint32_t)4U & (uint32_t)0x3ffffffU; - uint32_t r3 = i3 >> (uint32_t)6U & (uint32_t)0x3ffffffU; - uint32_t r4 = i4 >> (uint32_t)8U; - tmp[0U] = r0; - tmp[1U] = r1; - tmp[2U] = r2; - tmp[3U] = r3; - tmp[4U] = r4; - Hacl_Impl_Poly1305_32_State_poly1305_state scrut0 = st; - uint32_t *h = scrut0.h; - Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st; - uint32_t *r = scrut.r; - Hacl_Bignum_AddAndMultiply_add_and_multiply(h, tmp, r); -} - -inline static void -Hacl_Impl_Poly1305_32_poly1305_process_last_block( - Hacl_Impl_Poly1305_32_State_poly1305_state st, - uint8_t *m, - uint64_t rem_) -{ - uint8_t zero1 = (uint8_t)0U; - KRML_CHECK_SIZE(zero1, (uint32_t)16U); - uint8_t block[16U]; - for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) - block[_i] = zero1; - uint32_t i0 = (uint32_t)rem_; - uint32_t i = (uint32_t)rem_; - memcpy(block, m, i * sizeof m[0U]); - block[i0] = (uint8_t)1U; - Hacl_Impl_Poly1305_32_poly1305_process_last_block_(block, st, m, rem_); -} - -static void -Hacl_Impl_Poly1305_32_poly1305_last_pass(uint32_t *acc) -{ - Hacl_Bignum_Fproduct_carry_limb_(acc); - Hacl_Bignum_Modulo_carry_top(acc); - uint32_t t0 = acc[0U]; - uint32_t t10 = acc[1U]; - uint32_t t20 = acc[2U]; - uint32_t t30 = acc[3U]; - uint32_t t40 = acc[4U]; - uint32_t t1_ = t10 + (t0 >> (uint32_t)26U); - uint32_t mask_261 = (uint32_t)0x3ffffffU; - uint32_t t0_ = t0 & mask_261; - uint32_t t2_ = t20 + (t1_ >> (uint32_t)26U); - uint32_t t1__ = t1_ & mask_261; - uint32_t t3_ = t30 + (t2_ >> (uint32_t)26U); - uint32_t t2__ = t2_ & mask_261; - uint32_t t4_ = t40 + (t3_ >> (uint32_t)26U); - uint32_t t3__ = t3_ & mask_261; - acc[0U] = t0_; - acc[1U] = t1__; - acc[2U] = t2__; - acc[3U] = t3__; - acc[4U] = t4_; - Hacl_Bignum_Modulo_carry_top(acc); - uint32_t t00 = acc[0U]; - uint32_t t1 = acc[1U]; - uint32_t t2 = acc[2U]; - uint32_t t3 = acc[3U]; - uint32_t t4 = acc[4U]; - uint32_t t1_0 = t1 + (t00 >> (uint32_t)26U); - uint32_t t0_0 = t00 & (uint32_t)0x3ffffffU; - uint32_t t2_0 = t2 + (t1_0 >> (uint32_t)26U); - uint32_t t1__0 = t1_0 & (uint32_t)0x3ffffffU; - uint32_t t3_0 = t3 + (t2_0 >> (uint32_t)26U); - uint32_t t2__0 = t2_0 & (uint32_t)0x3ffffffU; - uint32_t t4_0 = t4 + (t3_0 >> (uint32_t)26U); - uint32_t t3__0 = t3_0 & (uint32_t)0x3ffffffU; - acc[0U] = t0_0; - acc[1U] = t1__0; - acc[2U] = t2__0; - acc[3U] = t3__0; - acc[4U] = t4_0; - Hacl_Bignum_Modulo_carry_top(acc); - uint32_t i0 = acc[0U]; - uint32_t i1 = acc[1U]; - uint32_t i0_ = i0 & (uint32_t)0x3ffffffU; - uint32_t i1_ = i1 + (i0 >> (uint32_t)26U); - acc[0U] = i0_; - acc[1U] = i1_; - uint32_t a0 = acc[0U]; - uint32_t a1 = acc[1U]; - uint32_t a2 = acc[2U]; - uint32_t a3 = acc[3U]; - uint32_t a4 = acc[4U]; - uint32_t mask0 = FStar_UInt32_gte_mask(a0, (uint32_t)0x3fffffbU); - uint32_t mask1 = FStar_UInt32_eq_mask(a1, (uint32_t)0x3ffffffU); - uint32_t mask2 = FStar_UInt32_eq_mask(a2, (uint32_t)0x3ffffffU); - uint32_t mask3 = FStar_UInt32_eq_mask(a3, (uint32_t)0x3ffffffU); - uint32_t mask4 = FStar_UInt32_eq_mask(a4, (uint32_t)0x3ffffffU); - uint32_t mask = (((mask0 & mask1) & mask2) & mask3) & mask4; - uint32_t a0_ = a0 - ((uint32_t)0x3fffffbU & mask); - uint32_t a1_ = a1 - ((uint32_t)0x3ffffffU & mask); - uint32_t a2_ = a2 - ((uint32_t)0x3ffffffU & mask); - uint32_t a3_ = a3 - ((uint32_t)0x3ffffffU & mask); - uint32_t a4_ = a4 - ((uint32_t)0x3ffffffU & mask); - acc[0U] = a0_; - acc[1U] = a1_; - acc[2U] = a2_; - acc[3U] = a3_; - acc[4U] = a4_; -} - -static Hacl_Impl_Poly1305_32_State_poly1305_state -Hacl_Impl_Poly1305_32_mk_state(uint32_t *r, uint32_t *h) -{ - return ((Hacl_Impl_Poly1305_32_State_poly1305_state){.r = r, .h = h }); -} - -static void -Hacl_Standalone_Poly1305_32_poly1305_blocks( - Hacl_Impl_Poly1305_32_State_poly1305_state st, - uint8_t *m, - uint64_t len1) -{ - if (!(len1 == (uint64_t)0U)) { - uint8_t *block = m; - uint8_t *tail1 = m + (uint32_t)16U; - Hacl_Impl_Poly1305_32_poly1305_update(st, block); - uint64_t len2 = len1 - (uint64_t)1U; - Hacl_Standalone_Poly1305_32_poly1305_blocks(st, tail1, len2); - } -} - -static void -Hacl_Standalone_Poly1305_32_poly1305_partial( - Hacl_Impl_Poly1305_32_State_poly1305_state st, - uint8_t *input, - uint64_t len1, - uint8_t *kr) -{ - Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st; - uint32_t *r = scrut.r; - uint32_t *x0 = r; - FStar_UInt128_t k1 = load128_le(kr); - FStar_UInt128_t - k_clamped = - FStar_UInt128_logand(k1, - FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0ffffffcU), - (uint32_t)64U), - FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0fffffffU))); - uint32_t r0 = (uint32_t)FStar_UInt128_uint128_to_uint64(k_clamped) & (uint32_t)0x3ffffffU; - uint32_t - r1 = - (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)26U)) & (uint32_t)0x3ffffffU; - uint32_t - r2 = - (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)52U)) & (uint32_t)0x3ffffffU; - uint32_t - r3 = - (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)78U)) & (uint32_t)0x3ffffffU; - uint32_t - r4 = - (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)104U)) & (uint32_t)0x3ffffffU; - x0[0U] = r0; - x0[1U] = r1; - x0[2U] = r2; - x0[3U] = r3; - x0[4U] = r4; - Hacl_Impl_Poly1305_32_State_poly1305_state scrut0 = st; - uint32_t *h = scrut0.h; - uint32_t *x00 = h; - x00[0U] = (uint32_t)0U; - x00[1U] = (uint32_t)0U; - x00[2U] = (uint32_t)0U; - x00[3U] = (uint32_t)0U; - x00[4U] = (uint32_t)0U; - Hacl_Standalone_Poly1305_32_poly1305_blocks(st, input, len1); -} - -static void -Hacl_Standalone_Poly1305_32_poly1305_complete( - Hacl_Impl_Poly1305_32_State_poly1305_state st, - uint8_t *m, - uint64_t len1, - uint8_t *k1) -{ - uint8_t *kr = k1; - uint64_t len16 = len1 >> (uint32_t)4U; - uint64_t rem16 = len1 & (uint64_t)0xfU; - uint8_t *part_input = m; - uint8_t *last_block = m + (uint32_t)((uint64_t)16U * len16); - Hacl_Standalone_Poly1305_32_poly1305_partial(st, part_input, len16, kr); - if (!(rem16 == (uint64_t)0U)) - Hacl_Impl_Poly1305_32_poly1305_process_last_block(st, last_block, rem16); - Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st; - uint32_t *h = scrut.h; - uint32_t *acc = h; - Hacl_Impl_Poly1305_32_poly1305_last_pass(acc); -} - -static void -Hacl_Standalone_Poly1305_32_crypto_onetimeauth_( - uint8_t *output, - uint8_t *input, - uint64_t len1, - uint8_t *k1) -{ - uint32_t buf[10U] = { 0U }; - uint32_t *r = buf; - uint32_t *h = buf + (uint32_t)5U; - Hacl_Impl_Poly1305_32_State_poly1305_state st = Hacl_Impl_Poly1305_32_mk_state(r, h); - uint8_t *key_s = k1 + (uint32_t)16U; - Hacl_Standalone_Poly1305_32_poly1305_complete(st, input, len1, k1); - Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st; - uint32_t *h5 = scrut.h; - uint32_t *acc = h5; - FStar_UInt128_t k_ = load128_le(key_s); - uint32_t h0 = acc[0U]; - uint32_t h1 = acc[1U]; - uint32_t h2 = acc[2U]; - uint32_t h3 = acc[3U]; - uint32_t h4 = acc[4U]; - FStar_UInt128_t - acc_ = - FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h4), - (uint32_t)104U), - FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h3), - (uint32_t)78U), - FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h2), - (uint32_t)52U), - FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h1), - (uint32_t)26U), - FStar_UInt128_uint64_to_uint128((uint64_t)h0))))); - FStar_UInt128_t mac_ = FStar_UInt128_add_mod(acc_, k_); - store128_le(output, mac_); -} - -static void -Hacl_Standalone_Poly1305_32_crypto_onetimeauth( - uint8_t *output, - uint8_t *input, - uint64_t len1, - uint8_t *k1) -{ - Hacl_Standalone_Poly1305_32_crypto_onetimeauth_(output, input, len1, k1); -} - -void * -Hacl_Poly1305_32_op_String_Access(FStar_Monotonic_HyperStack_mem h, uint8_t *b) -{ - return (void *)(uint8_t)0U; -} - -Hacl_Impl_Poly1305_32_State_poly1305_state -Hacl_Poly1305_32_mk_state(uint32_t *r, uint32_t *acc) -{ - return Hacl_Impl_Poly1305_32_mk_state(r, acc); -} +uint32_t Hacl_Poly1305_32_blocklen = (uint32_t)16U; void -Hacl_Poly1305_32_init(Hacl_Impl_Poly1305_32_State_poly1305_state st, uint8_t *k1) +Hacl_Poly1305_32_poly1305_init(uint64_t *ctx, uint8_t *key) { - Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st; - uint32_t *r = scrut.r; - uint32_t *x0 = r; - FStar_UInt128_t k10 = load128_le(k1); - FStar_UInt128_t - k_clamped = - FStar_UInt128_logand(k10, - FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0ffffffcU), - (uint32_t)64U), - FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0fffffffU))); - uint32_t r0 = (uint32_t)FStar_UInt128_uint128_to_uint64(k_clamped) & (uint32_t)0x3ffffffU; - uint32_t - r1 = - (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)26U)) & (uint32_t)0x3ffffffU; - uint32_t - r2 = - (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)52U)) & (uint32_t)0x3ffffffU; - uint32_t - r3 = - (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)78U)) & (uint32_t)0x3ffffffU; - uint32_t - r4 = - (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)104U)) & (uint32_t)0x3ffffffU; - x0[0U] = r0; - x0[1U] = r1; - x0[2U] = r2; - x0[3U] = r3; - x0[4U] = r4; - Hacl_Impl_Poly1305_32_State_poly1305_state scrut0 = st; - uint32_t *h = scrut0.h; - uint32_t *x00 = h; - x00[0U] = (uint32_t)0U; - x00[1U] = (uint32_t)0U; - x00[2U] = (uint32_t)0U; - x00[3U] = (uint32_t)0U; - x00[4U] = (uint32_t)0U; + uint64_t *acc = ctx; + uint64_t *pre = ctx + (uint32_t)5U; + uint8_t *kr = key; + acc[0U] = (uint64_t)0U; + acc[1U] = (uint64_t)0U; + acc[2U] = (uint64_t)0U; + acc[3U] = (uint64_t)0U; + acc[4U] = (uint64_t)0U; + uint64_t u0 = load64_le(kr); + uint64_t lo = u0; + uint64_t u = load64_le(kr + (uint32_t)8U); + uint64_t hi = u; + uint64_t mask0 = (uint64_t)0x0ffffffc0fffffffU; + uint64_t mask1 = (uint64_t)0x0ffffffc0ffffffcU; + uint64_t lo1 = lo & mask0; + uint64_t hi1 = hi & mask1; + uint64_t *r = pre; + uint64_t *r5 = pre + (uint32_t)5U; + uint64_t *rn = pre + (uint32_t)10U; + uint64_t *rn_5 = pre + (uint32_t)15U; + uint64_t r_vec0 = lo1; + uint64_t r_vec1 = hi1; + uint64_t f00 = r_vec0 & (uint64_t)0x3ffffffU; + uint64_t f10 = r_vec0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = r_vec0 >> (uint32_t)52U | (r_vec1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = r_vec1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = r_vec1 >> (uint32_t)40U; + uint64_t f0 = f00; + uint64_t f1 = f10; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f4 = f40; + r[0U] = f0; + r[1U] = f1; + r[2U] = f2; + r[3U] = f3; + r[4U] = f4; + uint64_t f200 = r[0U]; + uint64_t f21 = r[1U]; + uint64_t f22 = r[2U]; + uint64_t f23 = r[3U]; + uint64_t f24 = r[4U]; + r5[0U] = f200 * (uint64_t)5U; + r5[1U] = f21 * (uint64_t)5U; + r5[2U] = f22 * (uint64_t)5U; + r5[3U] = f23 * (uint64_t)5U; + r5[4U] = f24 * (uint64_t)5U; + rn[0U] = r[0U]; + rn[1U] = r[1U]; + rn[2U] = r[2U]; + rn[3U] = r[3U]; + rn[4U] = r[4U]; + rn_5[0U] = r5[0U]; + rn_5[1U] = r5[1U]; + rn_5[2U] = r5[2U]; + rn_5[3U] = r5[3U]; + rn_5[4U] = r5[4U]; } -void *Hacl_Poly1305_32_empty_log = (void *)(uint8_t)0U; - void -Hacl_Poly1305_32_update_block(Hacl_Impl_Poly1305_32_State_poly1305_state st, uint8_t *m) +Hacl_Poly1305_32_poly1305_update1(uint64_t *ctx, uint8_t *text) { - Hacl_Impl_Poly1305_32_poly1305_update(st, m); + uint64_t *pre = ctx + (uint32_t)5U; + uint64_t *acc = ctx; + uint64_t e[5U] = { 0U }; + uint64_t u0 = load64_le(text); + uint64_t lo = u0; + uint64_t u = load64_le(text + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + uint64_t mask = b; + uint64_t f4 = e[4U]; + e[4U] = f4 | mask; + uint64_t *r = pre; + uint64_t *r5 = pre + (uint32_t)5U; + uint64_t r0 = r[0U]; + uint64_t r1 = r[1U]; + uint64_t r2 = r[2U]; + uint64_t r3 = r[3U]; + uint64_t r4 = r[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc[0U]; + uint64_t a1 = acc[1U]; + uint64_t a2 = acc[2U]; + uint64_t a3 = acc[3U]; + uint64_t a4 = acc[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r1 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r1 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r1 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r1 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t l = t0 + (uint64_t)0U; + uint64_t tmp0 = l & (uint64_t)0x3ffffffU; + uint64_t c0 = l >> (uint32_t)26U; + uint64_t l0 = t1 + c0; + uint64_t tmp1 = l0 & (uint64_t)0x3ffffffU; + uint64_t c1 = l0 >> (uint32_t)26U; + uint64_t l1 = t2 + c1; + uint64_t tmp2 = l1 & (uint64_t)0x3ffffffU; + uint64_t c2 = l1 >> (uint32_t)26U; + uint64_t l2 = t3 + c2; + uint64_t tmp3 = l2 & (uint64_t)0x3ffffffU; + uint64_t c3 = l2 >> (uint32_t)26U; + uint64_t l3 = t4 + c3; + uint64_t tmp4 = l3 & (uint64_t)0x3ffffffU; + uint64_t c4 = l3 >> (uint32_t)26U; + uint64_t l4 = tmp0 + c4 * (uint64_t)5U; + uint64_t tmp01 = l4 & (uint64_t)0x3ffffffU; + uint64_t c5 = l4 >> (uint32_t)26U; + uint64_t tmp11 = tmp1 + c5; + uint64_t o0 = tmp01; + uint64_t o1 = tmp11; + uint64_t o2 = tmp2; + uint64_t o3 = tmp3; + uint64_t o4 = tmp4; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; } void -Hacl_Poly1305_32_update( - Hacl_Impl_Poly1305_32_State_poly1305_state st, - uint8_t *m, - uint32_t len1) +Hacl_Poly1305_32_poly1305_update(uint64_t *ctx, uint32_t len, uint8_t *text) { - if (!(len1 == (uint32_t)0U)) { - uint8_t *block = m; - uint8_t *m_ = m + (uint32_t)16U; - uint32_t len2 = len1 - (uint32_t)1U; - Hacl_Poly1305_32_update_block(st, block); - Hacl_Poly1305_32_update(st, m_, len2); + uint64_t *pre = ctx + (uint32_t)5U; + uint64_t *acc = ctx; + uint32_t nb = len / (uint32_t)16U; + uint32_t rem1 = len % (uint32_t)16U; + for (uint32_t i = (uint32_t)0U; i < nb; i = i + (uint32_t)1U) { + uint8_t *block = text + i * (uint32_t)16U; + uint64_t e[5U] = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + uint64_t mask = b; + uint64_t f4 = e[4U]; + e[4U] = f4 | mask; + uint64_t *r = pre; + uint64_t *r5 = pre + (uint32_t)5U; + uint64_t r0 = r[0U]; + uint64_t r1 = r[1U]; + uint64_t r2 = r[2U]; + uint64_t r3 = r[3U]; + uint64_t r4 = r[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc[0U]; + uint64_t a1 = acc[1U]; + uint64_t a2 = acc[2U]; + uint64_t a3 = acc[3U]; + uint64_t a4 = acc[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r1 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r1 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r1 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r1 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t l = t0 + (uint64_t)0U; + uint64_t tmp0 = l & (uint64_t)0x3ffffffU; + uint64_t c0 = l >> (uint32_t)26U; + uint64_t l0 = t1 + c0; + uint64_t tmp1 = l0 & (uint64_t)0x3ffffffU; + uint64_t c1 = l0 >> (uint32_t)26U; + uint64_t l1 = t2 + c1; + uint64_t tmp2 = l1 & (uint64_t)0x3ffffffU; + uint64_t c2 = l1 >> (uint32_t)26U; + uint64_t l2 = t3 + c2; + uint64_t tmp3 = l2 & (uint64_t)0x3ffffffU; + uint64_t c3 = l2 >> (uint32_t)26U; + uint64_t l3 = t4 + c3; + uint64_t tmp4 = l3 & (uint64_t)0x3ffffffU; + uint64_t c4 = l3 >> (uint32_t)26U; + uint64_t l4 = tmp0 + c4 * (uint64_t)5U; + uint64_t tmp01 = l4 & (uint64_t)0x3ffffffU; + uint64_t c5 = l4 >> (uint32_t)26U; + uint64_t tmp11 = tmp1 + c5; + uint64_t o0 = tmp01; + uint64_t o1 = tmp11; + uint64_t o2 = tmp2; + uint64_t o3 = tmp3; + uint64_t o4 = tmp4; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + } + if (rem1 > (uint32_t)0U) { + uint8_t *last1 = text + nb * (uint32_t)16U; + uint64_t e[5U] = { 0U }; + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, last1, rem1 * sizeof last1[0U]); + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f4 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f4; + uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U; + uint64_t mask = b; + uint64_t fi = e[rem1 * (uint32_t)8U / (uint32_t)26U]; + e[rem1 * (uint32_t)8U / (uint32_t)26U] = fi | mask; + uint64_t *r = pre; + uint64_t *r5 = pre + (uint32_t)5U; + uint64_t r0 = r[0U]; + uint64_t r1 = r[1U]; + uint64_t r2 = r[2U]; + uint64_t r3 = r[3U]; + uint64_t r4 = r[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc[0U]; + uint64_t a1 = acc[1U]; + uint64_t a2 = acc[2U]; + uint64_t a3 = acc[3U]; + uint64_t a4 = acc[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r1 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r1 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r1 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r1 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t l = t0 + (uint64_t)0U; + uint64_t tmp0 = l & (uint64_t)0x3ffffffU; + uint64_t c0 = l >> (uint32_t)26U; + uint64_t l0 = t1 + c0; + uint64_t tmp1 = l0 & (uint64_t)0x3ffffffU; + uint64_t c1 = l0 >> (uint32_t)26U; + uint64_t l1 = t2 + c1; + uint64_t tmp2 = l1 & (uint64_t)0x3ffffffU; + uint64_t c2 = l1 >> (uint32_t)26U; + uint64_t l2 = t3 + c2; + uint64_t tmp3 = l2 & (uint64_t)0x3ffffffU; + uint64_t c3 = l2 >> (uint32_t)26U; + uint64_t l3 = t4 + c3; + uint64_t tmp4 = l3 & (uint64_t)0x3ffffffU; + uint64_t c4 = l3 >> (uint32_t)26U; + uint64_t l4 = tmp0 + c4 * (uint64_t)5U; + uint64_t tmp01 = l4 & (uint64_t)0x3ffffffU; + uint64_t c5 = l4 >> (uint32_t)26U; + uint64_t tmp11 = tmp1 + c5; + uint64_t o0 = tmp01; + uint64_t o1 = tmp11; + uint64_t o2 = tmp2; + uint64_t o3 = tmp3; + uint64_t o4 = tmp4; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + return; } } void -Hacl_Poly1305_32_update_last( - Hacl_Impl_Poly1305_32_State_poly1305_state st, - uint8_t *m, - uint32_t len1) -{ - if (!((uint64_t)len1 == (uint64_t)0U)) - Hacl_Impl_Poly1305_32_poly1305_process_last_block(st, m, (uint64_t)len1); - Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st; - uint32_t *h = scrut.h; - uint32_t *acc = h; - Hacl_Impl_Poly1305_32_poly1305_last_pass(acc); -} - -void -Hacl_Poly1305_32_finish( - Hacl_Impl_Poly1305_32_State_poly1305_state st, - uint8_t *mac, - uint8_t *k1) +Hacl_Poly1305_32_poly1305_finish(uint8_t *tag, uint8_t *key, uint64_t *ctx) { - Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st; - uint32_t *h = scrut.h; - uint32_t *acc = h; - FStar_UInt128_t k_ = load128_le(k1); - uint32_t h0 = acc[0U]; - uint32_t h1 = acc[1U]; - uint32_t h2 = acc[2U]; - uint32_t h3 = acc[3U]; - uint32_t h4 = acc[4U]; - FStar_UInt128_t - acc_ = - FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h4), - (uint32_t)104U), - FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h3), - (uint32_t)78U), - FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h2), - (uint32_t)52U), - FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h1), - (uint32_t)26U), - FStar_UInt128_uint64_to_uint128((uint64_t)h0))))); - FStar_UInt128_t mac_ = FStar_UInt128_add_mod(acc_, k_); - store128_le(mac, mac_); + uint64_t *acc = ctx; + uint8_t *ks = key + (uint32_t)16U; + uint64_t f0 = acc[0U]; + uint64_t f12 = acc[1U]; + uint64_t f22 = acc[2U]; + uint64_t f32 = acc[3U]; + uint64_t f40 = acc[4U]; + uint64_t l = f0 + (uint64_t)0U; + uint64_t tmp0 = l & (uint64_t)0x3ffffffU; + uint64_t c0 = l >> (uint32_t)26U; + uint64_t l0 = f12 + c0; + uint64_t tmp1 = l0 & (uint64_t)0x3ffffffU; + uint64_t c1 = l0 >> (uint32_t)26U; + uint64_t l1 = f22 + c1; + uint64_t tmp2 = l1 & (uint64_t)0x3ffffffU; + uint64_t c2 = l1 >> (uint32_t)26U; + uint64_t l2 = f32 + c2; + uint64_t tmp3 = l2 & (uint64_t)0x3ffffffU; + uint64_t c3 = l2 >> (uint32_t)26U; + uint64_t l3 = f40 + c3; + uint64_t tmp4 = l3 & (uint64_t)0x3ffffffU; + uint64_t c4 = l3 >> (uint32_t)26U; + uint64_t l4 = tmp0 + c4 * (uint64_t)5U; + uint64_t tmp0_ = l4 & (uint64_t)0x3ffffffU; + uint64_t c5 = l4 >> (uint32_t)26U; + uint64_t f010 = tmp0_; + uint64_t f110 = tmp1 + c5; + uint64_t f210 = tmp2; + uint64_t f310 = tmp3; + uint64_t f410 = tmp4; + uint64_t mh = (uint64_t)0x3ffffffU; + uint64_t ml = (uint64_t)0x3fffffbU; + uint64_t mask = FStar_UInt64_eq_mask(f410, mh); + uint64_t mask1 = mask & FStar_UInt64_eq_mask(f310, mh); + uint64_t mask2 = mask1 & FStar_UInt64_eq_mask(f210, mh); + uint64_t mask3 = mask2 & FStar_UInt64_eq_mask(f110, mh); + uint64_t mask4 = mask3 & ~~FStar_UInt64_gte_mask(f010, ml); + uint64_t ph = mask4 & mh; + uint64_t pl = mask4 & ml; + uint64_t o0 = f010 - pl; + uint64_t o1 = f110 - ph; + uint64_t o2 = f210 - ph; + uint64_t o3 = f310 - ph; + uint64_t o4 = f410 - ph; + uint64_t f011 = o0; + uint64_t f111 = o1; + uint64_t f211 = o2; + uint64_t f311 = o3; + uint64_t f411 = o4; + acc[0U] = f011; + acc[1U] = f111; + acc[2U] = f211; + acc[3U] = f311; + acc[4U] = f411; + uint64_t f00 = acc[0U]; + uint64_t f1 = acc[1U]; + uint64_t f2 = acc[2U]; + uint64_t f3 = acc[3U]; + uint64_t f4 = acc[4U]; + uint64_t f01 = f00; + uint64_t f112 = f1; + uint64_t f212 = f2; + uint64_t f312 = f3; + uint64_t f41 = f4; + uint64_t lo = (f01 | f112 << (uint32_t)26U) | f212 << (uint32_t)52U; + uint64_t hi = (f212 >> (uint32_t)12U | f312 << (uint32_t)14U) | f41 << (uint32_t)40U; + uint64_t f10 = lo; + uint64_t f11 = hi; + uint64_t u0 = load64_le(ks); + uint64_t lo0 = u0; + uint64_t u = load64_le(ks + (uint32_t)8U); + uint64_t hi0 = u; + uint64_t f20 = lo0; + uint64_t f21 = hi0; + uint64_t r0 = f10 + f20; + uint64_t r1 = f11 + f21; + uint64_t c = (r0 ^ ((r0 ^ f20) | ((r0 - f20) ^ f20))) >> (uint32_t)63U; + uint64_t r11 = r1 + c; + uint64_t f30 = r0; + uint64_t f31 = r11; + store64_le(tag, f30); + store64_le(tag + (uint32_t)8U, f31); } void -Hacl_Poly1305_32_crypto_onetimeauth( - uint8_t *output, - uint8_t *input, - uint64_t len1, - uint8_t *k1) +Hacl_Poly1305_32_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key) { - Hacl_Standalone_Poly1305_32_crypto_onetimeauth(output, input, len1, k1); + uint64_t ctx[25U] = { 0U }; + Hacl_Poly1305_32_poly1305_init(ctx, key); + Hacl_Poly1305_32_poly1305_update(ctx, len, text); + Hacl_Poly1305_32_poly1305_finish(tag, key, ctx); } diff --git a/lib/freebl/verified/Hacl_Poly1305_32.h b/lib/freebl/verified/Hacl_Poly1305_32.h index f808bc6f97..442b5db429 100644 --- a/lib/freebl/verified/Hacl_Poly1305_32.h +++ b/lib/freebl/verified/Hacl_Poly1305_32.h @@ -1,103 +1,49 @@ -/* Copyright 2016-2018 INRIA and Microsoft Corporation +/* MIT License * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation * - * http://www.apache.org/licenses/LICENSE-2.0 + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ -#include "kremlib.h" +#include "kremlin/internal/types.h" +#include "kremlin/lowstar_endianness.h" +#include +#include + #ifndef __Hacl_Poly1305_32_H #define __Hacl_Poly1305_32_H -typedef uint32_t Hacl_Bignum_Constants_limb; - -typedef uint64_t Hacl_Bignum_Constants_wide; - -typedef uint64_t Hacl_Bignum_Wide_t; - -typedef uint32_t Hacl_Bignum_Limb_t; - -typedef void *Hacl_Impl_Poly1305_32_State_log_t; - -typedef uint8_t *Hacl_Impl_Poly1305_32_State_uint8_p; - -typedef uint32_t *Hacl_Impl_Poly1305_32_State_bigint; - -typedef void *Hacl_Impl_Poly1305_32_State_seqelem; - -typedef uint32_t *Hacl_Impl_Poly1305_32_State_elemB; - -typedef uint8_t *Hacl_Impl_Poly1305_32_State_wordB; - -typedef uint8_t *Hacl_Impl_Poly1305_32_State_wordB_16; - -typedef struct -{ - uint32_t *r; - uint32_t *h; -} Hacl_Impl_Poly1305_32_State_poly1305_state; - -typedef void *Hacl_Impl_Poly1305_32_log_t; - -typedef uint32_t *Hacl_Impl_Poly1305_32_bigint; - -typedef uint8_t *Hacl_Impl_Poly1305_32_uint8_p; - -typedef uint32_t *Hacl_Impl_Poly1305_32_elemB; - -typedef uint8_t *Hacl_Impl_Poly1305_32_wordB; - -typedef uint8_t *Hacl_Impl_Poly1305_32_wordB_16; - -typedef uint8_t *Hacl_Poly1305_32_uint8_p; - -typedef uint64_t Hacl_Poly1305_32_uint64_t; - -void *Hacl_Poly1305_32_op_String_Access(FStar_Monotonic_HyperStack_mem h, uint8_t *b); - -typedef uint8_t *Hacl_Poly1305_32_key; - -typedef Hacl_Impl_Poly1305_32_State_poly1305_state Hacl_Poly1305_32_state; +#include "Hacl_Kremlib.h" -Hacl_Impl_Poly1305_32_State_poly1305_state -Hacl_Poly1305_32_mk_state(uint32_t *r, uint32_t *acc); +extern uint32_t Hacl_Poly1305_32_blocklen; -void Hacl_Poly1305_32_init(Hacl_Impl_Poly1305_32_State_poly1305_state st, uint8_t *k1); +typedef uint64_t *Hacl_Poly1305_32_poly1305_ctx; -extern void *Hacl_Poly1305_32_empty_log; +void Hacl_Poly1305_32_poly1305_init(uint64_t *ctx, uint8_t *key); -void Hacl_Poly1305_32_update_block(Hacl_Impl_Poly1305_32_State_poly1305_state st, uint8_t *m); +void Hacl_Poly1305_32_poly1305_update1(uint64_t *ctx, uint8_t *text); -void -Hacl_Poly1305_32_update( - Hacl_Impl_Poly1305_32_State_poly1305_state st, - uint8_t *m, - uint32_t len1); +void Hacl_Poly1305_32_poly1305_update(uint64_t *ctx, uint32_t len, uint8_t *text); -void -Hacl_Poly1305_32_update_last( - Hacl_Impl_Poly1305_32_State_poly1305_state st, - uint8_t *m, - uint32_t len1); +void Hacl_Poly1305_32_poly1305_finish(uint8_t *tag, uint8_t *key, uint64_t *ctx); -void -Hacl_Poly1305_32_finish( - Hacl_Impl_Poly1305_32_State_poly1305_state st, - uint8_t *mac, - uint8_t *k1); +void Hacl_Poly1305_32_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key); -void -Hacl_Poly1305_32_crypto_onetimeauth( - uint8_t *output, - uint8_t *input, - uint64_t len1, - uint8_t *k1); +#define __Hacl_Poly1305_32_H_DEFINED #endif diff --git a/lib/freebl/verified/Hacl_Poly1305_64.c b/lib/freebl/verified/Hacl_Poly1305_64.c deleted file mode 100644 index 4d3bdde19c..0000000000 --- a/lib/freebl/verified/Hacl_Poly1305_64.c +++ /dev/null @@ -1,485 +0,0 @@ -/* Copyright 2016-2018 INRIA and Microsoft Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Hacl_Poly1305_64.h" - -inline static void -Hacl_Bignum_Modulo_reduce(uint64_t *b) -{ - uint64_t b0 = b[0U]; - b[0U] = (b0 << (uint32_t)4U) + (b0 << (uint32_t)2U); -} - -inline static void -Hacl_Bignum_Modulo_carry_top(uint64_t *b) -{ - uint64_t b2 = b[2U]; - uint64_t b0 = b[0U]; - uint64_t b2_42 = b2 >> (uint32_t)42U; - b[2U] = b2 & (uint64_t)0x3ffffffffffU; - b[0U] = (b2_42 << (uint32_t)2U) + b2_42 + b0; -} - -inline static void -Hacl_Bignum_Modulo_carry_top_wide(FStar_UInt128_t *b) -{ - FStar_UInt128_t b2 = b[2U]; - FStar_UInt128_t b0 = b[0U]; - FStar_UInt128_t - b2_ = FStar_UInt128_logand(b2, FStar_UInt128_uint64_to_uint128((uint64_t)0x3ffffffffffU)); - uint64_t b2_42 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(b2, (uint32_t)42U)); - FStar_UInt128_t - b0_ = FStar_UInt128_add(b0, FStar_UInt128_uint64_to_uint128((b2_42 << (uint32_t)2U) + b2_42)); - b[2U] = b2_; - b[0U] = b0_; -} - -inline static void -Hacl_Bignum_Fproduct_copy_from_wide_(uint64_t *output, FStar_UInt128_t *input) -{ - for (uint32_t i = (uint32_t)0U; i < (uint32_t)3U; i = i + (uint32_t)1U) { - FStar_UInt128_t xi = input[i]; - output[i] = FStar_UInt128_uint128_to_uint64(xi); - } -} - -inline static void -Hacl_Bignum_Fproduct_sum_scalar_multiplication_( - FStar_UInt128_t *output, - uint64_t *input, - uint64_t s) -{ - for (uint32_t i = (uint32_t)0U; i < (uint32_t)3U; i = i + (uint32_t)1U) { - FStar_UInt128_t xi = output[i]; - uint64_t yi = input[i]; - output[i] = FStar_UInt128_add_mod(xi, FStar_UInt128_mul_wide(yi, s)); - } -} - -inline static void -Hacl_Bignum_Fproduct_carry_wide_(FStar_UInt128_t *tmp) -{ - for (uint32_t i = (uint32_t)0U; i < (uint32_t)2U; i = i + (uint32_t)1U) { - uint32_t ctr = i; - FStar_UInt128_t tctr = tmp[ctr]; - FStar_UInt128_t tctrp1 = tmp[ctr + (uint32_t)1U]; - uint64_t r0 = FStar_UInt128_uint128_to_uint64(tctr) & (uint64_t)0xfffffffffffU; - FStar_UInt128_t c = FStar_UInt128_shift_right(tctr, (uint32_t)44U); - tmp[ctr] = FStar_UInt128_uint64_to_uint128(r0); - tmp[ctr + (uint32_t)1U] = FStar_UInt128_add(tctrp1, c); - } -} - -inline static void -Hacl_Bignum_Fproduct_carry_limb_(uint64_t *tmp) -{ - for (uint32_t i = (uint32_t)0U; i < (uint32_t)2U; i = i + (uint32_t)1U) { - uint32_t ctr = i; - uint64_t tctr = tmp[ctr]; - uint64_t tctrp1 = tmp[ctr + (uint32_t)1U]; - uint64_t r0 = tctr & (uint64_t)0xfffffffffffU; - uint64_t c = tctr >> (uint32_t)44U; - tmp[ctr] = r0; - tmp[ctr + (uint32_t)1U] = tctrp1 + c; - } -} - -inline static void -Hacl_Bignum_Fmul_shift_reduce(uint64_t *output) -{ - uint64_t tmp = output[2U]; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)2U; i = i + (uint32_t)1U) { - uint32_t ctr = (uint32_t)3U - i - (uint32_t)1U; - uint64_t z = output[ctr - (uint32_t)1U]; - output[ctr] = z; - } - output[0U] = tmp; - Hacl_Bignum_Modulo_reduce(output); -} - -static void -Hacl_Bignum_Fmul_mul_shift_reduce_(FStar_UInt128_t *output, uint64_t *input, uint64_t *input2) -{ - for (uint32_t i = (uint32_t)0U; i < (uint32_t)2U; i = i + (uint32_t)1U) { - uint64_t input2i = input2[i]; - Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i); - Hacl_Bignum_Fmul_shift_reduce(input); - } - uint32_t i = (uint32_t)2U; - uint64_t input2i = input2[i]; - Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i); -} - -inline static void -Hacl_Bignum_Fmul_fmul(uint64_t *output, uint64_t *input, uint64_t *input2) -{ - uint64_t tmp[3U] = { 0U }; - memcpy(tmp, input, (uint32_t)3U * sizeof input[0U]); - KRML_CHECK_SIZE(FStar_UInt128_uint64_to_uint128((uint64_t)0U), (uint32_t)3U); - FStar_UInt128_t t[3U]; - for (uint32_t _i = 0U; _i < (uint32_t)3U; ++_i) - t[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U); - Hacl_Bignum_Fmul_mul_shift_reduce_(t, tmp, input2); - Hacl_Bignum_Fproduct_carry_wide_(t); - Hacl_Bignum_Modulo_carry_top_wide(t); - Hacl_Bignum_Fproduct_copy_from_wide_(output, t); - uint64_t i0 = output[0U]; - uint64_t i1 = output[1U]; - uint64_t i0_ = i0 & (uint64_t)0xfffffffffffU; - uint64_t i1_ = i1 + (i0 >> (uint32_t)44U); - output[0U] = i0_; - output[1U] = i1_; -} - -inline static void -Hacl_Bignum_AddAndMultiply_add_and_multiply(uint64_t *acc, uint64_t *block, uint64_t *r) -{ - for (uint32_t i = (uint32_t)0U; i < (uint32_t)3U; i = i + (uint32_t)1U) { - uint64_t xi = acc[i]; - uint64_t yi = block[i]; - acc[i] = xi + yi; - } - Hacl_Bignum_Fmul_fmul(acc, acc, r); -} - -inline static void -Hacl_Impl_Poly1305_64_poly1305_update( - Hacl_Impl_Poly1305_64_State_poly1305_state st, - uint8_t *m) -{ - Hacl_Impl_Poly1305_64_State_poly1305_state scrut0 = st; - uint64_t *h = scrut0.h; - uint64_t *acc = h; - Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; - uint64_t *r = scrut.r; - uint64_t *r3 = r; - uint64_t tmp[3U] = { 0U }; - FStar_UInt128_t m0 = load128_le(m); - uint64_t r0 = FStar_UInt128_uint128_to_uint64(m0) & (uint64_t)0xfffffffffffU; - uint64_t - r1 = - FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(m0, (uint32_t)44U)) & (uint64_t)0xfffffffffffU; - uint64_t r2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(m0, (uint32_t)88U)); - tmp[0U] = r0; - tmp[1U] = r1; - tmp[2U] = r2; - uint64_t b2 = tmp[2U]; - uint64_t b2_ = (uint64_t)0x10000000000U | b2; - tmp[2U] = b2_; - Hacl_Bignum_AddAndMultiply_add_and_multiply(acc, tmp, r3); -} - -inline static void -Hacl_Impl_Poly1305_64_poly1305_process_last_block_( - uint8_t *block, - Hacl_Impl_Poly1305_64_State_poly1305_state st, - uint8_t *m, - uint64_t rem_) -{ - uint64_t tmp[3U] = { 0U }; - FStar_UInt128_t m0 = load128_le(block); - uint64_t r0 = FStar_UInt128_uint128_to_uint64(m0) & (uint64_t)0xfffffffffffU; - uint64_t - r1 = - FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(m0, (uint32_t)44U)) & (uint64_t)0xfffffffffffU; - uint64_t r2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(m0, (uint32_t)88U)); - tmp[0U] = r0; - tmp[1U] = r1; - tmp[2U] = r2; - Hacl_Impl_Poly1305_64_State_poly1305_state scrut0 = st; - uint64_t *h = scrut0.h; - Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; - uint64_t *r = scrut.r; - Hacl_Bignum_AddAndMultiply_add_and_multiply(h, tmp, r); -} - -inline static void -Hacl_Impl_Poly1305_64_poly1305_process_last_block( - Hacl_Impl_Poly1305_64_State_poly1305_state st, - uint8_t *m, - uint64_t rem_) -{ - uint8_t zero1 = (uint8_t)0U; - KRML_CHECK_SIZE(zero1, (uint32_t)16U); - uint8_t block[16U]; - for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) - block[_i] = zero1; - uint32_t i0 = (uint32_t)rem_; - uint32_t i = (uint32_t)rem_; - memcpy(block, m, i * sizeof m[0U]); - block[i0] = (uint8_t)1U; - Hacl_Impl_Poly1305_64_poly1305_process_last_block_(block, st, m, rem_); -} - -static void -Hacl_Impl_Poly1305_64_poly1305_last_pass(uint64_t *acc) -{ - Hacl_Bignum_Fproduct_carry_limb_(acc); - Hacl_Bignum_Modulo_carry_top(acc); - uint64_t a0 = acc[0U]; - uint64_t a10 = acc[1U]; - uint64_t a20 = acc[2U]; - uint64_t a0_ = a0 & (uint64_t)0xfffffffffffU; - uint64_t r0 = a0 >> (uint32_t)44U; - uint64_t a1_ = (a10 + r0) & (uint64_t)0xfffffffffffU; - uint64_t r1 = (a10 + r0) >> (uint32_t)44U; - uint64_t a2_ = a20 + r1; - acc[0U] = a0_; - acc[1U] = a1_; - acc[2U] = a2_; - Hacl_Bignum_Modulo_carry_top(acc); - uint64_t i0 = acc[0U]; - uint64_t i1 = acc[1U]; - uint64_t i0_ = i0 & (uint64_t)0xfffffffffffU; - uint64_t i1_ = i1 + (i0 >> (uint32_t)44U); - acc[0U] = i0_; - acc[1U] = i1_; - uint64_t a00 = acc[0U]; - uint64_t a1 = acc[1U]; - uint64_t a2 = acc[2U]; - uint64_t mask0 = FStar_UInt64_gte_mask(a00, (uint64_t)0xffffffffffbU); - uint64_t mask1 = FStar_UInt64_eq_mask(a1, (uint64_t)0xfffffffffffU); - uint64_t mask2 = FStar_UInt64_eq_mask(a2, (uint64_t)0x3ffffffffffU); - uint64_t mask = (mask0 & mask1) & mask2; - uint64_t a0_0 = a00 - ((uint64_t)0xffffffffffbU & mask); - uint64_t a1_0 = a1 - ((uint64_t)0xfffffffffffU & mask); - uint64_t a2_0 = a2 - ((uint64_t)0x3ffffffffffU & mask); - acc[0U] = a0_0; - acc[1U] = a1_0; - acc[2U] = a2_0; -} - -static Hacl_Impl_Poly1305_64_State_poly1305_state -Hacl_Impl_Poly1305_64_mk_state(uint64_t *r, uint64_t *h) -{ - return ((Hacl_Impl_Poly1305_64_State_poly1305_state){.r = r, .h = h }); -} - -static void -Hacl_Standalone_Poly1305_64_poly1305_blocks( - Hacl_Impl_Poly1305_64_State_poly1305_state st, - uint8_t *m, - uint64_t len1) -{ - if (!(len1 == (uint64_t)0U)) { - uint8_t *block = m; - uint8_t *tail1 = m + (uint32_t)16U; - Hacl_Impl_Poly1305_64_poly1305_update(st, block); - uint64_t len2 = len1 - (uint64_t)1U; - Hacl_Standalone_Poly1305_64_poly1305_blocks(st, tail1, len2); - } -} - -static void -Hacl_Standalone_Poly1305_64_poly1305_partial( - Hacl_Impl_Poly1305_64_State_poly1305_state st, - uint8_t *input, - uint64_t len1, - uint8_t *kr) -{ - Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; - uint64_t *r = scrut.r; - uint64_t *x0 = r; - FStar_UInt128_t k1 = load128_le(kr); - FStar_UInt128_t - k_clamped = - FStar_UInt128_logand(k1, - FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0ffffffcU), - (uint32_t)64U), - FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0fffffffU))); - uint64_t r0 = FStar_UInt128_uint128_to_uint64(k_clamped) & (uint64_t)0xfffffffffffU; - uint64_t - r1 = - FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)44U)) & (uint64_t)0xfffffffffffU; - uint64_t - r2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)88U)); - x0[0U] = r0; - x0[1U] = r1; - x0[2U] = r2; - Hacl_Impl_Poly1305_64_State_poly1305_state scrut0 = st; - uint64_t *h = scrut0.h; - uint64_t *x00 = h; - x00[0U] = (uint64_t)0U; - x00[1U] = (uint64_t)0U; - x00[2U] = (uint64_t)0U; - Hacl_Standalone_Poly1305_64_poly1305_blocks(st, input, len1); -} - -static void -Hacl_Standalone_Poly1305_64_poly1305_complete( - Hacl_Impl_Poly1305_64_State_poly1305_state st, - uint8_t *m, - uint64_t len1, - uint8_t *k1) -{ - uint8_t *kr = k1; - uint64_t len16 = len1 >> (uint32_t)4U; - uint64_t rem16 = len1 & (uint64_t)0xfU; - uint8_t *part_input = m; - uint8_t *last_block = m + (uint32_t)((uint64_t)16U * len16); - Hacl_Standalone_Poly1305_64_poly1305_partial(st, part_input, len16, kr); - if (!(rem16 == (uint64_t)0U)) - Hacl_Impl_Poly1305_64_poly1305_process_last_block(st, last_block, rem16); - Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; - uint64_t *h = scrut.h; - uint64_t *acc = h; - Hacl_Impl_Poly1305_64_poly1305_last_pass(acc); -} - -static void -Hacl_Standalone_Poly1305_64_crypto_onetimeauth_( - uint8_t *output, - uint8_t *input, - uint64_t len1, - uint8_t *k1) -{ - uint64_t buf[6U] = { 0U }; - uint64_t *r = buf; - uint64_t *h = buf + (uint32_t)3U; - Hacl_Impl_Poly1305_64_State_poly1305_state st = Hacl_Impl_Poly1305_64_mk_state(r, h); - uint8_t *key_s = k1 + (uint32_t)16U; - Hacl_Standalone_Poly1305_64_poly1305_complete(st, input, len1, k1); - Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; - uint64_t *h3 = scrut.h; - uint64_t *acc = h3; - FStar_UInt128_t k_ = load128_le(key_s); - uint64_t h0 = acc[0U]; - uint64_t h1 = acc[1U]; - uint64_t h2 = acc[2U]; - FStar_UInt128_t - acc_ = - FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128(h2 - << (uint32_t)24U | - h1 >> (uint32_t)20U), - (uint32_t)64U), - FStar_UInt128_uint64_to_uint128(h1 << (uint32_t)44U | h0)); - FStar_UInt128_t mac_ = FStar_UInt128_add_mod(acc_, k_); - store128_le(output, mac_); -} - -static void -Hacl_Standalone_Poly1305_64_crypto_onetimeauth( - uint8_t *output, - uint8_t *input, - uint64_t len1, - uint8_t *k1) -{ - Hacl_Standalone_Poly1305_64_crypto_onetimeauth_(output, input, len1, k1); -} - -Hacl_Impl_Poly1305_64_State_poly1305_state -Hacl_Poly1305_64_mk_state(uint64_t *r, uint64_t *acc) -{ - return Hacl_Impl_Poly1305_64_mk_state(r, acc); -} - -void -Hacl_Poly1305_64_init(Hacl_Impl_Poly1305_64_State_poly1305_state st, uint8_t *k1) -{ - Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; - uint64_t *r = scrut.r; - uint64_t *x0 = r; - FStar_UInt128_t k10 = load128_le(k1); - FStar_UInt128_t - k_clamped = - FStar_UInt128_logand(k10, - FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0ffffffcU), - (uint32_t)64U), - FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0fffffffU))); - uint64_t r0 = FStar_UInt128_uint128_to_uint64(k_clamped) & (uint64_t)0xfffffffffffU; - uint64_t - r1 = - FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)44U)) & (uint64_t)0xfffffffffffU; - uint64_t - r2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)88U)); - x0[0U] = r0; - x0[1U] = r1; - x0[2U] = r2; - Hacl_Impl_Poly1305_64_State_poly1305_state scrut0 = st; - uint64_t *h = scrut0.h; - uint64_t *x00 = h; - x00[0U] = (uint64_t)0U; - x00[1U] = (uint64_t)0U; - x00[2U] = (uint64_t)0U; -} - -void -Hacl_Poly1305_64_update_block(Hacl_Impl_Poly1305_64_State_poly1305_state st, uint8_t *m) -{ - Hacl_Impl_Poly1305_64_poly1305_update(st, m); -} - -void -Hacl_Poly1305_64_update( - Hacl_Impl_Poly1305_64_State_poly1305_state st, - uint8_t *m, - uint32_t num_blocks) -{ - if (!(num_blocks == (uint32_t)0U)) { - uint8_t *block = m; - uint8_t *m_ = m + (uint32_t)16U; - uint32_t n1 = num_blocks - (uint32_t)1U; - Hacl_Poly1305_64_update_block(st, block); - Hacl_Poly1305_64_update(st, m_, n1); - } -} - -void -Hacl_Poly1305_64_update_last( - Hacl_Impl_Poly1305_64_State_poly1305_state st, - uint8_t *m, - uint32_t len1) -{ - if (!((uint64_t)len1 == (uint64_t)0U)) - Hacl_Impl_Poly1305_64_poly1305_process_last_block(st, m, (uint64_t)len1); - Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; - uint64_t *h = scrut.h; - uint64_t *acc = h; - Hacl_Impl_Poly1305_64_poly1305_last_pass(acc); -} - -void -Hacl_Poly1305_64_finish( - Hacl_Impl_Poly1305_64_State_poly1305_state st, - uint8_t *mac, - uint8_t *k1) -{ - Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; - uint64_t *h = scrut.h; - uint64_t *acc = h; - FStar_UInt128_t k_ = load128_le(k1); - uint64_t h0 = acc[0U]; - uint64_t h1 = acc[1U]; - uint64_t h2 = acc[2U]; - FStar_UInt128_t - acc_ = - FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128(h2 - << (uint32_t)24U | - h1 >> (uint32_t)20U), - (uint32_t)64U), - FStar_UInt128_uint64_to_uint128(h1 << (uint32_t)44U | h0)); - FStar_UInt128_t mac_ = FStar_UInt128_add_mod(acc_, k_); - store128_le(mac, mac_); -} - -void -Hacl_Poly1305_64_crypto_onetimeauth( - uint8_t *output, - uint8_t *input, - uint64_t len1, - uint8_t *k1) -{ - Hacl_Standalone_Poly1305_64_crypto_onetimeauth(output, input, len1, k1); -} diff --git a/lib/freebl/verified/Hacl_Poly1305_64.h b/lib/freebl/verified/Hacl_Poly1305_64.h deleted file mode 100644 index e2e62193c7..0000000000 --- a/lib/freebl/verified/Hacl_Poly1305_64.h +++ /dev/null @@ -1,99 +0,0 @@ -/* Copyright 2016-2018 INRIA and Microsoft Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kremlib.h" -#ifndef __Hacl_Poly1305_64_H -#define __Hacl_Poly1305_64_H - -typedef uint64_t Hacl_Bignum_Constants_limb; - -typedef FStar_UInt128_t Hacl_Bignum_Constants_wide; - -typedef FStar_UInt128_t Hacl_Bignum_Wide_t; - -typedef uint64_t Hacl_Bignum_Limb_t; - -typedef void *Hacl_Impl_Poly1305_64_State_log_t; - -typedef uint8_t *Hacl_Impl_Poly1305_64_State_uint8_p; - -typedef uint64_t *Hacl_Impl_Poly1305_64_State_bigint; - -typedef void *Hacl_Impl_Poly1305_64_State_seqelem; - -typedef uint64_t *Hacl_Impl_Poly1305_64_State_elemB; - -typedef uint8_t *Hacl_Impl_Poly1305_64_State_wordB; - -typedef uint8_t *Hacl_Impl_Poly1305_64_State_wordB_16; - -typedef struct -{ - uint64_t *r; - uint64_t *h; -} Hacl_Impl_Poly1305_64_State_poly1305_state; - -typedef void *Hacl_Impl_Poly1305_64_log_t; - -typedef uint64_t *Hacl_Impl_Poly1305_64_bigint; - -typedef uint8_t *Hacl_Impl_Poly1305_64_uint8_p; - -typedef uint64_t *Hacl_Impl_Poly1305_64_elemB; - -typedef uint8_t *Hacl_Impl_Poly1305_64_wordB; - -typedef uint8_t *Hacl_Impl_Poly1305_64_wordB_16; - -typedef uint8_t *Hacl_Poly1305_64_uint8_p; - -typedef uint64_t Hacl_Poly1305_64_uint64_t; - -typedef uint8_t *Hacl_Poly1305_64_key; - -typedef Hacl_Impl_Poly1305_64_State_poly1305_state Hacl_Poly1305_64_state; - -Hacl_Impl_Poly1305_64_State_poly1305_state -Hacl_Poly1305_64_mk_state(uint64_t *r, uint64_t *acc); - -void Hacl_Poly1305_64_init(Hacl_Impl_Poly1305_64_State_poly1305_state st, uint8_t *k1); - -void Hacl_Poly1305_64_update_block(Hacl_Impl_Poly1305_64_State_poly1305_state st, uint8_t *m); - -void -Hacl_Poly1305_64_update( - Hacl_Impl_Poly1305_64_State_poly1305_state st, - uint8_t *m, - uint32_t num_blocks); - -void -Hacl_Poly1305_64_update_last( - Hacl_Impl_Poly1305_64_State_poly1305_state st, - uint8_t *m, - uint32_t len1); - -void -Hacl_Poly1305_64_finish( - Hacl_Impl_Poly1305_64_State_poly1305_state st, - uint8_t *mac, - uint8_t *k1); - -void -Hacl_Poly1305_64_crypto_onetimeauth( - uint8_t *output, - uint8_t *input, - uint64_t len1, - uint8_t *k1); -#endif diff --git a/lib/freebl/verified/kremlib.h b/lib/freebl/verified/kremlib.h deleted file mode 100644 index 69b5845201..0000000000 --- a/lib/freebl/verified/kremlib.h +++ /dev/null @@ -1,672 +0,0 @@ -/* Copyright 2016-2018 INRIA and Microsoft Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __KREMLIB_H -#define __KREMLIB_H - -#include "kremlib_base.h" - -/* For tests only: we might need this function to be forward-declared, because - * the dependency on WasmSupport appears very late, after SimplifyWasm, and - * sadly, after the topological order has been done. */ -void WasmSupport_check_buffer_size(uint32_t s); - -/******************************************************************************/ -/* Stubs to ease compilation of non-Low* code */ -/******************************************************************************/ - -/* Some types that KreMLin has no special knowledge of; many of them appear in - * signatures of ghost functions, meaning that it suffices to give them (any) - * definition. */ -typedef void *FStar_Seq_Base_seq, *Prims_prop, *FStar_HyperStack_mem, - *FStar_Set_set, *Prims_st_pre_h, *FStar_Heap_heap, *Prims_all_pre_h, - *FStar_TSet_set, *Prims_list, *FStar_Map_t, *FStar_UInt63_t_, - *FStar_Int63_t_, *FStar_UInt63_t, *FStar_Int63_t, *FStar_UInt_uint_t, - *FStar_Int_int_t, *FStar_HyperStack_stackref, *FStar_Bytes_bytes, - *FStar_HyperHeap_rid, *FStar_Heap_aref, *FStar_Monotonic_Heap_heap, - *FStar_Monotonic_Heap_aref, *FStar_Monotonic_HyperHeap_rid, - *FStar_Monotonic_HyperStack_mem, *FStar_Char_char_; - -typedef const char *Prims_string; - -/* For "bare" targets that do not have a C stdlib, the user might want to use - * [-add-include '"mydefinitions.h"'] and override these. */ -#ifndef KRML_HOST_PRINTF -#define KRML_HOST_PRINTF printf -#endif - -#ifndef KRML_HOST_EXIT -#define KRML_HOST_EXIT exit -#endif - -#ifndef KRML_HOST_MALLOC -#define KRML_HOST_MALLOC malloc -#endif - -/* In statement position, exiting is easy. */ -#define KRML_EXIT \ - do { \ - KRML_HOST_PRINTF("Unimplemented function at %s:%d\n", __FILE__, __LINE__); \ - KRML_HOST_EXIT(254); \ - } while (0) - -/* In expression position, use the comma-operator and a malloc to return an - * expression of the right size. KreMLin passes t as the parameter to the macro. - */ -#define KRML_EABORT(t, msg) \ - (KRML_HOST_PRINTF("KreMLin abort at %s:%d\n%s\n", __FILE__, __LINE__, msg), \ - KRML_HOST_EXIT(255), *((t *)KRML_HOST_MALLOC(sizeof(t)))) - -/* In FStar.Buffer.fst, the size of arrays is uint32_t, but it's a number of - * *elements*. Do an ugly, run-time check (some of which KreMLin can eliminate). - */ -#define KRML_CHECK_SIZE(elt, size) \ - if (((size_t)size) > SIZE_MAX / sizeof(elt)) { \ - KRML_HOST_PRINTF( \ - "Maximum allocatable size exceeded, aborting before overflow at " \ - "%s:%d\n", \ - __FILE__, __LINE__); \ - KRML_HOST_EXIT(253); \ - } - -/* A series of GCC atrocities to trace function calls (kremlin's [-d c-calls] - * option). Useful when trying to debug, say, Wasm, to compare traces. */ -/* clang-format off */ -#ifdef __GNUC__ -#define KRML_FORMAT(X) _Generic((X), \ - uint8_t : "0x%08" PRIx8, \ - uint16_t: "0x%08" PRIx16, \ - uint32_t: "0x%08" PRIx32, \ - uint64_t: "0x%08" PRIx64, \ - int8_t : "0x%08" PRIx8, \ - int16_t : "0x%08" PRIx16, \ - int32_t : "0x%08" PRIx32, \ - int64_t : "0x%08" PRIx64, \ - default : "%s") - -#define KRML_FORMAT_ARG(X) _Generic((X), \ - uint8_t : X, \ - uint16_t: X, \ - uint32_t: X, \ - uint64_t: X, \ - int8_t : X, \ - int16_t : X, \ - int32_t : X, \ - int64_t : X, \ - default : "unknown") -/* clang-format on */ - -#define KRML_DEBUG_RETURN(X) \ - ({ \ - __auto_type _ret = (X); \ - KRML_HOST_PRINTF("returning: "); \ - KRML_HOST_PRINTF(KRML_FORMAT(_ret), KRML_FORMAT_ARG(_ret)); \ - KRML_HOST_PRINTF(" \n"); \ - _ret; \ - }) -#endif - -#define FStar_Buffer_eqb(b1, b2, n) \ - (memcmp((b1), (b2), (n) * sizeof((b1)[0])) == 0) - -/* Stubs to make ST happy. Important note: you must generate a use of the macro - * argument, otherwise, you may have FStar_ST_recall(f) as the only use of f; - * KreMLin will think that this is a valid use, but then the C compiler, after - * macro expansion, will error out. */ -#define FStar_HyperHeap_root 0 -#define FStar_Pervasives_Native_fst(x) (x).fst -#define FStar_Pervasives_Native_snd(x) (x).snd -#define FStar_Seq_Base_createEmpty(x) 0 -#define FStar_Seq_Base_create(len, init) 0 -#define FStar_Seq_Base_upd(s, i, e) 0 -#define FStar_Seq_Base_eq(l1, l2) 0 -#define FStar_Seq_Base_length(l1) 0 -#define FStar_Seq_Base_append(x, y) 0 -#define FStar_Seq_Base_slice(x, y, z) 0 -#define FStar_Seq_Properties_snoc(x, y) 0 -#define FStar_Seq_Properties_cons(x, y) 0 -#define FStar_Seq_Base_index(x, y) 0 -#define FStar_HyperStack_is_eternal_color(x) 0 -#define FStar_Monotonic_HyperHeap_root 0 -#define FStar_Buffer_to_seq_full(x) 0 -#define FStar_Buffer_recall(x) -#define FStar_HyperStack_ST_op_Colon_Equals(x, v) KRML_EXIT -#define FStar_HyperStack_ST_op_Bang(x) 0 -#define FStar_HyperStack_ST_salloc(x) 0 -#define FStar_HyperStack_ST_ralloc(x, y) 0 -#define FStar_HyperStack_ST_new_region(x) (0) -#define FStar_Monotonic_RRef_m_alloc(x) \ - { \ - 0 \ - } - -#define FStar_HyperStack_ST_recall(x) \ - do { \ - (void)(x); \ - } while (0) - -#define FStar_HyperStack_ST_recall_region(x) \ - do { \ - (void)(x); \ - } while (0) - -#define FStar_Monotonic_RRef_m_recall(x1, x2) \ - do { \ - (void)(x1); \ - (void)(x2); \ - } while (0) - -#define FStar_Monotonic_RRef_m_write(x1, x2, x3, x4, x5) \ - do { \ - (void)(x1); \ - (void)(x2); \ - (void)(x3); \ - (void)(x4); \ - (void)(x5); \ - } while (0) - -/******************************************************************************/ -/* Endian-ness macros that can only be implemented in C */ -/******************************************************************************/ - -/* ... for Linux */ -#if defined(__linux__) || defined(__CYGWIN__) -#include - -/* ... for OSX */ -#elif defined(__APPLE__) -#include -#define htole64(x) OSSwapHostToLittleInt64(x) -#define le64toh(x) OSSwapLittleToHostInt64(x) -#define htobe64(x) OSSwapHostToBigInt64(x) -#define be64toh(x) OSSwapBigToHostInt64(x) - -#define htole16(x) OSSwapHostToLittleInt16(x) -#define le16toh(x) OSSwapLittleToHostInt16(x) -#define htobe16(x) OSSwapHostToBigInt16(x) -#define be16toh(x) OSSwapBigToHostInt16(x) - -#define htole32(x) OSSwapHostToLittleInt32(x) -#define le32toh(x) OSSwapLittleToHostInt32(x) -#define htobe32(x) OSSwapHostToBigInt32(x) -#define be32toh(x) OSSwapBigToHostInt32(x) - -/* ... for Solaris */ -#elif defined(__sun__) -#include -#define htole64(x) LE_64(x) -#define le64toh(x) LE_64(x) -#define htobe64(x) BE_64(x) -#define be64toh(x) BE_64(x) - -#define htole16(x) LE_16(x) -#define le16toh(x) LE_16(x) -#define htobe16(x) BE_16(x) -#define be16toh(x) BE_16(x) - -#define htole32(x) LE_32(x) -#define le32toh(x) LE_32(x) -#define htobe32(x) BE_32(x) -#define be32toh(x) BE_32(x) - -/* ... for the BSDs */ -#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) -#include -#elif defined(__OpenBSD__) -#include - -/* ... for Windows (MSVC)... not targeting XBOX 360! */ -#elif defined(_MSC_VER) - -#include -#define htobe16(x) _byteswap_ushort(x) -#define htole16(x) (x) -#define be16toh(x) _byteswap_ushort(x) -#define le16toh(x) (x) - -#define htobe32(x) _byteswap_ulong(x) -#define htole32(x) (x) -#define be32toh(x) _byteswap_ulong(x) -#define le32toh(x) (x) - -#define htobe64(x) _byteswap_uint64(x) -#define htole64(x) (x) -#define be64toh(x) _byteswap_uint64(x) -#define le64toh(x) (x) - -/* ... for Windows (GCC-like, e.g. mingw or clang) */ -#elif (defined(_WIN32) || defined(_WIN64)) && \ - (defined(__GNUC__) || defined(__clang__)) - -#define htobe16(x) __builtin_bswap16(x) -#define htole16(x) (x) -#define be16toh(x) __builtin_bswap16(x) -#define le16toh(x) (x) - -#define htobe32(x) __builtin_bswap32(x) -#define htole32(x) (x) -#define be32toh(x) __builtin_bswap32(x) -#define le32toh(x) (x) - -#define htobe64(x) __builtin_bswap64(x) -#define htole64(x) (x) -#define be64toh(x) __builtin_bswap64(x) -#define le64toh(x) (x) - -/* ... generic big-endian fallback code */ -#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - -/* byte swapping code inspired by: - * https://github.com/rweather/arduinolibs/blob/master/libraries/Crypto/utility/EndianUtil.h - * */ - -#define htobe32(x) (x) -#define be32toh(x) (x) -#define htole32(x) \ - (__extension__({ \ - uint32_t _temp = (x); \ - ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) | \ - ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000); \ - })) -#define le32toh(x) (htole32((x))) - -#define htobe64(x) (x) -#define be64toh(x) (x) -#define htole64(x) \ - (__extension__({ \ - uint64_t __temp = (x); \ - uint32_t __low = htobe32((uint32_t)__temp); \ - uint32_t __high = htobe32((uint32_t)(__temp >> 32)); \ - (((uint64_t)__low) << 32) | __high; \ - })) -#define le64toh(x) (htole64((x))) - -/* ... generic little-endian fallback code */ -#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - -#define htole32(x) (x) -#define le32toh(x) (x) -#define htobe32(x) \ - (__extension__({ \ - uint32_t _temp = (x); \ - ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) | \ - ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000); \ - })) -#define be32toh(x) (htobe32((x))) - -#define htole64(x) (x) -#define le64toh(x) (x) -#define htobe64(x) \ - (__extension__({ \ - uint64_t __temp = (x); \ - uint32_t __low = htobe32((uint32_t)__temp); \ - uint32_t __high = htobe32((uint32_t)(__temp >> 32)); \ - (((uint64_t)__low) << 32) | __high; \ - })) -#define be64toh(x) (htobe64((x))) - -/* ... couldn't determine endian-ness of the target platform */ -#else -#error "Please define __BYTE_ORDER__!" - -#endif /* defined(__linux__) || ... */ - -/* Loads and stores. These avoid undefined behavior due to unaligned memory - * accesses, via memcpy. */ - -inline static uint16_t -load16(uint8_t *b) -{ - uint16_t x; - memcpy(&x, b, 2); - return x; -} - -inline static uint32_t -load32(uint8_t *b) -{ - uint32_t x; - memcpy(&x, b, 4); - return x; -} - -inline static uint64_t -load64(uint8_t *b) -{ - uint64_t x; - memcpy(&x, b, 8); - return x; -} - -inline static void -store16(uint8_t *b, uint16_t i) -{ - memcpy(b, &i, 2); -} - -inline static void -store32(uint8_t *b, uint32_t i) -{ - memcpy(b, &i, 4); -} - -inline static void -store64(uint8_t *b, uint64_t i) -{ - memcpy(b, &i, 8); -} - -#define load16_le(b) (le16toh(load16(b))) -#define store16_le(b, i) (store16(b, htole16(i))) -#define load16_be(b) (be16toh(load16(b))) -#define store16_be(b, i) (store16(b, htobe16(i))) - -#define load32_le(b) (le32toh(load32(b))) -#define store32_le(b, i) (store32(b, htole32(i))) -#define load32_be(b) (be32toh(load32(b))) -#define store32_be(b, i) (store32(b, htobe32(i))) - -#define load64_le(b) (le64toh(load64(b))) -#define store64_le(b, i) (store64(b, htole64(i))) -#define load64_be(b) (be64toh(load64(b))) -#define store64_be(b, i) (store64(b, htobe64(i))) - -/******************************************************************************/ -/* Checked integers to ease the compilation of non-Low* code */ -/******************************************************************************/ - -typedef int32_t Prims_pos, Prims_nat, Prims_nonzero, Prims_int, - krml_checked_int_t; - -inline static bool -Prims_op_GreaterThanOrEqual(int32_t x, int32_t y) -{ - return x >= y; -} - -inline static bool -Prims_op_LessThanOrEqual(int32_t x, int32_t y) -{ - return x <= y; -} - -inline static bool -Prims_op_GreaterThan(int32_t x, int32_t y) -{ - return x > y; -} - -inline static bool -Prims_op_LessThan(int32_t x, int32_t y) -{ - return x < y; -} - -#define RETURN_OR(x) \ - do { \ - int64_t __ret = x; \ - if (__ret < INT32_MIN || INT32_MAX < __ret) { \ - KRML_HOST_PRINTF("Prims.{int,nat,pos} integer overflow at %s:%d\n", \ - __FILE__, __LINE__); \ - KRML_HOST_EXIT(252); \ - } \ - return (int32_t)__ret; \ - } while (0) - -inline static int32_t -Prims_pow2(int32_t x) -{ - RETURN_OR((int64_t)1 << (int64_t)x); -} - -inline static int32_t -Prims_op_Multiply(int32_t x, int32_t y) -{ - RETURN_OR((int64_t)x * (int64_t)y); -} - -inline static int32_t -Prims_op_Addition(int32_t x, int32_t y) -{ - RETURN_OR((int64_t)x + (int64_t)y); -} - -inline static int32_t -Prims_op_Subtraction(int32_t x, int32_t y) -{ - RETURN_OR((int64_t)x - (int64_t)y); -} - -inline static int32_t -Prims_op_Division(int32_t x, int32_t y) -{ - RETURN_OR((int64_t)x / (int64_t)y); -} - -inline static int32_t -Prims_op_Modulus(int32_t x, int32_t y) -{ - RETURN_OR((int64_t)x % (int64_t)y); -} - -inline static int8_t -FStar_UInt8_uint_to_t(int8_t x) -{ - return x; -} -inline static int16_t -FStar_UInt16_uint_to_t(int16_t x) -{ - return x; -} -inline static int32_t -FStar_UInt32_uint_to_t(int32_t x) -{ - return x; -} -inline static int64_t -FStar_UInt64_uint_to_t(int64_t x) -{ - return x; -} - -inline static int8_t -FStar_UInt8_v(int8_t x) -{ - return x; -} -inline static int16_t -FStar_UInt16_v(int16_t x) -{ - return x; -} -inline static int32_t -FStar_UInt32_v(int32_t x) -{ - return x; -} -inline static int64_t -FStar_UInt64_v(int64_t x) -{ - return x; -} - -/* Platform-specific 128-bit arithmetic. These are static functions in a header, - * so that each translation unit gets its own copy and the C compiler can - * optimize. */ -#ifndef KRML_NOUINT128 -typedef unsigned __int128 FStar_UInt128_t, FStar_UInt128_t_, uint128_t; - -static inline void -print128(const char *where, uint128_t n) -{ - KRML_HOST_PRINTF("%s: [%" PRIu64 ",%" PRIu64 "]\n", where, - (uint64_t)(n >> 64), (uint64_t)n); -} - -static inline uint128_t -load128_le(uint8_t *b) -{ - uint128_t l = (uint128_t)load64_le(b); - uint128_t h = (uint128_t)load64_le(b + 8); - return (h << 64 | l); -} - -static inline void -store128_le(uint8_t *b, uint128_t n) -{ - store64_le(b, (uint64_t)n); - store64_le(b + 8, (uint64_t)(n >> 64)); -} - -static inline uint128_t -load128_be(uint8_t *b) -{ - uint128_t h = (uint128_t)load64_be(b); - uint128_t l = (uint128_t)load64_be(b + 8); - return (h << 64 | l); -} - -static inline void -store128_be(uint8_t *b, uint128_t n) -{ - store64_be(b, (uint64_t)(n >> 64)); - store64_be(b + 8, (uint64_t)n); -} - -#define FStar_UInt128_add(x, y) ((x) + (y)) -#define FStar_UInt128_mul(x, y) ((x) * (y)) -#define FStar_UInt128_add_mod(x, y) ((x) + (y)) -#define FStar_UInt128_sub(x, y) ((x) - (y)) -#define FStar_UInt128_sub_mod(x, y) ((x) - (y)) -#define FStar_UInt128_logand(x, y) ((x) & (y)) -#define FStar_UInt128_logor(x, y) ((x) | (y)) -#define FStar_UInt128_logxor(x, y) ((x) ^ (y)) -#define FStar_UInt128_lognot(x) (~(x)) -#define FStar_UInt128_shift_left(x, y) ((x) << (y)) -#define FStar_UInt128_shift_right(x, y) ((x) >> (y)) -#define FStar_UInt128_uint64_to_uint128(x) ((uint128_t)(x)) -#define FStar_UInt128_uint128_to_uint64(x) ((uint64_t)(x)) -#define FStar_UInt128_mul_wide(x, y) ((uint128_t)(x) * (y)) -#define FStar_UInt128_op_Hat_Hat(x, y) ((x) ^ (y)) - -static inline uint128_t -FStar_UInt128_eq_mask(uint128_t x, uint128_t y) -{ - uint64_t mask = - FStar_UInt64_eq_mask((uint64_t)(x >> 64), (uint64_t)(y >> 64)) & - FStar_UInt64_eq_mask(x, y); - return ((uint128_t)mask) << 64 | mask; -} - -static inline uint128_t -FStar_UInt128_gte_mask(uint128_t x, uint128_t y) -{ - uint64_t mask = - (FStar_UInt64_gte_mask(x >> 64, y >> 64) & - ~(FStar_UInt64_eq_mask(x >> 64, y >> 64))) | - (FStar_UInt64_eq_mask(x >> 64, y >> 64) & FStar_UInt64_gte_mask(x, y)); - return ((uint128_t)mask) << 64 | mask; -} - -#else /* !defined(KRML_NOUINT128) */ - -/* This is a bad circular dependency... should fix it properly. */ -#include "FStar.h" - -typedef FStar_UInt128_uint128 FStar_UInt128_t_, uint128_t; - -/* A series of definitions written using pointers. */ -static inline void -print128_(const char *where, uint128_t *n) -{ - KRML_HOST_PRINTF("%s: [0x%08" PRIx64 ",0x%08" PRIx64 "]\n", where, n->high, n->low); -} - -static inline void -load128_le_(uint8_t *b, uint128_t *r) -{ - r->low = load64_le(b); - r->high = load64_le(b + 8); -} - -static inline void -store128_le_(uint8_t *b, uint128_t *n) -{ - store64_le(b, n->low); - store64_le(b + 8, n->high); -} - -static inline void -load128_be_(uint8_t *b, uint128_t *r) -{ - r->high = load64_be(b); - r->low = load64_be(b + 8); -} - -static inline void -store128_be_(uint8_t *b, uint128_t *n) -{ - store64_be(b, n->high); - store64_be(b + 8, n->low); -} - -#ifndef KRML_NOSTRUCT_PASSING - -static inline void -print128(const char *where, uint128_t n) -{ - print128_(where, &n); -} - -static inline uint128_t -load128_le(uint8_t *b) -{ - uint128_t r; - load128_le_(b, &r); - return r; -} - -static inline void -store128_le(uint8_t *b, uint128_t n) -{ - store128_le_(b, &n); -} - -static inline uint128_t -load128_be(uint8_t *b) -{ - uint128_t r; - load128_be_(b, &r); - return r; -} - -static inline void -store128_be(uint8_t *b, uint128_t n) -{ - store128_be_(b, &n); -} - -#else /* !defined(KRML_STRUCT_PASSING) */ - -#define print128 print128_ -#define load128_le load128_le_ -#define store128_le store128_le_ -#define load128_be load128_be_ -#define store128_be store128_be_ - -#endif /* KRML_STRUCT_PASSING */ -#endif /* KRML_UINT128 */ -#endif /* __KREMLIB_H */ diff --git a/lib/freebl/verified/kremlib_base.h b/lib/freebl/verified/kremlib_base.h deleted file mode 100644 index f88f02aa52..0000000000 --- a/lib/freebl/verified/kremlib_base.h +++ /dev/null @@ -1,192 +0,0 @@ -/* Copyright 2016-2018 INRIA and Microsoft Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __KREMLIB_BASE_H -#define __KREMLIB_BASE_H - -#include -#include -#include -#include -#include -#include -#include - -/******************************************************************************/ -/* Some macros to ease compatibility */ -/******************************************************************************/ - -/* Define __cdecl and friends when using GCC, so that we can safely compile code - * that contains __cdecl on all platforms. Note that this is in a separate - * header so that Dafny-generated code can include just this file. */ -#ifndef _MSC_VER -/* Use the gcc predefined macros if on a platform/architectures that set them. - * Otherwise define them to be empty. */ -#ifndef __cdecl -#define __cdecl -#endif -#ifndef __stdcall -#define __stdcall -#endif -#ifndef __fastcall -#define __fastcall -#endif -#endif - -#ifdef __GNUC__ -#define inline __inline__ -#endif - -/* GCC-specific attribute syntax; everyone else gets the standard C inline - * attribute. */ -#ifdef __GNU_C__ -#ifndef __clang__ -#define force_inline inline __attribute__((always_inline)) -#else -#define force_inline inline -#endif -#else -#define force_inline inline -#endif - -/******************************************************************************/ -/* Implementing C.fst */ -/******************************************************************************/ - -/* Uppercase issue; we have to define lowercase versions of the C macros (as we - * have no way to refer to an uppercase *variable* in F*). */ -extern int exit_success; -extern int exit_failure; - -/* This one allows the user to write C.EXIT_SUCCESS. */ -typedef int exit_code; - -void print_string(const char *s); -void print_bytes(uint8_t *b, uint32_t len); - -/* The universal null pointer defined in C.Nullity.fst */ -#define C_Nullity_null(X) 0 - -/* If some globals need to be initialized before the main, then kremlin will - * generate and try to link last a function with this type: */ -void kremlinit_globals(void); - -/******************************************************************************/ -/* Implementation of machine integers (possibly of 128-bit integers) */ -/******************************************************************************/ - -/* Integer types */ -typedef uint64_t FStar_UInt64_t, FStar_UInt64_t_; -typedef int64_t FStar_Int64_t, FStar_Int64_t_; -typedef uint32_t FStar_UInt32_t, FStar_UInt32_t_; -typedef int32_t FStar_Int32_t, FStar_Int32_t_; -typedef uint16_t FStar_UInt16_t, FStar_UInt16_t_; -typedef int16_t FStar_Int16_t, FStar_Int16_t_; -typedef uint8_t FStar_UInt8_t, FStar_UInt8_t_; -typedef int8_t FStar_Int8_t, FStar_Int8_t_; - -static inline uint32_t -rotate32_left(uint32_t x, uint32_t n) -{ - /* assert (n<32); */ - return (x << n) | (x >> (32 - n)); -} -static inline uint32_t -rotate32_right(uint32_t x, uint32_t n) -{ - /* assert (n<32); */ - return (x >> n) | (x << (32 - n)); -} - -/* Constant time comparisons */ -static inline uint8_t -FStar_UInt8_eq_mask(uint8_t x, uint8_t y) -{ - x = ~(x ^ y); - x &= x << 4; - x &= x << 2; - x &= x << 1; - return (int8_t)x >> 7; -} - -static inline uint8_t -FStar_UInt8_gte_mask(uint8_t x, uint8_t y) -{ - return ~(uint8_t)(((int32_t)x - y) >> 31); -} - -static inline uint16_t -FStar_UInt16_eq_mask(uint16_t x, uint16_t y) -{ - x = ~(x ^ y); - x &= x << 8; - x &= x << 4; - x &= x << 2; - x &= x << 1; - return (int16_t)x >> 15; -} - -static inline uint16_t -FStar_UInt16_gte_mask(uint16_t x, uint16_t y) -{ - return ~(uint16_t)(((int32_t)x - y) >> 31); -} - -static inline uint32_t -FStar_UInt32_eq_mask(uint32_t x, uint32_t y) -{ - x = ~(x ^ y); - x &= x << 16; - x &= x << 8; - x &= x << 4; - x &= x << 2; - x &= x << 1; - return ((int32_t)x) >> 31; -} - -static inline uint32_t -FStar_UInt32_gte_mask(uint32_t x, uint32_t y) -{ - return ~((uint32_t)(((int64_t)x - y) >> 63)); -} - -static inline uint64_t -FStar_UInt64_eq_mask(uint64_t x, uint64_t y) -{ - x = ~(x ^ y); - x &= x << 32; - x &= x << 16; - x &= x << 8; - x &= x << 4; - x &= x << 2; - x &= x << 1; - return ((int64_t)x) >> 63; -} - -static inline uint64_t -FStar_UInt64_gte_mask(uint64_t x, uint64_t y) -{ - uint64_t low63 = - ~((uint64_t)((int64_t)((int64_t)(x & UINT64_C(0x7fffffffffffffff)) - - (int64_t)(y & UINT64_C(0x7fffffffffffffff))) >> - 63)); - uint64_t high_bit = - ~((uint64_t)((int64_t)((int64_t)(x & UINT64_C(0x8000000000000000)) - - (int64_t)(y & UINT64_C(0x8000000000000000))) >> - 63)); - return low63 & high_bit; -} - -#endif diff --git a/lib/freebl/verified/kremlin/include/kremlin/internal/callconv.h b/lib/freebl/verified/kremlin/include/kremlin/internal/callconv.h new file mode 100644 index 0000000000..8278b157d3 --- /dev/null +++ b/lib/freebl/verified/kremlin/include/kremlin/internal/callconv.h @@ -0,0 +1,46 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __KREMLIN_CALLCONV_H +#define __KREMLIN_CALLCONV_H + +/******************************************************************************/ +/* Some macros to ease compatibility */ +/******************************************************************************/ + +/* We want to generate __cdecl safely without worrying about it being undefined. + * When using MSVC, these are always defined. When using MinGW, these are + * defined too. They have no meaning for other platforms, so we define them to + * be empty macros in other situations. */ +#ifndef _MSC_VER +#ifndef __cdecl +#define __cdecl +#endif +#ifndef __stdcall +#define __stdcall +#endif +#ifndef __fastcall +#define __fastcall +#endif +#endif + +/* Since KreMLin emits the inline keyword unconditionally, we follow the + * guidelines at https://gcc.gnu.org/onlinedocs/gcc/Inline.html and make this + * __inline__ to ensure the code compiles with -std=c90 and earlier. */ +#ifdef __GNUC__ +#define inline __inline__ +#endif + +/* GCC-specific attribute syntax; everyone else gets the standard C inline + * attribute. */ +#ifdef __GNU_C__ +#ifndef __clang__ +#define force_inline inline __attribute__((always_inline)) +#else +#define force_inline inline +#endif +#else +#define force_inline inline +#endif + +#endif diff --git a/lib/freebl/verified/kremlin/include/kremlin/internal/compat.h b/lib/freebl/verified/kremlin/include/kremlin/internal/compat.h new file mode 100644 index 0000000000..964d1c52aa --- /dev/null +++ b/lib/freebl/verified/kremlin/include/kremlin/internal/compat.h @@ -0,0 +1,32 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef KRML_COMPAT_H +#define KRML_COMPAT_H + +#include + +/* A series of macros that define C implementations of types that are not Low*, + * to facilitate porting programs to Low*. */ + +typedef struct { + uint32_t length; + const char *data; +} FStar_Bytes_bytes; + +typedef int32_t Prims_pos, Prims_nat, Prims_nonzero, Prims_int, + krml_checked_int_t; + +#define RETURN_OR(x) \ + do { \ + int64_t __ret = x; \ + if (__ret < INT32_MIN || INT32_MAX < __ret) { \ + KRML_HOST_PRINTF( \ + "Prims.{int,nat,pos} integer overflow at %s:%d\n", __FILE__, \ + __LINE__); \ + KRML_HOST_EXIT(252); \ + } \ + return (int32_t)__ret; \ + } while (0) + +#endif diff --git a/lib/freebl/verified/kremlin/include/kremlin/internal/target.h b/lib/freebl/verified/kremlin/include/kremlin/internal/target.h new file mode 100644 index 0000000000..25f0fd0ac4 --- /dev/null +++ b/lib/freebl/verified/kremlin/include/kremlin/internal/target.h @@ -0,0 +1,113 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __KREMLIN_TARGET_H +#define __KREMLIN_TARGET_H + +#include +#include +#include +#include +#include + +#include "kremlin/internal/callconv.h" + +/******************************************************************************/ +/* Macros that KreMLin will generate. */ +/******************************************************************************/ + +/* For "bare" targets that do not have a C stdlib, the user might want to use + * [-add-early-include '"mydefinitions.h"'] and override these. */ +#ifndef KRML_HOST_PRINTF +#define KRML_HOST_PRINTF printf +#endif + +#if ( \ + (defined __STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + (!(defined KRML_HOST_EPRINTF))) +#define KRML_HOST_EPRINTF(...) fprintf(stderr, __VA_ARGS__) +#endif + +#ifndef KRML_HOST_EXIT +#define KRML_HOST_EXIT exit +#endif + +#ifndef KRML_HOST_MALLOC +#define KRML_HOST_MALLOC malloc +#endif + +#ifndef KRML_HOST_CALLOC +#define KRML_HOST_CALLOC calloc +#endif + +#ifndef KRML_HOST_FREE +#define KRML_HOST_FREE free +#endif + +#ifndef KRML_HOST_TIME + +#include + +/* Prims_nat not yet in scope */ +inline static int32_t +krml_time() +{ + return (int32_t)time(NULL); +} + +#define KRML_HOST_TIME krml_time +#endif + +/* In statement position, exiting is easy. */ +#define KRML_EXIT \ + do { \ + KRML_HOST_PRINTF("Unimplemented function at %s:%d\n", __FILE__, __LINE__); \ + KRML_HOST_EXIT(254); \ + } while (0) + +/* In expression position, use the comma-operator and a malloc to return an + * expression of the right size. KreMLin passes t as the parameter to the macro. + */ +#define KRML_EABORT(t, msg) \ + (KRML_HOST_PRINTF("KreMLin abort at %s:%d\n%s\n", __FILE__, __LINE__, msg), \ + KRML_HOST_EXIT(255), *((t *)KRML_HOST_MALLOC(sizeof(t)))) + +/* In FStar.Buffer.fst, the size of arrays is uint32_t, but it's a number of + * *elements*. Do an ugly, run-time check (some of which KreMLin can eliminate). + */ + +#ifdef __GNUC__ +#define _KRML_CHECK_SIZE_PRAGMA \ + _Pragma("GCC diagnostic ignored \"-Wtype-limits\"") +#else +#define _KRML_CHECK_SIZE_PRAGMA +#endif + +#define KRML_CHECK_SIZE(size_elt, sz) \ + do { \ + _KRML_CHECK_SIZE_PRAGMA \ + if (((size_t)(sz)) > ((size_t)(SIZE_MAX / (size_elt)))) { \ + KRML_HOST_PRINTF( \ + "Maximum allocatable size exceeded, aborting before overflow at " \ + "%s:%d\n", \ + __FILE__, __LINE__); \ + KRML_HOST_EXIT(253); \ + } \ + } while (0) + +#if defined(_MSC_VER) && _MSC_VER < 1900 +#define KRML_HOST_SNPRINTF(buf, sz, fmt, arg) _snprintf_s(buf, sz, _TRUNCATE, fmt, arg) +#else +#define KRML_HOST_SNPRINTF(buf, sz, fmt, arg) snprintf(buf, sz, fmt, arg) +#endif + +#if defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ > 4 +#define KRML_DEPRECATED(x) __attribute__((deprecated(x))) +#elif defined(__GNUC__) +/* deprecated attribute is not defined in GCC < 4.5. */ +#define KRML_DEPRECATED(x) +#elif defined(_MSC_VER) +#define KRML_DEPRECATED(x) __declspec(deprecated(x)) +#endif + +#endif diff --git a/lib/freebl/verified/kremlin/include/kremlin/internal/types.h b/lib/freebl/verified/kremlin/include/kremlin/internal/types.h new file mode 100644 index 0000000000..67d5af5f0a --- /dev/null +++ b/lib/freebl/verified/kremlin/include/kremlin/internal/types.h @@ -0,0 +1,85 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef KRML_TYPES_H +#define KRML_TYPES_H + +#include +#include +#include +#include + +/* Types which are either abstract, meaning that have to be implemented in C, or + * which are models, meaning that they are swapped out at compile-time for + * hand-written C types (in which case they're marked as noextract). */ + +typedef uint64_t FStar_UInt64_t, FStar_UInt64_t_; +typedef int64_t FStar_Int64_t, FStar_Int64_t_; +typedef uint32_t FStar_UInt32_t, FStar_UInt32_t_; +typedef int32_t FStar_Int32_t, FStar_Int32_t_; +typedef uint16_t FStar_UInt16_t, FStar_UInt16_t_; +typedef int16_t FStar_Int16_t, FStar_Int16_t_; +typedef uint8_t FStar_UInt8_t, FStar_UInt8_t_; +typedef int8_t FStar_Int8_t, FStar_Int8_t_; + +/* Only useful when building Kremlib, because it's in the dependency graph of + * FStar.Int.Cast. */ +typedef uint64_t FStar_UInt63_t, FStar_UInt63_t_; +typedef int64_t FStar_Int63_t, FStar_Int63_t_; + +typedef double FStar_Float_float; +typedef uint32_t FStar_Char_char; +typedef FILE *FStar_IO_fd_read, *FStar_IO_fd_write; + +typedef void *FStar_Dyn_dyn; + +typedef const char *C_String_t, *C_String_t_, *C_Compat_String_t, *C_Compat_String_t_; + +typedef int exit_code; +typedef FILE *channel; + +typedef unsigned long long TestLib_cycles; + +typedef uint64_t FStar_Date_dateTime, FStar_Date_timeSpan; + +/* Now Prims.string is no longer illegal with the new model in LowStar.Printf; + * it's operations that produce Prims_string which are illegal. Bring the + * definition into scope by default. */ +typedef const char *Prims_string; + +/* The great static header headache. */ + +/* The uint128 type is a special case since we offer several implementations of + * it, depending on the compiler and whether the user wants the verified + * implementation or not. */ +#if !defined(KRML_VERIFIED_UINT128) && defined(_MSC_VER) && defined(_M_X64) +#include +typedef __m128i FStar_UInt128_uint128; +#elif !defined(KRML_VERIFIED_UINT128) && !defined(_MSC_VER) && \ + (defined(__x86_64__) || defined(__x86_64) || defined(__aarch64__)) +typedef unsigned __int128 FStar_UInt128_uint128; +#else +typedef struct FStar_UInt128_uint128_s { + uint64_t low; + uint64_t high; +} FStar_UInt128_uint128; +#endif + +/* The former is defined once, here (otherwise, conflicts for test-c89. The + * latter is for internal use. */ +typedef FStar_UInt128_uint128 FStar_UInt128_t, uint128_t; + +#include "kremlin/lowstar_endianness.h" + +/* This one is always included, because it defines C.Endianness functions too. */ +#if !defined(_MSC_VER) +#include "fstar_uint128_gcc64.h" +#endif + +#if !defined(KRML_VERIFIED_UINT128) && defined(_MSC_VER) +#include "fstar_uint128_msvc.h" +#elif defined(KRML_VERIFIED_UINT128) +#include "FStar_UInt128_Verified.h" +#endif + +#endif diff --git a/lib/freebl/verified/kremlin/include/kremlin/lowstar_endianness.h b/lib/freebl/verified/kremlin/include/kremlin/lowstar_endianness.h new file mode 100644 index 0000000000..2a13cc9f15 --- /dev/null +++ b/lib/freebl/verified/kremlin/include/kremlin/lowstar_endianness.h @@ -0,0 +1,242 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __LOWSTAR_ENDIANNESS_H +#define __LOWSTAR_ENDIANNESS_H + +#include +#include + +/******************************************************************************/ +/* Implementing C.fst (part 2: endian-ness macros) */ +/******************************************************************************/ + +/* ... for Linux */ +#if defined(__linux__) || defined(__CYGWIN__) || defined(__USE_SYSTEM_ENDIAN_H__) +#include + +/* ... for OSX */ +#elif defined(__APPLE__) +#include +#define htole64(x) OSSwapHostToLittleInt64(x) +#define le64toh(x) OSSwapLittleToHostInt64(x) +#define htobe64(x) OSSwapHostToBigInt64(x) +#define be64toh(x) OSSwapBigToHostInt64(x) + +#define htole16(x) OSSwapHostToLittleInt16(x) +#define le16toh(x) OSSwapLittleToHostInt16(x) +#define htobe16(x) OSSwapHostToBigInt16(x) +#define be16toh(x) OSSwapBigToHostInt16(x) + +#define htole32(x) OSSwapHostToLittleInt32(x) +#define le32toh(x) OSSwapLittleToHostInt32(x) +#define htobe32(x) OSSwapHostToBigInt32(x) +#define be32toh(x) OSSwapBigToHostInt32(x) + +/* ... for Solaris */ +#elif defined(__sun__) +#include +#define htole64(x) LE_64(x) +#define le64toh(x) LE_64(x) +#define htobe64(x) BE_64(x) +#define be64toh(x) BE_64(x) + +#define htole16(x) LE_16(x) +#define le16toh(x) LE_16(x) +#define htobe16(x) BE_16(x) +#define be16toh(x) BE_16(x) + +#define htole32(x) LE_32(x) +#define le32toh(x) LE_32(x) +#define htobe32(x) BE_32(x) +#define be32toh(x) BE_32(x) + +/* ... for the BSDs */ +#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) +#include +#elif defined(__OpenBSD__) +#include + +/* ... for Windows (MSVC)... not targeting XBOX 360! */ +#elif defined(_MSC_VER) + +#include +#define htobe16(x) _byteswap_ushort(x) +#define htole16(x) (x) +#define be16toh(x) _byteswap_ushort(x) +#define le16toh(x) (x) + +#define htobe32(x) _byteswap_ulong(x) +#define htole32(x) (x) +#define be32toh(x) _byteswap_ulong(x) +#define le32toh(x) (x) + +#define htobe64(x) _byteswap_uint64(x) +#define htole64(x) (x) +#define be64toh(x) _byteswap_uint64(x) +#define le64toh(x) (x) + +/* ... for Windows (GCC-like, e.g. mingw or clang) */ +#elif (defined(_WIN32) || defined(_WIN64)) && \ + (defined(__GNUC__) || defined(__clang__)) + +#define htobe16(x) __builtin_bswap16(x) +#define htole16(x) (x) +#define be16toh(x) __builtin_bswap16(x) +#define le16toh(x) (x) + +#define htobe32(x) __builtin_bswap32(x) +#define htole32(x) (x) +#define be32toh(x) __builtin_bswap32(x) +#define le32toh(x) (x) + +#define htobe64(x) __builtin_bswap64(x) +#define htole64(x) (x) +#define be64toh(x) __builtin_bswap64(x) +#define le64toh(x) (x) + +/* ... generic big-endian fallback code */ +#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* byte swapping code inspired by: + * https://github.com/rweather/arduinolibs/blob/master/libraries/Crypto/utility/EndianUtil.h + * */ + +#define htobe32(x) (x) +#define be32toh(x) (x) +#define htole32(x) \ + (__extension__({ \ + uint32_t _temp = (x); \ + ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) | \ + ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000); \ + })) +#define le32toh(x) (htole32((x))) + +#define htobe64(x) (x) +#define be64toh(x) (x) +#define htole64(x) \ + (__extension__({ \ + uint64_t __temp = (x); \ + uint32_t __low = htobe32((uint32_t)__temp); \ + uint32_t __high = htobe32((uint32_t)(__temp >> 32)); \ + (((uint64_t)__low) << 32) | __high; \ + })) +#define le64toh(x) (htole64((x))) + +/* ... generic little-endian fallback code */ +#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + +#define htole32(x) (x) +#define le32toh(x) (x) +#define htobe32(x) \ + (__extension__({ \ + uint32_t _temp = (x); \ + ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) | \ + ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000); \ + })) +#define be32toh(x) (htobe32((x))) + +#define htole64(x) (x) +#define le64toh(x) (x) +#define htobe64(x) \ + (__extension__({ \ + uint64_t __temp = (x); \ + uint32_t __low = htobe32((uint32_t)__temp); \ + uint32_t __high = htobe32((uint32_t)(__temp >> 32)); \ + (((uint64_t)__low) << 32) | __high; \ + })) +#define be64toh(x) (htobe64((x))) + +/* ... couldn't determine endian-ness of the target platform */ +#else +#error "Please define __BYTE_ORDER__!" + +#endif /* defined(__linux__) || ... */ + +/* Loads and stores. These avoid undefined behavior due to unaligned memory + * accesses, via memcpy. */ + +inline static uint16_t +load16(uint8_t *b) +{ + uint16_t x; + memcpy(&x, b, 2); + return x; +} + +inline static uint32_t +load32(uint8_t *b) +{ + uint32_t x; + memcpy(&x, b, 4); + return x; +} + +inline static uint64_t +load64(uint8_t *b) +{ + uint64_t x; + memcpy(&x, b, 8); + return x; +} + +inline static void +store16(uint8_t *b, uint16_t i) +{ + memcpy(b, &i, 2); +} + +inline static void +store32(uint8_t *b, uint32_t i) +{ + memcpy(b, &i, 4); +} + +inline static void +store64(uint8_t *b, uint64_t i) +{ + memcpy(b, &i, 8); +} + +/* Legacy accessors so that this header can serve as an implementation of + * C.Endianness */ +#define load16_le(b) (le16toh(load16(b))) +#define store16_le(b, i) (store16(b, htole16(i))) +#define load16_be(b) (be16toh(load16(b))) +#define store16_be(b, i) (store16(b, htobe16(i))) + +#define load32_le(b) (le32toh(load32(b))) +#define store32_le(b, i) (store32(b, htole32(i))) +#define load32_be(b) (be32toh(load32(b))) +#define store32_be(b, i) (store32(b, htobe32(i))) + +#define load64_le(b) (le64toh(load64(b))) +#define store64_le(b, i) (store64(b, htole64(i))) +#define load64_be(b) (be64toh(load64(b))) +#define store64_be(b, i) (store64(b, htobe64(i))) + +/* Co-existence of LowStar.Endianness and FStar.Endianness generates name + * conflicts, because of course both insist on having no prefixes. Until a + * prefix is added, or until we truly retire FStar.Endianness, solve this issue + * in an elegant way. */ +#define load16_le0 load16_le +#define store16_le0 store16_le +#define load16_be0 load16_be +#define store16_be0 store16_be + +#define load32_le0 load32_le +#define store32_le0 store32_le +#define load32_be0 load32_be +#define store32_be0 store32_be + +#define load64_le0 load64_le +#define store64_le0 store64_le +#define load64_be0 load64_be +#define store64_be0 store64_be + +#define load128_le0 load128_le +#define store128_le0 store128_le +#define load128_be0 load128_be +#define store128_be0 store128_be + +#endif diff --git a/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128.h b/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128.h new file mode 100644 index 0000000000..824728b9e4 --- /dev/null +++ b/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128.h @@ -0,0 +1,87 @@ +/* + Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. + + This file was generated by KreMLin + KreMLin invocation: ../krml -fparentheses -fcurly-braces -fno-shadow -header copyright-header.txt -minimal -skip-compilation -extract-uints -tmpdir dist/minimal -bundle FStar.UInt64+FStar.UInt32+FStar.UInt16+FStar.UInt8=[rename=FStar_UInt_8_16_32_64] -library FStar.UInt128 -add-include -add-include -add-include "kremlin/internal/compat.h" -add-include "kremlin/lowstar_endianness.h" -add-include "kremlin/internal/types.h" -add-include "kremlin/internal/target.h" -bundle LowStar.Endianness= -bundle FStar.UInt128= -bundle *,WindowsWorkaroundSigh fstar_uint128.c -o libkremlib.a .extract/prims.krml .extract/FStar_Pervasives_Native.krml .extract/FStar_Pervasives.krml .extract/FStar_Preorder.krml .extract/FStar_Calc.krml .extract/FStar_Squash.krml .extract/FStar_Classical.krml .extract/FStar_StrongExcludedMiddle.krml .extract/FStar_FunctionalExtensionality.krml .extract/FStar_List_Tot_Base.krml .extract/FStar_List_Tot_Properties.krml .extract/FStar_List_Tot.krml .extract/FStar_Seq_Base.krml .extract/FStar_Seq_Properties.krml .extract/FStar_Seq.krml .extract/FStar_Mul.krml .extract/FStar_Math_Lib.krml .extract/FStar_Math_Lemmas.krml .extract/FStar_BitVector.krml .extract/FStar_UInt.krml .extract/FStar_UInt32.krml .extract/FStar_Int.krml .extract/FStar_Int16.krml .extract/FStar_Reflection_Types.krml .extract/FStar_Reflection_Data.krml .extract/FStar_Order.krml .extract/FStar_Reflection_Basic.krml .extract/FStar_Ghost.krml .extract/FStar_ErasedLogic.krml .extract/FStar_UInt64.krml .extract/FStar_UInt8.krml .extract/FStar_Endianness.krml .extract/FStar_Set.krml .extract/FStar_PropositionalExtensionality.krml .extract/FStar_PredicateExtensionality.krml .extract/FStar_TSet.krml .extract/FStar_Monotonic_Heap.krml .extract/FStar_Heap.krml .extract/FStar_Map.krml .extract/FStar_Monotonic_HyperHeap.krml .extract/FStar_Monotonic_HyperStack.krml .extract/FStar_HyperStack.krml .extract/FStar_Monotonic_Witnessed.krml .extract/FStar_HyperStack_ST.krml .extract/FStar_HyperStack_All.krml .extract/FStar_Char.krml .extract/FStar_Exn.krml .extract/FStar_ST.krml .extract/FStar_All.krml .extract/FStar_List.krml .extract/FStar_String.krml .extract/FStar_Reflection_Const.krml .extract/FStar_Reflection_Derived.krml .extract/FStar_Reflection_Derived_Lemmas.krml .extract/FStar_Date.krml .extract/FStar_Universe.krml .extract/FStar_GSet.krml .extract/FStar_ModifiesGen.krml .extract/FStar_Range.krml .extract/FStar_Tactics_Types.krml .extract/FStar_Tactics_Result.krml .extract/FStar_Tactics_Effect.krml .extract/FStar_Tactics_Builtins.krml .extract/FStar_Reflection.krml .extract/FStar_Tactics_SyntaxHelpers.krml .extract/FStar_Tactics_Util.krml .extract/FStar_Reflection_Formula.krml .extract/FStar_Tactics_Derived.krml .extract/FStar_Tactics_Logic.krml .extract/FStar_Tactics.krml .extract/FStar_BigOps.krml .extract/LowStar_Monotonic_Buffer.krml .extract/LowStar_Buffer.krml .extract/Spec_Loops.krml .extract/LowStar_BufferOps.krml .extract/C_Loops.krml .extract/FStar_Kremlin_Endianness.krml .extract/FStar_UInt63.krml .extract/FStar_Dyn.krml .extract/FStar_Int63.krml .extract/FStar_Int64.krml .extract/FStar_Int32.krml .extract/FStar_Int8.krml .extract/FStar_UInt16.krml .extract/FStar_Int_Cast.krml .extract/FStar_UInt128.krml .extract/C_Endianness.krml .extract/WasmSupport.krml .extract/FStar_Float.krml .extract/FStar_IO.krml .extract/C.krml .extract/LowStar_Modifies.krml .extract/C_String.krml .extract/FStar_Bytes.krml .extract/FStar_HyperStack_IO.krml .extract/LowStar_Printf.krml .extract/LowStar_Endianness.krml .extract/C_Failure.krml .extract/TestLib.krml .extract/FStar_Int_Cast_Full.krml + F* version: b0467796 + KreMLin version: ab4c97c6 +*/ + +#include +#include +#include "kremlin/internal/compat.h" +#include "kremlin/lowstar_endianness.h" +#include "kremlin/internal/types.h" +#include "kremlin/internal/target.h" + +#ifndef __FStar_UInt128_H +#define __FStar_UInt128_H + +inline static uint64_t +FStar_UInt128___proj__Mkuint128__item__low(FStar_UInt128_uint128 projectee); + +inline static uint64_t +FStar_UInt128___proj__Mkuint128__item__high(FStar_UInt128_uint128 projectee); + +inline static FStar_UInt128_uint128 +FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +inline static FStar_UInt128_uint128 +FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +inline static FStar_UInt128_uint128 +FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +inline static FStar_UInt128_uint128 +FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +inline static FStar_UInt128_uint128 +FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +inline static FStar_UInt128_uint128 +FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +inline static FStar_UInt128_uint128 FStar_UInt128_lognot(FStar_UInt128_uint128 a); + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s); + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s); + +inline static bool FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +inline static bool FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +inline static bool FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +inline static bool FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +inline static bool FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +inline static FStar_UInt128_uint128 +FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +inline static FStar_UInt128_uint128 +FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +inline static FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a); + +inline static uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a); + +inline static FStar_UInt128_uint128 FStar_UInt128_mul32(uint64_t x, uint32_t y); + +inline static FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y); + +#define __FStar_UInt128_H_DEFINED +#endif diff --git a/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128_Verified.h b/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128_Verified.h new file mode 100644 index 0000000000..1ffe8435ce --- /dev/null +++ b/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128_Verified.h @@ -0,0 +1,346 @@ +/* + Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. + + This file was generated by KreMLin + KreMLin invocation: ../krml -fparentheses -fcurly-braces -fno-shadow -header copyright-header.txt -minimal -skip-compilation -extract-uints -tmpdir dist/uint128 -add-include -add-include -add-include "kremlin/internal/types.h" -add-include "kremlin/internal/target.h" -bundle FStar.UInt64[rename=FStar_UInt_8_16_32_64] -bundle FStar.UInt128=[rename=FStar_UInt128_Verified] -fc89 -bundle *,WindowsWorkaroundSigh -static-header FStar.UInt128,FStar.UInt64 -ccopt -DKRML_VERIFIED_UINT128 -o libkremlib.a .extract/prims.krml .extract/FStar_Pervasives_Native.krml .extract/FStar_Pervasives.krml .extract/FStar_Preorder.krml .extract/FStar_Calc.krml .extract/FStar_Squash.krml .extract/FStar_Classical.krml .extract/FStar_StrongExcludedMiddle.krml .extract/FStar_FunctionalExtensionality.krml .extract/FStar_List_Tot_Base.krml .extract/FStar_List_Tot_Properties.krml .extract/FStar_List_Tot.krml .extract/FStar_Seq_Base.krml .extract/FStar_Seq_Properties.krml .extract/FStar_Seq.krml .extract/FStar_Mul.krml .extract/FStar_Math_Lib.krml .extract/FStar_Math_Lemmas.krml .extract/FStar_BitVector.krml .extract/FStar_UInt.krml .extract/FStar_UInt32.krml .extract/FStar_Int.krml .extract/FStar_Int16.krml .extract/FStar_Reflection_Types.krml .extract/FStar_Reflection_Data.krml .extract/FStar_Order.krml .extract/FStar_Reflection_Basic.krml .extract/FStar_Ghost.krml .extract/FStar_ErasedLogic.krml .extract/FStar_UInt64.krml .extract/FStar_UInt8.krml .extract/FStar_Endianness.krml .extract/FStar_Set.krml .extract/FStar_PropositionalExtensionality.krml .extract/FStar_PredicateExtensionality.krml .extract/FStar_TSet.krml .extract/FStar_Monotonic_Heap.krml .extract/FStar_Heap.krml .extract/FStar_Map.krml .extract/FStar_Monotonic_HyperHeap.krml .extract/FStar_Monotonic_HyperStack.krml .extract/FStar_HyperStack.krml .extract/FStar_Monotonic_Witnessed.krml .extract/FStar_HyperStack_ST.krml .extract/FStar_HyperStack_All.krml .extract/FStar_Char.krml .extract/FStar_Exn.krml .extract/FStar_ST.krml .extract/FStar_All.krml .extract/FStar_List.krml .extract/FStar_String.krml .extract/FStar_Reflection_Const.krml .extract/FStar_Reflection_Derived.krml .extract/FStar_Reflection_Derived_Lemmas.krml .extract/FStar_Date.krml .extract/FStar_Universe.krml .extract/FStar_GSet.krml .extract/FStar_ModifiesGen.krml .extract/FStar_Range.krml .extract/FStar_Tactics_Types.krml .extract/FStar_Tactics_Result.krml .extract/FStar_Tactics_Effect.krml .extract/FStar_Tactics_Builtins.krml .extract/FStar_Reflection.krml .extract/FStar_Tactics_SyntaxHelpers.krml .extract/FStar_Tactics_Util.krml .extract/FStar_Reflection_Formula.krml .extract/FStar_Tactics_Derived.krml .extract/FStar_Tactics_Logic.krml .extract/FStar_Tactics.krml .extract/FStar_BigOps.krml .extract/LowStar_Monotonic_Buffer.krml .extract/LowStar_Buffer.krml .extract/Spec_Loops.krml .extract/LowStar_BufferOps.krml .extract/C_Loops.krml .extract/FStar_Kremlin_Endianness.krml .extract/FStar_UInt63.krml .extract/FStar_Dyn.krml .extract/FStar_Int63.krml .extract/FStar_Int64.krml .extract/FStar_Int32.krml .extract/FStar_Int8.krml .extract/FStar_UInt16.krml .extract/FStar_Int_Cast.krml .extract/FStar_UInt128.krml .extract/C_Endianness.krml .extract/WasmSupport.krml .extract/FStar_Float.krml .extract/FStar_IO.krml .extract/C.krml .extract/LowStar_Modifies.krml .extract/C_String.krml .extract/FStar_Bytes.krml .extract/FStar_HyperStack_IO.krml .extract/LowStar_Printf.krml .extract/LowStar_Endianness.krml .extract/C_Failure.krml .extract/TestLib.krml .extract/FStar_Int_Cast_Full.krml + F* version: b0467796 + KreMLin version: ab4c97c6 +*/ + +#include +#include +#include "kremlin/internal/types.h" +#include "kremlin/internal/target.h" + +#ifndef __FStar_UInt128_Verified_H +#define __FStar_UInt128_Verified_H + +#include "FStar_UInt_8_16_32_64.h" + +inline static uint64_t +FStar_UInt128___proj__Mkuint128__item__low(FStar_UInt128_uint128 projectee) +{ + return projectee.low; +} + +inline static uint64_t +FStar_UInt128___proj__Mkuint128__item__high(FStar_UInt128_uint128 projectee) +{ + return projectee.high; +} + +inline static uint64_t +FStar_UInt128_constant_time_carry(uint64_t a, uint64_t b) +{ + return (a ^ ((a ^ b) | ((a - b) ^ b))) >> (uint32_t)63U; +} + +inline static uint64_t +FStar_UInt128_carry(uint64_t a, uint64_t b) +{ + return FStar_UInt128_constant_time_carry(a, b); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low); + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low); + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low); + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub_mod_impl(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return FStar_UInt128_sub_mod_impl(a, b); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low & b.low; + lit.high = a.high & b.high; + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low ^ b.low; + lit.high = a.high ^ b.high; + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low | b.low; + lit.high = a.high | b.high; + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_lognot(FStar_UInt128_uint128 a) +{ + FStar_UInt128_uint128 lit; + lit.low = ~a.low; + lit.high = ~a.high; + return lit; +} + +static uint32_t FStar_UInt128_u32_64 = (uint32_t)64U; + +inline static uint64_t +FStar_UInt128_add_u64_shift_left(uint64_t hi, uint64_t lo, uint32_t s) +{ + return (hi << s) + (lo >> (FStar_UInt128_u32_64 - s)); +} + +inline static uint64_t +FStar_UInt128_add_u64_shift_left_respec(uint64_t hi, uint64_t lo, uint32_t s) +{ + return FStar_UInt128_add_u64_shift_left(hi, lo, s); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_left_small(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s == (uint32_t)0U) { + return a; + } else { + FStar_UInt128_uint128 lit; + lit.low = a.low << s; + lit.high = FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s); + return lit; + } +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_left_large(FStar_UInt128_uint128 a, uint32_t s) +{ + FStar_UInt128_uint128 lit; + lit.low = (uint64_t)0U; + lit.high = a.low << (s - FStar_UInt128_u32_64); + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s < FStar_UInt128_u32_64) { + return FStar_UInt128_shift_left_small(a, s); + } else { + return FStar_UInt128_shift_left_large(a, s); + } +} + +inline static uint64_t +FStar_UInt128_add_u64_shift_right(uint64_t hi, uint64_t lo, uint32_t s) +{ + return (lo >> s) + (hi << (FStar_UInt128_u32_64 - s)); +} + +inline static uint64_t +FStar_UInt128_add_u64_shift_right_respec(uint64_t hi, uint64_t lo, uint32_t s) +{ + return FStar_UInt128_add_u64_shift_right(hi, lo, s); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_right_small(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s == (uint32_t)0U) { + return a; + } else { + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s); + lit.high = a.high >> s; + return lit; + } +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_right_large(FStar_UInt128_uint128 a, uint32_t s) +{ + FStar_UInt128_uint128 lit; + lit.low = a.high >> (s - FStar_UInt128_u32_64); + lit.high = (uint64_t)0U; + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s < FStar_UInt128_u32_64) { + return FStar_UInt128_shift_right_small(a, s); + } else { + return FStar_UInt128_shift_right_large(a, s); + } +} + +inline static bool +FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.low == b.low && a.high == b.high; +} + +inline static bool +FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high > b.high || (a.high == b.high && a.low > b.low); +} + +inline static bool +FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high < b.high || (a.high == b.high && a.low < b.low); +} + +inline static bool +FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high > b.high || (a.high == b.high && a.low >= b.low); +} + +inline static bool +FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high < b.high || (a.high == b.high && a.low <= b.low); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high); + lit.high = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high); + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = + (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)); + lit.high = + (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)); + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_uint64_to_uint128(uint64_t a) +{ + FStar_UInt128_uint128 lit; + lit.low = a; + lit.high = (uint64_t)0U; + return lit; +} + +inline static uint64_t +FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a) +{ + return a.low; +} + +inline static uint64_t +FStar_UInt128_u64_mod_32(uint64_t a) +{ + return a & (uint64_t)0xffffffffU; +} + +static uint32_t FStar_UInt128_u32_32 = (uint32_t)32U; + +inline static uint64_t +FStar_UInt128_u32_combine(uint64_t hi, uint64_t lo) +{ + return lo + (hi << FStar_UInt128_u32_32); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_mul32(uint64_t x, uint32_t y) +{ + FStar_UInt128_uint128 lit; + lit.low = + FStar_UInt128_u32_combine((x >> FStar_UInt128_u32_32) * (uint64_t)y + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32), + FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y)); + lit.high = + ((x >> FStar_UInt128_u32_32) * (uint64_t)y + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32)) >> FStar_UInt128_u32_32; + return lit; +} + +inline static uint64_t +FStar_UInt128_u32_combine_(uint64_t hi, uint64_t lo) +{ + return lo + (hi << FStar_UInt128_u32_32); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_mul_wide(uint64_t x, uint64_t y) +{ + FStar_UInt128_uint128 lit; + lit.low = + FStar_UInt128_u32_combine_(FStar_UInt128_u64_mod_32(x) * (y >> FStar_UInt128_u32_32) + + FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)), + FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y))); + lit.high = + (x >> FStar_UInt128_u32_32) * (y >> FStar_UInt128_u32_32) + + (((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)) >> FStar_UInt128_u32_32) + + ((FStar_UInt128_u64_mod_32(x) * (y >> FStar_UInt128_u32_32) + + FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32))) >> + FStar_UInt128_u32_32); + return lit; +} + +#define __FStar_UInt128_Verified_H_DEFINED +#endif diff --git a/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt_8_16_32_64.h b/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt_8_16_32_64.h new file mode 100644 index 0000000000..68cd217a8c --- /dev/null +++ b/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt_8_16_32_64.h @@ -0,0 +1,176 @@ +/* + Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. + + This file was generated by KreMLin + KreMLin invocation: ../krml -fparentheses -fcurly-braces -fno-shadow -header copyright-header.txt -minimal -skip-compilation -extract-uints -tmpdir dist/minimal -bundle FStar.UInt64+FStar.UInt32+FStar.UInt16+FStar.UInt8=[rename=FStar_UInt_8_16_32_64] -library FStar.UInt128 -add-include -add-include -add-include "kremlin/internal/compat.h" -add-include "kremlin/lowstar_endianness.h" -add-include "kremlin/internal/types.h" -add-include "kremlin/internal/target.h" -bundle LowStar.Endianness= -bundle FStar.UInt128= -bundle *,WindowsWorkaroundSigh fstar_uint128.c -o libkremlib.a .extract/prims.krml .extract/FStar_Pervasives_Native.krml .extract/FStar_Pervasives.krml .extract/FStar_Preorder.krml .extract/FStar_Calc.krml .extract/FStar_Squash.krml .extract/FStar_Classical.krml .extract/FStar_StrongExcludedMiddle.krml .extract/FStar_FunctionalExtensionality.krml .extract/FStar_List_Tot_Base.krml .extract/FStar_List_Tot_Properties.krml .extract/FStar_List_Tot.krml .extract/FStar_Seq_Base.krml .extract/FStar_Seq_Properties.krml .extract/FStar_Seq.krml .extract/FStar_Mul.krml .extract/FStar_Math_Lib.krml .extract/FStar_Math_Lemmas.krml .extract/FStar_BitVector.krml .extract/FStar_UInt.krml .extract/FStar_UInt32.krml .extract/FStar_Int.krml .extract/FStar_Int16.krml .extract/FStar_Reflection_Types.krml .extract/FStar_Reflection_Data.krml .extract/FStar_Order.krml .extract/FStar_Reflection_Basic.krml .extract/FStar_Ghost.krml .extract/FStar_ErasedLogic.krml .extract/FStar_UInt64.krml .extract/FStar_UInt8.krml .extract/FStar_Endianness.krml .extract/FStar_Set.krml .extract/FStar_PropositionalExtensionality.krml .extract/FStar_PredicateExtensionality.krml .extract/FStar_TSet.krml .extract/FStar_Monotonic_Heap.krml .extract/FStar_Heap.krml .extract/FStar_Map.krml .extract/FStar_Monotonic_HyperHeap.krml .extract/FStar_Monotonic_HyperStack.krml .extract/FStar_HyperStack.krml .extract/FStar_Monotonic_Witnessed.krml .extract/FStar_HyperStack_ST.krml .extract/FStar_HyperStack_All.krml .extract/FStar_Char.krml .extract/FStar_Exn.krml .extract/FStar_ST.krml .extract/FStar_All.krml .extract/FStar_List.krml .extract/FStar_String.krml .extract/FStar_Reflection_Const.krml .extract/FStar_Reflection_Derived.krml .extract/FStar_Reflection_Derived_Lemmas.krml .extract/FStar_Date.krml .extract/FStar_Universe.krml .extract/FStar_GSet.krml .extract/FStar_ModifiesGen.krml .extract/FStar_Range.krml .extract/FStar_Tactics_Types.krml .extract/FStar_Tactics_Result.krml .extract/FStar_Tactics_Effect.krml .extract/FStar_Tactics_Builtins.krml .extract/FStar_Reflection.krml .extract/FStar_Tactics_SyntaxHelpers.krml .extract/FStar_Tactics_Util.krml .extract/FStar_Reflection_Formula.krml .extract/FStar_Tactics_Derived.krml .extract/FStar_Tactics_Logic.krml .extract/FStar_Tactics.krml .extract/FStar_BigOps.krml .extract/LowStar_Monotonic_Buffer.krml .extract/LowStar_Buffer.krml .extract/Spec_Loops.krml .extract/LowStar_BufferOps.krml .extract/C_Loops.krml .extract/FStar_Kremlin_Endianness.krml .extract/FStar_UInt63.krml .extract/FStar_Dyn.krml .extract/FStar_Int63.krml .extract/FStar_Int64.krml .extract/FStar_Int32.krml .extract/FStar_Int8.krml .extract/FStar_UInt16.krml .extract/FStar_Int_Cast.krml .extract/FStar_UInt128.krml .extract/C_Endianness.krml .extract/WasmSupport.krml .extract/FStar_Float.krml .extract/FStar_IO.krml .extract/C.krml .extract/LowStar_Modifies.krml .extract/C_String.krml .extract/FStar_Bytes.krml .extract/FStar_HyperStack_IO.krml .extract/LowStar_Printf.krml .extract/LowStar_Endianness.krml .extract/C_Failure.krml .extract/TestLib.krml .extract/FStar_Int_Cast_Full.krml + F* version: b0467796 + KreMLin version: ab4c97c6 +*/ + +#include +#include +#include "kremlin/internal/compat.h" +#include "kremlin/lowstar_endianness.h" +#include "kremlin/internal/types.h" +#include "kremlin/internal/target.h" + +#ifndef __FStar_UInt_8_16_32_64_H +#define __FStar_UInt_8_16_32_64_H + +extern Prims_int FStar_UInt64_n; + +extern Prims_int FStar_UInt64_v(uint64_t x); + +extern uint64_t FStar_UInt64_uint_to_t(Prims_int x); + +extern uint64_t FStar_UInt64_minus(uint64_t a); + +extern uint32_t FStar_UInt64_n_minus_one; + +inline static uint64_t +FStar_UInt64_eq_mask(uint64_t a, uint64_t b) +{ + uint64_t x = a ^ b; + uint64_t minus_x = ~x + (uint64_t)1U; + uint64_t x_or_minus_x = x | minus_x; + uint64_t xnx = x_or_minus_x >> (uint32_t)63U; + return xnx - (uint64_t)1U; +} + +inline static uint64_t +FStar_UInt64_gte_mask(uint64_t a, uint64_t b) +{ + uint64_t x = a; + uint64_t y = b; + uint64_t x_xor_y = x ^ y; + uint64_t x_sub_y = x - y; + uint64_t x_sub_y_xor_y = x_sub_y ^ y; + uint64_t q = x_xor_y | x_sub_y_xor_y; + uint64_t x_xor_q = x ^ q; + uint64_t x_xor_q_ = x_xor_q >> (uint32_t)63U; + return x_xor_q_ - (uint64_t)1U; +} + +extern Prims_string FStar_UInt64_to_string(uint64_t uu____716); + +extern uint64_t FStar_UInt64_of_string(Prims_string uu____728); + +extern Prims_int FStar_UInt32_n; + +extern Prims_int FStar_UInt32_v(uint32_t x); + +extern uint32_t FStar_UInt32_uint_to_t(Prims_int x); + +extern uint32_t FStar_UInt32_minus(uint32_t a); + +extern uint32_t FStar_UInt32_n_minus_one; + +inline static uint32_t +FStar_UInt32_eq_mask(uint32_t a, uint32_t b) +{ + uint32_t x = a ^ b; + uint32_t minus_x = ~x + (uint32_t)1U; + uint32_t x_or_minus_x = x | minus_x; + uint32_t xnx = x_or_minus_x >> (uint32_t)31U; + return xnx - (uint32_t)1U; +} + +inline static uint32_t +FStar_UInt32_gte_mask(uint32_t a, uint32_t b) +{ + uint32_t x = a; + uint32_t y = b; + uint32_t x_xor_y = x ^ y; + uint32_t x_sub_y = x - y; + uint32_t x_sub_y_xor_y = x_sub_y ^ y; + uint32_t q = x_xor_y | x_sub_y_xor_y; + uint32_t x_xor_q = x ^ q; + uint32_t x_xor_q_ = x_xor_q >> (uint32_t)31U; + return x_xor_q_ - (uint32_t)1U; +} + +extern Prims_string FStar_UInt32_to_string(uint32_t uu____716); + +extern uint32_t FStar_UInt32_of_string(Prims_string uu____728); + +extern Prims_int FStar_UInt16_n; + +extern Prims_int FStar_UInt16_v(uint16_t x); + +extern uint16_t FStar_UInt16_uint_to_t(Prims_int x); + +extern uint16_t FStar_UInt16_minus(uint16_t a); + +extern uint32_t FStar_UInt16_n_minus_one; + +inline static uint16_t +FStar_UInt16_eq_mask(uint16_t a, uint16_t b) +{ + uint16_t x = a ^ b; + uint16_t minus_x = ~x + (uint16_t)1U; + uint16_t x_or_minus_x = x | minus_x; + uint16_t xnx = x_or_minus_x >> (uint32_t)15U; + return xnx - (uint16_t)1U; +} + +inline static uint16_t +FStar_UInt16_gte_mask(uint16_t a, uint16_t b) +{ + uint16_t x = a; + uint16_t y = b; + uint16_t x_xor_y = x ^ y; + uint16_t x_sub_y = x - y; + uint16_t x_sub_y_xor_y = x_sub_y ^ y; + uint16_t q = x_xor_y | x_sub_y_xor_y; + uint16_t x_xor_q = x ^ q; + uint16_t x_xor_q_ = x_xor_q >> (uint32_t)15U; + return x_xor_q_ - (uint16_t)1U; +} + +extern Prims_string FStar_UInt16_to_string(uint16_t uu____716); + +extern uint16_t FStar_UInt16_of_string(Prims_string uu____728); + +extern Prims_int FStar_UInt8_n; + +extern Prims_int FStar_UInt8_v(uint8_t x); + +extern uint8_t FStar_UInt8_uint_to_t(Prims_int x); + +extern uint8_t FStar_UInt8_minus(uint8_t a); + +extern uint32_t FStar_UInt8_n_minus_one; + +inline static uint8_t +FStar_UInt8_eq_mask(uint8_t a, uint8_t b) +{ + uint8_t x = a ^ b; + uint8_t minus_x = ~x + (uint8_t)1U; + uint8_t x_or_minus_x = x | minus_x; + uint8_t xnx = x_or_minus_x >> (uint32_t)7U; + return xnx - (uint8_t)1U; +} + +inline static uint8_t +FStar_UInt8_gte_mask(uint8_t a, uint8_t b) +{ + uint8_t x = a; + uint8_t y = b; + uint8_t x_xor_y = x ^ y; + uint8_t x_sub_y = x - y; + uint8_t x_sub_y_xor_y = x_sub_y ^ y; + uint8_t q = x_xor_y | x_sub_y_xor_y; + uint8_t x_xor_q = x ^ q; + uint8_t x_xor_q_ = x_xor_q >> (uint32_t)7U; + return x_xor_q_ - (uint8_t)1U; +} + +extern Prims_string FStar_UInt8_to_string(uint8_t uu____716); + +extern uint8_t FStar_UInt8_of_string(Prims_string uu____728); + +typedef uint8_t FStar_UInt8_byte; + +#define __FStar_UInt_8_16_32_64_H_DEFINED +#endif diff --git a/lib/freebl/verified/kremlin/kremlib/dist/minimal/LowStar_Endianness.h b/lib/freebl/verified/kremlin/kremlib/dist/minimal/LowStar_Endianness.h new file mode 100644 index 0000000000..89106c3d71 --- /dev/null +++ b/lib/freebl/verified/kremlin/kremlib/dist/minimal/LowStar_Endianness.h @@ -0,0 +1,32 @@ +/* + Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. + + This file was generated by KreMLin + KreMLin invocation: ../krml -fparentheses -fcurly-braces -fno-shadow -header copyright-header.txt -minimal -skip-compilation -extract-uints -tmpdir dist/minimal -bundle FStar.UInt64+FStar.UInt32+FStar.UInt16+FStar.UInt8=[rename=FStar_UInt_8_16_32_64] -library FStar.UInt128 -add-include -add-include -add-include "kremlin/internal/compat.h" -add-include "kremlin/lowstar_endianness.h" -add-include "kremlin/internal/types.h" -add-include "kremlin/internal/target.h" -bundle LowStar.Endianness= -bundle FStar.UInt128= -bundle *,WindowsWorkaroundSigh fstar_uint128.c -o libkremlib.a .extract/prims.krml .extract/FStar_Pervasives_Native.krml .extract/FStar_Pervasives.krml .extract/FStar_Preorder.krml .extract/FStar_Calc.krml .extract/FStar_Squash.krml .extract/FStar_Classical.krml .extract/FStar_StrongExcludedMiddle.krml .extract/FStar_FunctionalExtensionality.krml .extract/FStar_List_Tot_Base.krml .extract/FStar_List_Tot_Properties.krml .extract/FStar_List_Tot.krml .extract/FStar_Seq_Base.krml .extract/FStar_Seq_Properties.krml .extract/FStar_Seq.krml .extract/FStar_Mul.krml .extract/FStar_Math_Lib.krml .extract/FStar_Math_Lemmas.krml .extract/FStar_BitVector.krml .extract/FStar_UInt.krml .extract/FStar_UInt32.krml .extract/FStar_Int.krml .extract/FStar_Int16.krml .extract/FStar_Reflection_Types.krml .extract/FStar_Reflection_Data.krml .extract/FStar_Order.krml .extract/FStar_Reflection_Basic.krml .extract/FStar_Ghost.krml .extract/FStar_ErasedLogic.krml .extract/FStar_UInt64.krml .extract/FStar_UInt8.krml .extract/FStar_Endianness.krml .extract/FStar_Set.krml .extract/FStar_PropositionalExtensionality.krml .extract/FStar_PredicateExtensionality.krml .extract/FStar_TSet.krml .extract/FStar_Monotonic_Heap.krml .extract/FStar_Heap.krml .extract/FStar_Map.krml .extract/FStar_Monotonic_HyperHeap.krml .extract/FStar_Monotonic_HyperStack.krml .extract/FStar_HyperStack.krml .extract/FStar_Monotonic_Witnessed.krml .extract/FStar_HyperStack_ST.krml .extract/FStar_HyperStack_All.krml .extract/FStar_Char.krml .extract/FStar_Exn.krml .extract/FStar_ST.krml .extract/FStar_All.krml .extract/FStar_List.krml .extract/FStar_String.krml .extract/FStar_Reflection_Const.krml .extract/FStar_Reflection_Derived.krml .extract/FStar_Reflection_Derived_Lemmas.krml .extract/FStar_Date.krml .extract/FStar_Universe.krml .extract/FStar_GSet.krml .extract/FStar_ModifiesGen.krml .extract/FStar_Range.krml .extract/FStar_Tactics_Types.krml .extract/FStar_Tactics_Result.krml .extract/FStar_Tactics_Effect.krml .extract/FStar_Tactics_Builtins.krml .extract/FStar_Reflection.krml .extract/FStar_Tactics_SyntaxHelpers.krml .extract/FStar_Tactics_Util.krml .extract/FStar_Reflection_Formula.krml .extract/FStar_Tactics_Derived.krml .extract/FStar_Tactics_Logic.krml .extract/FStar_Tactics.krml .extract/FStar_BigOps.krml .extract/LowStar_Monotonic_Buffer.krml .extract/LowStar_Buffer.krml .extract/Spec_Loops.krml .extract/LowStar_BufferOps.krml .extract/C_Loops.krml .extract/FStar_Kremlin_Endianness.krml .extract/FStar_UInt63.krml .extract/FStar_Dyn.krml .extract/FStar_Int63.krml .extract/FStar_Int64.krml .extract/FStar_Int32.krml .extract/FStar_Int8.krml .extract/FStar_UInt16.krml .extract/FStar_Int_Cast.krml .extract/FStar_UInt128.krml .extract/C_Endianness.krml .extract/WasmSupport.krml .extract/FStar_Float.krml .extract/FStar_IO.krml .extract/C.krml .extract/LowStar_Modifies.krml .extract/C_String.krml .extract/FStar_Bytes.krml .extract/FStar_HyperStack_IO.krml .extract/LowStar_Printf.krml .extract/LowStar_Endianness.krml .extract/C_Failure.krml .extract/TestLib.krml .extract/FStar_Int_Cast_Full.krml + F* version: b0467796 + KreMLin version: ab4c97c6 +*/ + +#include +#include +#include "kremlin/internal/compat.h" +#include "kremlin/lowstar_endianness.h" +#include "kremlin/internal/types.h" +#include "kremlin/internal/target.h" + +#ifndef __LowStar_Endianness_H +#define __LowStar_Endianness_H + +#include "FStar_UInt128.h" + +inline static void store128_le(uint8_t *x0, FStar_UInt128_uint128 x1); + +inline static FStar_UInt128_uint128 load128_le(uint8_t *x0); + +inline static void store128_be(uint8_t *x0, FStar_UInt128_uint128 x1); + +inline static FStar_UInt128_uint128 load128_be(uint8_t *x0); + +#define __LowStar_Endianness_H_DEFINED +#endif diff --git a/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_gcc64.h b/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_gcc64.h new file mode 100644 index 0000000000..ba01362a9e --- /dev/null +++ b/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_gcc64.h @@ -0,0 +1,303 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +/******************************************************************************/ +/* Machine integers (128-bit arithmetic) */ +/******************************************************************************/ + +/* This header contains two things. + * + * First, an implementation of 128-bit arithmetic suitable for 64-bit GCC and + * Clang, i.e. all the operations from FStar.UInt128. + * + * Second, 128-bit operations from C.Endianness (or LowStar.Endianness), + * suitable for any compiler and platform (via a series of ifdefs). This second + * part is unfortunate, and should be fixed by moving {load,store}128_{be,le} to + * FStar.UInt128 to avoid a maze of preprocessor guards and hand-written code. + * */ + +/* This file is used for both the minimal and generic kremlib distributions. As + * such, it assumes that the machine integers have been bundled the exact same + * way in both cases. */ + +#include "FStar_UInt128.h" +#include "FStar_UInt_8_16_32_64.h" +#include "LowStar_Endianness.h" + +#if !defined(KRML_VERIFIED_UINT128) && !defined(_MSC_VER) && \ + (defined(__x86_64__) || defined(__x86_64) || defined(__aarch64__)) + +/* GCC + using native unsigned __int128 support */ + +inline static uint128_t +load128_le(uint8_t *b) +{ + uint128_t l = (uint128_t)load64_le(b); + uint128_t h = (uint128_t)load64_le(b + 8); + return (h << 64 | l); +} + +inline static void +store128_le(uint8_t *b, uint128_t n) +{ + store64_le(b, (uint64_t)n); + store64_le(b + 8, (uint64_t)(n >> 64)); +} + +inline static uint128_t +load128_be(uint8_t *b) +{ + uint128_t h = (uint128_t)load64_be(b); + uint128_t l = (uint128_t)load64_be(b + 8); + return (h << 64 | l); +} + +inline static void +store128_be(uint8_t *b, uint128_t n) +{ + store64_be(b, (uint64_t)(n >> 64)); + store64_be(b + 8, (uint64_t)n); +} + +inline static uint128_t +FStar_UInt128_add(uint128_t x, uint128_t y) +{ + return x + y; +} + +inline static uint128_t +FStar_UInt128_mul(uint128_t x, uint128_t y) +{ + return x * y; +} + +inline static uint128_t +FStar_UInt128_add_mod(uint128_t x, uint128_t y) +{ + return x + y; +} + +inline static uint128_t +FStar_UInt128_sub(uint128_t x, uint128_t y) +{ + return x - y; +} + +inline static uint128_t +FStar_UInt128_sub_mod(uint128_t x, uint128_t y) +{ + return x - y; +} + +inline static uint128_t +FStar_UInt128_logand(uint128_t x, uint128_t y) +{ + return x & y; +} + +inline static uint128_t +FStar_UInt128_logor(uint128_t x, uint128_t y) +{ + return x | y; +} + +inline static uint128_t +FStar_UInt128_logxor(uint128_t x, uint128_t y) +{ + return x ^ y; +} + +inline static uint128_t +FStar_UInt128_lognot(uint128_t x) +{ + return ~x; +} + +inline static uint128_t +FStar_UInt128_shift_left(uint128_t x, uint32_t y) +{ + return x << y; +} + +inline static uint128_t +FStar_UInt128_shift_right(uint128_t x, uint32_t y) +{ + return x >> y; +} + +inline static uint128_t +FStar_UInt128_uint64_to_uint128(uint64_t x) +{ + return (uint128_t)x; +} + +inline static uint64_t +FStar_UInt128_uint128_to_uint64(uint128_t x) +{ + return (uint64_t)x; +} + +inline static uint128_t +FStar_UInt128_mul_wide(uint64_t x, uint64_t y) +{ + return ((uint128_t)x) * y; +} + +inline static uint128_t +FStar_UInt128_eq_mask(uint128_t x, uint128_t y) +{ + uint64_t mask = + FStar_UInt64_eq_mask((uint64_t)(x >> 64), (uint64_t)(y >> 64)) & + FStar_UInt64_eq_mask(x, y); + return ((uint128_t)mask) << 64 | mask; +} + +inline static uint128_t +FStar_UInt128_gte_mask(uint128_t x, uint128_t y) +{ + uint64_t mask = + (FStar_UInt64_gte_mask(x >> 64, y >> 64) & + ~(FStar_UInt64_eq_mask(x >> 64, y >> 64))) | + (FStar_UInt64_eq_mask(x >> 64, y >> 64) & FStar_UInt64_gte_mask(x, y)); + return ((uint128_t)mask) << 64 | mask; +} + +inline static uint64_t +FStar_UInt128___proj__Mkuint128__item__low(uint128_t x) +{ + return (uint64_t)x; +} + +inline static uint64_t +FStar_UInt128___proj__Mkuint128__item__high(uint128_t x) +{ + return (uint64_t)(x >> 64); +} + +inline static uint128_t +FStar_UInt128_add_underspec(uint128_t x, uint128_t y) +{ + return x + y; +} + +inline static uint128_t +FStar_UInt128_sub_underspec(uint128_t x, uint128_t y) +{ + return x - y; +} + +inline static bool +FStar_UInt128_eq(uint128_t x, uint128_t y) +{ + return x == y; +} + +inline static bool +FStar_UInt128_gt(uint128_t x, uint128_t y) +{ + return x > y; +} + +inline static bool +FStar_UInt128_lt(uint128_t x, uint128_t y) +{ + return x < y; +} + +inline static bool +FStar_UInt128_gte(uint128_t x, uint128_t y) +{ + return x >= y; +} + +inline static bool +FStar_UInt128_lte(uint128_t x, uint128_t y) +{ + return x <= y; +} + +inline static uint128_t +FStar_UInt128_mul32(uint64_t x, uint32_t y) +{ + return (uint128_t)x * (uint128_t)y; +} + +#elif !defined(_MSC_VER) && defined(KRML_VERIFIED_UINT128) + +/* Verified uint128 implementation. */ + +/* Access 64-bit fields within the int128. */ +#define HIGH64_OF(x) ((x)->high) +#define LOW64_OF(x) ((x)->low) + +/* A series of definitions written using pointers. */ + +inline static void +load128_le_(uint8_t *b, uint128_t *r) +{ + LOW64_OF(r) = load64_le(b); + HIGH64_OF(r) = load64_le(b + 8); +} + +inline static void +store128_le_(uint8_t *b, uint128_t *n) +{ + store64_le(b, LOW64_OF(n)); + store64_le(b + 8, HIGH64_OF(n)); +} + +inline static void +load128_be_(uint8_t *b, uint128_t *r) +{ + HIGH64_OF(r) = load64_be(b); + LOW64_OF(r) = load64_be(b + 8); +} + +inline static void +store128_be_(uint8_t *b, uint128_t *n) +{ + store64_be(b, HIGH64_OF(n)); + store64_be(b + 8, LOW64_OF(n)); +} + +#ifndef KRML_NOSTRUCT_PASSING + +inline static uint128_t +load128_le(uint8_t *b) +{ + uint128_t r; + load128_le_(b, &r); + return r; +} + +inline static void +store128_le(uint8_t *b, uint128_t n) +{ + store128_le_(b, &n); +} + +inline static uint128_t +load128_be(uint8_t *b) +{ + uint128_t r; + load128_be_(b, &r); + return r; +} + +inline static void +store128_be(uint8_t *b, uint128_t n) +{ + store128_be_(b, &n); +} + +#else /* !defined(KRML_STRUCT_PASSING) */ + +#define print128 print128_ +#define load128_le load128_le_ +#define store128_le store128_le_ +#define load128_be load128_be_ +#define store128_be store128_be_ + +#endif /* KRML_STRUCT_PASSING */ + +#endif diff --git a/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_msvc.h b/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_msvc.h new file mode 100644 index 0000000000..5a05332fc3 --- /dev/null +++ b/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_msvc.h @@ -0,0 +1,528 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +/* This file was generated by KreMLin + * then hand-edited to use MSVC intrinsics KreMLin invocation: + * C:\users\barrybo\mitls2c\kremlin\_build\src\Kremlin.native -minimal -fnouint128 C:/users/barrybo/mitls2c/FStar/ulib/FStar.UInt128.fst -tmpdir ../secure_api/out/runtime_switch/uint128 -skip-compilation -add-include "kremlib0.h" -drop FStar.Int.Cast.Full -bundle FStar.UInt128=FStar.*,Prims + * F* version: 15104ff8 + * KreMLin version: 318b7fa8 + */ +#include "kremlin/internal/types.h" +#include "FStar_UInt128.h" +#include "FStar_UInt_8_16_32_64.h" + +#ifndef _MSC_VER +#error This file only works with the MSVC compiler +#endif + +#if defined(_M_X64) && !defined(KRML_VERIFIED_UINT128) +#define HAS_OPTIMIZED 1 +#else +#define HAS_OPTIMIZED 0 +#endif + +// Define .low and .high in terms of the __m128i fields, to reduce +// the amount of churn in this file. +#if HAS_OPTIMIZED +#include +#include +#define low m128i_u64[0] +#define high m128i_u64[1] +#endif + +inline static FStar_UInt128_uint128 +load128_le(uint8_t *b) +{ +#if HAS_OPTIMIZED + return _mm_loadu_si128((__m128i *)b); +#else + return ( + (FStar_UInt128_uint128){.low = load64_le(b), .high = load64_le(b + 8) }); +#endif +} + +inline static void +store128_le(uint8_t *b, FStar_UInt128_uint128 n) +{ + store64_le(b, n.low); + store64_le(b + 8, n.high); +} + +inline static FStar_UInt128_uint128 +load128_be(uint8_t *b) +{ + uint64_t l = load64_be(b + 8); + uint64_t h = load64_be(b); +#if HAS_OPTIMIZED + return _mm_set_epi64x(h, l); +#else + return ((FStar_UInt128_uint128){.low = l, .high = h }); +#endif +} + +inline static void +store128_be(uint8_t *b, uint128_t n) +{ + store64_be(b, n.high); + store64_be(b + 8, n.low); +} + +inline static uint64_t +FStar_UInt128_constant_time_carry(uint64_t a, uint64_t b) +{ + return (a ^ (a ^ b | a - b ^ b)) >> (uint32_t)63U; +} + +inline static uint64_t +FStar_UInt128_carry(uint64_t a, uint64_t b) +{ + return FStar_UInt128_constant_time_carry(a, b); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + uint64_t l, h; + + unsigned char carry = + _addcarry_u64(0, a.low, b.low, &l); // low/CF = a.low+b.low+0 + _addcarry_u64(carry, a.high, b.high, &h); // high = a.high+b.high+CF + return _mm_set_epi64x(h, l); +#else + return ((FStar_UInt128_uint128){ + .low = a.low + b.low, + .high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low) }); +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return FStar_UInt128_add(a, b); +#else + return ((FStar_UInt128_uint128){ + .low = a.low + b.low, + .high = FStar_UInt64_add_underspec( + FStar_UInt64_add_underspec(a.high, b.high), + FStar_UInt128_carry(a.low + b.low, b.low)) }); +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return FStar_UInt128_add(a, b); +#else + return ((FStar_UInt128_uint128){ + .low = a.low + b.low, + .high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low) }); +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + uint64_t l, h; + + unsigned char borrow = _subborrow_u64(0, a.low, b.low, &l); + _subborrow_u64(borrow, a.high, b.high, &h); + return _mm_set_epi64x(h, l); +#else + return ((FStar_UInt128_uint128){ + .low = a.low - b.low, + .high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low) }); +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return FStar_UInt128_sub(a, b); +#else + return ((FStar_UInt128_uint128){ + .low = a.low - b.low, + .high = FStar_UInt64_sub_underspec( + FStar_UInt64_sub_underspec(a.high, b.high), + FStar_UInt128_carry(a.low, a.low - b.low)) }); +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub_mod_impl(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return ((FStar_UInt128_uint128){ + .low = a.low - b.low, + .high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low) }); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return FStar_UInt128_sub(a, b); +#else + return FStar_UInt128_sub_mod_impl(a, b); +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return _mm_and_si128(a, b); +#else + return ( + (FStar_UInt128_uint128){.low = a.low & b.low, .high = a.high & b.high }); +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return _mm_xor_si128(a, b); +#else + return ( + (FStar_UInt128_uint128){.low = a.low ^ b.low, .high = a.high ^ b.high }); +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return _mm_or_si128(a, b); +#else + return ( + (FStar_UInt128_uint128){.low = a.low | b.low, .high = a.high | b.high }); +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_lognot(FStar_UInt128_uint128 a) +{ +#if HAS_OPTIMIZED + return _mm_andnot_si128(a, a); +#else + return ((FStar_UInt128_uint128){.low = ~a.low, .high = ~a.high }); +#endif +} + +static const uint32_t FStar_UInt128_u32_64 = (uint32_t)64U; + +inline static uint64_t +FStar_UInt128_add_u64_shift_left(uint64_t hi, uint64_t lo, uint32_t s) +{ + return (hi << s) + (lo >> FStar_UInt128_u32_64 - s); +} + +inline static uint64_t +FStar_UInt128_add_u64_shift_left_respec(uint64_t hi, uint64_t lo, uint32_t s) +{ + return FStar_UInt128_add_u64_shift_left(hi, lo, s); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_left_small(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s == (uint32_t)0U) + return a; + else + return ((FStar_UInt128_uint128){ + .low = a.low << s, + .high = FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s) }); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_left_large(FStar_UInt128_uint128 a, uint32_t s) +{ + return ((FStar_UInt128_uint128){.low = (uint64_t)0U, + .high = a.low << s - FStar_UInt128_u32_64 }); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s) +{ +#if HAS_OPTIMIZED + if (s == 0) { + return a; + } else if (s < FStar_UInt128_u32_64) { + uint64_t l = a.low << s; + uint64_t h = __shiftleft128(a.low, a.high, (unsigned char)s); + return _mm_set_epi64x(h, l); + } else { + return _mm_set_epi64x(a.low << (s - FStar_UInt128_u32_64), 0); + } +#else + if (s < FStar_UInt128_u32_64) + return FStar_UInt128_shift_left_small(a, s); + else + return FStar_UInt128_shift_left_large(a, s); +#endif +} + +inline static uint64_t +FStar_UInt128_add_u64_shift_right(uint64_t hi, uint64_t lo, uint32_t s) +{ + return (lo >> s) + (hi << FStar_UInt128_u32_64 - s); +} + +inline static uint64_t +FStar_UInt128_add_u64_shift_right_respec(uint64_t hi, uint64_t lo, uint32_t s) +{ + return FStar_UInt128_add_u64_shift_right(hi, lo, s); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_right_small(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s == (uint32_t)0U) + return a; + else + return ((FStar_UInt128_uint128){ + .low = FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s), + .high = a.high >> s }); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_right_large(FStar_UInt128_uint128 a, uint32_t s) +{ + return ((FStar_UInt128_uint128){.low = a.high >> s - FStar_UInt128_u32_64, + .high = (uint64_t)0U }); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s) +{ +#if HAS_OPTIMIZED + if (s == 0) { + return a; + } else if (s < FStar_UInt128_u32_64) { + uint64_t l = __shiftright128(a.low, a.high, (unsigned char)s); + uint64_t h = a.high >> s; + return _mm_set_epi64x(h, l); + } else { + return _mm_set_epi64x(0, a.high >> (s - FStar_UInt128_u32_64)); + } +#else + if (s < FStar_UInt128_u32_64) + return FStar_UInt128_shift_right_small(a, s); + else + return FStar_UInt128_shift_right_large(a, s); +#endif +} + +inline static bool +FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.low == b.low && a.high == b.high; +} + +inline static bool +FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high > b.high || a.high == b.high && a.low > b.low; +} + +inline static bool +FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high < b.high || a.high == b.high && a.low < b.low; +} + +inline static bool +FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high > b.high || a.high == b.high && a.low >= b.low; +} + +inline static bool +FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high < b.high || a.high == b.high && a.low <= b.low; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + // PCMPW to produce 4 32-bit values, all either 0x0 or 0xffffffff + __m128i r32 = _mm_cmpeq_epi32(a, b); + // Shuffle 3,2,1,0 into 2,3,0,1 (swapping dwords inside each half) + __m128i s32 = _mm_shuffle_epi32(r32, _MM_SHUFFLE(2, 3, 0, 1)); + // Bitwise and to compute (3&2),(2&3),(1&0),(0&1) + __m128i ret64 = _mm_and_si128(r32, s32); + // Swap the two 64-bit values to form s64 + __m128i s64 = + _mm_shuffle_epi32(ret64, _MM_SHUFFLE(1, 0, 3, 2)); // 3,2,1,0 -> 1,0,3,2 + // And them together + return _mm_and_si128(ret64, s64); +#else + return ( + (FStar_UInt128_uint128){.low = FStar_UInt64_eq_mask(a.low, b.low) & + FStar_UInt64_eq_mask(a.high, b.high), + .high = FStar_UInt64_eq_mask(a.low, b.low) & + FStar_UInt64_eq_mask(a.high, b.high) }); +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED && 0 + // ge - compare 3,2,1,0 for >= and generating 0 or 0xffffffff for each + // eq - compare 3,2,1,0 for == and generating 0 or 0xffffffff for each + // slot 0 = ge0 | (eq0 & ge1) | (eq0 & eq1 & ge2) | (eq0 & eq1 & eq2 & ge3) + // then splat slot 0 to 3,2,1,0 + __m128i gt = _mm_cmpgt_epi32(a, b); + __m128i eq = _mm_cmpeq_epi32(a, b); + __m128i ge = _mm_or_si128(gt, eq); + __m128i ge0 = ge; + __m128i eq0 = eq; + __m128i ge1 = _mm_srli_si128(ge, 4); // shift ge from 3,2,1,0 to 0x0,3,2,1 + __m128i t1 = _mm_and_si128(eq0, ge1); + __m128i ret = _mm_or_si128(ge, t1); // ge0 | (eq0 & ge1) is now in 0 + __m128i eq1 = _mm_srli_si128(eq, 4); // shift eq from 3,2,1,0 to 0x0,3,2,1 + __m128i ge2 = + _mm_srli_si128(ge1, 4); // shift original ge from 3,2,1,0 to 0x0,0x0,3,2 + __m128i t2 = + _mm_and_si128(eq0, _mm_and_si128(eq1, ge2)); // t2 = (eq0 & eq1 & ge2) + ret = _mm_or_si128(ret, t2); + __m128i eq2 = _mm_srli_si128(eq1, 4); // shift eq from 3,2,1,0 to 0x0,00,00,3 + __m128i ge3 = + _mm_srli_si128(ge2, 4); // shift original ge from 3,2,1,0 to 0x0,0x0,0x0,3 + __m128i t3 = _mm_and_si128( + eq0, _mm_and_si128( + eq1, _mm_and_si128(eq2, ge3))); // t3 = (eq0 & eq1 & eq2 & ge3) + ret = _mm_or_si128(ret, t3); + return _mm_shuffle_epi32( + ret, + _MM_SHUFFLE(0, 0, 0, 0)); // the result is in 0. Shuffle into all dwords. +#else + return ((FStar_UInt128_uint128){ + .low = FStar_UInt64_gte_mask(a.high, b.high) & + ~FStar_UInt64_eq_mask(a.high, b.high) | + FStar_UInt64_eq_mask(a.high, b.high) & + FStar_UInt64_gte_mask(a.low, b.low), + .high = FStar_UInt64_gte_mask(a.high, b.high) & + ~FStar_UInt64_eq_mask(a.high, b.high) | + FStar_UInt64_eq_mask(a.high, b.high) & + FStar_UInt64_gte_mask(a.low, b.low) }); +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_uint64_to_uint128(uint64_t a) +{ +#if HAS_OPTIMIZED + return _mm_set_epi64x(0, a); +#else + return ((FStar_UInt128_uint128){.low = a, .high = (uint64_t)0U }); +#endif +} + +inline static uint64_t +FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a) +{ + return a.low; +} + +inline static uint64_t +FStar_UInt128_u64_mod_32(uint64_t a) +{ + return a & (uint64_t)0xffffffffU; +} + +static uint32_t FStar_UInt128_u32_32 = (uint32_t)32U; + +inline static uint64_t +FStar_UInt128_u32_combine(uint64_t hi, uint64_t lo) +{ + return lo + (hi << FStar_UInt128_u32_32); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_mul32(uint64_t x, uint32_t y) +{ +#if HAS_OPTIMIZED + uint64_t l, h; + l = _umul128(x, (uint64_t)y, &h); + return _mm_set_epi64x(h, l); +#else + return ((FStar_UInt128_uint128){ + .low = FStar_UInt128_u32_combine( + (x >> FStar_UInt128_u32_32) * (uint64_t)y + + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> + FStar_UInt128_u32_32), + FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y)), + .high = (x >> FStar_UInt128_u32_32) * (uint64_t)y + + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> + FStar_UInt128_u32_32) >> + FStar_UInt128_u32_32 }); +#endif +} + +/* Note: static headers bring scope collision issues when they define types! + * Because now client (kremlin-generated) code will include this header and + * there might be type collisions if the client code uses quadruples of uint64s. + * So, we cannot use the kremlin-generated name. */ +typedef struct K_quad_s { + uint64_t fst; + uint64_t snd; + uint64_t thd; + uint64_t f3; +} K_quad; + +inline static K_quad +FStar_UInt128_mul_wide_impl_t_(uint64_t x, uint64_t y) +{ + return ((K_quad){ + .fst = FStar_UInt128_u64_mod_32(x), + .snd = FStar_UInt128_u64_mod_32( + FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y)), + .thd = x >> FStar_UInt128_u32_32, + .f3 = (x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> + FStar_UInt128_u32_32) }); +} + +static uint64_t +FStar_UInt128_u32_combine_(uint64_t hi, uint64_t lo) +{ + return lo + (hi << FStar_UInt128_u32_32); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_mul_wide_impl(uint64_t x, uint64_t y) +{ + K_quad scrut = + FStar_UInt128_mul_wide_impl_t_(x, y); + uint64_t u1 = scrut.fst; + uint64_t w3 = scrut.snd; + uint64_t x_ = scrut.thd; + uint64_t t_ = scrut.f3; + return ((FStar_UInt128_uint128){ + .low = FStar_UInt128_u32_combine_( + u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_), w3), + .high = + x_ * (y >> FStar_UInt128_u32_32) + (t_ >> FStar_UInt128_u32_32) + + (u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_) >> + FStar_UInt128_u32_32) }); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_mul_wide(uint64_t x, uint64_t y) +{ +#if HAS_OPTIMIZED + uint64_t l, h; + l = _umul128(x, y, &h); + return _mm_set_epi64x(h, l); +#else + return FStar_UInt128_mul_wide_impl(x, y); +#endif +} diff --git a/lib/freebl/verified/libintvector.h b/lib/freebl/verified/libintvector.h new file mode 100644 index 0000000000..f11c9308a5 --- /dev/null +++ b/lib/freebl/verified/libintvector.h @@ -0,0 +1,335 @@ +#ifndef __Vec_Intrin_H +#define __Vec_Intrin_H + +#include +#include +#include +#include + +typedef __m128i Lib_IntVector_Intrinsics_vec128; + +#define Lib_IntVector_Intrinsics_ni_aes_enc(x0, x1) \ + (_mm_aesenc_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_ni_aes_enc_last(x0, x1) \ + (_mm_aesenclast_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_ni_aes_keygen_assist(x0, x1) \ + (_mm_aeskeygenassist_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_ni_clmul(x0, x1, x2) \ + (_mm_clmulepi64_si128(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \ + (_mm_xor_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ + (_mm_cmpeq_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ + (_mm_cmpeq_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ + (_mm_cmpgt_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ + (_mm_cmpgt_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ + (_mm_or_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ + (_mm_and_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ + (_mm_xor_si128(x0, _mm_set1_epi32(-1))) + +#define Lib_IntVector_Intrinsics_vec128_shift_left(x0, x1) \ + (_mm_slli_si128(x0, (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right(x0, x1) \ + (_mm_srli_si128(x0, (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ + (_mm_slli_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ + (_mm_srli_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_shift_left32(x0, x1) \ + (_mm_slli_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \ + (_mm_srli_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32_8(x0) \ + (_mm_shuffle_epi8(x0, _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) \ + (_mm_shuffle_epi8(x0, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ + ((x1 == 8 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_8(x0) : (x1 == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) : _mm_xor_si128(_mm_slli_epi32(x0, x1), _mm_srli_epi32(x0, 32 - x1))))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ + (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, 32 - x1)) + +#define Lib_IntVector_Intrinsics_vec128_shuffle32(x0, x1, x2, x3, x4) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE(x1, x2, x3, x4))) + +#define Lib_IntVector_Intrinsics_vec128_shuffle64(x0, x1, x2) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE(2 * x1 + 1, 2 * x1, 2 * x2 + 1, 2 * x2))) + +#define Lib_IntVector_Intrinsics_vec128_load_le(x0) \ + (_mm_loadu_si128((__m128i*)(x0))) + +#define Lib_IntVector_Intrinsics_vec128_store_le(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec128_load_be(x0) \ + (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))) + +#define Lib_IntVector_Intrinsics_vec128_load32_be(x0) \ + (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3))) + +#define Lib_IntVector_Intrinsics_vec128_load64_be(x0) \ + (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7))) + +#define Lib_IntVector_Intrinsics_vec128_store_be(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)))) + +#define Lib_IntVector_Intrinsics_vec128_store32_be(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)))) + +#define Lib_IntVector_Intrinsics_vec128_store64_be(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)))) + +#define Lib_IntVector_Intrinsics_vec128_insert8(x0, x1, x2) \ + (_mm_insert_epi8(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ + (_mm_insert_epi32(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ + (_mm_insert_epi64(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_extract8(x0, x1) \ + (_mm_extract_epi8(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ + (_mm_extract_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ + (_mm_extract_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_zero \ + (_mm_set1_epi16((uint16_t)0)) + +#define Lib_IntVector_Intrinsics_bit_mask64(x) -((x)&1) + +#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ + (_mm_add_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ + (_mm_sub_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ + (_mm_mul_epu32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ + (_mm_mul_epu32(x0, _mm_set1_epi64x(x1))) + +#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ + (_mm_add_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_sub32(x0, x1) \ + (_mm_sub_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_mul32(x0, x1) \ + (_mm_mullo_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_smul32(x0, x1) \ + (_mm_mullo_epi32(x0, _mm_set1_epi32(x1))) + +#define Lib_IntVector_Intrinsics_vec128_load128(x) \ + ((__m128i)x) + +#define Lib_IntVector_Intrinsics_vec128_load64(x) \ + (_mm_set1_epi64x(x)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec128_load64s(x1, x2) \ + (_mm_set_epi64x(x1, x2)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec128_load32(x) \ + (_mm_set1_epi32(x)) + +#define Lib_IntVector_Intrinsics_vec128_load32s(x3, x2, x1, x0) \ + (_mm_set_epi32(x3, x2, x1, x0)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x1, x2) \ + (_mm_unpacklo_epi32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x1, x2) \ + (_mm_unpackhi_epi32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x1, x2) \ + (_mm_unpacklo_epi64(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x1, x2) \ + (_mm_unpackhi_epi64(x1, x2)) + +typedef __m256i Lib_IntVector_Intrinsics_vec256; + +#define Lib_IntVector_Intrinsics_vec256_eq64(x0, x1) \ + (_mm256_cmpeq_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_eq32(x0, x1) \ + (_mm256_cmpeq_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_gt64(x0, x1) \ + (_mm256_cmpgt_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_gt32(x0, x1) \ + (_mm256_cmpgt_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_xor(x0, x1) \ + (_mm256_xor_si256(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_or(x0, x1) \ + (_mm256_or_si256(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_and(x0, x1) \ + (_mm256_and_si256(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_lognot(x0) \ + (_mm256_xor_si256(x0, _mm256_set1_epi32(-1))) + +#define Lib_IntVector_Intrinsics_vec256_shift_left(x0, x1) \ + (_mm256_slli_si256(x0, (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec256_shift_right(x0, x1) \ + (_mm256_srli_si256(x0, (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec256_shift_left64(x0, x1) \ + (_mm256_slli_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_shift_right64(x0, x1) \ + (_mm256_srli_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_shift_left32(x0, x1) \ + (_mm256_slli_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_shift_right32(x0, x1) \ + (_mm256_srli_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left32_8(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left32_16(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left32(x0, x1) \ + ((x1 == 8 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_8(x0) : (x1 == 16 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_16(x0) : _mm256_or_si256(_mm256_slli_epi32(x0, x1), _mm256_srli_epi32(x0, 32 - x1))))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right32(x0, x1) \ + (Lib_IntVector_Intrinsics_vec256_rotate_left32(x0, 32 - x1)) + +#define Lib_IntVector_Intrinsics_vec128_shuffle32(x0, x1, x2, x3, x4) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE(x1, x2, x3, x4))) + +#define Lib_IntVector_Intrinsics_vec256_shuffle64(x0, x1, x2, x3, x4) \ + (_mm256_permute4x64_epi64(x0, _MM_SHUFFLE(x1, x2, x3, x4))) + +#define Lib_IntVector_Intrinsics_vec256_shuffle32(x0, x1, x2, x3, x4, x5, x6, x7, x8) \ + (_mm256_permutevar8x32_epi32(x0, _mm256_set_epi32(x1, x2, x3, x4, x5, x6, x7, x8))) + +#define Lib_IntVector_Intrinsics_vec256_load_le(x0) \ + (_mm256_loadu_si256((__m256i*)(x0))) + +#define Lib_IntVector_Intrinsics_vec256_store_le(x0, x1) \ + (_mm256_storeu_si256((__m256i*)(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec256_insert8(x0, x1, x2) \ + (_mm256_insert_epi8(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_insert32(x0, x1, x2) \ + (_mm256_insert_epi32(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_insert64(x0, x1, x2) \ + (_mm256_insert_epi64(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_extract8(x0, x1) \ + (_mm256_extract_epi8(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_extract32(x0, x1) \ + (_mm256_extract_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_extract64(x0, x1) \ + (_mm256_extract_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_zero \ + (_mm256_set1_epi16((uint16_t)0)) + +#define Lib_IntVector_Intrinsics_vec256_add64(x0, x1) \ + (_mm256_add_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_sub64(x0, x1) \ + (_mm256_sub_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_mul64(x0, x1) \ + (_mm256_mul_epu32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_smul64(x0, x1) \ + (_mm256_mul_epu32(x0, _mm256_set1_epi64x(x1))) + +#define Lib_IntVector_Intrinsics_vec256_add32(x0, x1) \ + (_mm256_add_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_sub32(x0, x1) \ + (_mm256_sub_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_mul32(x0, x1) \ + (_mm256_mullo_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_smul32(x0, x1) \ + (_mm256_mullo_epi32(x0, _mm256_set1_epi32(x1))) + +#define Lib_IntVector_Intrinsics_vec256_load64(x1) \ + (_mm256_set1_epi64x(x1)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec256_load64s(x1, x2, x3, x4) \ + (_mm256_set_epi64x(x1, x2, x3, x4)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec256_load32(x) \ + (_mm256_set1_epi32(x)) + +#define Lib_IntVector_Intrinsics_vec256_load32s(x7, x6, x5, x4, x3, x2, x1, x0) \ + (_mm256_set_epi32(x7, x6, x5, x4, x3, x2, x1, x0)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec256_load128(x) \ + (_mm256_set_m128i((__m128i)x)) + +#define Lib_IntVector_Intrinsics_vec256_load128s(x1, x0) \ + (_mm256_set_m128i((__m128i)x1, (__m128i)x0)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_low32(x1, x2) \ + (_mm256_unpacklo_epi32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_high32(x1, x2) \ + (_mm256_unpackhi_epi32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_low64(x1, x2) \ + (_mm256_unpacklo_epi64(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_high64(x1, x2) \ + (_mm256_unpackhi_epi64(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_low128(x1, x2) \ + (_mm256_permute2x128_si256(x1, x2, 0x20)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_high128(x1, x2) \ + (_mm256_permute2x128_si256(x1, x2, 0x31)) + +#define Lib_IntVector_Intrinsics_bit_mask64(x) -((x)&1) + +#endif diff --git a/lib/freebl/verified/specs/Spec.CTR.fst b/lib/freebl/verified/specs/Spec.CTR.fst deleted file mode 100644 index e411cd3535..0000000000 --- a/lib/freebl/verified/specs/Spec.CTR.fst +++ /dev/null @@ -1,98 +0,0 @@ -/* Copyright 2016-2017 INRIA and Microsoft Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -module Spec.CTR - -module ST = FStar.HyperStack.ST - -open FStar.Mul -open FStar.Seq -open Spec.Lib - -#reset-options "--initial_fuel 0 --max_fuel 0 --initial_ifuel 0 --max_ifuel 0" - -type block_cipher_ctx = { - keylen: nat ; - blocklen: (x:nat{x>0}); - noncelen: nat; - counterbits: nat; - incr: pos} - -type key (c:block_cipher_ctx) = lbytes c.keylen -type nonce (c:block_cipher_ctx) = lbytes c.noncelen -type block (c:block_cipher_ctx) = lbytes (c.blocklen*c.incr) -type counter (c:block_cipher_ctx) = UInt.uint_t c.counterbits -type block_cipher (c:block_cipher_ctx) = key c -> nonce c -> counter c -> block c - -val xor: #len:nat -> x:lbytes len -> y:lbytes len -> Tot (lbytes len) -let xor #len x y = map2 FStar.UInt8.(fun x y -> x ^^ y) x y - - -val counter_mode_blocks: - ctx: block_cipher_ctx -> - bc: block_cipher ctx -> - k:key ctx -> n:nonce ctx -> c:counter ctx -> - plain:seq UInt8.t{c + ctx.incr * (length plain / ctx.blocklen) < pow2 ctx.counterbits /\ - length plain % (ctx.blocklen * ctx.incr) = 0} -> - Tot (lbytes (length plain)) - (decreases (length plain)) -#reset-options "--z3rlimit 200 --max_fuel 0" -let rec counter_mode_blocks ctx block_enc key nonce counter plain = - let len = length plain in - let len' = len / (ctx.blocklen * ctx.incr) in - Math.Lemmas.lemma_div_mod len (ctx.blocklen * ctx.incr) ; - if len = 0 then Seq.createEmpty #UInt8.t - else ( - let prefix, block = split plain (len - ctx.blocklen * ctx.incr) in - (* TODO: move to a single lemma for clarify *) - Math.Lemmas.lemma_mod_plus (length prefix) 1 (ctx.blocklen * ctx.incr); - Math.Lemmas.lemma_div_le (length prefix) len ctx.blocklen; - Spec.CTR.Lemmas.lemma_div len (ctx.blocklen * ctx.incr); - (* End TODO *) - let cipher = counter_mode_blocks ctx block_enc key nonce counter prefix in - let mask = block_enc key nonce (counter + (len / ctx.blocklen - 1) * ctx.incr) in - let eb = xor block mask in - cipher @| eb - ) - - -val counter_mode: - ctx: block_cipher_ctx -> - bc: block_cipher ctx -> - k:key ctx -> n:nonce ctx -> c:counter ctx -> - plain:seq UInt8.t{c + ctx.incr * (length plain / ctx.blocklen) < pow2 ctx.counterbits} -> - Tot (lbytes (length plain)) - (decreases (length plain)) -#reset-options "--z3rlimit 200 --max_fuel 0" -let counter_mode ctx block_enc key nonce counter plain = - let len = length plain in - let blocks_len = (ctx.incr * ctx.blocklen) * (len / (ctx.blocklen * ctx.incr)) in - let part_len = len % (ctx.blocklen * ctx.incr) in - (* TODO: move to a single lemma for clarify *) - Math.Lemmas.lemma_div_mod len (ctx.blocklen * ctx.incr); - Math.Lemmas.multiple_modulo_lemma (len / (ctx.blocklen * ctx.incr)) (ctx.blocklen * ctx.incr); - Math.Lemmas.lemma_div_le (blocks_len) len ctx.blocklen; - (* End TODO *) - let blocks, last_block = split plain blocks_len in - let cipher_blocks = counter_mode_blocks ctx block_enc key nonce counter blocks in - let cipher_last_block = - if part_len > 0 - then (* encrypt final partial block(s) *) - let mask = block_enc key nonce (counter+ctx.incr*(length plain / ctx.blocklen)) in - let mask = slice mask 0 part_len in - assert(length last_block = part_len); - xor #part_len last_block mask - else createEmpty in - cipher_blocks @| cipher_last_block diff --git a/lib/freebl/verified/specs/Spec.Chacha20.fst b/lib/freebl/verified/specs/Spec.Chacha20.fst deleted file mode 100644 index 0bdc697254..0000000000 --- a/lib/freebl/verified/specs/Spec.Chacha20.fst +++ /dev/null @@ -1,169 +0,0 @@ -/* Copyright 2016-2017 INRIA and Microsoft Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -module Spec.Chacha20 - -module ST = FStar.HyperStack.ST - -open FStar.Mul -open FStar.Seq -open FStar.UInt32 -open FStar.Endianness -open Spec.Lib -open Spec.Chacha20.Lemmas -open Seq.Create - -#set-options "--max_fuel 0 --z3rlimit 100" - -(* Constants *) -let keylen = 32 (* in bytes *) -let blocklen = 64 (* in bytes *) -let noncelen = 12 (* in bytes *) - -type key = lbytes keylen -type block = lbytes blocklen -type nonce = lbytes noncelen -type counter = UInt.uint_t 32 - -// using @ as a functional substitute for ; -// internally, blocks are represented as 16 x 4-byte integers -type state = m:seq UInt32.t {length m = 16} -type idx = n:nat{n < 16} -type shuffle = state -> Tot state - -let line (a:idx) (b:idx) (d:idx) (s:t{0 < v s /\ v s < 32}) (m:state) : Tot state = - let m = m.[a] <- (m.[a] +%^ m.[b]) in - let m = m.[d] <- ((m.[d] ^^ m.[a]) <<< s) in m - -let quarter_round a b c d : shuffle = - line a b d 16ul @ - line c d b 12ul @ - line a b d 8ul @ - line c d b 7ul - -let column_round : shuffle = - quarter_round 0 4 8 12 @ - quarter_round 1 5 9 13 @ - quarter_round 2 6 10 14 @ - quarter_round 3 7 11 15 - -let diagonal_round : shuffle = - quarter_round 0 5 10 15 @ - quarter_round 1 6 11 12 @ - quarter_round 2 7 8 13 @ - quarter_round 3 4 9 14 - -let double_round: shuffle = - column_round @ diagonal_round (* 2 rounds *) - -let rounds : shuffle = - iter 10 double_round (* 20 rounds *) - -let chacha20_core (s:state) : Tot state = - let s' = rounds s in - Spec.Loops.seq_map2 (fun x y -> x +%^ y) s' s - -(* state initialization *) -let c0 = 0x61707865ul -let c1 = 0x3320646eul -let c2 = 0x79622d32ul -let c3 = 0x6b206574ul - -let setup (k:key) (n:nonce) (c:counter): Tot state = - create_4 c0 c1 c2 c3 @| - uint32s_from_le 8 k @| - create_1 (UInt32.uint_to_t c) @| - uint32s_from_le 3 n - -let chacha20_block (k:key) (n:nonce) (c:counter): Tot block = - let st = setup k n c in - let st' = chacha20_core st in - uint32s_to_le 16 st' - -let chacha20_ctx: Spec.CTR.block_cipher_ctx = - let open Spec.CTR in - { - keylen = keylen; - blocklen = blocklen; - noncelen = noncelen; - counterbits = 32; - incr = 1 - } - -let chacha20_cipher: Spec.CTR.block_cipher chacha20_ctx = chacha20_block - -let chacha20_encrypt_bytes key nonce counter m = - Spec.CTR.counter_mode chacha20_ctx chacha20_cipher key nonce counter m - - -unfold let test_plaintext = [ - 0x4cuy; 0x61uy; 0x64uy; 0x69uy; 0x65uy; 0x73uy; 0x20uy; 0x61uy; - 0x6euy; 0x64uy; 0x20uy; 0x47uy; 0x65uy; 0x6euy; 0x74uy; 0x6cuy; - 0x65uy; 0x6duy; 0x65uy; 0x6euy; 0x20uy; 0x6fuy; 0x66uy; 0x20uy; - 0x74uy; 0x68uy; 0x65uy; 0x20uy; 0x63uy; 0x6cuy; 0x61uy; 0x73uy; - 0x73uy; 0x20uy; 0x6fuy; 0x66uy; 0x20uy; 0x27uy; 0x39uy; 0x39uy; - 0x3auy; 0x20uy; 0x49uy; 0x66uy; 0x20uy; 0x49uy; 0x20uy; 0x63uy; - 0x6fuy; 0x75uy; 0x6cuy; 0x64uy; 0x20uy; 0x6fuy; 0x66uy; 0x66uy; - 0x65uy; 0x72uy; 0x20uy; 0x79uy; 0x6fuy; 0x75uy; 0x20uy; 0x6fuy; - 0x6euy; 0x6cuy; 0x79uy; 0x20uy; 0x6fuy; 0x6euy; 0x65uy; 0x20uy; - 0x74uy; 0x69uy; 0x70uy; 0x20uy; 0x66uy; 0x6fuy; 0x72uy; 0x20uy; - 0x74uy; 0x68uy; 0x65uy; 0x20uy; 0x66uy; 0x75uy; 0x74uy; 0x75uy; - 0x72uy; 0x65uy; 0x2cuy; 0x20uy; 0x73uy; 0x75uy; 0x6euy; 0x73uy; - 0x63uy; 0x72uy; 0x65uy; 0x65uy; 0x6euy; 0x20uy; 0x77uy; 0x6fuy; - 0x75uy; 0x6cuy; 0x64uy; 0x20uy; 0x62uy; 0x65uy; 0x20uy; 0x69uy; - 0x74uy; 0x2euy -] - -unfold let test_ciphertext = [ - 0x6euy; 0x2euy; 0x35uy; 0x9auy; 0x25uy; 0x68uy; 0xf9uy; 0x80uy; - 0x41uy; 0xbauy; 0x07uy; 0x28uy; 0xdduy; 0x0duy; 0x69uy; 0x81uy; - 0xe9uy; 0x7euy; 0x7auy; 0xecuy; 0x1duy; 0x43uy; 0x60uy; 0xc2uy; - 0x0auy; 0x27uy; 0xafuy; 0xccuy; 0xfduy; 0x9fuy; 0xaeuy; 0x0buy; - 0xf9uy; 0x1buy; 0x65uy; 0xc5uy; 0x52uy; 0x47uy; 0x33uy; 0xabuy; - 0x8fuy; 0x59uy; 0x3duy; 0xabuy; 0xcduy; 0x62uy; 0xb3uy; 0x57uy; - 0x16uy; 0x39uy; 0xd6uy; 0x24uy; 0xe6uy; 0x51uy; 0x52uy; 0xabuy; - 0x8fuy; 0x53uy; 0x0cuy; 0x35uy; 0x9fuy; 0x08uy; 0x61uy; 0xd8uy; - 0x07uy; 0xcauy; 0x0duy; 0xbfuy; 0x50uy; 0x0duy; 0x6auy; 0x61uy; - 0x56uy; 0xa3uy; 0x8euy; 0x08uy; 0x8auy; 0x22uy; 0xb6uy; 0x5euy; - 0x52uy; 0xbcuy; 0x51uy; 0x4duy; 0x16uy; 0xccuy; 0xf8uy; 0x06uy; - 0x81uy; 0x8cuy; 0xe9uy; 0x1auy; 0xb7uy; 0x79uy; 0x37uy; 0x36uy; - 0x5auy; 0xf9uy; 0x0buy; 0xbfuy; 0x74uy; 0xa3uy; 0x5buy; 0xe6uy; - 0xb4uy; 0x0buy; 0x8euy; 0xeduy; 0xf2uy; 0x78uy; 0x5euy; 0x42uy; - 0x87uy; 0x4duy -] - -unfold let test_key = [ - 0uy; 1uy; 2uy; 3uy; 4uy; 5uy; 6uy; 7uy; - 8uy; 9uy; 10uy; 11uy; 12uy; 13uy; 14uy; 15uy; - 16uy; 17uy; 18uy; 19uy; 20uy; 21uy; 22uy; 23uy; - 24uy; 25uy; 26uy; 27uy; 28uy; 29uy; 30uy; 31uy - ] -unfold let test_nonce = [ - 0uy; 0uy; 0uy; 0uy; 0uy; 0uy; 0uy; 0x4auy; 0uy; 0uy; 0uy; 0uy - ] - -unfold let test_counter = 1 - -let test() = - assert_norm(List.Tot.length test_plaintext = 114); - assert_norm(List.Tot.length test_ciphertext = 114); - assert_norm(List.Tot.length test_key = 32); - assert_norm(List.Tot.length test_nonce = 12); - let test_plaintext = createL test_plaintext in - let test_ciphertext = createL test_ciphertext in - let test_key = createL test_key in - let test_nonce = createL test_nonce in - chacha20_encrypt_bytes test_key test_nonce test_counter test_plaintext - = test_ciphertext diff --git a/lib/freebl/verified/specs/Spec.Curve25519.fst b/lib/freebl/verified/specs/Spec.Curve25519.fst deleted file mode 100644 index af4035b09f..0000000000 --- a/lib/freebl/verified/specs/Spec.Curve25519.fst +++ /dev/null @@ -1,168 +0,0 @@ -/* Copyright 2016-2017 INRIA and Microsoft Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -module Spec.Curve25519 - -module ST = FStar.HyperStack.ST - -open FStar.Mul -open FStar.Seq -open FStar.UInt8 -open FStar.Endianness -open Spec.Lib -open Spec.Curve25519.Lemmas - -#reset-options "--initial_fuel 0 --max_fuel 0 --z3rlimit 20" - -(* Field types and parameters *) -let prime = pow2 255 - 19 -type elem : Type0 = e:int{e >= 0 /\ e < prime} -let fadd e1 e2 = (e1 + e2) % prime -let fsub e1 e2 = (e1 - e2) % prime -let fmul e1 e2 = (e1 * e2) % prime -let zero : elem = 0 -let one : elem = 1 -let ( +@ ) = fadd -let ( *@ ) = fmul - -(** Exponentiation *) -let rec ( ** ) (e:elem) (n:pos) : Tot elem (decreases n) = - if n = 1 then e - else - if n % 2 = 0 then op_Star_Star (e `fmul` e) (n / 2) - else e `fmul` (op_Star_Star (e `fmul` e) ((n-1)/2)) - -(* Type aliases *) -type scalar = lbytes 32 -type serialized_point = lbytes 32 -type proj_point = | Proj: x:elem -> z:elem -> proj_point - -let decodeScalar25519 (k:scalar) = - let k = k.[0] <- (k.[0] &^ 248uy) in - let k = k.[31] <- ((k.[31] &^ 127uy) |^ 64uy) in k - -let decodePoint (u:serialized_point) = - (little_endian u % pow2 255) % prime - -let add_and_double qx nq nqp1 = - let x_1 = qx in - let x_2, z_2 = nq.x, nq.z in - let x_3, z_3 = nqp1.x, nqp1.z in - let a = x_2 `fadd` z_2 in - let aa = a**2 in - let b = x_2 `fsub` z_2 in - let bb = b**2 in - let e = aa `fsub` bb in - let c = x_3 `fadd` z_3 in - let d = x_3 `fsub` z_3 in - let da = d `fmul` a in - let cb = c `fmul` b in - let x_3 = (da `fadd` cb)**2 in - let z_3 = x_1 `fmul` ((da `fsub` cb)**2) in - let x_2 = aa `fmul` bb in - let z_2 = e `fmul` (aa `fadd` (121665 `fmul` e)) in - Proj x_2 z_2, Proj x_3 z_3 - -let ith_bit (k:scalar) (i:nat{i < 256}) = - let q = i / 8 in let r = i % 8 in - (v (k.[q]) / pow2 r) % 2 - -let rec montgomery_ladder_ (init:elem) x xp1 (k:scalar) (ctr:nat{ctr<=256}) - : Tot proj_point (decreases ctr) = - if ctr = 0 then x - else ( - let ctr' = ctr - 1 in - let (x', xp1') = - if ith_bit k ctr' = 1 then ( - let nqp2, nqp1 = add_and_double init xp1 x in - nqp1, nqp2 - ) else add_and_double init x xp1 in - montgomery_ladder_ init x' xp1' k ctr' - ) - -let montgomery_ladder (init:elem) (k:scalar) : Tot proj_point = - montgomery_ladder_ init (Proj one zero) (Proj init one) k 256 - -let encodePoint (p:proj_point) : Tot serialized_point = - let p = p.x `fmul` (p.z ** (prime - 2)) in - little_bytes 32ul p - -let scalarmult (k:scalar) (u:serialized_point) : Tot serialized_point = - let k = decodeScalar25519 k in - let u = decodePoint u in - let res = montgomery_ladder u k in - encodePoint res - - -(* ********************* *) -(* RFC 7748 Test Vectors *) -(* ********************* *) - -let scalar1 = [ - 0xa5uy; 0x46uy; 0xe3uy; 0x6buy; 0xf0uy; 0x52uy; 0x7cuy; 0x9duy; - 0x3buy; 0x16uy; 0x15uy; 0x4buy; 0x82uy; 0x46uy; 0x5euy; 0xdduy; - 0x62uy; 0x14uy; 0x4cuy; 0x0auy; 0xc1uy; 0xfcuy; 0x5auy; 0x18uy; - 0x50uy; 0x6auy; 0x22uy; 0x44uy; 0xbauy; 0x44uy; 0x9auy; 0xc4uy -] - -let scalar2 = [ - 0x4buy; 0x66uy; 0xe9uy; 0xd4uy; 0xd1uy; 0xb4uy; 0x67uy; 0x3cuy; - 0x5auy; 0xd2uy; 0x26uy; 0x91uy; 0x95uy; 0x7duy; 0x6auy; 0xf5uy; - 0xc1uy; 0x1buy; 0x64uy; 0x21uy; 0xe0uy; 0xeauy; 0x01uy; 0xd4uy; - 0x2cuy; 0xa4uy; 0x16uy; 0x9euy; 0x79uy; 0x18uy; 0xbauy; 0x0duy -] - -let input1 = [ - 0xe6uy; 0xdbuy; 0x68uy; 0x67uy; 0x58uy; 0x30uy; 0x30uy; 0xdbuy; - 0x35uy; 0x94uy; 0xc1uy; 0xa4uy; 0x24uy; 0xb1uy; 0x5fuy; 0x7cuy; - 0x72uy; 0x66uy; 0x24uy; 0xecuy; 0x26uy; 0xb3uy; 0x35uy; 0x3buy; - 0x10uy; 0xa9uy; 0x03uy; 0xa6uy; 0xd0uy; 0xabuy; 0x1cuy; 0x4cuy -] - -let input2 = [ - 0xe5uy; 0x21uy; 0x0fuy; 0x12uy; 0x78uy; 0x68uy; 0x11uy; 0xd3uy; - 0xf4uy; 0xb7uy; 0x95uy; 0x9duy; 0x05uy; 0x38uy; 0xaeuy; 0x2cuy; - 0x31uy; 0xdbuy; 0xe7uy; 0x10uy; 0x6fuy; 0xc0uy; 0x3cuy; 0x3euy; - 0xfcuy; 0x4cuy; 0xd5uy; 0x49uy; 0xc7uy; 0x15uy; 0xa4uy; 0x93uy -] - -let expected1 = [ - 0xc3uy; 0xdauy; 0x55uy; 0x37uy; 0x9duy; 0xe9uy; 0xc6uy; 0x90uy; - 0x8euy; 0x94uy; 0xeauy; 0x4duy; 0xf2uy; 0x8duy; 0x08uy; 0x4fuy; - 0x32uy; 0xecuy; 0xcfuy; 0x03uy; 0x49uy; 0x1cuy; 0x71uy; 0xf7uy; - 0x54uy; 0xb4uy; 0x07uy; 0x55uy; 0x77uy; 0xa2uy; 0x85uy; 0x52uy -] -let expected2 = [ - 0x95uy; 0xcbuy; 0xdeuy; 0x94uy; 0x76uy; 0xe8uy; 0x90uy; 0x7duy; - 0x7auy; 0xaduy; 0xe4uy; 0x5cuy; 0xb4uy; 0xb8uy; 0x73uy; 0xf8uy; - 0x8buy; 0x59uy; 0x5auy; 0x68uy; 0x79uy; 0x9fuy; 0xa1uy; 0x52uy; - 0xe6uy; 0xf8uy; 0xf7uy; 0x64uy; 0x7auy; 0xacuy; 0x79uy; 0x57uy -] - -let test () = - assert_norm(List.Tot.length scalar1 = 32); - assert_norm(List.Tot.length scalar2 = 32); - assert_norm(List.Tot.length input1 = 32); - assert_norm(List.Tot.length input2 = 32); - assert_norm(List.Tot.length expected1 = 32); - assert_norm(List.Tot.length expected2 = 32); - let scalar1 = createL scalar1 in - let scalar2 = createL scalar2 in - let input1 = createL input1 in - let input2 = createL input2 in - let expected1 = createL expected1 in - let expected2 = createL expected2 in - scalarmult scalar1 input1 = expected1 - && scalarmult scalar2 input2 = expected2 diff --git a/lib/freebl/verified/specs/Spec.Poly1305.fst b/lib/freebl/verified/specs/Spec.Poly1305.fst deleted file mode 100644 index f9d8a4cb2a..0000000000 --- a/lib/freebl/verified/specs/Spec.Poly1305.fst +++ /dev/null @@ -1,107 +0,0 @@ -/* Copyright 2016-2017 INRIA and Microsoft Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -module Spec.Poly1305 - -module ST = FStar.HyperStack.ST - -open FStar.Math.Lib -open FStar.Mul -open FStar.Seq -open FStar.UInt8 -open FStar.Endianness -open Spec.Poly1305.Lemmas - -#set-options "--initial_fuel 0 --max_fuel 0 --initial_ifuel 0 --max_ifuel 0" - -(* Field types and parameters *) -let prime = pow2 130 - 5 -type elem = e:int{e >= 0 /\ e < prime} -let fadd (e1:elem) (e2:elem) = (e1 + e2) % prime -let fmul (e1:elem) (e2:elem) = (e1 * e2) % prime -let zero : elem = 0 -let one : elem = 1 -let op_Plus_At = fadd -let op_Star_At = fmul -(* Type aliases *) -let op_Amp_Bar = UInt.logand #128 -type word = w:bytes{length w <= 16} -type word_16 = w:bytes{length w = 16} -type tag = word_16 -type key = lbytes 32 -type text = seq word - -(* Specification code *) -let encode (w:word) = - (pow2 (8 * length w)) `fadd` (little_endian w) - -let rec poly (txt:text) (r:e:elem) : Tot elem (decreases (length txt)) = - if length txt = 0 then zero - else - let a = poly (Seq.tail txt) r in - let n = encode (Seq.head txt) in - (n `fadd` a) `fmul` r - -let encode_r (rb:word_16) = - (little_endian rb) &| 0x0ffffffc0ffffffc0ffffffc0fffffff - -let finish (a:elem) (s:word_16) : Tot tag = - let n = (a + little_endian s) % pow2 128 in - little_bytes 16ul n - -let rec encode_bytes (txt:bytes) : Tot text (decreases (length txt)) = - if length txt = 0 then createEmpty - else - let w, txt = split txt (min (length txt) 16) in - append_last (encode_bytes txt) w - -let poly1305 (msg:bytes) (k:key) : Tot tag = - let text = encode_bytes msg in - let r = encode_r (slice k 0 16) in - let s = slice k 16 32 in - finish (poly text r) s - - -(* ********************* *) -(* RFC 7539 Test Vectors *) -(* ********************* *) - -#reset-options "--initial_fuel 0 --max_fuel 0 --z3rlimit 20" - -unfold let msg = [ - 0x43uy; 0x72uy; 0x79uy; 0x70uy; 0x74uy; 0x6fuy; 0x67uy; 0x72uy; - 0x61uy; 0x70uy; 0x68uy; 0x69uy; 0x63uy; 0x20uy; 0x46uy; 0x6fuy; - 0x72uy; 0x75uy; 0x6duy; 0x20uy; 0x52uy; 0x65uy; 0x73uy; 0x65uy; - 0x61uy; 0x72uy; 0x63uy; 0x68uy; 0x20uy; 0x47uy; 0x72uy; 0x6fuy; - 0x75uy; 0x70uy ] - -unfold let k = [ - 0x85uy; 0xd6uy; 0xbeuy; 0x78uy; 0x57uy; 0x55uy; 0x6duy; 0x33uy; - 0x7fuy; 0x44uy; 0x52uy; 0xfeuy; 0x42uy; 0xd5uy; 0x06uy; 0xa8uy; - 0x01uy; 0x03uy; 0x80uy; 0x8auy; 0xfbuy; 0x0duy; 0xb2uy; 0xfduy; - 0x4auy; 0xbfuy; 0xf6uy; 0xafuy; 0x41uy; 0x49uy; 0xf5uy; 0x1buy ] - -unfold let expected = [ - 0xa8uy; 0x06uy; 0x1duy; 0xc1uy; 0x30uy; 0x51uy; 0x36uy; 0xc6uy; - 0xc2uy; 0x2buy; 0x8buy; 0xafuy; 0x0cuy; 0x01uy; 0x27uy; 0xa9uy ] - -let test () : Tot bool = - assert_norm(List.Tot.length msg = 34); - assert_norm(List.Tot.length k = 32); - assert_norm(List.Tot.length expected = 16); - let msg = createL msg in - let k = createL k in - let expected = createL expected in - poly1305 msg k = expected diff --git a/lib/freebl/verified/vec128.h b/lib/freebl/verified/vec128.h deleted file mode 100644 index 6ccecc9ecd..0000000000 --- a/lib/freebl/verified/vec128.h +++ /dev/null @@ -1,345 +0,0 @@ -/* Copyright 2016-2018 INRIA and Microsoft Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __Vec_H -#define __Vec_H - -#ifdef __MSVC__ -#define forceinline __forceinline inline -#elif (defined(__GNUC__) || defined(__clang__)) -#define forceinline __attribute__((always_inline)) inline -#else -#define forceinline inline -#endif - -#if defined(__SSSE3__) || defined(__AVX2__) || defined(__AVX__) - -#include -#include - -#define VEC128 -#define vec_size 4 - -typedef __m128i vec; - -static forceinline vec -vec_rotate_left_8(vec v) -{ - __m128i x = _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3); - return _mm_shuffle_epi8(v, x); -} - -static forceinline vec -vec_rotate_left_16(vec v) -{ - __m128i x = _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2); - return _mm_shuffle_epi8(v, x); -} - -static forceinline vec -vec_rotate_left(vec v, unsigned int n) -{ - if (n == 8) - return vec_rotate_left_8(v); - if (n == 16) - return vec_rotate_left_16(v); - return _mm_xor_si128(_mm_slli_epi32(v, n), - _mm_srli_epi32(v, 32 - n)); -} - -static forceinline vec -vec_rotate_right(vec v, unsigned int n) -{ - return (vec_rotate_left(v, 32 - n)); -} - -#define vec_shuffle_right(x, n) \ - _mm_shuffle_epi32(x, _MM_SHUFFLE((3 + (n)) % 4, (2 + (n)) % 4, (1 + (n)) % 4, (n) % 4)) - -#define vec_shuffle_left(x, n) vec_shuffle_right((x), 4 - (n)) - -static forceinline vec -vec_load_32x4(uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4) -{ - return _mm_set_epi32(x4, x3, x2, x1); -} - -static forceinline vec -vec_load_32x8(uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4, uint32_t x5, uint32_t x6, uint32_t x7, uint32_t x8) -{ - return _mm_set_epi32(x4, x3, x2, x1); -} - -static forceinline vec -vec_load_le(const unsigned char* in) -{ - return _mm_loadu_si128((__m128i*)(in)); -} - -static forceinline vec -vec_load128_le(const unsigned char* in) -{ - return vec_load_le(in); -} - -static forceinline void -vec_store_le(unsigned char* out, vec v) -{ - _mm_storeu_si128((__m128i*)(out), v); -} - -static forceinline vec -vec_add(vec v1, vec v2) -{ - return _mm_add_epi32(v1, v2); -} - -static forceinline vec -vec_add_u32(vec v1, uint32_t x) -{ - vec v2 = vec_load_32x4(x, 0, 0, 0); - return _mm_add_epi32(v1, v2); -} - -static forceinline vec -vec_increment(vec v1) -{ - vec one = vec_load_32x4(1, 0, 0, 0); - return _mm_add_epi32(v1, one); -} - -static forceinline vec -vec_xor(vec v1, vec v2) -{ - return _mm_xor_si128(v1, v2); -} - -#define vec_zero() _mm_set_epi32(0, 0, 0, 0) - -#elif defined(__ARM_NEON__) || defined(__ARM_NEON) -#include - -typedef uint32x4_t vec; - -static forceinline vec -vec_xor(vec v1, vec v2) -{ - return veorq_u32(v1, v2); -} - -#define vec_rotate_left(x, n) \ - vsriq_n_u32(vshlq_n_u32((x), (n)), (x), 32 - (n)) - -#define vec_rotate_right(a, b) \ - vec_rotate_left((b), 32 - (b)) - -#define vec_shuffle_right(x, n) \ - vextq_u32((x), (x), (n)) - -#define vec_shuffle_left(a, b) \ - vec_shuffle_right((a), 4 - (b)) - -static forceinline vec -vec_load_32x4(uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4) -{ - uint32_t a[4] = { x1, x2, x3, x4 }; - return vld1q_u32(a); -} - -static forceinline vec -vec_load_32(uint32_t x1) -{ - uint32_t a[4] = { x1, x1, x1, x1 }; - return vld1q_u32(a); -} - -static forceinline vec -vec_load_32x8(uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4, uint32_t x5, uint32_t x6, uint32_t x7, uint32_t x8) -{ - return vec_load_32x4(x1, x2, x3, x4); -} - -static forceinline vec -vec_load_le(const unsigned char* in) -{ - return vld1q_u32((uint32_t*)in); -} - -static forceinline vec -vec_load128_le(const unsigned char* in) -{ - return vec_load_le(in); -} - -static forceinline void -vec_store_le(unsigned char* out, vec v) -{ - vst1q_u32((uint32_t*)out, v); -} - -static forceinline vec -vec_add(vec v1, vec v2) -{ - return vaddq_u32(v1, v2); -} - -static forceinline vec -vec_add_u32(vec v1, uint32_t x) -{ - vec v2 = vec_load_32x4(x, 0, 0, 0); - return vec_add(v1, v2); -} - -static forceinline vec -vec_increment(vec v1) -{ - vec one = vec_load_32x4(1, 0, 0, 0); - return vec_add(v1, one); -} - -#define vec_zero() vec_load_32x4(0, 0, 0, 0) - -#else - -#define VEC128 -#define vec_size 4 - -typedef struct { - uint32_t v[4]; -} vec; - -static forceinline vec -vec_xor(vec v1, vec v2) -{ - vec r; - r.v[0] = v1.v[0] ^ v2.v[0]; - r.v[1] = v1.v[1] ^ v2.v[1]; - r.v[2] = v1.v[2] ^ v2.v[2]; - r.v[3] = v1.v[3] ^ v2.v[3]; - return r; -} - -static forceinline vec -vec_rotate_left(vec v, unsigned int n) -{ - vec r; - r.v[0] = (v.v[0] << n) ^ (v.v[0] >> (32 - n)); - r.v[1] = (v.v[1] << n) ^ (v.v[1] >> (32 - n)); - r.v[2] = (v.v[2] << n) ^ (v.v[2] >> (32 - n)); - r.v[3] = (v.v[3] << n) ^ (v.v[3] >> (32 - n)); - return r; -} - -static forceinline vec -vec_rotate_right(vec v, unsigned int n) -{ - return (vec_rotate_left(v, 32 - n)); -} - -static forceinline vec -vec_shuffle_right(vec v, unsigned int n) -{ - vec r; - r.v[0] = v.v[n % 4]; - r.v[1] = v.v[(n + 1) % 4]; - r.v[2] = v.v[(n + 2) % 4]; - r.v[3] = v.v[(n + 3) % 4]; - return r; -} - -static forceinline vec -vec_shuffle_left(vec x, unsigned int n) -{ - return vec_shuffle_right(x, 4 - n); -} - -static forceinline vec -vec_load_32x4(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3) -{ - vec v; - v.v[0] = x0; - v.v[1] = x1; - v.v[2] = x2; - v.v[3] = x3; - return v; -} - -static forceinline vec -vec_load_32(uint32_t x0) -{ - vec v; - v.v[0] = x0; - v.v[1] = x0; - v.v[2] = x0; - v.v[3] = x0; - return v; -} - -static forceinline vec -vec_load_le(const uint8_t* in) -{ - vec r; - r.v[0] = load32_le((uint8_t*)in); - r.v[1] = load32_le((uint8_t*)in + 4); - r.v[2] = load32_le((uint8_t*)in + 8); - r.v[3] = load32_le((uint8_t*)in + 12); - return r; -} - -static forceinline void -vec_store_le(unsigned char* out, vec r) -{ - store32_le(out, r.v[0]); - store32_le(out + 4, r.v[1]); - store32_le(out + 8, r.v[2]); - store32_le(out + 12, r.v[3]); -} - -static forceinline vec -vec_load128_le(const unsigned char* in) -{ - return vec_load_le(in); -} - -static forceinline vec -vec_add(vec v1, vec v2) -{ - vec r; - r.v[0] = v1.v[0] + v2.v[0]; - r.v[1] = v1.v[1] + v2.v[1]; - r.v[2] = v1.v[2] + v2.v[2]; - r.v[3] = v1.v[3] + v2.v[3]; - return r; -} - -static forceinline vec -vec_add_u32(vec v1, uint32_t x) -{ - vec v2 = vec_load_32x4(x, 0, 0, 0); - return vec_add(v1, v2); -} - -static forceinline vec -vec_increment(vec v1) -{ - vec one = vec_load_32x4(1, 0, 0, 0); - return vec_add(v1, one); -} - -#define vec_zero() vec_load_32x4(0, 0, 0, 0) - -#endif - -#endif