#include <stdint.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdbool.h>
#include <stdlib.h>
static int lock = -1;
typedef unsigned uint128_t __attribute__((mode(TI)));
#define RADIX 64
typedef uint64_t digit_t; // Unsigned 64-bit digit
#define NWORDS_FIELD 2 // Number of words of a field element
#define NWORDS_ORDER 4
// Digit multiplication
#define MUL(multiplier, multiplicand, hi, lo) \
{ \
uint128_t tempReg = (uint128_t)(multiplier) * (uint128_t)(multiplicand); \
*(hi) = (digit_t)(tempReg >> RADIX); \
(lo) = (digit_t)tempReg; \
}
// Digit addition with carry
#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \
{ \
uint128_t tempReg = (uint128_t)(addend1) + (uint128_t)(addend2) + (uint128_t)(carryIn); \
(carryOut) = (digit_t)(tempReg >> RADIX); \
(sumOut) = (digit_t)tempReg; \
}
// Digit subtraction with borrow
#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \
{ \
uint128_t tempReg = \
(uint128_t)(minuend) - (uint128_t)(subtrahend) - (uint128_t)(borrowIn); \
(borrowOut) = (digit_t)(tempReg >> (sizeof(uint128_t) * 8 - 1)); \
(differenceOut) = (digit_t)tempReg; \
}
static void multiply(const digit_t *a, const digit_t *b, digit_t *c)
{ // Schoolbook multiprecision multiply, c = a*b
unsigned int i, j;
digit_t u, v, UV[2];
unsigned char carry = 0;
for (i = 0; i < (2 * NWORDS_ORDER); i++)
c[i] = 0;
for (i = 0; i < NWORDS_ORDER; i++) {
u = 0;
for (j = 0; j < NWORDS_ORDER; j++) {
MUL(a[i], b[j], UV + 1, UV[0]);
ADDC(0, UV[0], u, carry, v);
u = UV[1] + carry;
ADDC(0, c[i + j], v, carry, v);
u = u + carry;
c[i + j] = v;
}
c[NWORDS_ORDER + i] = u;
}
}
static unsigned char add(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords)
{ // Multiprecision addition, c = a+b. Returns the carry bit
unsigned int i;
unsigned char carry = 0;
for (i = 0; i < nwords; i++) {
ADDC(carry, a[i], b[i], carry, c[i]);
}
return carry;
}
unsigned char subtract(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords)
{ // Multiprecision subtraction, c = a-b. Returns the borrow bit
unsigned int i;
unsigned char borrow = 0;
for (i = 0; i < nwords; i++) {
SUBC(borrow, a[i], b[i], borrow, c[i]);
}
return borrow;
}
static const uint64_t curve_order[4] = {
0x2FB2540EC7768CE7, 0xDFBD004DFE0F7999, 0xF05397829CBC14E5, 0x0029CBC14E5E0A72
};
static const uint64_t Montgomery_Rprime[4] = {
0xC81DB8795FF3D621, 0x173EA5AAEA6B387D, 0x3D01B7C72136F61C, 0x0006A5F16AC8F9D3
};
static const uint64_t Montgomery_rprime[4] = {
0xE12FE5F079BC3929, 0xD75E78B8D1FCDCF3, 0xBCE409ED76B5DB21, 0xF32702FDAFC1C074
};
void Montgomery_multiply_mod_order(const digit_t *ma, const digit_t *mb, digit_t *mc)
{ // 256-bit Montgomery multiplication modulo the curve order, mc = ma*mb*r' mod order, where
// ma,mb,mc in [0, order-1] ma, mb and mc are assumed to be in Montgomery representation The
// Montgomery constant r' = -r^(-1) mod 2^(log_2(r)) is the global value "Montgomery_rprime",
// where r is the order
unsigned int i;
digit_t mask, P[2 * NWORDS_ORDER], Q[2 * NWORDS_ORDER], temp[2 * NWORDS_ORDER];
digit_t *order = (digit_t *)curve_order;
unsigned char cout = 0, bout = 0;
multiply(ma, mb, P); // P = ma * mb
multiply(P, (digit_t *)&Montgomery_rprime, Q); // Q = P * r' mod 2^(log_2(r))
multiply(Q, (digit_t *)&curve_order, temp); // temp = Q * r
cout = add(P, temp, temp, 2 * NWORDS_ORDER); // (cout, temp) = P + Q * r
for (i = 0; i < NWORDS_ORDER; i++) { // (cout, mc) = (P + Q * r)/2^(log_2(r))
mc[i] = temp[NWORDS_ORDER + i];
}
// Final, constant-time subtraction
bout = subtract(mc, (digit_t *)&curve_order, mc, NWORDS_ORDER); // (cout, mc) = (cout, mc) - r
mask = (digit_t)(
cout -
bout); // if (cout, mc) >= 0 then mask = 0x00..0, else if (cout, mc) < 0 then mask = 0xFF..F
for (i = 0; i < NWORDS_ORDER; i++) { // temp = mask & r
temp[i] = (order[i] & mask);
}
add(mc, temp, mc, NWORDS_ORDER); // mc = mc + (mask & r)
return;
}
void modulo_order(digit_t *a, digit_t *c)
{ // Reduction modulo the order using Montgomery arithmetic
// ma = a*Montgomery_Rprime mod r, where a,ma in [0, r-1], a,ma,r < 2^256
// c = ma*1*Montgomery_Rprime^(-1) mod r, where ma,c in [0, r-1], ma,c,r < 2^256
digit_t ma[NWORDS_ORDER], one[NWORDS_ORDER] = { 0 };
one[0] = 1;
Montgomery_multiply_mod_order(a, (digit_t *)&Montgomery_Rprime, ma);
Montgomery_multiply_mod_order(ma, one, c);
}
static __inline void delay(unsigned int count)
{
while (count--) {
}
}
int random_bytes(unsigned char *random_array, unsigned int nbytes)
{
int r, n = nbytes, count = 0;
if (lock == -1) {
do {
lock = open("/dev/urandom", O_RDONLY);
if (lock == -1) {
delay(0xFFFFF);
}
} while (lock == -1);
}
while (n > 0) {
do {
r = read(lock, random_array + count, n);
if (r == -1) {
delay(0xFFFF);
}
} while (r == -1);
count += r;
n -= r;
}
return true;
}
int main()
{
digit_t temp[NWORDS_ORDER], temp2[NWORDS_ORDER];
random_bytes((unsigned char *)temp, sizeof(digit_t) * NWORDS_ORDER);
random_bytes((unsigned char *)temp2, sizeof(digit_t) * NWORDS_ORDER);
modulo_order(temp, temp2);
}