Commit 2712648b authored by Michael Hamburg's avatar Michael Hamburg
Browse files

Second commit. Still very preliminary.

Changed the formats of private keys and shared secrets.

Added SHA512 support.  It's slow and probably has endian bugs.

Signatures are now supported.

Renamed a bunch of internal functions to be more readable and
consistent.

Began documenting functions with Doxygen.

See HISTORY.txt for more details.
parent 25697caf
This diff is collapsed.
March 5, 2014:
First revision.
Private keys are now longer. They now store a copy of the public key, and
a secret symmetric key for signing purposes.
Signatures are now supported, though like everything else in this library,
their format is not stable. They use a deterministic Schnorr mode,
similar to EdDSA. Precomputed low-latency signing is not supported (yet?).
The hash function is SHA-512.
The deterministic hashing mode needs to be changed to HMAC (TODO!). It's
currently envelope-MAC.
Probably in the future there will be a distinction between ECDH key and
signing keys (and possibly also MQV keys etc).
Began renaming internal functions. Removing p448_ prefixes from EC point
operations. Trying to put the verb first. For example,
"p448_isogeny_un_to_tw" is now called "twist_and_double".
Began documenting with Doxygen. Use "make doc" to make a very incomplete
documentation directory.
There have been many other internal changes.
Feb 21, 2014:
Initial import and benchmarking scripts.
Keygen and ECDH are implemented, but there's no hash function.
......@@ -3,19 +3,20 @@
CC = clang
CFLAGS = -O3 -std=c99 -pedantic -Wall -Wextra -Werror \
-mavx2 -DMUST_HAVE_SSSE3 -mbmi2 \
-ffunction-sections -fdata-sections -fomit-frame-pointer -fPIC
-mssse3 -maes -mavx2 -DMUST_HAVE_AVX -mbmi2 \
-ffunction-sections -fdata-sections -fomit-frame-pointer -fPIC \
-DEXPERIMENT_ECDH_OBLITERATE_CT=1 -DEXPERIMENT_ECDH_STIR_IN_PUBKEYS=1
.PHONY: clean all runbench
.PHONY: clean all runbench todo doc
.PRECIOUS: build/%.s
HEADERS= Makefile $(shell find . -name "*.h") build/timestamp
LIBCOMPONENTS= build/goldilocks.o build/barrett_field.o build/crandom.o \
build/p448.o build/ec_point.o build/scalarmul.o
build/p448.o build/ec_point.o build/scalarmul.o build/sha512.o
all: bench
bench: *.h *.c
$(CC) $(CFLAGS) -o $@ *.c
......@@ -34,7 +35,26 @@ build/goldilocks.so: $(LIBCOMPONENTS)
libtool -macosx_version_min 10.6 -dynamic -dead_strip -lc -x -o $@ \
-exported_symbols_list exported.sym \
$(LIBCOMPONENTS)
doc/timestamp:
mkdir -p doc
touch $@
doc: Doxyfile doc/timestamp *.c *.h
doxygen
todo::
@egrep --color=auto -w -i 'hack|todo|fixme|bug|xxx|perf|future|remove' *.h *.c
@echo '============================='
@(for i in FIXME BUG XXX TODO HACK PERF FUTURE REMOVE; do \
egrep -w -i $$i *.h *.c > /dev/null || continue; \
/bin/echo -n $$i' ' | head -c 10; \
egrep -w -i $$i *.h *.c | wc -l; \
done)
@echo '============================='
@echo -n 'Total '
@egrep -w -i 'hack|todo|fixme|bug|xxx|perf|future|remove' *.h *.c | wc -l
runbench: bench
./$<
......
Important work items for Ed448-Goldilocks:
* Import SHA-512 or SHA-3.
* Decide which.
* Get a public-domain version which is 64-bit and 32-bit clean.
* Update LICENSE and README to reflect that SHA is not my code.
* Incorporate hashing into goldilocks_shared_secret.
* It's a pretty terrible shared secret right now.
* Decide on output size
* Documentation: write high-level API docs, and internal docs to help
other implementors.
* Partial progress on Doxygenating the code.
* Documentation: write a spec or add to Watson's
......@@ -37,12 +30,13 @@ Important work items for Ed448-Goldilocks:
* Testing:
* Corner-case testing
* more bulk random testing
* More bulk random testing
* Negative testing.
* SAGE-(auto?)-generated test vectors
* Test the Barrett fields
* Safety: add static analysis attributes for compilers that support them
* EG, warn on ignored return types
* Most functions now have warn on ignored return.
* Safety:
* Check for init() if it's still required once we've done the above
......@@ -65,17 +59,19 @@ Important work items for Ed448-Goldilocks:
* Scalarmul with other cofactor modes.
* High-level API:
* Signatures.
* Decide on strictness level.
* SPAKE2 Elligator Edition? Maybe write a paper first.
* Elligator.
* Need to write Elligator inverse. Might not be Elligator-2S.
* FHMQV? Is this patented?
* What low-level APIs to expose?
* Edwards points with add, sub, scalarmul, =, ==, ser/deser?
* Portability: test and make clean with other compilers
* Using a fair amount of __attribute__ code.
* Portability: try to make the vector code as portable as possible
* Currently using clang ext_vector_length.
* I can't get a simple for-loop to autovectorize :-/
......@@ -89,8 +85,7 @@ Important work items for Ed448-Goldilocks:
* Run through the SAGE tool to generate new bias & bound.
* Portability: make the outer layers of the code 32-bit clean.
* I don't think that there are endian bugs, but who knows?
* There are endian bugs in the signing algorithm.
* NEON and vectorless constant-time comparison.
* Performance: write and incorporate some extra routines
......@@ -99,6 +94,11 @@ Important work items for Ed448-Goldilocks:
* Performance: fixed parameters?
* Perhaps useful for comb precomputation.
* Performance: Improve SHA512.
* Improve portability.
* Improve speed.
* Decide what things to stir into hashes for various functions.
* Performance: improve the Barrett field code.
* Support other primes?
......
......@@ -109,6 +109,42 @@ widemac(
return carry;
}
void
barrett_negate (
word_t *a,
int nwords_a,
const word_t *p_lo,
int nwords_p,
int nwords_lo,
int p_shift
) {
int i;
dsword_t carry = 0;
barrett_reduce(a,nwords_a,0,p_lo,nwords_p,nwords_lo,p_shift);
/* Have p = 2^big - p_lo. Want p - a = 2^big - p_lo - a */
for (i=0; i<nwords_lo; i++) {
a[i] = carry = carry - p_lo[i] - a[i];
carry >>= WORD_BITS;
}
for (; i<nwords_p; i++) {
a[i] = carry = carry - a[i];
if (i<nwords_p-1) {
carry >>= WORD_BITS;
}
}
a[nwords_p-1] = carry = carry + (((word_t)1) << p_shift);
for (; i<nwords_a; i++) {
assert(!a[i]);
}
assert(!(carry>>64));
}
void
barrett_reduce(
word_t *a,
......@@ -195,14 +231,6 @@ barrett_mul_or_mac(
tmp[i] = 0;
}
if (doMac) {
for (i=0; i<nwords_accum; i++) {
tmp[i] = accum[i];
}
barrett_reduce(tmp, nwords_tmp, 0, p_lo, nwords_p, nwords_lo, p_shift);
}
for (bpos=nwords_b-1; bpos >= 0; bpos--) {
/* Invariant at the beginning of the loop: the high word is unused. */
assert(tmp[nwords_tmp-1] == 0);
......@@ -211,6 +239,7 @@ barrett_mul_or_mac(
for (i=nwords_tmp-2; i>=0; i--) {
tmp[i+1] = tmp[i];
}
tmp[0] = 0;
/* mac and reduce */
word_t carry = widemac(tmp, nwords_tmp, a, nwords_a, b[bpos], 0);
......@@ -223,6 +252,11 @@ barrett_mul_or_mac(
* so the high word is again clear */
}
if (doMac) {
word_t cout = add_nr_packed(tmp, accum, nwords_accum);
barrett_reduce(tmp, nwords_tmp, cout, p_lo, nwords_p, nwords_lo, p_shift);
}
for (i=0; i<nwords_tmp && i<nwords_accum; i++) {
accum[i] = tmp[i];
}
......
......@@ -44,6 +44,16 @@ sub_nr_ext_packed(
int nwords_c,
word_t mask
);
void
barrett_negate (
word_t *a,
int nwords_a,
const word_t *p_lo,
int nwords_p,
int nwords_lo,
int p_shift
);
/*
* If doMac, accum = accum + a*b mod p.
......
......@@ -14,6 +14,7 @@
#include "barrett_field.h"
#include "crandom.h"
#include "goldilocks.h"
#include "sha512.h"
word_t q448_lo[4] = {
0xdc873d6d54a7bb0dull,
......@@ -129,6 +130,23 @@ int main(int argc, char **argv) {
when = now() - when;
printf("rand448: %5.1fns\n", when * 1e9 / i);
struct sha512_ctx_t sha;
uint8_t hashout[128];
when = now();
for (i=0; i<10000; i++) {
sha512_init(&sha);
sha512_final(&sha, hashout);
}
when = now() - when;
printf("sha512 1blk: %5.1fns\n", when * 1e9 / i);
when = now();
for (i=0; i<10000; i++) {
sha512_update(&sha, hashout, 128);
}
when = now() - when;
printf("sha512 blk: %5.1fns (%0.2f MB/s)\n", when * 1e9 / i, 128*i/when/1e6);
when = now();
for (i=0; i<10000; i++) {
p448_isr(&c, &a);
......@@ -161,7 +179,7 @@ int main(int argc, char **argv) {
for (i=0; i<100; i++) {
p448_randomize(&crand, &a);
elligator_2s_inject(&affine, &a);
if (!p448_affine_validate(&affine)) {
if (!validate_affine(&affine)) {
printf("Elligator validation failure!\n");
p448_print("a", &a);
p448_print("x", &affine.x);
......@@ -171,14 +189,14 @@ int main(int argc, char **argv) {
when = now();
for (i=0; i<10000; i++) {
affine_deserialize(&affine, &a);
deserialize_affine(&affine, &a);
}
when = now() - when;
printf("decompress: %5.1fµs\n", when * 1e6 / i);
when = now();
for (i=0; i<10000; i++) {
extensible_serialize(&a, &exta);
serialize_extensible(&a, &exta);
}
when = now() - when;
printf("compress: %5.1fµs\n", when * 1e6 / i);
......@@ -186,8 +204,8 @@ int main(int argc, char **argv) {
int goods = 0;
for (i=0; i<100; i++) {
p448_randomize(&crand, &a);
mask_t good = affine_deserialize(&affine, &a);
if (good & !p448_affine_validate(&affine)) {
mask_t good = deserialize_affine(&affine, &a);
if (good & !validate_affine(&affine)) {
printf("Deserialize validation failure!\n");
p448_print("a", &a);
p448_print("x", &affine.x);
......@@ -195,7 +213,7 @@ int main(int argc, char **argv) {
} else if (good) {
goods++;
convert_affine_to_extensible(&exta,&affine);
extensible_serialize(&b, &exta);
serialize_extensible(&b, &exta);
p448_sub(&c,&b,&a);
p448_bias(&c,2);
if (!p448_is_zero(&c)) {
......@@ -203,7 +221,7 @@ int main(int argc, char **argv) {
p448_print("a", &a);
p448_print("x", &affine.x);
p448_print("y", &affine.y);
affine_deserialize(&affine, &b);
deserialize_affine(&affine, &b);
p448_print("b", &b);
p448_print("x", &affine.x);
p448_print("y", &affine.y);
......@@ -230,52 +248,52 @@ int main(int argc, char **argv) {
}
when = now() - when;
printf("barrett red: %5.1fns\n", when * 1e9 / i);
when = now();
for (i=0; i<100000; i++) {
barrett_mac(lsk,7,lsk,7,lsk,7,q448_lo,7,4,62);
}
when = now() - when;
printf("barrett mac: %5.1fns\n", when * 1e9 / i);
//
// when = now();
// for (i=0; i<100000; i++) {
// barrett_mac(lsk,7,lsk,7,lsk,7,q448_lo,7,4,62);
// }
// when = now() - when;
// printf("barrett mac: %5.1fns\n", when * 1e9 / i);
when = now();
for (i=0; i<1000000; i++) {
p448_tw_extensible_add_niels(&ext, &niels);
add_tw_niels_to_tw_extensible(&ext, &niels);
}
when = now() - when;
printf("exti+niels: %5.1fns\n", when * 1e9 / i);
when = now();
for (i=0; i<1000000; i++) {
p448_tw_extensible_add_pniels(&ext, &pniels);
add_tw_pniels_to_tw_extensible(&ext, &pniels);
}
when = now() - when;
printf("exti+pniels: %5.1fns\n", when * 1e9 / i);
when = now();
for (i=0; i<1000000; i++) {
p448_tw_extensible_double(&ext);
double_tw_extensible(&ext);
}
when = now() - when;
printf("exti dbl: %5.1fns\n", when * 1e9 / i);
when = now();
for (i=0; i<1000000; i++) {
p448_isogeny_tw_to_un(&exta, &ext);
untwist_and_double(&exta, &ext);
}
when = now() - when;
printf("i->a isog: %5.1fns\n", when * 1e9 / i);
when = now();
for (i=0; i<1000000; i++) {
p448_isogeny_un_to_tw(&ext, &exta);
twist_and_double(&ext, &exta);
}
when = now() - when;
printf("a->i isog: %5.1fns\n", when * 1e9 / i);
when = now();
for (i=0; i<1000000; i++) {
p448_montgomery_step(&mb);
montgomery_step(&mb);
}
when = now() - when;
printf("monty step: %5.1fns\n", when * 1e9 / i);
......@@ -295,14 +313,20 @@ int main(int argc, char **argv) {
printf("edwards smz: %5.1fµs\n", when * 1e6 / i);
when = now();
int sum = 0;
for (i=0; i<1000; i++) {
edwards_scalar_multiply_vlook(&ext,sk);
untwist_and_double_and_serialize(&a,&ext);
}
when = now() - when;
printf("edwards svl: %5.1fµs\n", when * 1e6 / i);
when = now();
for (i=0; i<1000; i++) {
q448_randomize(&crand, sk);
sum += edwards_scalar_multiply_vt(&ext,sk);
edwards_scalar_multiply_vt(&ext,sk);
}
when = now() - when;
printf("edwards vtm: %5.1fµs (%0.2f avg bits = 1.5 + 448/%0.2f)\n",
when * 1e6 / i, 1.0*sum/i, 448.0*i/(sum-1.5*i));
printf("edwards vtm: %5.1fµs\n", when * 1e6 / i);
struct tw_niels_t wnaft[1<<6];
when = now();
......@@ -351,23 +375,22 @@ int main(int argc, char **argv) {
printf("edwards vt5: %5.1fµs\n", when * 1e6 / i);
when = now();
sum = 0;
for (i=0; i<1000; i++) {
q448_randomize(&crand, sk);
q448_randomize(&crand, tk);
sum += edwards_combo_var_fixed_vt(&ext,sk,tk,wnaft,5);
edwards_combo_var_fixed_vt(&ext,sk,tk,wnaft,5);
}
when = now() - when;
printf("vt vf combo: %5.1fµs (avg = %0.3f)\n", when * 1e6 / i, 1.0*sum/i);
printf("vt vf combo: %5.1fµs\n", when * 1e6 / i);
when = now();
for (i=0; i<1000; i++) {
affine_deserialize(&affine, &a);
deserialize_affine(&affine, &a);
convert_affine_to_extensible(&exta,&affine);
p448_isogeny_un_to_tw(&ext,&exta);
twist_and_double(&ext,&exta);
edwards_scalar_multiply(&ext,sk);
p448_isogeny_tw_to_un(&exta,&ext);
extensible_serialize(&b, &exta);
untwist_and_double(&exta,&ext);
serialize_extensible(&b, &exta);
}
when = now() - when;
printf("edwards sm: %5.1fµs\n", when * 1e6 / i);
......@@ -376,10 +399,10 @@ int main(int argc, char **argv) {
while (1) {
p448_randomize(&crand, &a);
if (affine_deserialize(&affine, &a)) break;
if (deserialize_affine(&affine, &a)) break;
}
convert_affine_to_extensible(&exta,&affine);
p448_isogeny_un_to_tw(&ext,&exta);
twist_and_double(&ext,&exta);
when = now();
for (i=0; i<1000; i++) {
precompute_for_combs(table, &ext, 5, 5, 18);
......@@ -400,13 +423,6 @@ int main(int argc, char **argv) {
}
when = now() - when;
printf("com(3,5,30): %5.1fµs\n", when * 1e6 / i);
when = now();
for (i=0; i<10000; i++) {
edwards_comb(&ext, sk, table, 2, 5, 45);
}
when = now() - when;
printf("com(2,5,45): %5.1fµs\n", when * 1e6 / i);
when = now();
for (i=0; i<10000; i++) {
......@@ -419,8 +435,8 @@ int main(int argc, char **argv) {
for (i=0; i<10000; i++) {
q448_randomize(&crand, sk);
edwards_comb(&ext, sk, table, 5, 5, 18);
p448_isogeny_tw_to_un(&exta,&ext);
extensible_serialize(&b, &exta);
untwist_and_double(&exta,&ext);
serialize_extensible(&b, &exta);
}
when = now() - when;
printf("keygen: %5.1fµs\n", when * 1e6 / i);
......@@ -430,14 +446,15 @@ int main(int argc, char **argv) {
int res = goldilocks_init();
assert(!res);
uint8_t gpk[56],gsk[56],hsk[56],hpk[56];
struct goldilocks_public_key_t gpk,hpk;
struct goldilocks_private_key_t gsk,hsk;
when = now();
for (i=0; i<10000; i++) {
if (i&1) {
res = goldilocks_keygen(gsk,gpk);
res = goldilocks_keygen(&gsk,&gpk);
} else {
res = goldilocks_keygen(hsk,hpk);
res = goldilocks_keygen(&hsk,&hpk);
}
assert(!res);
}
......@@ -449,14 +466,14 @@ int main(int argc, char **argv) {
when = now();
for (i=0; i<10000; i++) {
if (i&1) {
gres1 = goldilocks_shared_secret(ss1,gsk,hpk);
gres1 = goldilocks_shared_secret(ss1,&gsk,&hpk);
} else {
gres2 = goldilocks_shared_secret(ss2,hsk,gpk);
gres2 = goldilocks_shared_secret(ss2,&hsk,&gpk);
}
}
when = now() - when;
printf("ecdh: %5.1fµs\n", when * 1e6 / i);
if (gres1 || gres2 || memcmp(ss1,ss2,56)) {
if (gres1 || gres2 || memcmp(ss1,ss2,64)) {
printf("[FAIL] %d %d\n",gres1,gres2);
printf("ss1 = ");
......@@ -470,9 +487,39 @@ int main(int argc, char **argv) {
printf("\n");
}
uint8_t sout[56*2];
const char *message = "hello world";
uint64_t message_len = strlen(message);
when = now();
for (i=0; i<10000; i++) {
res = goldilocks_sign(sout,(const unsigned char *)message,message_len,&gsk);
assert(!res);
}
when = now() - when;
printf("sign: %5.1fµs\n", when * 1e6 / i);
when = now();
for (i=0; i<10000; i++) {
res = goldilocks_verify(sout,(const unsigned char *)message,message_len,&gpk);
}
when = now() - when;
printf("verify: %5.1fµs\n", when * 1e6 / i);
printf("\nTesting...\n");
int failures=0, successes = 0;
for (i=0; i<1000; i++) {
(void)goldilocks_keygen(&gsk,&gpk);
goldilocks_sign(sout,(const unsigned char *)message,message_len,&gsk);
res = goldilocks_verify(sout,(const unsigned char *)message,message_len,&gpk);
if (res) failures++;
}
if (failures) {
printf("FAIL %d/%d signature checks!\n", failures, i);
}
failures=0; successes = 0;
for (i=0; i<1000; i++) {
p448_randomize(&crand, &a);
uint64_t two = 2;
......@@ -501,14 +548,14 @@ int main(int argc, char **argv) {
mask_t good;
do {
p448_randomize(&crand, &a);
good = affine_deserialize(&affine, &a);
good = deserialize_affine(&affine, &a);
} while (!good);
convert_affine_to_extensible(&exta,&affine);
p448_isogeny_un_to_tw(&ext,&exta);
p448_isogeny_tw_to_un(&exta,&ext);
extensible_serialize(&b, &exta);
isogeny_and_serialize(&c, &ext);
twist_and_double(&ext,&exta);
untwist_and_double(&exta,&ext);
serialize_extensible(&b, &exta);
untwist_and_double_and_serialize(&c, &ext);
p448_sub(&d,&b,&c);
p448_bias(&d,2);
......@@ -536,12 +583,12 @@ int main(int argc, char **argv) {
mask_t good = p448_montgomery_ladder(&b,&a,&four,3,0);
good &= p448_montgomery_ladder(&c,&b,sk,448,0);
mask_t goodb = affine_deserialize(&affine, &a);
mask_t goodb = deserialize_affine(&affine, &a);
convert_affine_to_extensible(&exta,&affine);
p448_isogeny_un_to_tw(&ext,&exta);
twist_and_double(&ext,&exta);
edwards_scalar_multiply(&ext,sk);
p448_isogeny_tw_to_un(&exta,&ext);
extensible_serialize(&b, &exta);
untwist_and_double(&exta,&ext);
serialize_extensible(&b, &exta);
p448_sub(&d,&b,&c);
p448_bias(&d,2);
......@@ -573,14 +620,14 @@ int main(int argc, char **argv) {
good &= p448_montgomery_ladder(&c,&b,sk,448,0);
if (!good) continue;
affine_deserialize(&affine, &a);
deserialize_affine(&affine, &a);
convert_affine_to_extensible(&exta,&affine);
p448_isogeny_un_to_tw(&ext,&exta);
twist_and_double(&ext,&exta);
precompute_for_combs(table, &ext, 5, 5, 18);
edwards_comb(&ext, sk, table, 5, 5, 18);
p448_isogeny_tw_to_un(&exta,&ext);
extensible_serialize(&b, &exta);
untwist_and_double(&exta,&ext);
serialize_extensible(&b, &exta);