Commit 4eb210cd authored by Michael Hamburg's avatar Michael Hamburg
Browse files

Mostly a cleanup release. Cleanup old code, improve documentation,

improve GCC-cleanness, etc.

Disable the crandom output buffer so that it won't return duplicate
data across fork().  I should still stir in more entropy into the
buffer at least when RDRAND is available, but this should prevent
disasters for now.

The Elligator code in the current version is incompatible with past
versions due to a minor tweak.  It wasn't being called by any of
the API functions, though.

Removing "magic" constants and type names.  So for example p448_t
is now field_t (though maybe it should really be felem_t?).  This
should enable other curves with the Goldilocks code in the not-too-
distant future.

Added CRANDOM_MIGHT_IS_MUST so that you don't have to -D a bunch of
things on the command line.

You can `make bat` to make an eBAT which probably doesn't work.

I haven't implemented the improved nonce generation from the
curves@moderncrypto.org thread yet.
parent 8ebdfaee
May 3, 2104:
July 11, 2014:
This is mostly a cleanup release.
Added CRANDOM_MIGHT_IS_MUST config flag (default: 1). When set, this
causes crandom to assume that all features in the target arch will
be available, instead of detecting them. This makes sense because
the rest of the Goldilocks code is not (yet?) able to detect features.
Also, I'd like to submit this to SUPERCOP eventually, and SUPERCOP won't
pass -DMUST_HAVE_XXX on the command line the way the Makefile here did.
Flag EXPERIMENT_CRANDOM_BUFFER_CUTOFF_BYTES to disable the crandom
output buffer. This buffer improves performance (very marginally at
Goldilocks sizes), but can cause problems with forking and VM
snapshotting. By default, the buffer is now disabled.
I've slightly tweaked the Elligator implementation (which is still
unused) to make it easier to invert. This makes anything using Elligator
(i.e. nothing) incompatible with previous releases.
I've been factoring "magic" constants such as curve orders, window sizes,
etc into a few headers, to reduce the effort to port the code to other
primes, curves, etc. For example, I could test the Microsoft curves, and
something like:
x^2 + y^2 = 1 +- 5382[45] x^2 y^2 mod 2^480-2^240-1
("Goldeneye"? "Ridinghood"?) might be a reasonable thing to try for
64-bit CPUs.
In a similar vein, most of the internal code has been changed to say
"field" instead of p448, so that a future version of magic.h can decide
which field header to include.
You can now `make bat` to create an eBAT in build/ed448-goldilocks. This
is only minimally tested, though, because SUPERCOP doesn't work on my
machine and I'm too lazy to reverse engineer it. It sets a new macro,
SUPERCOP_WONT_LET_ME_OPEN_FILES, which causes goldilocks_init() to fall
back to something horribly insecure if crandom_init_from_file raises
EMFILE.
Slightly improved documentation.
Removed some old commented-out code; restored the /* C-style */ comment
discipline.
The AMD-64 version should now be GCC clean, at least for reasonably
recent GCC (tested on OS X.9.3, Haswell, gcc-4.9).
History no longer says "2104".
May 3, 2014:
Minor changes to internal routines mean that this version is not
compatible with the previous one.
......
......@@ -39,7 +39,7 @@ endif
ARCHFLAGS += -mcpu=cortex-a9 # FIXME
GENFLAGS = -DN_TESTS_BASE=1000 # sooooo sloooooow
else
ARCHFLAGS += -mssse3 -maes -mavx -mavx2 -DMUST_HAVE_AVX2 -mbmi2 #TODO
ARCHFLAGS += -maes -mavx2 -mbmi2 #TODO
endif
ifeq ($(CC),clang)
......@@ -48,26 +48,28 @@ endif
ifeq (,$(findstring 64,$(ARCH))$(findstring gcc,$(CC)))
# ARCHFLAGS += -m32
ARCHFLAGS += -DGOLDI_FORCE_32_BIT=1
XCFLAGS += -DGOLDI_FORCE_32_BIT=1
endif
CFLAGS = $(LANGFLAGS) $(WARNFLAGS) $(INCFLAGS) $(OFLAGS) $(ARCHFLAGS) $(GENFLAGS) $(XCFLAGS)
LDFLAGS = $(ARCHFLAGS) $(XLDFLAGS)
ASFLAGS = $(ARCHFLAGS)
.PHONY: clean all test bench todo doc lib
.PHONY: clean all test bench todo doc lib bat
.PRECIOUS: build/%.s
HEADERS= Makefile $(shell find . -name "*.h") build/timestamp
LIBCOMPONENTS= build/goldilocks.o build/barrett_field.o build/crandom.o \
build/p448.o build/ec_point.o build/scalarmul.o build/sha512.o
build/p448.o build/ec_point.o build/scalarmul.o build/sha512.o build/magic.o
TESTCOMPONENTS=build/test.o build/test_scalarmul.o build/test_sha512.o \
build/test_pointops.o build/test_arithmetic.o build/test_goldilocks.o
build/test_pointops.o build/test_arithmetic.o build/test_goldilocks.o build/magic.o
BENCHCOMPONENTS=build/bench.o
BATNAME=build/ed448-goldilocks
all: lib build/test build/bench
scan: clean
......@@ -118,6 +120,19 @@ doc/timestamp:
doc: Doxyfile doc/timestamp src/*.c src/include/*.h src/$(ARCH)/*.c src/$(ARCH)/*.h
doxygen
bat: $(BATNAME)
$(BATNAME): include/* src/* src/*/*
rm -fr $@
for arch in src/arch*; do \
mkdir -p $@/`basename $$arch`; \
cp include/* src/*.c src/include/* $$arch/* $@/`basename $$arch`; \
perl -p -i -e 's/.*endif.*GOLDILOCKS_CONFIG_H/#define SUPERCOP_WONT_LET_ME_OPEN_FILES 1\n\n$$&/' $@/`basename $$arch`/config.h; \
done
echo 'Mike Hamburg' > $@/designers
echo 'Ed448-Goldilocks sign and dh' > $@/description
todo::
@(find * -name '*.h'; find * -name '*.c') | xargs egrep --color=auto -w \
'HACK|TODO|FIXME|BUG|XXX|PERF|FUTURE|REMOVE|MAGIC'
......@@ -139,4 +154,4 @@ test: build/test
./$<
clean:
rm -fr build doc
rm -fr build doc $(BATNAME)
......@@ -25,8 +25,8 @@ Important work items for Ed448-Goldilocks:
* [DONE] Bugfix: make sure that init() and randomization are thread-safe.
* Security: check on deserialization that points are < p.
* Check also that they're nonzero or otherwise non-pathological?
* [DONE] Security: check on deserialization that points are < p.
* [NEEDS TESTING] Check also that they're nonzero or otherwise non-pathological?
* Testing:
* Corner-case testing
......@@ -39,16 +39,16 @@ Important work items for Ed448-Goldilocks:
* Most functions now have warn on ignored return.
* Safety:
* Check for init() if it's still required once we've done the above
* [DONE] Check for init() if it's still required once we've done the above
* Decide what to do about RNG failures
* abort
* return error and zeroize
* return error but continue if RNG is kind of mostly OK
* Flexibility: decide which API options are good.
* Eg, should functions take nbits and table sizes?
* [DONE?] Eg, should functions take nbits and table sizes?
* Remove hardcoded adjustments from comb control.
* [DONE] Remove hardcoded adjustments from comb control.
* These adjustments make the output wrong when it's not 450 bits.
* Other slow Barrett fields? Montgomery fields?
......@@ -71,6 +71,7 @@ Important work items for Ed448-Goldilocks:
* Portability: test and make clean with other compilers
* Using a fair amount of __attribute__ code.
* [DONE] Should work for GCC now.
* Portability: try to make the vector code as portable as possible
* Currently using clang ext_vector_length.
......@@ -79,15 +80,15 @@ Important work items for Ed448-Goldilocks:
* Portability: make the inner layers of the code 32-bit clean.
* Write new versions of the field code.
* 28-bit limbs give less headroom for carries.
* Now have a vectorless ARM version; need NEON.
* [DONE] 28-bit limbs give less headroom for carries.
* [DONE] Now have a vectorless ARM version; need NEON.
* Improve speed of 32-bit field code.
* Run through the SAGE tool to generate new bias & bound.
* [DONE] Run through the SAGE tool to generate new bias & bound.
* [DONE] Portability: make the outer layers of the code 32-bit clean.
* Performance/flexibility: decide which parameters should be hard-coded.
* [DONE] Performance/flexibility: decide which parameters should be hard-coded.
* Perhaps useful for comb precomputation.
* Performance: Improve SHA512.
......@@ -120,4 +121,4 @@ Important work items for Ed448-Goldilocks:
* Clear other TODO/FIXME/HACK/PERF items in the code
* Submit to SUPERCOP
* [DONE?] Submit to SUPERCOP
......@@ -380,55 +380,55 @@ serialize_montgomery (
const struct montgomery_t* a,
const struct p448_t* sbz
) {
mask_t L0, L1, L2;
struct p448_t L3, L4, L5, L6;
p448_mul ( &L6, &a->z0, &a->zd );
p448_sub ( &L4, &L6, &a->xd );
p448_bias ( &L4, 2 );
p448_weak_reduce( &L4 );
p448_mul ( &L6, &a->za, &L4 );
p448_mul ( &L5, &a->z0, &a->xd );
p448_sub ( &L4, &L5, &a->zd );
p448_bias ( &L4, 2 );
p448_weak_reduce( &L4 );
p448_mul ( &L3, &a->xa, &L4 );
p448_add ( &L5, &L3, &L6 );
p448_sub ( &L4, &L6, &L3 );
p448_bias ( &L4, 2 );
p448_weak_reduce( &L4 );
p448_mul ( &L6, &L4, &L5 );
p448_copy ( &L5, &a->z0 );
p448_addw ( &L5, 1 );
p448_sqr ( &L4, &L5 );
p448_mulw ( &L5, &L4, 39082 );
p448_neg ( &L4, &L5 );
p448_add ( &L5, &a->z0, &a->z0 );
p448_bias ( &L5, 1 );
p448_add ( &L3, &L5, &L5 );
p448_add ( &L5, &L3, &L4 );
p448_weak_reduce( &L5 );
p448_mul ( &L3, &a->xd, &L5 );
L1 = p448_is_zero( &a->zd );
L2 = - L1;
p448_mask ( &L4, &L3, L1 );
p448_add ( &L5, &L4, &a->zd );
L0 = ~ L1;
p448_mul ( &L4, sbz, &L6 );
p448_addw ( &L4, L2 );
p448_mul ( &L6, &L5, &L4 );
p448_mul ( &L4, &L6, &L5 );
p448_mul ( &L5, &L6, &a->xd );
p448_mul ( &L6, &L4, &L5 );
p448_isr ( &L3, &L6 );
p448_mul ( &L5, &L4, &L3 );
p448_sqr ( &L4, &L3 );
p448_mul ( &L3, &L6, &L4 );
p448_mask ( b, &L5, L0 );
p448_subw ( &L3, 1 );
p448_bias ( &L3, 1 );
L1 = p448_is_zero( &L3 );
L0 = p448_is_zero( sbz );
return L1 | L0;
mask_t L4, L5, L6;
struct p448_t L0, L1, L2, L3;
p448_mul ( &L3, &a->z0, &a->zd );
p448_sub ( &L1, &L3, &a->xd );
p448_bias ( &L1, 2 );
p448_weak_reduce( &L1 );
p448_mul ( &L3, &a->za, &L1 );
p448_mul ( &L2, &a->z0, &a->xd );
p448_sub ( &L1, &L2, &a->zd );
p448_bias ( &L1, 2 );
p448_weak_reduce( &L1 );
p448_mul ( &L0, &a->xa, &L1 );
p448_add ( &L2, &L0, &L3 );
p448_sub ( &L1, &L3, &L0 );
p448_bias ( &L1, 2 );
p448_weak_reduce( &L1 );
p448_mul ( &L3, &L1, &L2 );
p448_copy ( &L2, &a->z0 );
p448_addw ( &L2, 1 );
p448_sqr ( &L1, &L2 );
p448_mulw ( &L2, &L1, 39082 );
p448_neg ( &L1, &L2 );
p448_add ( &L2, &a->z0, &a->z0 );
p448_bias ( &L2, 1 );
p448_add ( &L0, &L2, &L2 );
p448_add ( &L2, &L0, &L1 );
p448_weak_reduce( &L2 );
p448_mul ( &L0, &a->xd, &L2 );
L5 = p448_is_zero( &a->zd );
L6 = - L5;
p448_mask ( &L1, &L0, L5 );
p448_add ( &L2, &L1, &a->zd );
L4 = ~ L5;
p448_mul ( &L1, sbz, &L3 );
p448_addw ( &L1, L6 );
p448_mul ( &L3, &L2, &L1 );
p448_mul ( &L1, &L3, &L2 );
p448_mul ( &L2, &L3, &a->xd );
p448_mul ( &L3, &L1, &L2 );
p448_isr ( &L0, &L3 );
p448_mul ( &L2, &L1, &L0 );
p448_sqr ( &L1, &L0 );
p448_mul ( &L0, &L3, &L1 );
p448_mask ( b, &L2, L4 );
p448_subw ( &L0, 1 );
p448_bias ( &L0, 1 );
L5 = p448_is_zero( &L0 );
L4 = p448_is_zero( sbz );
return L5 | L4;
}
void
......@@ -524,8 +524,8 @@ test_only_twist (
struct tw_extensible_t* b,
const struct extensible_t* a
) {
mask_t L0, L1;
struct p448_t L2, L3;
mask_t L2, L3;
struct p448_t L0, L1;
p448_sqr ( &b->u, &a->z );
p448_sqr ( &b->y, &a->x );
p448_sub ( &b->z, &b->u, &b->y );
......@@ -541,35 +541,35 @@ test_only_twist (
p448_bias ( &b->z, 2 );
p448_weak_reduce( &b->z );
p448_mul ( &b->t, &b->z, &b->x );
p448_mul ( &L3, &b->t, &b->u );
p448_mul ( &b->x, &b->t, &L3 );
p448_isr ( &L2, &b->x );
p448_mul ( &b->u, &b->t, &L2 );
p448_sqr ( &L3, &L2 );
p448_mul ( &b->t, &b->x, &L3 );
p448_add ( &b->x, &a->y, &a->x );
p448_weak_reduce( &b->x );
p448_sub ( &L2, &a->x, &a->y );
p448_bias ( &L2, 2 );
p448_weak_reduce( &L2 );
p448_mul ( &L3, &b->t, &L2 );
p448_add ( &L2, &L3, &b->x );
p448_sub ( &b->t, &b->x, &L3 );
p448_mul ( &L1, &b->t, &b->u );
p448_mul ( &b->x, &b->t, &L1 );
p448_isr ( &L0, &b->x );
p448_mul ( &b->u, &b->t, &L0 );
p448_sqr ( &L1, &L0 );
p448_mul ( &b->t, &b->x, &L1 );
p448_add ( &L1, &a->y, &a->x );
p448_weak_reduce( &L1 );
p448_sub ( &L0, &a->x, &a->y );
p448_bias ( &L0, 2 );
p448_weak_reduce( &L0 );
p448_mul ( &b->x, &b->t, &L0 );
p448_add ( &L0, &b->x, &L1 );
p448_sub ( &b->t, &L1, &b->x );
p448_bias ( &b->t, 2 );
p448_weak_reduce( &b->t );
p448_mul ( &b->x, &L2, &b->u );
L0 = p448_is_zero( &b->y );
L1 = - L0;
p448_addw ( &b->x, L1 );
p448_mul ( &b->x, &L0, &b->u );
L2 = p448_is_zero( &b->y );
L3 = - L2;
p448_addw ( &b->x, L3 );
p448_weak_reduce( &b->x );
p448_mul ( &b->y, &b->t, &b->u );
L0 = p448_is_zero( &b->z );
L1 = - L0;
p448_addw ( &b->y, L1 );
L2 = p448_is_zero( &b->z );
L3 = - L2;
p448_addw ( &b->y, L3 );
p448_weak_reduce( &b->y );
L1 = p448_is_zero( &a->y );
L0 = L1 + 1;
p448_set_ui( &b->z, L0 );
L3 = p448_is_zero( &a->y );
L2 = L3 + 1;
p448_set_ui( &b->z, L2 );
p448_copy ( &b->t, &b->x );
p448_copy ( &b->u, &b->y );
}
......@@ -578,16 +578,16 @@ mask_t
is_square (
const struct p448_t* x
) {
mask_t L0, L1;
struct p448_t L2, L3;
p448_isr ( &L2, x );
p448_sqr ( &L3, &L2 );
p448_mul ( &L2, x, &L3 );
p448_subw ( &L2, 1 );
p448_bias ( &L2, 1 );
L1 = p448_is_zero( &L2 );
L0 = p448_is_zero( x );
return L1 | L0;
mask_t L2, L3;
struct p448_t L0, L1;
p448_isr ( &L0, x );
p448_sqr ( &L1, &L0 );
p448_mul ( &L0, x, &L1 );
p448_subw ( &L0, 1 );
p448_bias ( &L0, 1 );
L3 = p448_is_zero( &L0 );
L2 = p448_is_zero( x );
return L3 | L2;
}
mask_t
......@@ -744,15 +744,15 @@ eq_affine (
const struct affine_t* a,
const struct affine_t* b
) {
mask_t L0, L1;
struct p448_t L2;
p448_sub ( &L2, &a->x, &b->x );
p448_bias ( &L2, 2 );
L1 = p448_is_zero( &L2 );
p448_sub ( &L2, &a->y, &b->y );
p448_bias ( &L2, 2 );
L0 = p448_is_zero( &L2 );
return L1 & L0;
mask_t L1, L2;
struct p448_t L0;
p448_sub ( &L0, &a->x, &b->x );
p448_bias ( &L0, 2 );
L2 = p448_is_zero( &L0 );
p448_sub ( &L0, &a->y, &b->y );
p448_bias ( &L0, 2 );
L1 = p448_is_zero( &L0 );
return L2 & L1;
}
mask_t
......@@ -760,19 +760,19 @@ eq_extensible (
const struct extensible_t* a,
const struct extensible_t* b
) {
mask_t L0, L1;
struct p448_t L2, L3, L4;
p448_mul ( &L4, &b->z, &a->x );
p448_mul ( &L3, &a->z, &b->x );
p448_sub ( &L2, &L4, &L3 );
p448_bias ( &L2, 2 );
L1 = p448_is_zero( &L2 );
p448_mul ( &L4, &b->z, &a->y );
p448_mul ( &L3, &a->z, &b->y );
p448_sub ( &L2, &L4, &L3 );
p448_bias ( &L2, 2 );
L0 = p448_is_zero( &L2 );
return L1 & L0;
mask_t L3, L4;
struct p448_t L0, L1, L2;
p448_mul ( &L2, &b->z, &a->x );
p448_mul ( &L1, &a->z, &b->x );
p448_sub ( &L0, &L2, &L1 );
p448_bias ( &L0, 2 );
L4 = p448_is_zero( &L0 );
p448_mul ( &L2, &b->z, &a->y );
p448_mul ( &L1, &a->z, &b->y );
p448_sub ( &L0, &L2, &L1 );
p448_bias ( &L0, 2 );
L3 = p448_is_zero( &L0 );
return L4 & L3;
}
mask_t
......@@ -780,19 +780,19 @@ eq_tw_extensible (
const struct tw_extensible_t* a,
const struct tw_extensible_t* b
) {
mask_t L0, L1;
struct p448_t L2, L3, L4;
p448_mul ( &L4, &b->z, &a->x );
p448_mul ( &L3, &a->z, &b->x );
p448_sub ( &L2, &L4, &L3 );
p448_bias ( &L2, 2 );
L1 = p448_is_zero( &L2 );
p448_mul ( &L4, &b->z, &a->y );
p448_mul ( &L3, &a->z, &b->y );
p448_sub ( &L2, &L4, &L3 );
p448_bias ( &L2, 2 );
L0 = p448_is_zero( &L2 );
return L1 & L0;
mask_t L3, L4;
struct p448_t L0, L1, L2;
p448_mul ( &L2, &b->z, &a->x );
p448_mul ( &L1, &a->z, &b->x );
p448_sub ( &L0, &L2, &L1 );
p448_bias ( &L0, 2 );
L4 = p448_is_zero( &L0 );
p448_mul ( &L2, &b->z, &a->y );
p448_mul ( &L1, &a->z, &b->y );
p448_sub ( &L0, &L2, &L1 );
p448_bias ( &L0, 2 );
L3 = p448_is_zero( &L0 );
return L4 & L3;
}
void
......@@ -801,38 +801,41 @@ elligator_2s_inject (
const struct p448_t* r
) {
mask_t L0, L1;
struct p448_t L2, L3, L4, L5, L6, L7, L8, L9;
struct p448_t L2, L3, L4, L5, L6, L7, L8;
p448_sqr ( &a->x, r );
p448_sqr ( &L3, &a->x );
p448_copy ( &a->y, &L3 );
p448_subw ( &a->y, 1 );
p448_neg ( &L9, &a->y );
p448_bias ( &L9, 2 );
p448_weak_reduce( &L9 );
p448_sqr ( &L2, &L9 );
p448_mulw ( &L8, &L2, 1527402724 );
p448_mulw ( &L7, &L3, 6108985600 );
p448_add ( &a->y, &L7, &L8 );
p448_neg ( &L4, &a->y );
p448_bias ( &L4, 2 );
p448_weak_reduce( &L4 );
p448_sqr ( &L2, &L4 );
p448_mulw ( &L7, &L2, 1527402724 );
p448_mulw ( &L8, &L3, 6108985600 );
p448_add ( &a->y, &L8, &L7 );
p448_weak_reduce( &a->y );
p448_mulw ( &L8, &L2, 6109454568 );
p448_sub ( &L7, &a->y, &L8 );
p448_bias ( &L7, 2 );
p448_weak_reduce( &L7 );
p448_mulw ( &L4, &a->y, 78160 );
p448_mul ( &L6, &L7, &L9 );
p448_mul ( &L8, &L6, &L4 );
p448_mulw ( &L6, &a->y, 78160 );
p448_mul ( &L5, &L7, &L6 );
p448_mul ( &L8, &L5, &L4 );
p448_mul ( &L4, &L5, &L6 );
p448_mul ( &L5, &L7, &L8 );
p448_mul ( &L8, &L5, &L4 );
p448_mul ( &L4, &L7, &L8 );
p448_isr ( &L5, &L4 );
p448_mul ( &L4, &L6, &L5 );
p448_sqr ( &L6, &L5 );
p448_mul ( &L5, &L8, &L6 );
p448_mul ( &L8, &L7, &L5 );
p448_mul ( &L7, &L8, &L5 );
p448_copy ( &L5, &a->x );
p448_subw ( &L5, 1 );
p448_isr ( &L6, &L4 );
p448_mul ( &L4, &L5, &L6 );
p448_sqr ( &L5, &L6 );
p448_mul ( &L6, &L8, &L5 );
p448_mul ( &L8, &L7, &L6 );
p448_mul ( &L7, &L8, &L6 );
p448_copy ( &L6, &a->x );
p448_subw ( &L6, 1 );
p448_addw ( &a->x, 1 );
p448_mul ( &L6, &a->x, &L8 );
p448_sub ( &a->x, &L5, &L6 );
p448_mul ( &L5, &a->x, &L8 );
p448_sub ( &a->x, &L6, &L5 );
p448_bias ( &a->x, 3 );
p448_weak_reduce( &a->x );
p448_mul ( &L5, &L4, &a->x );
......@@ -849,7 +852,7 @@ elligator_2s_inject (
p448_mulw ( &L3, &L2, 3054649120 );
p448_add ( &L2, &L3, &a->y );
p448_mul ( &a->y, &L7, &L2 );
L1 = p448_is_zero( &L9 );
L1 = p448_is_zero( &L8 );
L0 = - L1;
p448_addw ( &a->y, L0 );
p448_weak_reduce( &a->y );
......@@ -877,83 +880,83 @@ mask_t
validate_tw_extensible (
const struct tw_extensible_t* ext
) {
mask_t L0, L1;
struct p448_t L2, L3, L4, L5;
mask_t L4, L5;
struct p448_t L0, L1, L2, L3;
/*
* Check invariant:
* 0 = -x*y + z*t*u
*/
p448_mul ( &L2, &ext->t, &ext->u );
p448_mul ( &L4, &ext->z, &L2 );
p448_addw ( &L4, 0 );
p448_mul ( &L3, &ext->x, &ext->y );
p448_neg ( &L2, &L3 );
p448_add ( &L3, &L2, &L4 );
p448_bias ( &L3, 2 );
L1 = p448_is_zero( &L3 );
p448_mul ( &L1, &ext->t, &ext->u );
p448_mul ( &L2, &ext->z, &L1 );
p448_addw ( &L2, 0 );
p448_mul ( &L0, &ext->x, &ext->y );
p448_neg ( &L1, &L0 );
p448_add ( &L0, &L1, &L2 );
p448_bias ( &L0, 2 );
L5 = p448_is_zero( &L0 );
/*
* Check invariant:
* 0 = d*t^2*u^2 + x^2 - y^2 + z^2 - t^2*u^2
*/
p448_sqr ( &L4, &ext->y );
p448_neg ( &L2, &L4 );
p448_addw ( &L2, 0 );
p448_sqr ( &L3, &ext->x );
p448_add ( &L4, &L3, &L2 );
p448_sqr ( &L5, &ext->u );
p448_sqr ( &L3, &ext->t );
p448_mul ( &L2, &L3, &L5 );
p448_mulw ( &L3, &L2, 39081 );
p448_neg ( &L5, &L3 );
p448_add ( &L3, &L5, &L4 );