From 9d09322b41776a0d6ecde182f731eff77d0f052b Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter.wuille@gmail.com>
Date: Fri, 27 Mar 2015 14:03:36 -0700
Subject: [PATCH] Squashed 'src/secp256k1/' changes from 50cc6ab..1897b8e

1897b8e Merge pull request #229
efc571c Add simple testcases for signing with rfc6979 extra entropy.
1573a10 Add ability to pass extra entropy to rfc6979
3087bc4 Merge pull request #228
d9b9f11 Merge pull request #218
0065a8f Eliminate multiple-returns from secp256k1.c.
354ffa3 Make secp256k1_ec_pubkey_create reject oversized secrets.
27bc131 Silence some warnings from pedantic static analysis tools, improve compatibility with C++.
3b7ea63 Merge pull request #221
f789c5b Merge pull request #215
4bc273b Merge pull request #222
137a8ec Merge pull request #216
7c3771d Disable overlength-strings warnings.
8956111 use 128-bit hex seed
02efd06 Use RFC6979 for test PRNGs
ae55e85 Use faster byteswapping and avoid alignment-increasing casts.
443cd4b Get rid of hex format and some binary conversions
0bada0e Merge #214: Improve signing API documentation & specification
8030d7c Improve signing API documentation & specification
7b2fc1c Merge #213: Removed gotos, which are hard to trace and maintain.
11690d3 Removed gotos, which are hard to trace and maintain.
122a1ec Merge pull request #205
035406d Merge pull request #206
2d4cd53 Merge pull request #161
34b898d Additional comments for the testing PRNG and a seeding fix.
6efd6e7 Some comments explaining some of the constants in the code.
ffccfd2 x86_64 assembly optimization for scalar_4x64
67cbdf0 Merge pull request #207
039723d Benchmarks for all internal operations
6cc8425 Include a comment on secp256k1_ecdsa_sign explaining low-s.
f88343f Merge pull request #203
d61e899 Add group operation counts
2473f17 Merge pull request #202
b5bbce6 Some readme updates, e.g. removal of the GMP field.
f0d851e Merge pull request #201
a0ea884 Merge pull request #200
f735446 Convert the rest of the codebase to C89.
bf2e1ac Convert tests to C89. (also fixes a use of bare "inline" in field)
fc8285f Merge pull request #199
fff412e Merge pull request #197
4be8d6f Centralize the definition of uint128_t and use it uniformly.
d9543c9 Switch scalar code to C89.
fcc48c4 Remove the non-storage cmov
55422b6 Switch ecmult_gen to use storage types
41f8455 Use group element storage type in EC multiplications
e68d720 Add group element storage type
ff889f7 Field storage type
7137be8 Merge pull request #196
0768bd5 Get rid of variable-length hex string conversions
e84e761 Merge pull request #195
792bcdb Covert several more files to C89.
45cdf44 Merge pull request #193
17db09e Merge pull request #194
402878a fix ifdef/ifndef
25b35c7 Convert field code to strict C89 (+ long long, +__int128)
3627437 C89 nits and dead code removal.
a9f350d Merge pull request #191
4732d26 Convert the field/group/ecdsa constant initialization to static consts
19f3e76 Remove unused secp256k1_fe_inner_{start, stop} functions
f1ebfe3 Convert the scalar constant initialization to static consts

git-subtree-dir: src/secp256k1
git-subtree-split: 1897b8e90bbbdcd919427c9a8ae35b420e919d8f
---
 Makefile.am                  |  10 +-
 README.md                    |  20 +-
 configure.ac                 |  12 +-
 include/secp256k1.h          |  46 ++-
 src/bench.h                  |  29 +-
 src/bench_internal.c         | 318 +++++++++++++++++
 src/bench_inv.c              |  52 ---
 src/bench_recover.c          |  17 +-
 src/bench_sign.c             |  15 +-
 src/bench_verify.c           |  14 +-
 src/ecdsa.h                  |   4 -
 src/ecdsa_impl.h             | 173 +++++-----
 src/eckey_impl.h             |  19 +-
 src/ecmult_gen_impl.h        |  57 +--
 src/ecmult_impl.h            | 144 +++++---
 src/field.h                  |  27 +-
 src/field_10x26.h            |  26 ++
 src/field_10x26_impl.h       | 185 ++++++----
 src/field_5x52.h             |  26 ++
 src/field_5x52_impl.h        |  82 +++--
 src/field_5x52_int128_impl.h | 120 +++----
 src/field_impl.h             | 187 ++++------
 src/group.h                  |  42 ++-
 src/group_impl.h             | 250 +++++++-------
 src/hash.h                   |   4 +-
 src/hash_impl.h              | 117 +++----
 src/num_gmp_impl.h           |  29 +-
 src/scalar.h                 |   7 +-
 src/scalar_4x64.h            |   2 +
 src/scalar_4x64_impl.h       | 551 +++++++++++++++++++++++++++--
 src/scalar_8x32.h            |   2 +
 src/scalar_8x32_impl.h       |  87 +++--
 src/scalar_impl.h            | 279 +++++++--------
 src/secp256k1.c              | 311 +++++++++--------
 src/testrand.h               |   6 +-
 src/testrand_impl.h          |  48 +--
 src/tests.c                  | 651 ++++++++++++++++++++++-------------
 src/util.h                   |  21 +-
 38 files changed, 2525 insertions(+), 1465 deletions(-)
 create mode 100644 src/bench_internal.c
 delete mode 100644 src/bench_inv.c

diff --git a/Makefile.am b/Makefile.am
index 985c172eb..cc15338b7 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -49,7 +49,7 @@ libsecp256k1_la_LIBADD = $(SECP_LIBS)
 
 noinst_PROGRAMS =
 if USE_BENCHMARK
-noinst_PROGRAMS += bench_verify bench_recover bench_sign bench_inv
+noinst_PROGRAMS += bench_verify bench_recover bench_sign bench_internal
 bench_verify_SOURCES = src/bench_verify.c
 bench_verify_LDADD = libsecp256k1.la $(SECP_LIBS)
 bench_verify_LDFLAGS = -static
@@ -59,10 +59,10 @@ bench_recover_LDFLAGS = -static
 bench_sign_SOURCES = src/bench_sign.c
 bench_sign_LDADD = libsecp256k1.la $(SECP_LIBS)
 bench_sign_LDFLAGS = -static
-bench_inv_SOURCES = src/bench_inv.c
-bench_inv_LDADD = $(SECP_LIBS)
-bench_inv_LDFLAGS = -static
-bench_inv_CPPFLAGS = $(SECP_INCLUDES)
+bench_internal_SOURCES = src/bench_internal.c
+bench_internal_LDADD = $(SECP_LIBS)
+bench_internal_LDFLAGS = -static
+bench_internal_CPPFLAGS = $(SECP_INCLUDES)
 endif
 
 if USE_TESTS
diff --git a/README.md b/README.md
index 1e49f4941..6095db422 100644
--- a/README.md
+++ b/README.md
@@ -5,25 +5,29 @@ libsecp256k1
 
 Optimized C library for EC operations on curve secp256k1.
 
-This library is experimental, so use at your own risk.
+This library is a work in progress and is being used to research best practices. Use at your own risk.
 
 Features:
-* Low-level field and group operations on secp256k1.
-* ECDSA signing/verification and key generation.
+* secp256k1 ECDSA signing/verification and key generation.
 * Adding/multiplying private/public keys.
 * Serialization/parsing of private keys, public keys, signatures.
+* Constant time, constant memory access signing and pubkey generation.
+* Derandomized DSA (via RFC6979 or with a caller provided function.)
 * Very efficient implementation.
 
 Implementation details
 ----------------------
 
 * General
-  * Avoid dynamic memory usage almost everywhere.
+  * No runtime heap allocation.
+  * Extensive testing infrastructure.
+  * Structured to facilitate review and analysis.
+  * Intended to be portable to any system with a C89 compiler and uint64_t support.
+  * Expose only higher level interfaces to minimize the API surface and improve application security. ("Be difficult to use insecurely.")
 * Field operations
   * Optimized implementation of arithmetic modulo the curve's field size (2^256 - 0x1000003D1).
     * Using 5 52-bit limbs (including hand-optimized assembly for x86_64, by Diederik Huys).
     * Using 10 26-bit limbs.
-    * Using GMP.
   * Field inverses and square roots using a sliding window over blocks of 1s (by Peter Dettman).
 * Scalar operations
   * Optimized implementation without data-dependent branches of arithmetic modulo the curve's order.
@@ -33,14 +37,15 @@ Implementation details
   * Point addition formula specifically simplified for the curve equation (y^2 = x^3 + 7).
   * Use addition between points in Jacobian and affine coordinates where possible.
   * Use a unified addition/doubling formula where necessary to avoid data-dependent branches.
+  * Point/x comparison without a field inversion by comparison in the Jacobian coordinate space.
 * Point multiplication for verification (a*P + b*G).
   * Use wNAF notation for point multiplicands.
   * Use a much larger window for multiples of G, using precomputed multiples.
   * Use Shamir's trick to do the multiplication with the public key and the generator simultaneously.
-  * Optionally use secp256k1's efficiently-computable endomorphism to split the multiplicands into 4 half-sized ones first.
+  * Optionally (off by default) use secp256k1's efficiently-computable endomorphism to split the P multiplicand into 2 half-sized ones.
 * Point multiplication for signing
   * Use a precomputed table of multiples of powers of 16 multiplied with the generator, so general multiplication becomes a series of additions.
-  * Slice the precomputed table in memory per byte, so memory access to the table becomes uniform.
+  * Access the table with branch-free conditional moves so memory access is uniform.
   * No data-dependent branches
   * The precomputed tables add and eventually subtract points for which no known scalar (private key) is known, preventing even an attacker with control over the private key used to control the data internally.
 
@@ -52,4 +57,5 @@ libsecp256k1 is built using autotools:
     $ ./autogen.sh
     $ ./configure
     $ make
+    $ ./tests
     $ sudo make install  # optional
diff --git a/configure.ac b/configure.ac
index f691156ff..3dc182951 100644
--- a/configure.ac
+++ b/configure.ac
@@ -5,7 +5,7 @@ AC_CONFIG_MACRO_DIR([build-aux/m4])
 AC_CANONICAL_HOST
 AH_TOP([#ifndef LIBSECP256K1_CONFIG_H])
 AH_TOP([#define LIBSECP256K1_CONFIG_H])
-AH_BOTTOM([#endif //LIBSECP256K1_CONFIG_H])
+AH_BOTTOM([#endif /*LIBSECP256K1_CONFIG_H*/])
 AM_INIT_AUTOMAKE([foreign subdir-objects])
 LT_INIT
 
@@ -22,9 +22,9 @@ if test "x$CFLAGS" = "x"; then
   CFLAGS="-O3 -g"
 fi
 
-AC_PROG_CC_C99
-if test x"$ac_cv_prog_cc_c99" = x"no"; then
-  AC_MSG_ERROR([c99 compiler support required])
+AC_PROG_CC_C89
+if test x"$ac_cv_prog_cc_c89" = x"no"; then
+  AC_MSG_ERROR([c89 compiler support required])
 fi
 
 case $host in
@@ -70,7 +70,7 @@ esac
 
 CFLAGS="$CFLAGS -W"
 
-warn_CFLAGS="-Wall -Wextra -Wcast-align -Wnested-externs -Wshadow -Wstrict-prototypes -Wno-unused-function"
+warn_CFLAGS="-std=c89 -pedantic -Wall -Wextra -Wcast-align -Wnested-externs -Wshadow -Wstrict-prototypes -Wno-unused-function -Wno-long-long -Wno-overlength-strings"
 saved_CFLAGS="$CFLAGS"
 CFLAGS="$CFLAGS $warn_CFLAGS"
 AC_MSG_CHECKING([if ${CC} supports ${warn_CFLAGS}])
@@ -305,6 +305,8 @@ if test x"$use_endomorphism" = x"yes"; then
   AC_DEFINE(USE_ENDOMORPHISM, 1, [Define this symbol to use endomorphism optimization])
 fi
 
+AC_C_BIGENDIAN()
+
 AC_MSG_NOTICE([Using assembly optimizations: $set_asm])
 AC_MSG_NOTICE([Using field implementation: $set_field])
 AC_MSG_NOTICE([Using bignum implementation: $set_bignum])
diff --git a/include/secp256k1.h b/include/secp256k1.h
index cfdae31ea..a6e39d13d 100644
--- a/include/secp256k1.h
+++ b/include/secp256k1.h
@@ -78,7 +78,7 @@ SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_verify(
 ) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(4);
 
 /** A pointer to a function to deterministically generate a nonce.
- * Returns: 1 if a nonce was succesfully generated. 0 will cause signing to fail.
+ * Returns: 1 if a nonce was successfully generated. 0 will cause signing to fail.
  * In:      msg32:     the 32-byte message hash being verified (will not be NULL)
  *          key32:     pointer to a 32-byte secret key (will not be NULL)
  *          attempt:   how many iterations we have tried to find a nonce.
@@ -97,7 +97,10 @@ typedef int (*secp256k1_nonce_function_t)(
   const void *data
 );
 
-/** An implementation of RFC6979 (using HMAC-SHA256) as nonce generation function. */
+/** An implementation of RFC6979 (using HMAC-SHA256) as nonce generation function.
+ * If a data pointer is passed, it is assumed to be a pointer to 32 bytes of
+ * extra entropy.
+ */
 extern const secp256k1_nonce_function_t secp256k1_nonce_function_rfc6979;
 
 /** A default safe nonce generation function (currently equal to secp256k1_nonce_function_rfc6979). */
@@ -106,15 +109,43 @@ extern const secp256k1_nonce_function_t secp256k1_nonce_function_default;
 
 /** Create an ECDSA signature.
  *  Returns: 1: signature created
- *           0: the nonce generation function failed
+ *           0: the nonce generation function failed, the private key was invalid, or there is not
+ *              enough space in the signature (as indicated by siglen).
  *  In:      msg32:  the 32-byte message hash being signed (cannot be NULL)
- *           seckey: pointer to a 32-byte secret key (cannot be NULL, assumed to be valid)
+ *           seckey: pointer to a 32-byte secret key (cannot be NULL)
  *           noncefp:pointer to a nonce generation function. If NULL, secp256k1_nonce_function_default is used
  *           ndata:  pointer to arbitrary data used by the nonce generation function (can be NULL)
  *  Out:     sig:    pointer to an array where the signature will be placed (cannot be NULL)
  *  In/Out:  siglen: pointer to an int with the length of sig, which will be updated
- *                   to contain the actual signature length (<=72).
+ *                   to contain the actual signature length (<=72). If 0 is returned, this will be
+ *                   set to zero.
  * Requires starting using SECP256K1_START_SIGN.
+ *
+ * The sig always has an s value in the lower half of the range (From 0x1
+ * to 0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF5D576E7357A4501DDFE92F46681B20A0,
+ * inclusive), unlike many other implementations.
+ * With ECDSA a third-party can can forge a second distinct signature
+ * of the same message given a single initial signature without knowing
+ * the key by setting s to its additive inverse mod-order, 'flipping' the
+ * sign of the random point R which is not included in the signature.
+ * Since the forgery is of the same message this isn't universally
+ * problematic, but in systems where message malleability or uniqueness
+ * of signatures is important this can cause issues.  This forgery can be
+ * blocked by all verifiers forcing signers to use a canonical form. The
+ * lower-S form reduces the size of signatures slightly on average when
+ * variable length encodings (such as DER) are used and is cheap to
+ * verify, making it a good choice. Security of always using lower-S is
+ * assured because anyone can trivially modify a signature after the
+ * fact to enforce this property.  Adjusting it inside the signing
+ * function avoids the need to re-serialize or have curve specific
+ * constants outside of the library.  By always using a canonical form
+ * even in applications where it isn't needed it becomes possible to
+ * impose a requirement later if a need is discovered.
+ * No other forms of ECDSA malleability are known and none seem likely,
+ * but there is no formal proof that ECDSA, even with this additional
+ * restriction, is free of other malleability.  Commonly used serialization
+ * schemes will also accept various non-unique encodings, so care should
+ * be taken when this property is required for an application.
  */
 int secp256k1_ecdsa_sign(
   const unsigned char *msg32,
@@ -127,12 +158,13 @@ int secp256k1_ecdsa_sign(
 
 /** Create a compact ECDSA signature (64 byte + recovery id).
  *  Returns: 1: signature created
- *           0: the nonce generation function failed
+ *           0: the nonce generation function failed, or the secret key was invalid.
  *  In:      msg32:  the 32-byte message hash being signed (cannot be NULL)
- *           seckey: pointer to a 32-byte secret key (cannot be NULL, assumed to be valid)
+ *           seckey: pointer to a 32-byte secret key (cannot be NULL)
  *           noncefp:pointer to a nonce generation function. If NULL, secp256k1_nonce_function_default is used
  *           ndata:  pointer to arbitrary data used by the nonce generation function (can be NULL)
  *  Out:     sig:    pointer to a 64-byte array where the signature will be placed (cannot be NULL)
+ *                   In case 0 is returned, the returned signature length will be zero.
  *           recid:  pointer to an int, which will be updated to contain the recovery id (can be NULL)
  * Requires starting using SECP256K1_START_SIGN.
  */
diff --git a/src/bench.h b/src/bench.h
index 668ec39f7..0559b3e85 100644
--- a/src/bench.h
+++ b/src/bench.h
@@ -17,21 +17,40 @@ static double gettimedouble(void) {
     return tv.tv_usec * 0.000001 + tv.tv_sec;
 }
 
-void run_benchmark(void (*benchmark)(void*), void (*setup)(void*), void (*teardown)(void*), void* data, int count, int iter) {
+void print_number(double x) {
+    double y = x;
+    int c = 0;
+    if (y < 0.0) y = -y;
+    while (y < 100.0) {
+        y *= 10.0;
+        c++;
+    }
+    printf("%.*f", c, x);
+}
+
+void run_benchmark(char *name, void (*benchmark)(void*), void (*setup)(void*), void (*teardown)(void*), void* data, int count, int iter) {
+    int i;
     double min = HUGE_VAL;
     double sum = 0.0;
     double max = 0.0;
-    for (int i = 0; i < count; i++) {
+    for (i = 0; i < count; i++) {
+        double begin, total;
         if (setup) setup(data);
-        double begin = gettimedouble();
+        begin = gettimedouble();
         benchmark(data);
-        double total = gettimedouble() - begin;
+        total = gettimedouble() - begin;
         if (teardown) teardown(data);
         if (total < min) min = total;
         if (total > max) max = total;
         sum += total;
     }
-    printf("min %.3fus / avg %.3fus / max %.3fus\n", min * 1000000.0 / iter, (sum / count) * 1000000.0 / iter, max * 1000000.0 / iter);
+    printf("%s: min ", name);
+    print_number(min * 1000000.0 / iter);
+    printf("us / avg ");
+    print_number((sum / count) * 1000000.0 / iter);
+    printf("us / avg ");
+    print_number(max * 1000000.0 / iter);
+    printf("us\n");
 }
 
 #endif
diff --git a/src/bench_internal.c b/src/bench_internal.c
new file mode 100644
index 000000000..a960549b9
--- /dev/null
+++ b/src/bench_internal.c
@@ -0,0 +1,318 @@
+/**********************************************************************
+ * Copyright (c) 2014-2015 Pieter Wuille                              *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+#include <stdio.h>
+
+#include "include/secp256k1.h"
+
+#include "util.h"
+#include "hash_impl.h"
+#include "num_impl.h"
+#include "field_impl.h"
+#include "group_impl.h"
+#include "scalar_impl.h"
+#include "ecmult_impl.h"
+#include "bench.h"
+
+typedef struct {
+    secp256k1_scalar_t scalar_x, scalar_y;
+    secp256k1_fe_t fe_x, fe_y;
+    secp256k1_ge_t ge_x, ge_y;
+    secp256k1_gej_t gej_x, gej_y;
+    unsigned char data[32];
+    int wnaf[256];
+} bench_inv_t;
+
+void bench_setup(void* arg) {
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    static const unsigned char init_x[32] = {
+        0x02, 0x03, 0x05, 0x07, 0x0b, 0x0d, 0x11, 0x13,
+        0x17, 0x1d, 0x1f, 0x25, 0x29, 0x2b, 0x2f, 0x35,
+        0x3b, 0x3d, 0x43, 0x47, 0x49, 0x4f, 0x53, 0x59,
+        0x61, 0x65, 0x67, 0x6b, 0x6d, 0x71, 0x7f, 0x83
+    };
+
+    static const unsigned char init_y[32] = {
+        0x82, 0x83, 0x85, 0x87, 0x8b, 0x8d, 0x81, 0x83,
+        0x97, 0xad, 0xaf, 0xb5, 0xb9, 0xbb, 0xbf, 0xc5,
+        0xdb, 0xdd, 0xe3, 0xe7, 0xe9, 0xef, 0xf3, 0xf9,
+        0x11, 0x15, 0x17, 0x1b, 0x1d, 0xb1, 0xbf, 0xd3
+    };
+
+    secp256k1_scalar_set_b32(&data->scalar_x, init_x, NULL);
+    secp256k1_scalar_set_b32(&data->scalar_y, init_y, NULL);
+    secp256k1_fe_set_b32(&data->fe_x, init_x);
+    secp256k1_fe_set_b32(&data->fe_y, init_y);
+    CHECK(secp256k1_ge_set_xo_var(&data->ge_x, &data->fe_x, 0));
+    CHECK(secp256k1_ge_set_xo_var(&data->ge_y, &data->fe_y, 1));
+    secp256k1_gej_set_ge(&data->gej_x, &data->ge_x);
+    secp256k1_gej_set_ge(&data->gej_y, &data->ge_y);
+    memcpy(data->data, init_x, 32);
+}
+
+void bench_scalar_add(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 2000000; i++) {
+        secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
+    }
+}
+
+void bench_scalar_negate(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 2000000; i++) {
+        secp256k1_scalar_negate(&data->scalar_x, &data->scalar_x);
+    }
+}
+
+void bench_scalar_sqr(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 200000; i++) {
+        secp256k1_scalar_sqr(&data->scalar_x, &data->scalar_x);
+    }
+}
+
+void bench_scalar_mul(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 200000; i++) {
+        secp256k1_scalar_mul(&data->scalar_x, &data->scalar_x, &data->scalar_y);
+    }
+}
+
+#ifdef USE_ENDOMORPHISM
+void bench_scalar_split(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 20000; i++) {
+        secp256k1_scalar_t l, r;
+        secp256k1_scalar_split_lambda_var(&l, &r, &data->scalar_x);
+        secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
+    }
+}
+#endif
+
+void bench_scalar_inverse(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 2000; i++) {
+        secp256k1_scalar_inverse(&data->scalar_x, &data->scalar_x);
+        secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
+    }
+}
+
+void bench_scalar_inverse_var(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 2000; i++) {
+        secp256k1_scalar_inverse_var(&data->scalar_x, &data->scalar_x);
+        secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
+    }
+}
+
+void bench_field_normalize(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 2000000; i++) {
+        secp256k1_fe_normalize(&data->fe_x);
+    }
+}
+
+void bench_field_normalize_weak(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 2000000; i++) {
+        secp256k1_fe_normalize_weak(&data->fe_x);
+    }
+}
+
+void bench_field_mul(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 200000; i++) {
+        secp256k1_fe_mul(&data->fe_x, &data->fe_x, &data->fe_y);
+    }
+}
+
+void bench_field_sqr(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 200000; i++) {
+        secp256k1_fe_sqr(&data->fe_x, &data->fe_x);
+    }
+}
+
+void bench_field_inverse(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 20000; i++) {
+        secp256k1_fe_inv(&data->fe_x, &data->fe_x);
+        secp256k1_fe_add(&data->fe_x, &data->fe_y);
+    }
+}
+
+void bench_field_inverse_var(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 20000; i++) {
+        secp256k1_fe_inv_var(&data->fe_x, &data->fe_x);
+        secp256k1_fe_add(&data->fe_x, &data->fe_y);
+    }
+}
+
+void bench_field_sqrt_var(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 20000; i++) {
+        secp256k1_fe_sqrt_var(&data->fe_x, &data->fe_x);
+        secp256k1_fe_add(&data->fe_x, &data->fe_y);
+    }
+}
+
+void bench_group_double_var(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 200000; i++) {
+        secp256k1_gej_double_var(&data->gej_x, &data->gej_x);
+    }
+}
+
+void bench_group_add_var(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 200000; i++) {
+        secp256k1_gej_add_var(&data->gej_x, &data->gej_x, &data->gej_y);
+    }
+}
+
+void bench_group_add_affine(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 200000; i++) {
+        secp256k1_gej_add_ge(&data->gej_x, &data->gej_x, &data->ge_y);
+    }
+}
+
+void bench_group_add_affine_var(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 200000; i++) {
+        secp256k1_gej_add_ge_var(&data->gej_x, &data->gej_x, &data->ge_y);
+    }
+}
+
+void bench_ecmult_wnaf(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 20000; i++) {
+        secp256k1_ecmult_wnaf(data->wnaf, &data->scalar_x, WINDOW_A);
+        secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
+    }
+}
+
+
+void bench_sha256(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+    secp256k1_sha256_t sha;
+
+    for (i = 0; i < 20000; i++) {
+        secp256k1_sha256_initialize(&sha);
+        secp256k1_sha256_write(&sha, data->data, 32);
+        secp256k1_sha256_finalize(&sha, data->data);
+    }
+}
+
+void bench_hmac_sha256(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+    secp256k1_hmac_sha256_t hmac;
+
+    for (i = 0; i < 20000; i++) {
+        secp256k1_hmac_sha256_initialize(&hmac, data->data, 32);
+        secp256k1_hmac_sha256_write(&hmac, data->data, 32);
+        secp256k1_hmac_sha256_finalize(&hmac, data->data);
+    }
+}
+
+void bench_rfc6979_hmac_sha256(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+    secp256k1_rfc6979_hmac_sha256_t rng;
+
+    for (i = 0; i < 20000; i++) {
+        secp256k1_rfc6979_hmac_sha256_initialize(&rng, data->data, 32, data->data, 32, NULL, 0);
+        secp256k1_rfc6979_hmac_sha256_generate(&rng, data->data, 32);
+    }
+}
+
+
+int have_flag(int argc, char** argv, char *flag) {
+    char** argm = argv + argc;
+    argv++;
+    if (argv == argm) {
+        return 1;
+    }
+    while (argv != NULL && argv != argm) {
+        if (strcmp(*argv, flag) == 0) return 1;
+        argv++;
+    }
+    return 0;
+}
+
+int main(int argc, char **argv) {
+    bench_inv_t data;
+    if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "add")) run_benchmark("scalar_add", bench_scalar_add, bench_setup, NULL, &data, 10, 2000000);
+    if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "negate")) run_benchmark("scalar_negate", bench_scalar_negate, bench_setup, NULL, &data, 10, 2000000);
+    if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "sqr")) run_benchmark("scalar_sqr", bench_scalar_sqr, bench_setup, NULL, &data, 10, 200000);
+    if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "mul")) run_benchmark("scalar_mul", bench_scalar_mul, bench_setup, NULL, &data, 10, 200000);
+#ifdef USE_ENDOMORPHISM
+    if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "split")) run_benchmark("scalar_split", bench_scalar_split, bench_setup, NULL, &data, 10, 20000);
+#endif
+    if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse", bench_scalar_inverse, bench_setup, NULL, &data, 10, 2000);
+    if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse_var", bench_scalar_inverse_var, bench_setup, NULL, &data, 10, 2000);
+
+    if (have_flag(argc, argv, "field") || have_flag(argc, argv, "normalize")) run_benchmark("field_normalize", bench_field_normalize, bench_setup, NULL, &data, 10, 2000000);
+    if (have_flag(argc, argv, "field") || have_flag(argc, argv, "normalize")) run_benchmark("field_normalize_weak", bench_field_normalize_weak, bench_setup, NULL, &data, 10, 2000000);
+    if (have_flag(argc, argv, "field") || have_flag(argc, argv, "sqr")) run_benchmark("field_sqr", bench_field_sqr, bench_setup, NULL, &data, 10, 200000);
+    if (have_flag(argc, argv, "field") || have_flag(argc, argv, "mul")) run_benchmark("field_mul", bench_field_mul, bench_setup, NULL, &data, 10, 200000);
+    if (have_flag(argc, argv, "field") || have_flag(argc, argv, "inverse")) run_benchmark("field_inverse", bench_field_inverse, bench_setup, NULL, &data, 10, 20000);
+    if (have_flag(argc, argv, "field") || have_flag(argc, argv, "inverse")) run_benchmark("field_inverse_var", bench_field_inverse_var, bench_setup, NULL, &data, 10, 20000);
+    if (have_flag(argc, argv, "field") || have_flag(argc, argv, "sqrt")) run_benchmark("field_sqrt_var", bench_field_sqrt_var, bench_setup, NULL, &data, 10, 20000);
+
+    if (have_flag(argc, argv, "group") || have_flag(argc, argv, "double")) run_benchmark("group_double_var", bench_group_double_var, bench_setup, NULL, &data, 10, 200000);
+    if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_var", bench_group_add_var, bench_setup, NULL, &data, 10, 200000);
+    if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine", bench_group_add_affine, bench_setup, NULL, &data, 10, 200000);
+    if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine_var", bench_group_add_affine_var, bench_setup, NULL, &data, 10, 200000);
+
+    if (have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("ecmult_wnaf", bench_ecmult_wnaf, bench_setup, NULL, &data, 10, 20000);
+
+    if (have_flag(argc, argv, "hash") || have_flag(argc, argv, "sha256")) run_benchmark("hash_sha256", bench_sha256, bench_setup, NULL, &data, 10, 20000);
+    if (have_flag(argc, argv, "hash") || have_flag(argc, argv, "hmac")) run_benchmark("hash_hmac_sha256", bench_hmac_sha256, bench_setup, NULL, &data, 10, 20000);
+    if (have_flag(argc, argv, "hash") || have_flag(argc, argv, "rng6979")) run_benchmark("hash_rfc6979_hmac_sha256", bench_rfc6979_hmac_sha256, bench_setup, NULL, &data, 10, 20000);
+    return 0;
+}
diff --git a/src/bench_inv.c b/src/bench_inv.c
deleted file mode 100644
index 3bdedea30..000000000
--- a/src/bench_inv.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/**********************************************************************
- * Copyright (c) 2014 Pieter Wuille                                   *
- * Distributed under the MIT software license, see the accompanying   *
- * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
- **********************************************************************/
-#include <stdio.h>
-
-#include "include/secp256k1.h"
-
-#include "util.h"
-#include "num_impl.h"
-#include "field_impl.h"
-#include "group_impl.h"
-#include "scalar_impl.h"
-#include "bench.h"
-
-typedef struct {
-    secp256k1_scalar_t base, x;
-} bench_inv_t;
-
-void bench_inv_setup(void* arg) {
-    bench_inv_t *data = (bench_inv_t*)arg;
-
-    static const unsigned char init[32] = {
-        0x02, 0x03, 0x05, 0x07, 0x0b, 0x0d, 0x11, 0x13,
-        0x17, 0x1d, 0x1f, 0x25, 0x29, 0x2b, 0x2f, 0x35,
-        0x3b, 0x3d, 0x43, 0x47, 0x49, 0x4f, 0x53, 0x59,
-        0x61, 0x65, 0x67, 0x6b, 0x6d, 0x71, 0x7f, 0x83
-    };
-
-    secp256k1_scalar_set_b32(&data->base, init, NULL);
-    secp256k1_scalar_set_b32(&data->x, init, NULL);
-}
-
-void bench_inv(void* arg) {
-    bench_inv_t *data = (bench_inv_t*)arg;
-
-    for (int i=0; i<20000; i++) {
-        secp256k1_scalar_inverse(&data->x, &data->x);
-        secp256k1_scalar_add(&data->x, &data->x, &data->base);
-    }
-}
-
-int main(void) {
-    secp256k1_ge_start();
-
-    bench_inv_t data;
-    run_benchmark(bench_inv, bench_inv_setup, NULL, &data, 10, 20000);
-
-    secp256k1_ge_stop();
-    return 0;
-}
diff --git a/src/bench_recover.c b/src/bench_recover.c
index b1e0f33ef..6991cc9d6 100644
--- a/src/bench_recover.c
+++ b/src/bench_recover.c
@@ -14,13 +14,15 @@ typedef struct {
 } bench_recover_t;
 
 void bench_recover(void* arg) {
+    int i;
     bench_recover_t *data = (bench_recover_t*)arg;
-
     unsigned char pubkey[33];
-    for (int i=0; i<20000; i++) {
+
+    for (i = 0; i < 20000; i++) {
+        int j;
         int pubkeylen = 33;
         CHECK(secp256k1_ecdsa_recover_compact(data->msg, data->sig, pubkey, &pubkeylen, 1, i % 2));
-        for (int j = 0; j < 32; j++) {
+        for (j = 0; j < 32; j++) {
             data->sig[j + 32] = data->msg[j];    /* Move former message to S. */
             data->msg[j] = data->sig[j];         /* Move former R to message. */
             data->sig[j] = pubkey[j + 1];        /* Move recovered pubkey X coordinate to R (which must be a valid X coordinate). */
@@ -29,17 +31,18 @@ void bench_recover(void* arg) {
 }
 
 void bench_recover_setup(void* arg) {
+    int i;
     bench_recover_t *data = (bench_recover_t*)arg;
 
-    for (int i = 0; i < 32; i++) data->msg[i] = 1 + i;
-    for (int i = 0; i < 64; i++) data->sig[i] = 65 + i;
+    for (i = 0; i < 32; i++) data->msg[i] = 1 + i;
+    for (i = 0; i < 64; i++) data->sig[i] = 65 + i;
 }
 
 int main(void) {
+    bench_recover_t data;
     secp256k1_start(SECP256K1_START_VERIFY);
 
-    bench_recover_t data;
-    run_benchmark(bench_recover, bench_recover_setup, NULL, &data, 10, 20000);
+    run_benchmark("ecdsa_recover", bench_recover, bench_recover_setup, NULL, &data, 10, 20000);
 
     secp256k1_stop();
     return 0;
diff --git a/src/bench_sign.c b/src/bench_sign.c
index 2276f00b9..c5b6829a8 100644
--- a/src/bench_sign.c
+++ b/src/bench_sign.c
@@ -14,20 +14,23 @@ typedef struct {
 } bench_sign_t;
 
 static void bench_sign_setup(void* arg) {
+    int i;
     bench_sign_t *data = (bench_sign_t*)arg;
 
-    for (int i = 0; i < 32; i++) data->msg[i] = i + 1;
-    for (int i = 0; i < 32; i++) data->key[i] = i + 65;
+    for (i = 0; i < 32; i++) data->msg[i] = i + 1;
+    for (i = 0; i < 32; i++) data->key[i] = i + 65;
 }
 
 static void bench_sign(void* arg) {
+    int i;
     bench_sign_t *data = (bench_sign_t*)arg;
 
     unsigned char sig[64];
-    for (int i=0; i<20000; i++) {
+    for (i = 0; i < 20000; i++) {
+        int j;
         int recid = 0;
         CHECK(secp256k1_ecdsa_sign_compact(data->msg, sig, data->key, NULL, NULL, &recid));
-        for (int j = 0; j < 32; j++) {
+        for (j = 0; j < 32; j++) {
             data->msg[j] = sig[j];             /* Move former R to message. */
             data->key[j] = sig[j + 32];        /* Move former S to key.     */
         }
@@ -35,10 +38,10 @@ static void bench_sign(void* arg) {
 }
 
 int main(void) {
+    bench_sign_t data;
     secp256k1_start(SECP256K1_START_SIGN);
 
-    bench_sign_t data;
-    run_benchmark(bench_sign, bench_sign_setup, NULL, &data, 10, 20000);
+    run_benchmark("ecdsa_sign", bench_sign, bench_sign_setup, NULL, &data, 10, 20000);
 
     secp256k1_stop();
     return 0;
diff --git a/src/bench_verify.c b/src/bench_verify.c
index a58ca8434..c279305a0 100644
--- a/src/bench_verify.c
+++ b/src/bench_verify.c
@@ -21,9 +21,10 @@ typedef struct {
 } benchmark_verify_t;
 
 static void benchmark_verify(void* arg) {
+    int i;
     benchmark_verify_t* data = (benchmark_verify_t*)arg;
 
-    for (int i=0; i<20000; i++) {
+    for (i = 0; i < 20000; i++) {
         data->sig[data->siglen - 1] ^= (i & 0xFF);
         data->sig[data->siglen - 2] ^= ((i >> 8) & 0xFF);
         data->sig[data->siglen - 3] ^= ((i >> 16) & 0xFF);
@@ -35,18 +36,19 @@ static void benchmark_verify(void* arg) {
 }
 
 int main(void) {
-    secp256k1_start(SECP256K1_START_VERIFY | SECP256K1_START_SIGN);
-
+    int i;
     benchmark_verify_t data;
 
-    for (int i = 0; i < 32; i++) data.msg[i] = 1 + i;
-    for (int i = 0; i < 32; i++) data.key[i] = 33 + i;
+    secp256k1_start(SECP256K1_START_VERIFY | SECP256K1_START_SIGN);
+
+    for (i = 0; i < 32; i++) data.msg[i] = 1 + i;
+    for (i = 0; i < 32; i++) data.key[i] = 33 + i;
     data.siglen = 72;
     secp256k1_ecdsa_sign(data.msg, data.sig, &data.siglen, data.key, NULL, NULL);
     data.pubkeylen = 33;
     CHECK(secp256k1_ec_pubkey_create(data.pubkey, &data.pubkeylen, data.key, 1));
 
-    run_benchmark(benchmark_verify, NULL, NULL, &data, 10, 20000);
+    run_benchmark("ecdsa_verify", benchmark_verify, NULL, NULL, &data, 10, 20000);
 
     secp256k1_stop();
     return 0;
diff --git a/src/ecdsa.h b/src/ecdsa.h
index 5fc5230c3..c195e7afc 100644
--- a/src/ecdsa.h
+++ b/src/ecdsa.h
@@ -10,9 +10,6 @@
 #include "scalar.h"
 #include "group.h"
 
-static void secp256k1_ecsda_start(void);
-static void secp256k1_ecdsa_stop(void);
-
 typedef struct {
     secp256k1_scalar_t r, s;
 } secp256k1_ecdsa_sig_t;
@@ -22,6 +19,5 @@ static int secp256k1_ecdsa_sig_serialize(unsigned char *sig, int *size, const se
 static int secp256k1_ecdsa_sig_verify(const secp256k1_ecdsa_sig_t *sig, const secp256k1_ge_t *pubkey, const secp256k1_scalar_t *message);
 static int secp256k1_ecdsa_sig_sign(secp256k1_ecdsa_sig_t *sig, const secp256k1_scalar_t *seckey, const secp256k1_scalar_t *message, const secp256k1_scalar_t *nonce, int *recid);
 static int secp256k1_ecdsa_sig_recover(const secp256k1_ecdsa_sig_t *sig, secp256k1_ge_t *pubkey, const secp256k1_scalar_t *message, int recid);
-static void secp256k1_ecdsa_sig_set_rs(secp256k1_ecdsa_sig_t *sig, const secp256k1_scalar_t *r, const secp256k1_scalar_t *s);
 
 #endif
diff --git a/src/ecdsa_impl.h b/src/ecdsa_impl.h
index 674650c1e..1a7764939 100644
--- a/src/ecdsa_impl.h
+++ b/src/ecdsa_impl.h
@@ -15,71 +15,69 @@
 #include "ecmult_gen.h"
 #include "ecdsa.h"
 
-typedef struct {
-    secp256k1_fe_t order_as_fe;
-    secp256k1_fe_t p_minus_order;
-} secp256k1_ecdsa_consts_t;
-
-static const secp256k1_ecdsa_consts_t *secp256k1_ecdsa_consts = NULL;
-
-static void secp256k1_ecdsa_start(void) {
-    if (secp256k1_ecdsa_consts != NULL)
-        return;
-
-    /* Allocate. */
-    secp256k1_ecdsa_consts_t *ret = (secp256k1_ecdsa_consts_t*)checked_malloc(sizeof(secp256k1_ecdsa_consts_t));
-
-    static const unsigned char order[] = {
-        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
-        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,
-        0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B,
-        0xBF,0xD2,0x5E,0x8C,0xD0,0x36,0x41,0x41
-    };
-
-    secp256k1_fe_set_b32(&ret->order_as_fe, order);
-    secp256k1_fe_negate(&ret->p_minus_order, &ret->order_as_fe, 1);
-    secp256k1_fe_normalize_var(&ret->p_minus_order);
-
-    /* Set the global pointer. */
-    secp256k1_ecdsa_consts = ret;
-}
-
-static void secp256k1_ecdsa_stop(void) {
-    if (secp256k1_ecdsa_consts == NULL)
-        return;
-
-    secp256k1_ecdsa_consts_t *c = (secp256k1_ecdsa_consts_t*)secp256k1_ecdsa_consts;
-    secp256k1_ecdsa_consts = NULL;
-    free(c);
-}
+/** Group order for secp256k1 defined as 'n' in "Standards for Efficient Cryptography" (SEC2) 2.7.1
+ *  sage: for t in xrange(1023, -1, -1):
+ *     ..   p = 2**256 - 2**32 - t
+ *     ..   if p.is_prime():
+ *     ..     print '%x'%p
+ *     ..     break
+ *   'fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f'
+ *  sage: a = 0
+ *  sage: b = 7
+ *  sage: F = FiniteField (p)
+ *  sage: '%x' % (EllipticCurve ([F (a), F (b)]).order())
+ *   'fffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364141'
+ */
+static const secp256k1_fe_t secp256k1_ecdsa_const_order_as_fe = SECP256K1_FE_CONST(
+    0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFEUL,
+    0xBAAEDCE6UL, 0xAF48A03BUL, 0xBFD25E8CUL, 0xD0364141UL
+);
+
+/** Difference between field and order, values 'p' and 'n' values defined in
+ *  "Standards for Efficient Cryptography" (SEC2) 2.7.1.
+ *  sage: p = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F
+ *  sage: a = 0
+ *  sage: b = 7
+ *  sage: F = FiniteField (p)
+ *  sage: '%x' % (p - EllipticCurve ([F (a), F (b)]).order())
+ *   '14551231950b75fc4402da1722fc9baee'
+ */
+static const secp256k1_fe_t secp256k1_ecdsa_const_p_minus_order = SECP256K1_FE_CONST(
+    0, 0, 0, 1, 0x45512319UL, 0x50B75FC4UL, 0x402DA172UL, 0x2FC9BAEEUL
+);
 
 static int secp256k1_ecdsa_sig_parse(secp256k1_ecdsa_sig_t *r, const unsigned char *sig, int size) {
+    unsigned char ra[32] = {0}, sa[32] = {0};
+    const unsigned char *rp;
+    const unsigned char *sp;
+    int lenr;
+    int lens;
+    int overflow;
     if (sig[0] != 0x30) return 0;
-    int lenr = sig[3];
+    lenr = sig[3];
     if (5+lenr >= size) return 0;
-    int lens = sig[lenr+5];
+    lens = sig[lenr+5];
     if (sig[1] != lenr+lens+4) return 0;
     if (lenr+lens+6 > size) return 0;
     if (sig[2] != 0x02) return 0;
     if (lenr == 0) return 0;
     if (sig[lenr+4] != 0x02) return 0;
     if (lens == 0) return 0;
-    const unsigned char *sp = sig + 6 + lenr;
+    sp = sig + 6 + lenr;
     while (lens > 0 && sp[0] == 0) {
         lens--;
         sp++;
     }
     if (lens > 32) return 0;
-    const unsigned char *rp = sig + 4;
+    rp = sig + 4;
     while (lenr > 0 && rp[0] == 0) {
         lenr--;
         rp++;
     }
     if (lenr > 32) return 0;
-    unsigned char ra[32] = {0}, sa[32] = {0};
     memcpy(ra + 32 - lenr, rp, lenr);
     memcpy(sa + 32 - lens, sp, lens);
-    int overflow = 0;
+    overflow = 0;
     secp256k1_scalar_set_b32(&r->r, ra, &overflow);
     if (overflow) return 0;
     secp256k1_scalar_set_b32(&r->s, sa, &overflow);
@@ -89,10 +87,10 @@ static int secp256k1_ecdsa_sig_parse(secp256k1_ecdsa_sig_t *r, const unsigned ch
 
 static int secp256k1_ecdsa_sig_serialize(unsigned char *sig, int *size, const secp256k1_ecdsa_sig_t *a) {
     unsigned char r[33] = {0}, s[33] = {0};
-    secp256k1_scalar_get_b32(&r[1], &a->r);
-    secp256k1_scalar_get_b32(&s[1], &a->s);
     unsigned char *rp = r, *sp = s;
     int lenR = 33, lenS = 33;
+    secp256k1_scalar_get_b32(&r[1], &a->r);
+    secp256k1_scalar_get_b32(&s[1], &a->s);
     while (lenR > 1 && rp[0] == 0 && rp[1] < 0x80) { lenR--; rp++; }
     while (lenS > 1 && sp[0] == 0 && sp[1] < 0x80) { lenS--; sp++; }
     if (*size < 6+lenS+lenR)
@@ -110,93 +108,100 @@ static int secp256k1_ecdsa_sig_serialize(unsigned char *sig, int *size, const se
 }
 
 static int secp256k1_ecdsa_sig_verify(const secp256k1_ecdsa_sig_t *sig, const secp256k1_ge_t *pubkey, const secp256k1_scalar_t *message) {
+    unsigned char c[32];
+    secp256k1_scalar_t sn, u1, u2;
+    secp256k1_fe_t xr;
+    secp256k1_gej_t pubkeyj;
+    secp256k1_gej_t pr;
+
     if (secp256k1_scalar_is_zero(&sig->r) || secp256k1_scalar_is_zero(&sig->s))
         return 0;
 
-    secp256k1_scalar_t sn, u1, u2;
     secp256k1_scalar_inverse_var(&sn, &sig->s);
     secp256k1_scalar_mul(&u1, &sn, message);
     secp256k1_scalar_mul(&u2, &sn, &sig->r);
-    secp256k1_gej_t pubkeyj; secp256k1_gej_set_ge(&pubkeyj, pubkey);
-    secp256k1_gej_t pr; secp256k1_ecmult(&pr, &pubkeyj, &u2, &u1);
+    secp256k1_gej_set_ge(&pubkeyj, pubkey);
+    secp256k1_ecmult(&pr, &pubkeyj, &u2, &u1);
     if (secp256k1_gej_is_infinity(&pr)) {
         return 0;
     }
-    unsigned char c[32];
     secp256k1_scalar_get_b32(c, &sig->r);
-    secp256k1_fe_t xr;
     secp256k1_fe_set_b32(&xr, c);
 
-    // We now have the recomputed R point in pr, and its claimed x coordinate (modulo n)
-    // in xr. Naively, we would extract the x coordinate from pr (requiring a inversion modulo p),
-    // compute the remainder modulo n, and compare it to xr. However:
-    //
-    //       xr == X(pr) mod n
-    //   <=> exists h. (xr + h * n < p && xr + h * n == X(pr))
-    //   [Since 2 * n > p, h can only be 0 or 1]
-    //   <=> (xr == X(pr)) || (xr + n < p && xr + n == X(pr))
-    //   [In Jacobian coordinates, X(pr) is pr.x / pr.z^2 mod p]
-    //   <=> (xr == pr.x / pr.z^2 mod p) || (xr + n < p && xr + n == pr.x / pr.z^2 mod p)
-    //   [Multiplying both sides of the equations by pr.z^2 mod p]
-    //   <=> (xr * pr.z^2 mod p == pr.x) || (xr + n < p && (xr + n) * pr.z^2 mod p == pr.x)
-    //
-    // Thus, we can avoid the inversion, but we have to check both cases separately.
-    // secp256k1_gej_eq_x implements the (xr * pr.z^2 mod p == pr.x) test.
+    /** We now have the recomputed R point in pr, and its claimed x coordinate (modulo n)
+     *  in xr. Naively, we would extract the x coordinate from pr (requiring a inversion modulo p),
+     *  compute the remainder modulo n, and compare it to xr. However:
+     *
+     *        xr == X(pr) mod n
+     *    <=> exists h. (xr + h * n < p && xr + h * n == X(pr))
+     *    [Since 2 * n > p, h can only be 0 or 1]
+     *    <=> (xr == X(pr)) || (xr + n < p && xr + n == X(pr))
+     *    [In Jacobian coordinates, X(pr) is pr.x / pr.z^2 mod p]
+     *    <=> (xr == pr.x / pr.z^2 mod p) || (xr + n < p && xr + n == pr.x / pr.z^2 mod p)
+     *    [Multiplying both sides of the equations by pr.z^2 mod p]
+     *    <=> (xr * pr.z^2 mod p == pr.x) || (xr + n < p && (xr + n) * pr.z^2 mod p == pr.x)
+     *
+     *  Thus, we can avoid the inversion, but we have to check both cases separately.
+     *  secp256k1_gej_eq_x implements the (xr * pr.z^2 mod p == pr.x) test.
+     */
     if (secp256k1_gej_eq_x_var(&xr, &pr)) {
-        // xr.x == xr * xr.z^2 mod p, so the signature is valid.
+        /* xr.x == xr * xr.z^2 mod p, so the signature is valid. */
         return 1;
     }
-    if (secp256k1_fe_cmp_var(&xr, &secp256k1_ecdsa_consts->p_minus_order) >= 0) {
-        // xr + p >= n, so we can skip testing the second case.
+    if (secp256k1_fe_cmp_var(&xr, &secp256k1_ecdsa_const_p_minus_order) >= 0) {
+        /* xr + p >= n, so we can skip testing the second case. */
         return 0;
     }
-    secp256k1_fe_add(&xr, &secp256k1_ecdsa_consts->order_as_fe);
+    secp256k1_fe_add(&xr, &secp256k1_ecdsa_const_order_as_fe);
     if (secp256k1_gej_eq_x_var(&xr, &pr)) {
-        // (xr + n) * pr.z^2 mod p == pr.x, so the signature is valid.
+        /* (xr + n) * pr.z^2 mod p == pr.x, so the signature is valid. */
         return 1;
     }
     return 0;
 }
 
 static int secp256k1_ecdsa_sig_recover(const secp256k1_ecdsa_sig_t *sig, secp256k1_ge_t *pubkey, const secp256k1_scalar_t *message, int recid) {
+    unsigned char brx[32];
+    secp256k1_fe_t fx;
+    secp256k1_ge_t x;
+    secp256k1_gej_t xj;
+    secp256k1_scalar_t rn, u1, u2;
+    secp256k1_gej_t qj;
+
     if (secp256k1_scalar_is_zero(&sig->r) || secp256k1_scalar_is_zero(&sig->s))
         return 0;
 
-    unsigned char brx[32];
     secp256k1_scalar_get_b32(brx, &sig->r);
-    secp256k1_fe_t fx;
     VERIFY_CHECK(secp256k1_fe_set_b32(&fx, brx)); /* brx comes from a scalar, so is less than the order; certainly less than p */
     if (recid & 2) {
-        if (secp256k1_fe_cmp_var(&fx, &secp256k1_ecdsa_consts->p_minus_order) >= 0)
+        if (secp256k1_fe_cmp_var(&fx, &secp256k1_ecdsa_const_p_minus_order) >= 0)
             return 0;
-        secp256k1_fe_add(&fx, &secp256k1_ecdsa_consts->order_as_fe);
+        secp256k1_fe_add(&fx, &secp256k1_ecdsa_const_order_as_fe);
     }
-    secp256k1_ge_t x;
     if (!secp256k1_ge_set_xo_var(&x, &fx, recid & 1))
         return 0;
-    secp256k1_gej_t xj;
     secp256k1_gej_set_ge(&xj, &x);
-    secp256k1_scalar_t rn, u1, u2;
     secp256k1_scalar_inverse_var(&rn, &sig->r);
     secp256k1_scalar_mul(&u1, &rn, message);
     secp256k1_scalar_negate(&u1, &u1);
     secp256k1_scalar_mul(&u2, &rn, &sig->s);
-    secp256k1_gej_t qj;
     secp256k1_ecmult(&qj, &xj, &u2, &u1);
     secp256k1_ge_set_gej_var(pubkey, &qj);
     return !secp256k1_gej_is_infinity(&qj);
 }
 
 static int secp256k1_ecdsa_sig_sign(secp256k1_ecdsa_sig_t *sig, const secp256k1_scalar_t *seckey, const secp256k1_scalar_t *message, const secp256k1_scalar_t *nonce, int *recid) {
+    unsigned char b[32];
     secp256k1_gej_t rp;
-    secp256k1_ecmult_gen(&rp, nonce);
     secp256k1_ge_t r;
+    secp256k1_scalar_t n;
+    int overflow = 0;
+
+    secp256k1_ecmult_gen(&rp, nonce);
     secp256k1_ge_set_gej(&r, &rp);
-    unsigned char b[32];
     secp256k1_fe_normalize(&r.x);
     secp256k1_fe_normalize(&r.y);
     secp256k1_fe_get_b32(b, &r.x);
-    int overflow = 0;
     secp256k1_scalar_set_b32(&sig->r, b, &overflow);
     if (secp256k1_scalar_is_zero(&sig->r)) {
         /* P.x = order is on the curve, so technically sig->r could end up zero, which would be an invalid signature. */
@@ -206,7 +211,6 @@ static int secp256k1_ecdsa_sig_sign(secp256k1_ecdsa_sig_t *sig, const secp256k1_
     }
     if (recid)
         *recid = (overflow ? 2 : 0) | (secp256k1_fe_is_odd(&r.y) ? 1 : 0);
-    secp256k1_scalar_t n;
     secp256k1_scalar_mul(&n, &sig->r, seckey);
     secp256k1_scalar_add(&n, &n, message);
     secp256k1_scalar_inverse(&sig->s, nonce);
@@ -224,9 +228,4 @@ static int secp256k1_ecdsa_sig_sign(secp256k1_ecdsa_sig_t *sig, const secp256k1_
     return 1;
 }
 
-static void secp256k1_ecdsa_sig_set_rs(secp256k1_ecdsa_sig_t *sig, const secp256k1_scalar_t *r, const secp256k1_scalar_t *s) {
-    sig->r = *r;
-    sig->s = *s;
-}
-
 #endif
diff --git a/src/eckey_impl.h b/src/eckey_impl.h
index b3fa7d9bd..3e06d05b4 100644
--- a/src/eckey_impl.h
+++ b/src/eckey_impl.h
@@ -51,13 +51,16 @@ static int secp256k1_eckey_pubkey_serialize(secp256k1_ge_t *elem, unsigned char
 }
 
 static int secp256k1_eckey_privkey_parse(secp256k1_scalar_t *key, const unsigned char *privkey, int privkeylen) {
+    unsigned char c[32] = {0};
     const unsigned char *end = privkey + privkeylen;
+    int lenb = 0;
+    int len = 0;
+    int overflow = 0;
     /* sequence header */
     if (end < privkey+1 || *privkey != 0x30)
         return 0;
     privkey++;
     /* sequence length constructor */
-    int lenb = 0;
     if (end < privkey+1 || !(*privkey & 0x80))
         return 0;
     lenb = *privkey & ~0x80; privkey++;
@@ -66,7 +69,6 @@ static int secp256k1_eckey_privkey_parse(secp256k1_scalar_t *key, const unsigned
     if (end < privkey+lenb)
         return 0;
     /* sequence length */
-    int len = 0;
     len = privkey[lenb-1] | (lenb > 1 ? privkey[lenb-2] << 8 : 0);
     privkey += lenb;
     if (end < privkey+len)
@@ -78,8 +80,6 @@ static int secp256k1_eckey_privkey_parse(secp256k1_scalar_t *key, const unsigned
     /* sequence element 1: octet string, up to 32 bytes */
     if (end < privkey+2 || privkey[0] != 0x04 || privkey[1] > 0x20 || end < privkey+2+privkey[1])
         return 0;
-    int overflow = 0;
-    unsigned char c[32] = {0};
     memcpy(c + 32 - privkey[1], privkey + 2, privkey[1]);
     secp256k1_scalar_set_b32(key, c, &overflow);
     memset(c, 0, 32);
@@ -88,8 +88,9 @@ static int secp256k1_eckey_privkey_parse(secp256k1_scalar_t *key, const unsigned
 
 static int secp256k1_eckey_privkey_serialize(unsigned char *privkey, int *privkeylen, const secp256k1_scalar_t *key, int compressed) {
     secp256k1_gej_t rp;
-    secp256k1_ecmult_gen(&rp, key);
     secp256k1_ge_t r;
+    int pubkeylen = 0;
+    secp256k1_ecmult_gen(&rp, key);
     secp256k1_ge_set_gej(&r, &rp);
     if (compressed) {
         static const unsigned char begin[] = {
@@ -110,7 +111,6 @@ static int secp256k1_eckey_privkey_serialize(unsigned char *privkey, int *privke
         memcpy(ptr, begin, sizeof(begin)); ptr += sizeof(begin);
         secp256k1_scalar_get_b32(ptr, key); ptr += 32;
         memcpy(ptr, middle, sizeof(middle)); ptr += sizeof(middle);
-        int pubkeylen = 0;
         if (!secp256k1_eckey_pubkey_serialize(&r, ptr, &pubkeylen, 1)) {
             return 0;
         }
@@ -137,7 +137,6 @@ static int secp256k1_eckey_privkey_serialize(unsigned char *privkey, int *privke
         memcpy(ptr, begin, sizeof(begin)); ptr += sizeof(begin);
         secp256k1_scalar_get_b32(ptr, key); ptr += 32;
         memcpy(ptr, middle, sizeof(middle)); ptr += sizeof(middle);
-        int pubkeylen = 0;
         if (!secp256k1_eckey_pubkey_serialize(&r, ptr, &pubkeylen, 0)) {
             return 0;
         }
@@ -156,8 +155,8 @@ static int secp256k1_eckey_privkey_tweak_add(secp256k1_scalar_t *key, const secp
 
 static int secp256k1_eckey_pubkey_tweak_add(secp256k1_ge_t *key, const secp256k1_scalar_t *tweak) {
     secp256k1_gej_t pt;
-    secp256k1_gej_set_ge(&pt, key);
     secp256k1_scalar_t one;
+    secp256k1_gej_set_ge(&pt, key);
     secp256k1_scalar_set_int(&one, 1);
     secp256k1_ecmult(&pt, &pt, &one, tweak);
 
@@ -176,12 +175,12 @@ static int secp256k1_eckey_privkey_tweak_mul(secp256k1_scalar_t *key, const secp
 }
 
 static int secp256k1_eckey_pubkey_tweak_mul(secp256k1_ge_t *key, const secp256k1_scalar_t *tweak) {
+    secp256k1_scalar_t zero;
+    secp256k1_gej_t pt;
     if (secp256k1_scalar_is_zero(tweak))
         return 0;
 
-    secp256k1_scalar_t zero;
     secp256k1_scalar_set_int(&zero, 0);
-    secp256k1_gej_t pt;
     secp256k1_gej_set_ge(&pt, key);
     secp256k1_ecmult(&pt, &pt, tweak, &zero);
     secp256k1_ge_set_gej(key, &pt);
diff --git a/src/ecmult_gen_impl.h b/src/ecmult_gen_impl.h
index 48436316e..849452c7a 100644
--- a/src/ecmult_gen_impl.h
+++ b/src/ecmult_gen_impl.h
@@ -24,49 +24,53 @@ typedef struct {
      * None of the resulting prec group elements have a known scalar, and neither do any of
      * the intermediate sums while computing a*G.
      */
-    secp256k1_fe_t prec[64][16][2]; /* prec[j][i] = (16^j * i * G + U_i).{x,y} */
+    secp256k1_ge_storage_t prec[64][16]; /* prec[j][i] = 16^j * i * G + U_i */
 } secp256k1_ecmult_gen_consts_t;
 
 static const secp256k1_ecmult_gen_consts_t *secp256k1_ecmult_gen_consts = NULL;
 
 static void secp256k1_ecmult_gen_start(void) {
+    secp256k1_ge_t prec[1024];
+    secp256k1_gej_t gj;
+    secp256k1_gej_t nums_gej;
+    secp256k1_ecmult_gen_consts_t *ret;
+    int i, j;
     if (secp256k1_ecmult_gen_consts != NULL)
         return;
 
     /* Allocate the precomputation table. */
-    secp256k1_ecmult_gen_consts_t *ret = (secp256k1_ecmult_gen_consts_t*)checked_malloc(sizeof(secp256k1_ecmult_gen_consts_t));
+    ret = (secp256k1_ecmult_gen_consts_t*)checked_malloc(sizeof(secp256k1_ecmult_gen_consts_t));
 
     /* get the generator */
-    const secp256k1_ge_t *g = &secp256k1_ge_consts->g;
-    secp256k1_gej_t gj; secp256k1_gej_set_ge(&gj, g);
+    secp256k1_gej_set_ge(&gj, &secp256k1_ge_const_g);
 
     /* Construct a group element with no known corresponding scalar (nothing up my sleeve). */
-    secp256k1_gej_t nums_gej;
     {
-        static const unsigned char nums_b32[32] = "The scalar for this x is unknown";
+        static const unsigned char nums_b32[33] = "The scalar for this x is unknown";
         secp256k1_fe_t nums_x;
-        VERIFY_CHECK(secp256k1_fe_set_b32(&nums_x, nums_b32));
         secp256k1_ge_t nums_ge;
+        VERIFY_CHECK(secp256k1_fe_set_b32(&nums_x, nums_b32));
         VERIFY_CHECK(secp256k1_ge_set_xo_var(&nums_ge, &nums_x, 0));
         secp256k1_gej_set_ge(&nums_gej, &nums_ge);
         /* Add G to make the bits in x uniformly distributed. */
-        secp256k1_gej_add_ge_var(&nums_gej, &nums_gej, g);
+        secp256k1_gej_add_ge_var(&nums_gej, &nums_gej, &secp256k1_ge_const_g);
     }
 
     /* compute prec. */
-    secp256k1_ge_t prec[1024];
     {
         secp256k1_gej_t precj[1024]; /* Jacobian versions of prec. */
-        secp256k1_gej_t gbase; gbase = gj; /* 16^j * G */
-        secp256k1_gej_t numsbase; numsbase = nums_gej; /* 2^j * nums. */
-        for (int j=0; j<64; j++) {
+        secp256k1_gej_t gbase;
+        secp256k1_gej_t numsbase;
+        gbase = gj; /* 16^j * G */
+        numsbase = nums_gej; /* 2^j * nums. */
+        for (j = 0; j < 64; j++) {
             /* Set precj[j*16 .. j*16+15] to (numsbase, numsbase + gbase, ..., numsbase + 15*gbase). */
             precj[j*16] = numsbase;
-            for (int i=1; i<16; i++) {
+            for (i = 1; i < 16; i++) {
                 secp256k1_gej_add_var(&precj[j*16 + i], &precj[j*16 + i - 1], &gbase);
             }
             /* Multiply gbase by 16. */
-            for (int i=0; i<4; i++) {
+            for (i = 0; i < 4; i++) {
                 secp256k1_gej_double_var(&gbase, &gbase);
             }
             /* Multiply numbase by 2. */
@@ -79,11 +83,9 @@ static void secp256k1_ecmult_gen_start(void) {
         }
         secp256k1_ge_set_all_gej_var(1024, prec, precj);
     }
-    for (int j=0; j<64; j++) {
-        for (int i=0; i<16; i++) {
-            VERIFY_CHECK(!secp256k1_ge_is_infinity(&prec[j*16 + i]));
-            ret->prec[j][i][0] = prec[j*16 + i].x;
-            ret->prec[j][i][1] = prec[j*16 + i].y;
+    for (j = 0; j < 64; j++) {
+        for (i = 0; i < 16; i++) {
+            secp256k1_ge_to_storage(&ret->prec[j][i], &prec[j*16 + i]);
         }
     }
 
@@ -92,26 +94,29 @@ static void secp256k1_ecmult_gen_start(void) {
 }
 
 static void secp256k1_ecmult_gen_stop(void) {
+    secp256k1_ecmult_gen_consts_t *c;
     if (secp256k1_ecmult_gen_consts == NULL)
         return;
 
-    secp256k1_ecmult_gen_consts_t *c = (secp256k1_ecmult_gen_consts_t*)secp256k1_ecmult_gen_consts;
+    c = (secp256k1_ecmult_gen_consts_t*)secp256k1_ecmult_gen_consts;
     secp256k1_ecmult_gen_consts = NULL;
     free(c);
 }
 
 static void secp256k1_ecmult_gen(secp256k1_gej_t *r, const secp256k1_scalar_t *gn) {
     const secp256k1_ecmult_gen_consts_t *c = secp256k1_ecmult_gen_consts;
-    secp256k1_gej_set_infinity(r);
     secp256k1_ge_t add;
-    add.infinity = 0;
+    secp256k1_ge_storage_t adds;
     int bits;
-    for (int j=0; j<64; j++) {
+    int i, j;
+    secp256k1_gej_set_infinity(r);
+    add.infinity = 0;
+    for (j = 0; j < 64; j++) {
         bits = secp256k1_scalar_get_bits(gn, j * 4, 4);
-        for (int i=0; i<16; i++) {
-            secp256k1_fe_cmov(&add.x, &c->prec[j][i][0], i == bits);
-            secp256k1_fe_cmov(&add.y, &c->prec[j][i][1], i == bits);
+        for (i = 0; i < 16; i++) {
+            secp256k1_ge_storage_cmov(&adds, &c->prec[j][i], i == bits);
         }
+        secp256k1_ge_from_storage(&add, &adds);
         secp256k1_gej_add_ge(r, r, &add);
     }
     bits = 0;
diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
index 345cfae73..ece0b0a45 100644
--- a/src/ecmult_impl.h
+++ b/src/ecmult_impl.h
@@ -37,22 +37,31 @@
  *  G is constant, so it only needs to be done once in advance.
  */
 static void secp256k1_ecmult_table_precomp_gej_var(secp256k1_gej_t *pre, const secp256k1_gej_t *a, int w) {
+    secp256k1_gej_t d;
+    int i;
     pre[0] = *a;
-    secp256k1_gej_t d; secp256k1_gej_double_var(&d, &pre[0]);
-    for (int i=1; i<(1 << (w-2)); i++)
+    secp256k1_gej_double_var(&d, &pre[0]);
+    for (i = 1; i < (1 << (w-2)); i++)
         secp256k1_gej_add_var(&pre[i], &d, &pre[i-1]);
 }
 
-static void secp256k1_ecmult_table_precomp_ge_var(secp256k1_ge_t *pre, const secp256k1_gej_t *a, int w) {
+static void secp256k1_ecmult_table_precomp_ge_storage_var(secp256k1_ge_storage_t *pre, const secp256k1_gej_t *a, int w) {
+    secp256k1_gej_t d;
+    int i;
     const int table_size = 1 << (w-2);
     secp256k1_gej_t *prej = checked_malloc(sizeof(secp256k1_gej_t) * table_size);
+    secp256k1_ge_t *prea = checked_malloc(sizeof(secp256k1_ge_t) * table_size);
     prej[0] = *a;
-    secp256k1_gej_t d; secp256k1_gej_double_var(&d, a);
-    for (int i=1; i<table_size; i++) {
+    secp256k1_gej_double_var(&d, a);
+    for (i = 1; i < table_size; i++) {
         secp256k1_gej_add_var(&prej[i], &d, &prej[i-1]);
     }
-    secp256k1_ge_set_all_gej_var(table_size, pre, prej);
+    secp256k1_ge_set_all_gej_var(table_size, prea, prej);
+    for (i = 0; i < table_size; i++) {
+        secp256k1_ge_to_storage(&pre[i], &prea[i]);
+    }
     free(prej);
+    free(prea);
 }
 
 /** The number of entries a table with precomputed multiples needs to have. */
@@ -60,51 +69,63 @@ static void secp256k1_ecmult_table_precomp_ge_var(secp256k1_ge_t *pre, const sec
 
 /** The following two macro retrieves a particular odd multiple from a table
  *  of precomputed multiples. */
-#define ECMULT_TABLE_GET(r,pre,n,w,neg) do { \
+#define ECMULT_TABLE_GET_GEJ(r,pre,n,w) do { \
     VERIFY_CHECK(((n) & 1) == 1); \
     VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \
     VERIFY_CHECK((n) <=  ((1 << ((w)-1)) - 1)); \
     if ((n) > 0) \
         *(r) = (pre)[((n)-1)/2]; \
     else \
-        (neg)((r), &(pre)[(-(n)-1)/2]); \
+        secp256k1_gej_neg((r), &(pre)[(-(n)-1)/2]); \
+} while(0)
+#define ECMULT_TABLE_GET_GE_STORAGE(r,pre,n,w) do { \
+    VERIFY_CHECK(((n) & 1) == 1); \
+    VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \
+    VERIFY_CHECK((n) <=  ((1 << ((w)-1)) - 1)); \
+    if ((n) > 0) \
+        secp256k1_ge_from_storage((r), &(pre)[((n)-1)/2]); \
+    else {\
+        secp256k1_ge_from_storage((r), &(pre)[(-(n)-1)/2]); \
+        secp256k1_ge_neg((r), (r)); \
+    } \
 } while(0)
-
-#define ECMULT_TABLE_GET_GEJ(r,pre,n,w) ECMULT_TABLE_GET((r),(pre),(n),(w),secp256k1_gej_neg)
-#define ECMULT_TABLE_GET_GE(r,pre,n,w)  ECMULT_TABLE_GET((r),(pre),(n),(w),secp256k1_ge_neg)
 
 typedef struct {
     /* For accelerating the computation of a*P + b*G: */
-    secp256k1_ge_t pre_g[ECMULT_TABLE_SIZE(WINDOW_G)];    /* odd multiples of the generator */
+    secp256k1_ge_storage_t pre_g[ECMULT_TABLE_SIZE(WINDOW_G)];    /* odd multiples of the generator */
 #ifdef USE_ENDOMORPHISM
-    secp256k1_ge_t pre_g_128[ECMULT_TABLE_SIZE(WINDOW_G)]; /* odd multiples of 2^128*generator */
+    secp256k1_ge_storage_t pre_g_128[ECMULT_TABLE_SIZE(WINDOW_G)]; /* odd multiples of 2^128*generator */
 #endif
 } secp256k1_ecmult_consts_t;
 
 static const secp256k1_ecmult_consts_t *secp256k1_ecmult_consts = NULL;
 
 static void secp256k1_ecmult_start(void) {
+    secp256k1_gej_t gj;
+    secp256k1_ecmult_consts_t *ret;
     if (secp256k1_ecmult_consts != NULL)
         return;
 
     /* Allocate the precomputation table. */
-    secp256k1_ecmult_consts_t *ret = (secp256k1_ecmult_consts_t*)checked_malloc(sizeof(secp256k1_ecmult_consts_t));
+    ret = (secp256k1_ecmult_consts_t*)checked_malloc(sizeof(secp256k1_ecmult_consts_t));
 
     /* get the generator */
-    const secp256k1_ge_t *g = &secp256k1_ge_consts->g;
-    secp256k1_gej_t gj; secp256k1_gej_set_ge(&gj, g);
+    secp256k1_gej_set_ge(&gj, &secp256k1_ge_const_g);
 
-#ifdef USE_ENDOMORPHISM
-    /* calculate 2^128*generator */
-    secp256k1_gej_t g_128j = gj;
-    for (int i=0; i<128; i++)
-        secp256k1_gej_double_var(&g_128j, &g_128j);
-#endif
 
     /* precompute the tables with odd multiples */
-    secp256k1_ecmult_table_precomp_ge_var(ret->pre_g, &gj, WINDOW_G);
+    secp256k1_ecmult_table_precomp_ge_storage_var(ret->pre_g, &gj, WINDOW_G);
+
 #ifdef USE_ENDOMORPHISM
-    secp256k1_ecmult_table_precomp_ge_var(ret->pre_g_128, &g_128j, WINDOW_G);
+    {
+        secp256k1_gej_t g_128j;
+        int i;
+        /* calculate 2^128*generator */
+        g_128j = gj;
+        for (i = 0; i < 128; i++)
+            secp256k1_gej_double_var(&g_128j, &g_128j);
+        secp256k1_ecmult_table_precomp_ge_storage_var(ret->pre_g_128, &g_128j, WINDOW_G);
+    }
 #endif
 
     /* Set the global pointer to the precomputation table. */
@@ -112,10 +133,11 @@ static void secp256k1_ecmult_start(void) {
 }
 
 static void secp256k1_ecmult_stop(void) {
+    secp256k1_ecmult_consts_t *c;
     if (secp256k1_ecmult_consts == NULL)
         return;
 
-    secp256k1_ecmult_consts_t *c = (secp256k1_ecmult_consts_t*)secp256k1_ecmult_consts;
+    c = (secp256k1_ecmult_consts_t*)secp256k1_ecmult_consts;
     secp256k1_ecmult_consts = NULL;
     free(c);
 }
@@ -129,16 +151,18 @@ static void secp256k1_ecmult_stop(void) {
  */
 static int secp256k1_ecmult_wnaf(int *wnaf, const secp256k1_scalar_t *a, int w) {
     secp256k1_scalar_t s = *a;
-
+    int set_bits = 0;
+    int bit = 0;
     int sign = 1;
+
     if (secp256k1_scalar_get_bits(&s, 255, 1)) {
         secp256k1_scalar_negate(&s, &s);
         sign = -1;
     }
 
-    int set_bits = 0;
-    int bit = 0;
     while (bit < 256) {
+        int now;
+        int word;
         if (secp256k1_scalar_get_bits(&s, bit, 1) == 0) {
             bit++;
             continue;
@@ -146,11 +170,11 @@ static int secp256k1_ecmult_wnaf(int *wnaf, const secp256k1_scalar_t *a, int w)
         while (set_bits < bit) {
             wnaf[set_bits++] = 0;
         }
-        int now = w;
+        now = w;
         if (bit + now > 256) {
             now = 256 - bit;
         }
-        int word = secp256k1_scalar_get_bits_var(&s, bit, now);
+        word = secp256k1_scalar_get_bits_var(&s, bit, now);
         if (word & (1 << (w-1))) {
             secp256k1_scalar_add_bit(&s, bit + w);
             wnaf[set_bits++] = sign * (word - (1 << w));
@@ -163,58 +187,74 @@ static int secp256k1_ecmult_wnaf(int *wnaf, const secp256k1_scalar_t *a, int w)
 }
 
 static void secp256k1_ecmult(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_scalar_t *na, const secp256k1_scalar_t *ng) {
+    secp256k1_gej_t tmpj;
+    secp256k1_gej_t pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
+    secp256k1_ge_t tmpa;
     const secp256k1_ecmult_consts_t *c = secp256k1_ecmult_consts;
-
 #ifdef USE_ENDOMORPHISM
+    secp256k1_gej_t pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
     secp256k1_scalar_t na_1, na_lam;
+    /* Splitted G factors. */
+    secp256k1_scalar_t ng_1, ng_128;
+    int wnaf_na_1[130];
+    int wnaf_na_lam[130];
+    int bits_na_1;
+    int bits_na_lam;
+    int wnaf_ng_1[129];
+    int bits_ng_1;
+    int wnaf_ng_128[129];
+    int bits_ng_128;
+#else
+    int wnaf_na[256];
+    int bits_na;
+    int wnaf_ng[257];
+    int bits_ng;
+#endif
+    int i;
+    int bits;
+
+#ifdef USE_ENDOMORPHISM
     /* split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit) */
     secp256k1_scalar_split_lambda_var(&na_1, &na_lam, na);
 
     /* build wnaf representation for na_1 and na_lam. */
-    int wnaf_na_1[130];   int bits_na_1   = secp256k1_ecmult_wnaf(wnaf_na_1,   &na_1,   WINDOW_A);
-    int wnaf_na_lam[130]; int bits_na_lam = secp256k1_ecmult_wnaf(wnaf_na_lam, &na_lam, WINDOW_A);
+    bits_na_1   = secp256k1_ecmult_wnaf(wnaf_na_1,   &na_1,   WINDOW_A);
+    bits_na_lam = secp256k1_ecmult_wnaf(wnaf_na_lam, &na_lam, WINDOW_A);
     VERIFY_CHECK(bits_na_1 <= 130);
     VERIFY_CHECK(bits_na_lam <= 130);
-    int bits = bits_na_1;
+    bits = bits_na_1;
     if (bits_na_lam > bits) bits = bits_na_lam;
 #else
     /* build wnaf representation for na. */
-    int wnaf_na[256];     int bits_na     = secp256k1_ecmult_wnaf(wnaf_na,     na,      WINDOW_A);
-    int bits = bits_na;
+    bits_na     = secp256k1_ecmult_wnaf(wnaf_na,     na,      WINDOW_A);
+    bits = bits_na;
 #endif
 
     /* calculate odd multiples of a */
-    secp256k1_gej_t pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
     secp256k1_ecmult_table_precomp_gej_var(pre_a, a, WINDOW_A);
 
 #ifdef USE_ENDOMORPHISM
-    secp256k1_gej_t pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
-    for (int i=0; i<ECMULT_TABLE_SIZE(WINDOW_A); i++)
+    for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++)
         secp256k1_gej_mul_lambda(&pre_a_lam[i], &pre_a[i]);
 
-    /* Splitted G factors. */
-    secp256k1_scalar_t ng_1, ng_128;
-
     /* split ng into ng_1 and ng_128 (where gn = gn_1 + gn_128*2^128, and gn_1 and gn_128 are ~128 bit) */
     secp256k1_scalar_split_128(&ng_1, &ng_128, ng);
 
     /* Build wnaf representation for ng_1 and ng_128 */
-    int wnaf_ng_1[129];   int bits_ng_1   = secp256k1_ecmult_wnaf(wnaf_ng_1,   &ng_1,   WINDOW_G);
-    int wnaf_ng_128[129]; int bits_ng_128 = secp256k1_ecmult_wnaf(wnaf_ng_128, &ng_128, WINDOW_G);
+    bits_ng_1   = secp256k1_ecmult_wnaf(wnaf_ng_1,   &ng_1,   WINDOW_G);
+    bits_ng_128 = secp256k1_ecmult_wnaf(wnaf_ng_128, &ng_128, WINDOW_G);
     if (bits_ng_1 > bits) bits = bits_ng_1;
     if (bits_ng_128 > bits) bits = bits_ng_128;
 #else
-    int wnaf_ng[257];     int bits_ng     = secp256k1_ecmult_wnaf(wnaf_ng,     ng,      WINDOW_G);
+    bits_ng     = secp256k1_ecmult_wnaf(wnaf_ng,     ng,      WINDOW_G);
     if (bits_ng > bits) bits = bits_ng;
 #endif
 
     secp256k1_gej_set_infinity(r);
-    secp256k1_gej_t tmpj;
-    secp256k1_ge_t tmpa;
 
-    for (int i=bits-1; i>=0; i--) {
-        secp256k1_gej_double_var(r, r);
+    for (i = bits-1; i >= 0; i--) {
         int n;
+        secp256k1_gej_double_var(r, r);
 #ifdef USE_ENDOMORPHISM
         if (i < bits_na_1 && (n = wnaf_na_1[i])) {
             ECMULT_TABLE_GET_GEJ(&tmpj, pre_a, n, WINDOW_A);
@@ -225,11 +265,11 @@ static void secp256k1_ecmult(secp256k1_gej_t *r, const secp256k1_gej_t *a, const
             secp256k1_gej_add_var(r, r, &tmpj);
         }
         if (i < bits_ng_1 && (n = wnaf_ng_1[i])) {
-            ECMULT_TABLE_GET_GE(&tmpa, c->pre_g, n, WINDOW_G);
+            ECMULT_TABLE_GET_GE_STORAGE(&tmpa, c->pre_g, n, WINDOW_G);
             secp256k1_gej_add_ge_var(r, r, &tmpa);
         }
         if (i < bits_ng_128 && (n = wnaf_ng_128[i])) {
-            ECMULT_TABLE_GET_GE(&tmpa, c->pre_g_128, n, WINDOW_G);
+            ECMULT_TABLE_GET_GE_STORAGE(&tmpa, c->pre_g_128, n, WINDOW_G);
             secp256k1_gej_add_ge_var(r, r, &tmpa);
         }
 #else
@@ -238,7 +278,7 @@ static void secp256k1_ecmult(secp256k1_gej_t *r, const secp256k1_gej_t *a, const
             secp256k1_gej_add_var(r, r, &tmpj);
         }
         if (i < bits_ng && (n = wnaf_ng[i])) {
-            ECMULT_TABLE_GET_GE(&tmpa, c->pre_g, n, WINDOW_G);
+            ECMULT_TABLE_GET_GE_STORAGE(&tmpa, c->pre_g, n, WINDOW_G);
             secp256k1_gej_add_ge_var(r, r, &tmpa);
         }
 #endif
diff --git a/src/field.h b/src/field.h
index 14e2b813c..9e6d7d3c0 100644
--- a/src/field.h
+++ b/src/field.h
@@ -30,21 +30,6 @@
 #error "Please select field implementation"
 #endif
 
-typedef struct {
-#ifndef USE_NUM_NONE
-    secp256k1_num_t p;
-#endif
-    secp256k1_fe_t order;
-} secp256k1_fe_consts_t;
-
-static const secp256k1_fe_consts_t *secp256k1_fe_consts = NULL;
-
-/** Initialize field element precomputation data. */
-static void secp256k1_fe_start(void);
-
-/** Unload field element precomputation data. */
-static void secp256k1_fe_stop(void);
-
 /** Normalize a field element. */
 static void secp256k1_fe_normalize(secp256k1_fe_t *r);
 
@@ -117,15 +102,15 @@ static void secp256k1_fe_inv_var(secp256k1_fe_t *r, const secp256k1_fe_t *a);
 /** Calculate the (modular) inverses of a batch of field elements. Requires the inputs' magnitudes to be
  *  at most 8. The output magnitudes are 1 (but not guaranteed to be normalized). The inputs and
  *  outputs must not overlap in memory. */
-static void secp256k1_fe_inv_all_var(size_t len, secp256k1_fe_t r[len], const secp256k1_fe_t a[len]);
+static void secp256k1_fe_inv_all_var(size_t len, secp256k1_fe_t *r, const secp256k1_fe_t *a);
 
-/** Convert a field element to a hexadecimal string. */
-static void secp256k1_fe_get_hex(char *r, int *rlen, const secp256k1_fe_t *a);
+/** Convert a field element to the storage type. */
+static void secp256k1_fe_to_storage(secp256k1_fe_storage_t *r, const secp256k1_fe_t*);
 
-/** Convert a hexadecimal string to a field element. */
-static int secp256k1_fe_set_hex(secp256k1_fe_t *r, const char *a, int alen);
+/** Convert a field element back from the storage type. */
+static void secp256k1_fe_from_storage(secp256k1_fe_t *r, const secp256k1_fe_storage_t*);
 
 /** If flag is true, set *r equal to *a; otherwise leave it. Constant-time. */
-static void secp256k1_fe_cmov(secp256k1_fe_t *r, const secp256k1_fe_t *a, int flag);
+static void secp256k1_fe_storage_cmov(secp256k1_fe_storage_t *r, const secp256k1_fe_storage_t *a, int flag);
 
 #endif
diff --git a/src/field_10x26.h b/src/field_10x26.h
index 66fb3f256..44bce6525 100644
--- a/src/field_10x26.h
+++ b/src/field_10x26.h
@@ -18,4 +18,30 @@ typedef struct {
 #endif
 } secp256k1_fe_t;
 
+/* Unpacks a constant into a overlapping multi-limbed FE element. */
+#define SECP256K1_FE_CONST_INNER(d7, d6, d5, d4, d3, d2, d1, d0) { \
+    (d0) & 0x3FFFFFFUL, \
+    ((d0) >> 26) | ((d1) & 0xFFFFFUL) << 6, \
+    ((d1) >> 20) | ((d2) & 0x3FFFUL) << 12, \
+    ((d2) >> 14) | ((d3) & 0xFFUL) << 18, \
+    ((d3) >> 8) | ((d4) & 0x3) << 24, \
+    ((d4) >> 2) & 0x3FFFFFFUL, \
+    ((d4) >> 28) | ((d5) & 0x3FFFFFUL) << 4, \
+    ((d5) >> 22) | ((d6) & 0xFFFF) << 10, \
+    ((d6) >> 16) | ((d7) & 0x3FF) << 16, \
+    ((d7) >> 10) \
+}
+
+#ifdef VERIFY
+#define SECP256K1_FE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {SECP256K1_FE_CONST_INNER((d7), (d6), (d5), (d4), (d3), (d2), (d1), (d0)), 1, 1}
+#else
+#define SECP256K1_FE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {SECP256K1_FE_CONST_INNER((d7), (d6), (d5), (d4), (d3), (d2), (d1), (d0))}
+#endif
+
+typedef struct {
+    uint32_t n[8];
+} secp256k1_fe_storage_t;
+
+#define SECP256K1_FE_STORAGE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {{ (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}
+
 #endif
diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h
index 9ef60a807..0afbb18a4 100644
--- a/src/field_10x26_impl.h
+++ b/src/field_10x26_impl.h
@@ -13,9 +13,6 @@
 #include "num.h"
 #include "field.h"
 
-static void secp256k1_fe_inner_start(void) {}
-static void secp256k1_fe_inner_stop(void) {}
-
 #ifdef VERIFY
 static void secp256k1_fe_verify(const secp256k1_fe_t *a) {
     const uint32_t *d = a->n;
@@ -54,8 +51,8 @@ static void secp256k1_fe_normalize(secp256k1_fe_t *r) {
              t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
 
     /* Reduce t9 at the start so there will be at most a single carry from the first pass */
-    uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
     uint32_t m;
+    uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
 
     /* The first pass ensures the magnitude is 1, ... */
     t0 += x * 0x3D1UL; t1 += (x << 6);
@@ -140,8 +137,8 @@ static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) {
              t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
 
     /* Reduce t9 at the start so there will be at most a single carry from the first pass */
-    uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
     uint32_t m;
+    uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
 
     /* The first pass ensures the magnitude is 1, ... */
     t0 += x * 0x3D1UL; t1 += (x << 6);
@@ -195,12 +192,12 @@ static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r) {
     uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
              t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
 
-    /* Reduce t9 at the start so there will be at most a single carry from the first pass */
-    uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
-
     /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
     uint32_t z0, z1;
 
+    /* Reduce t9 at the start so there will be at most a single carry from the first pass */
+    uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
+
     /* The first pass ensures the magnitude is 1, ... */
     t0 += x * 0x3D1UL; t1 += (x << 6);
     t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0  = t0; z1  = t0 ^ 0x3D0UL;
@@ -221,23 +218,36 @@ static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r) {
 }
 
 static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe_t *r) {
-    uint32_t t0 = r->n[0], t9 = r->n[9];
+    uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
+    uint32_t z0, z1;
+    uint32_t x;
+
+    t0 = r->n[0];
+    t9 = r->n[9];
 
     /* Reduce t9 at the start so there will be at most a single carry from the first pass */
-    uint32_t x = t9 >> 22;
+    x = t9 >> 22;
 
     /* The first pass ensures the magnitude is 1, ... */
     t0 += x * 0x3D1UL;
 
     /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
-    uint32_t z0 = t0 & 0x3FFFFFFUL, z1 = z0 ^ 0x3D0UL;
+    z0 = t0 & 0x3FFFFFFUL;
+    z1 = z0 ^ 0x3D0UL;
 
     /* Fast return path should catch the majority of cases */
     if ((z0 != 0UL) & (z1 != 0x3FFFFFFUL))
         return 0;
 
-    uint32_t t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
-             t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8];
+    t1 = r->n[1];
+    t2 = r->n[2];
+    t3 = r->n[3];
+    t4 = r->n[4];
+    t5 = r->n[5];
+    t6 = r->n[6];
+    t7 = r->n[7];
+    t8 = r->n[8];
+
     t9 &= 0x03FFFFFUL;
     t1 += (x << 6);
 
@@ -269,11 +279,11 @@ SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a) {
 }
 
 SECP256K1_INLINE static int secp256k1_fe_is_zero(const secp256k1_fe_t *a) {
+    const uint32_t *t = a->n;
 #ifdef VERIFY
     VERIFY_CHECK(a->normalized);
     secp256k1_fe_verify(a);
 #endif
-    const uint32_t *t = a->n;
     return (t[0] | t[1] | t[2] | t[3] | t[4] | t[5] | t[6] | t[7] | t[8] | t[9]) == 0;
 }
 
@@ -286,23 +296,25 @@ SECP256K1_INLINE static int secp256k1_fe_is_odd(const secp256k1_fe_t *a) {
 }
 
 SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe_t *a) {
+    int i;
 #ifdef VERIFY
     a->magnitude = 0;
     a->normalized = 1;
 #endif
-    for (int i=0; i<10; i++) {
+    for (i=0; i<10; i++) {
         a->n[i] = 0;
     }
 }
 
 static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
+    int i;
 #ifdef VERIFY
     VERIFY_CHECK(a->normalized);
     VERIFY_CHECK(b->normalized);
     secp256k1_fe_verify(a);
     secp256k1_fe_verify(b);
 #endif
-    for (int i = 9; i >= 0; i--) {
+    for (i = 9; i >= 0; i--) {
         if (a->n[i] > b->n[i]) return 1;
         if (a->n[i] < b->n[i]) return -1;
     }
@@ -310,10 +322,12 @@ static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b
 }
 
 static int secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) {
+    int i;
     r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
     r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
-    for (int i=0; i<32; i++) {
-        for (int j=0; j<4; j++) {
+    for (i=0; i<32; i++) {
+        int j;
+        for (j=0; j<4; j++) {
             int limb = (8*i+2*j)/26;
             int shift = (8*i+2*j)%26;
             r->n[limb] |= (uint32_t)((a[31-i] >> (2*j)) & 0x3) << shift;
@@ -332,13 +346,15 @@ static int secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) {
 
 /** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
 static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) {
+    int i;
 #ifdef VERIFY
     VERIFY_CHECK(a->normalized);
     secp256k1_fe_verify(a);
 #endif
-    for (int i=0; i<32; i++) {
+    for (i=0; i<32; i++) {
+        int j;
         int c = 0;
-        for (int j=0; j<4; j++) {
+        for (j=0; j<4; j++) {
             int limb = (8*i+2*j)/26;
             int shift = (8*i+2*j)%26;
             c |= ((a->n[limb] >> shift) & 0x3) << (2 * j);
@@ -415,6 +431,11 @@ SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1
 #endif
 
 SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) {
+    uint64_t c, d;
+    uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
+    uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7;
+    const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
+
     VERIFY_BITS(a[0], 30);
     VERIFY_BITS(a[1], 30);
     VERIFY_BITS(a[2], 30);
@@ -436,14 +457,11 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
     VERIFY_BITS(b[8], 30);
     VERIFY_BITS(b[9], 26);
 
-    const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
     /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
      *  px is a shorthand for sum(a[i]*b[x-i], i=0..x).
      *  Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0].
      */
 
-    uint64_t c, d;
-
     d  = (uint64_t)a[0] * b[9]
        + (uint64_t)a[1] * b[8]
        + (uint64_t)a[2] * b[7]
@@ -456,7 +474,7 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
        + (uint64_t)a[9] * b[0];
     /* VERIFY_BITS(d, 64); */
     /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
-    uint32_t t9 = d & M; d >>= 26;
+    t9 = d & M; d >>= 26;
     VERIFY_BITS(t9, 26);
     VERIFY_BITS(d, 38);
     /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
@@ -475,12 +493,12 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
        + (uint64_t)a[9] * b[1];
     VERIFY_BITS(d, 63);
     /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
-    uint64_t u0 = d & M; d >>= 26; c += u0 * R0;
+    u0 = d & M; d >>= 26; c += u0 * R0;
     VERIFY_BITS(u0, 26);
     VERIFY_BITS(d, 37);
     VERIFY_BITS(c, 61);
     /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
-    uint32_t t0 = c & M; c >>= 26; c += u0 * R1;
+    t0 = c & M; c >>= 26; c += u0 * R1;
     VERIFY_BITS(t0, 26);
     VERIFY_BITS(c, 37);
     /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
@@ -500,12 +518,12 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
        + (uint64_t)a[9] * b[2];
     VERIFY_BITS(d, 63);
     /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
-    uint64_t u1 = d & M; d >>= 26; c += u1 * R0;
+    u1 = d & M; d >>= 26; c += u1 * R0;
     VERIFY_BITS(u1, 26);
     VERIFY_BITS(d, 37);
     VERIFY_BITS(c, 63);
     /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
-    uint32_t t1 = c & M; c >>= 26; c += u1 * R1;
+    t1 = c & M; c >>= 26; c += u1 * R1;
     VERIFY_BITS(t1, 26);
     VERIFY_BITS(c, 38);
     /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
@@ -525,12 +543,12 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
        + (uint64_t)a[9] * b[3];
     VERIFY_BITS(d, 63);
     /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
-    uint64_t u2 = d & M; d >>= 26; c += u2 * R0;
+    u2 = d & M; d >>= 26; c += u2 * R0;
     VERIFY_BITS(u2, 26);
     VERIFY_BITS(d, 37);
     VERIFY_BITS(c, 63);
     /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
-    uint32_t t2 = c & M; c >>= 26; c += u2 * R1;
+    t2 = c & M; c >>= 26; c += u2 * R1;
     VERIFY_BITS(t2, 26);
     VERIFY_BITS(c, 38);
     /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
@@ -550,12 +568,12 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
        + (uint64_t)a[9] * b[4];
     VERIFY_BITS(d, 63);
     /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
-    uint64_t u3 = d & M; d >>= 26; c += u3 * R0;
+    u3 = d & M; d >>= 26; c += u3 * R0;
     VERIFY_BITS(u3, 26);
     VERIFY_BITS(d, 37);
     /* VERIFY_BITS(c, 64); */
     /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
-    uint32_t t3 = c & M; c >>= 26; c += u3 * R1;
+    t3 = c & M; c >>= 26; c += u3 * R1;
     VERIFY_BITS(t3, 26);
     VERIFY_BITS(c, 39);
     /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
@@ -575,12 +593,12 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
        + (uint64_t)a[9] * b[5];
     VERIFY_BITS(d, 62);
     /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
-    uint64_t u4 = d & M; d >>= 26; c += u4 * R0;
+    u4 = d & M; d >>= 26; c += u4 * R0;
     VERIFY_BITS(u4, 26);
     VERIFY_BITS(d, 36);
     /* VERIFY_BITS(c, 64); */
     /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
-    uint32_t t4 = c & M; c >>= 26; c += u4 * R1;
+    t4 = c & M; c >>= 26; c += u4 * R1;
     VERIFY_BITS(t4, 26);
     VERIFY_BITS(c, 39);
     /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
@@ -600,12 +618,12 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
        + (uint64_t)a[9] * b[6];
     VERIFY_BITS(d, 62);
     /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
-    uint64_t u5 = d & M; d >>= 26; c += u5 * R0;
+    u5 = d & M; d >>= 26; c += u5 * R0;
     VERIFY_BITS(u5, 26);
     VERIFY_BITS(d, 36);
     /* VERIFY_BITS(c, 64); */
     /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
-    uint32_t t5 = c & M; c >>= 26; c += u5 * R1;
+    t5 = c & M; c >>= 26; c += u5 * R1;
     VERIFY_BITS(t5, 26);
     VERIFY_BITS(c, 39);
     /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
@@ -625,12 +643,12 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
        + (uint64_t)a[9] * b[7];
     VERIFY_BITS(d, 61);
     /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
-    uint64_t u6 = d & M; d >>= 26; c += u6 * R0;
+    u6 = d & M; d >>= 26; c += u6 * R0;
     VERIFY_BITS(u6, 26);
     VERIFY_BITS(d, 35);
     /* VERIFY_BITS(c, 64); */
     /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
-    uint32_t t6 = c & M; c >>= 26; c += u6 * R1;
+    t6 = c & M; c >>= 26; c += u6 * R1;
     VERIFY_BITS(t6, 26);
     VERIFY_BITS(c, 39);
     /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
@@ -651,13 +669,13 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
        + (uint64_t)a[9] * b[8];
     VERIFY_BITS(d, 58);
     /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
-    uint64_t u7 = d & M; d >>= 26; c += u7 * R0;
+    u7 = d & M; d >>= 26; c += u7 * R0;
     VERIFY_BITS(u7, 26);
     VERIFY_BITS(d, 32);
     /* VERIFY_BITS(c, 64); */
     VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
     /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
-    uint32_t t7 = c & M; c >>= 26; c += u7 * R1;
+    t7 = c & M; c >>= 26; c += u7 * R1;
     VERIFY_BITS(t7, 26);
     VERIFY_BITS(c, 38);
     /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
@@ -678,7 +696,7 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
     d += (uint64_t)a[9] * b[9];
     VERIFY_BITS(d, 57);
     /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
-    uint64_t u8 = d & M; d >>= 26; c += u8 * R0;
+    u8 = d & M; d >>= 26; c += u8 * R0;
     VERIFY_BITS(u8, 26);
     VERIFY_BITS(d, 31);
     /* VERIFY_BITS(c, 64); */
@@ -742,6 +760,11 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
 }
 
 SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a) {
+    uint64_t c, d;
+    uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
+    uint32_t t9, t0, t1, t2, t3, t4, t5, t6, t7;
+    const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
+
     VERIFY_BITS(a[0], 30);
     VERIFY_BITS(a[1], 30);
     VERIFY_BITS(a[2], 30);
@@ -753,14 +776,11 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
     VERIFY_BITS(a[8], 30);
     VERIFY_BITS(a[9], 26);
 
-    const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
     /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
      *  px is a shorthand for sum(a[i]*a[x-i], i=0..x).
      *  Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0].
      */
 
-    uint64_t c, d;
-
     d  = (uint64_t)(a[0]*2) * a[9]
        + (uint64_t)(a[1]*2) * a[8]
        + (uint64_t)(a[2]*2) * a[7]
@@ -768,7 +788,7 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
        + (uint64_t)(a[4]*2) * a[5];
     /* VERIFY_BITS(d, 64); */
     /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
-    uint32_t t9 = d & M; d >>= 26;
+    t9 = d & M; d >>= 26;
     VERIFY_BITS(t9, 26);
     VERIFY_BITS(d, 38);
     /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
@@ -783,12 +803,12 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
        + (uint64_t)a[5] * a[5];
     VERIFY_BITS(d, 63);
     /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
-    uint64_t u0 = d & M; d >>= 26; c += u0 * R0;
+    u0 = d & M; d >>= 26; c += u0 * R0;
     VERIFY_BITS(u0, 26);
     VERIFY_BITS(d, 37);
     VERIFY_BITS(c, 61);
     /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
-    uint32_t t0 = c & M; c >>= 26; c += u0 * R1;
+    t0 = c & M; c >>= 26; c += u0 * R1;
     VERIFY_BITS(t0, 26);
     VERIFY_BITS(c, 37);
     /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
@@ -803,12 +823,12 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
        + (uint64_t)(a[5]*2) * a[6];
     VERIFY_BITS(d, 63);
     /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
-    uint64_t u1 = d & M; d >>= 26; c += u1 * R0;
+    u1 = d & M; d >>= 26; c += u1 * R0;
     VERIFY_BITS(u1, 26);
     VERIFY_BITS(d, 37);
     VERIFY_BITS(c, 63);
     /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
-    uint32_t t1 = c & M; c >>= 26; c += u1 * R1;
+    t1 = c & M; c >>= 26; c += u1 * R1;
     VERIFY_BITS(t1, 26);
     VERIFY_BITS(c, 38);
     /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
@@ -824,12 +844,12 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
        + (uint64_t)a[6] * a[6];
     VERIFY_BITS(d, 63);
     /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
-    uint64_t u2 = d & M; d >>= 26; c += u2 * R0;
+    u2 = d & M; d >>= 26; c += u2 * R0;
     VERIFY_BITS(u2, 26);
     VERIFY_BITS(d, 37);
     VERIFY_BITS(c, 63);
     /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
-    uint32_t t2 = c & M; c >>= 26; c += u2 * R1;
+    t2 = c & M; c >>= 26; c += u2 * R1;
     VERIFY_BITS(t2, 26);
     VERIFY_BITS(c, 38);
     /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
@@ -844,12 +864,12 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
        + (uint64_t)(a[6]*2) * a[7];
     VERIFY_BITS(d, 63);
     /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
-    uint64_t u3 = d & M; d >>= 26; c += u3 * R0;
+    u3 = d & M; d >>= 26; c += u3 * R0;
     VERIFY_BITS(u3, 26);
     VERIFY_BITS(d, 37);
     /* VERIFY_BITS(c, 64); */
     /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
-    uint32_t t3 = c & M; c >>= 26; c += u3 * R1;
+    t3 = c & M; c >>= 26; c += u3 * R1;
     VERIFY_BITS(t3, 26);
     VERIFY_BITS(c, 39);
     /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
@@ -865,12 +885,12 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
        + (uint64_t)a[7] * a[7];
     VERIFY_BITS(d, 62);
     /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
-    uint64_t u4 = d & M; d >>= 26; c += u4 * R0;
+    u4 = d & M; d >>= 26; c += u4 * R0;
     VERIFY_BITS(u4, 26);
     VERIFY_BITS(d, 36);
     /* VERIFY_BITS(c, 64); */
     /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
-    uint32_t t4 = c & M; c >>= 26; c += u4 * R1;
+    t4 = c & M; c >>= 26; c += u4 * R1;
     VERIFY_BITS(t4, 26);
     VERIFY_BITS(c, 39);
     /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
@@ -885,12 +905,12 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
        + (uint64_t)(a[7]*2) * a[8];
     VERIFY_BITS(d, 62);
     /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
-    uint64_t u5 = d & M; d >>= 26; c += u5 * R0;
+    u5 = d & M; d >>= 26; c += u5 * R0;
     VERIFY_BITS(u5, 26);
     VERIFY_BITS(d, 36);
     /* VERIFY_BITS(c, 64); */
     /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
-    uint32_t t5 = c & M; c >>= 26; c += u5 * R1;
+    t5 = c & M; c >>= 26; c += u5 * R1;
     VERIFY_BITS(t5, 26);
     VERIFY_BITS(c, 39);
     /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
@@ -906,12 +926,12 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
        + (uint64_t)a[8] * a[8];
     VERIFY_BITS(d, 61);
     /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
-    uint64_t u6 = d & M; d >>= 26; c += u6 * R0;
+    u6 = d & M; d >>= 26; c += u6 * R0;
     VERIFY_BITS(u6, 26);
     VERIFY_BITS(d, 35);
     /* VERIFY_BITS(c, 64); */
     /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
-    uint32_t t6 = c & M; c >>= 26; c += u6 * R1;
+    t6 = c & M; c >>= 26; c += u6 * R1;
     VERIFY_BITS(t6, 26);
     VERIFY_BITS(c, 39);
     /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
@@ -927,13 +947,13 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
     d += (uint64_t)(a[8]*2) * a[9];
     VERIFY_BITS(d, 58);
     /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
-    uint64_t u7 = d & M; d >>= 26; c += u7 * R0;
+    u7 = d & M; d >>= 26; c += u7 * R0;
     VERIFY_BITS(u7, 26);
     VERIFY_BITS(d, 32);
     /* VERIFY_BITS(c, 64); */
     VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
     /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
-    uint32_t t7 = c & M; c >>= 26; c += u7 * R1;
+    t7 = c & M; c >>= 26; c += u7 * R1;
     VERIFY_BITS(t7, 26);
     VERIFY_BITS(c, 38);
     /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
@@ -950,7 +970,7 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
     d += (uint64_t)a[9] * a[9];
     VERIFY_BITS(d, 57);
     /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
-    uint64_t u8 = d & M; d >>= 26; c += u8 * R0;
+    u8 = d & M; d >>= 26; c += u8 * R0;
     VERIFY_BITS(u8, 26);
     VERIFY_BITS(d, 31);
     /* VERIFY_BITS(c, 64); */
@@ -1043,8 +1063,10 @@ static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
 #endif
 }
 
-static void secp256k1_fe_cmov(secp256k1_fe_t *r, const secp256k1_fe_t *a, int flag) {
-    uint32_t mask0 = flag + ~((uint32_t)0), mask1 = ~mask0;
+static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage_t *r, const secp256k1_fe_storage_t *a, int flag) {
+    uint32_t mask0, mask1;
+    mask0 = flag + ~((uint32_t)0);
+    mask1 = ~mask0;
     r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
     r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
     r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
@@ -1053,13 +1075,36 @@ static void secp256k1_fe_cmov(secp256k1_fe_t *r, const secp256k1_fe_t *a, int fl
     r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1);
     r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1);
     r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1);
-    r->n[8] = (r->n[8] & mask0) | (a->n[8] & mask1);
-    r->n[9] = (r->n[9] & mask0) | (a->n[9] & mask1);
+}
+
+static void secp256k1_fe_to_storage(secp256k1_fe_storage_t *r, const secp256k1_fe_t *a) {
 #ifdef VERIFY
-    if (flag) {
-        r->magnitude = a->magnitude;
-        r->normalized = a->normalized;
-    }
+    VERIFY_CHECK(a->normalized);
+#endif
+    r->n[0] = a->n[0] | a->n[1] << 26;
+    r->n[1] = a->n[1] >> 6 | a->n[2] << 20;
+    r->n[2] = a->n[2] >> 12 | a->n[3] << 14;
+    r->n[3] = a->n[3] >> 18 | a->n[4] << 8;
+    r->n[4] = a->n[4] >> 24 | a->n[5] << 2 | a->n[6] << 28;
+    r->n[5] = a->n[6] >> 4 | a->n[7] << 22;
+    r->n[6] = a->n[7] >> 10 | a->n[8] << 16;
+    r->n[7] = a->n[8] >> 16 | a->n[9] << 10;
+}
+
+static SECP256K1_INLINE void secp256k1_fe_from_storage(secp256k1_fe_t *r, const secp256k1_fe_storage_t *a) {
+    r->n[0] = a->n[0] & 0x3FFFFFFUL;
+    r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL);
+    r->n[2] = a->n[1] >> 20 | ((a->n[2] << 12) & 0x3FFFFFFUL);
+    r->n[3] = a->n[2] >> 14 | ((a->n[3] << 18) & 0x3FFFFFFUL);
+    r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL);
+    r->n[5] = (a->n[4] >> 2) & 0x3FFFFFFUL;
+    r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL);
+    r->n[7] = a->n[5] >> 22 | ((a->n[6] << 10) & 0x3FFFFFFUL);
+    r->n[8] = a->n[6] >> 16 | ((a->n[7] << 16) & 0x3FFFFFFUL);
+    r->n[9] = a->n[7] >> 10;
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 1;
 #endif
 }
 
diff --git a/src/field_5x52.h b/src/field_5x52.h
index aeb0a6a1e..4513d36f4 100644
--- a/src/field_5x52.h
+++ b/src/field_5x52.h
@@ -18,4 +18,30 @@ typedef struct {
 #endif
 } secp256k1_fe_t;
 
+/* Unpacks a constant into a overlapping multi-limbed FE element. */
+#define SECP256K1_FE_CONST_INNER(d7, d6, d5, d4, d3, d2, d1, d0) { \
+    (d0) | ((uint64_t)(d1) & 0xFFFFFUL) << 32, \
+    ((d1) >> 20) | ((uint64_t)(d2)) << 12 | ((uint64_t)(d3) & 0xFFUL) << 44, \
+    ((d3) >> 8) | ((uint64_t)(d4) & 0xFFFFFFFUL) << 24, \
+    ((d4) >> 28) | ((uint64_t)(d5)) << 4 | ((uint64_t)(d6) & 0xFFFFUL) << 36, \
+    ((d6) >> 16) | ((uint64_t)(d7)) << 16 \
+}
+
+#ifdef VERIFY
+#define SECP256K1_FE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {SECP256K1_FE_CONST_INNER((d7), (d6), (d5), (d4), (d3), (d2), (d1), (d0)), 1, 1}
+#else
+#define SECP256K1_FE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {SECP256K1_FE_CONST_INNER((d7), (d6), (d5), (d4), (d3), (d2), (d1), (d0))}
+#endif
+
+typedef struct {
+    uint64_t n[4];
+} secp256k1_fe_storage_t;
+
+#define SECP256K1_FE_STORAGE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {{ \
+    (d0) | ((uint64_t)(d1)) << 32, \
+    (d2) | ((uint64_t)(d3)) << 32, \
+    (d4) | ((uint64_t)(d5)) << 32, \
+    (d6) | ((uint64_t)(d7)) << 32 \
+}}
+
 #endif
diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h
index 4db9e6f5f..2f9c8704a 100644
--- a/src/field_5x52_impl.h
+++ b/src/field_5x52_impl.h
@@ -30,13 +30,11 @@
  *  output.
  */
 
-static void secp256k1_fe_inner_start(void) {}
-static void secp256k1_fe_inner_stop(void) {}
-
 #ifdef VERIFY
 static void secp256k1_fe_verify(const secp256k1_fe_t *a) {
     const uint64_t *d = a->n;
     int m = a->normalized ? 1 : 2 * a->magnitude, r = 1;
+   /* secp256k1 'p' value defined in "Standards for Efficient Cryptography" (SEC2) 2.7.1. */
     r &= (d[0] <= 0xFFFFFFFFFFFFFULL * m);
     r &= (d[1] <= 0xFFFFFFFFFFFFFULL * m);
     r &= (d[2] <= 0xFFFFFFFFFFFFFULL * m);
@@ -62,8 +60,8 @@ static void secp256k1_fe_normalize(secp256k1_fe_t *r) {
     uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
 
     /* Reduce t4 at the start so there will be at most a single carry from the first pass */
-    uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
     uint64_t m;
+    uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
 
     /* The first pass ensures the magnitude is 1, ... */
     t0 += x * 0x1000003D1ULL;
@@ -129,8 +127,8 @@ static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) {
     uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
 
     /* Reduce t4 at the start so there will be at most a single carry from the first pass */
-    uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
     uint64_t m;
+    uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
 
     /* The first pass ensures the magnitude is 1, ... */
     t0 += x * 0x1000003D1ULL;
@@ -172,12 +170,12 @@ static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) {
 static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r) {
     uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
 
-    /* Reduce t4 at the start so there will be at most a single carry from the first pass */
-    uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
-
     /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
     uint64_t z0, z1;
 
+    /* Reduce t4 at the start so there will be at most a single carry from the first pass */
+    uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
+
     /* The first pass ensures the magnitude is 1, ... */
     t0 += x * 0x1000003D1ULL;
     t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL; z0  = t0; z1  = t0 ^ 0x1000003D0ULL;
@@ -193,22 +191,31 @@ static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r) {
 }
 
 static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe_t *r) {
-    uint64_t t0 = r->n[0], t4 = r->n[4];
+    uint64_t t0, t1, t2, t3, t4;
+    uint64_t z0, z1;
+    uint64_t x;
+
+    t0 = r->n[0];
+    t4 = r->n[4];
 
     /* Reduce t4 at the start so there will be at most a single carry from the first pass */
-    uint64_t x = t4 >> 48;
+    x = t4 >> 48;
 
     /* The first pass ensures the magnitude is 1, ... */
     t0 += x * 0x1000003D1ULL;
 
     /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
-    uint64_t z0 = t0 & 0xFFFFFFFFFFFFFULL, z1 = z0 ^ 0x1000003D0ULL;
+    z0 = t0 & 0xFFFFFFFFFFFFFULL;
+    z1 = z0 ^ 0x1000003D0ULL;
 
     /* Fast return path should catch the majority of cases */
     if ((z0 != 0ULL) & (z1 != 0xFFFFFFFFFFFFFULL))
         return 0;
 
-    uint64_t t1 = r->n[1], t2 = r->n[2], t3 = r->n[3];
+    t1 = r->n[1];
+    t2 = r->n[2];
+    t3 = r->n[3];
+
     t4 &= 0x0FFFFFFFFFFFFULL;
 
     t1 += (t0 >> 52); t0  = z0;
@@ -234,11 +241,11 @@ SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a) {
 }
 
 SECP256K1_INLINE static int secp256k1_fe_is_zero(const secp256k1_fe_t *a) {
+    const uint64_t *t = a->n;
 #ifdef VERIFY
     VERIFY_CHECK(a->normalized);
     secp256k1_fe_verify(a);
 #endif
-    const uint64_t *t = a->n;
     return (t[0] | t[1] | t[2] | t[3] | t[4]) == 0;
 }
 
@@ -251,23 +258,25 @@ SECP256K1_INLINE static int secp256k1_fe_is_odd(const secp256k1_fe_t *a) {
 }
 
 SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe_t *a) {
+    int i;
 #ifdef VERIFY
     a->magnitude = 0;
     a->normalized = 1;
 #endif
-    for (int i=0; i<5; i++) {
+    for (i=0; i<5; i++) {
         a->n[i] = 0;
     }
 }
 
 static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
+    int i;
 #ifdef VERIFY
     VERIFY_CHECK(a->normalized);
     VERIFY_CHECK(b->normalized);
     secp256k1_fe_verify(a);
     secp256k1_fe_verify(b);
 #endif
-    for (int i = 4; i >= 0; i--) {
+    for (i = 4; i >= 0; i--) {
         if (a->n[i] > b->n[i]) return 1;
         if (a->n[i] < b->n[i]) return -1;
     }
@@ -275,9 +284,11 @@ static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b
 }
 
 static int secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) {
+    int i;
     r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
-    for (int i=0; i<32; i++) {
-        for (int j=0; j<2; j++) {
+    for (i=0; i<32; i++) {
+        int j;
+        for (j=0; j<2; j++) {
             int limb = (8*i+4*j)/52;
             int shift = (8*i+4*j)%52;
             r->n[limb] |= (uint64_t)((a[31-i] >> (4*j)) & 0xF) << shift;
@@ -296,13 +307,15 @@ static int secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) {
 
 /** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
 static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) {
+    int i;
 #ifdef VERIFY
     VERIFY_CHECK(a->normalized);
     secp256k1_fe_verify(a);
 #endif
-    for (int i=0; i<32; i++) {
+    for (i=0; i<32; i++) {
+        int j;
         int c = 0;
-        for (int j=0; j<2; j++) {
+        for (j=0; j<2; j++) {
             int limb = (8*i+4*j)/52;
             int shift = (8*i+4*j)%52;
             c |= ((a->n[limb] >> shift) & 0xF) << (4 * j);
@@ -386,18 +399,35 @@ static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
 #endif
 }
 
-static void secp256k1_fe_cmov(secp256k1_fe_t *r, const secp256k1_fe_t *a, int flag) {
-    uint64_t mask0 = flag + ~((uint64_t)0), mask1 = ~mask0;
+static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage_t *r, const secp256k1_fe_storage_t *a, int flag) {
+    uint64_t mask0, mask1;
+    mask0 = flag + ~((uint64_t)0);
+    mask1 = ~mask0;
     r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
     r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
     r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
     r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
-    r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
+}
+
+static void secp256k1_fe_to_storage(secp256k1_fe_storage_t *r, const secp256k1_fe_t *a) {
 #ifdef VERIFY
-    if (flag) {
-        r->magnitude = a->magnitude;
-        r->normalized = a->normalized;
-    }
+    VERIFY_CHECK(a->normalized);
+#endif
+    r->n[0] = a->n[0] | a->n[1] << 52;
+    r->n[1] = a->n[1] >> 12 | a->n[2] << 40;
+    r->n[2] = a->n[2] >> 24 | a->n[3] << 28;
+    r->n[3] = a->n[3] >> 36 | a->n[4] << 16;
+}
+
+static SECP256K1_INLINE void secp256k1_fe_from_storage(secp256k1_fe_t *r, const secp256k1_fe_storage_t *a) {
+    r->n[0] = a->n[0] & 0xFFFFFFFFFFFFFULL;
+    r->n[1] = a->n[0] >> 52 | ((a->n[1] << 12) & 0xFFFFFFFFFFFFFULL);
+    r->n[2] = a->n[1] >> 40 | ((a->n[2] << 24) & 0xFFFFFFFFFFFFFULL);
+    r->n[3] = a->n[2] >> 28 | ((a->n[3] << 36) & 0xFFFFFFFFFFFFFULL);
+    r->n[4] = a->n[3] >> 16;
+#ifdef VERIFY
+    r->magnitude = 1;
+    r->normalized = 1;
 #endif
 }
 
diff --git a/src/field_5x52_int128_impl.h b/src/field_5x52_int128_impl.h
index ec631833c..9280bb5ea 100644
--- a/src/field_5x52_int128_impl.h
+++ b/src/field_5x52_int128_impl.h
@@ -16,6 +16,11 @@
 #endif
 
 SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) {
+    uint128_t c, d;
+    uint64_t t3, t4, tx, u0;
+    uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
+    const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
+
     VERIFY_BITS(a[0], 56);
     VERIFY_BITS(a[1], 56);
     VERIFY_BITS(a[2], 56);
@@ -28,63 +33,58 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t
     VERIFY_BITS(b[4], 52);
     VERIFY_CHECK(r != b);
 
-    const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
     /*  [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n.
      *  px is a shorthand for sum(a[i]*b[x-i], i=0..x).
      *  Note that [x 0 0 0 0 0] = [x*R].
      */
 
-    uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
-
-    __int128 c, d;
-
-    d  = (__int128)a0 * b[3]
-       + (__int128)a1 * b[2]
-       + (__int128)a2 * b[1]
-       + (__int128)a3 * b[0];
+    d  = (uint128_t)a0 * b[3]
+       + (uint128_t)a1 * b[2]
+       + (uint128_t)a2 * b[1]
+       + (uint128_t)a3 * b[0];
     VERIFY_BITS(d, 114);
     /* [d 0 0 0] = [p3 0 0 0] */
-    c  = (__int128)a4 * b[4];
+    c  = (uint128_t)a4 * b[4];
     VERIFY_BITS(c, 112);
     /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
     d += (c & M) * R; c >>= 52;
     VERIFY_BITS(d, 115);
     VERIFY_BITS(c, 60);
     /* [c 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
-    uint64_t t3 = d & M; d >>= 52;
+    t3 = d & M; d >>= 52;
     VERIFY_BITS(t3, 52);
     VERIFY_BITS(d, 63);
     /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
 
-    d += (__int128)a0 * b[4]
-       + (__int128)a1 * b[3]
-       + (__int128)a2 * b[2]
-       + (__int128)a3 * b[1]
-       + (__int128)a4 * b[0];
+    d += (uint128_t)a0 * b[4]
+       + (uint128_t)a1 * b[3]
+       + (uint128_t)a2 * b[2]
+       + (uint128_t)a3 * b[1]
+       + (uint128_t)a4 * b[0];
     VERIFY_BITS(d, 115);
     /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
     d += c * R;
     VERIFY_BITS(d, 116);
     /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
-    uint64_t t4 = d & M; d >>= 52;
+    t4 = d & M; d >>= 52;
     VERIFY_BITS(t4, 52);
     VERIFY_BITS(d, 64);
     /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
-    uint64_t tx = (t4 >> 48); t4 &= (M >> 4);
+    tx = (t4 >> 48); t4 &= (M >> 4);
     VERIFY_BITS(tx, 4);
     VERIFY_BITS(t4, 48);
     /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
 
-    c  = (__int128)a0 * b[0];
+    c  = (uint128_t)a0 * b[0];
     VERIFY_BITS(c, 112);
     /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */
-    d += (__int128)a1 * b[4]
-       + (__int128)a2 * b[3]
-       + (__int128)a3 * b[2]
-       + (__int128)a4 * b[1];
+    d += (uint128_t)a1 * b[4]
+       + (uint128_t)a2 * b[3]
+       + (uint128_t)a3 * b[2]
+       + (uint128_t)a4 * b[1];
     VERIFY_BITS(d, 115);
     /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
-    uint64_t u0 = d & M; d >>= 52;
+    u0 = d & M; d >>= 52;
     VERIFY_BITS(u0, 52);
     VERIFY_BITS(d, 63);
     /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
@@ -92,7 +92,7 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t
     u0 = (u0 << 4) | tx;
     VERIFY_BITS(u0, 56);
     /* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
-    c += (__int128)u0 * (R >> 4);
+    c += (uint128_t)u0 * (R >> 4);
     VERIFY_BITS(c, 115);
     /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
     r[0] = c & M; c >>= 52;
@@ -100,13 +100,13 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t
     VERIFY_BITS(c, 61);
     /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */
 
-    c += (__int128)a0 * b[1]
-       + (__int128)a1 * b[0];
+    c += (uint128_t)a0 * b[1]
+       + (uint128_t)a1 * b[0];
     VERIFY_BITS(c, 114);
     /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */
-    d += (__int128)a2 * b[4]
-       + (__int128)a3 * b[3]
-       + (__int128)a4 * b[2];
+    d += (uint128_t)a2 * b[4]
+       + (uint128_t)a3 * b[3]
+       + (uint128_t)a4 * b[2];
     VERIFY_BITS(d, 114);
     /* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
     c += (d & M) * R; d >>= 52;
@@ -118,13 +118,13 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t
     VERIFY_BITS(c, 63);
     /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
 
-    c += (__int128)a0 * b[2]
-       + (__int128)a1 * b[1]
-       + (__int128)a2 * b[0];
+    c += (uint128_t)a0 * b[2]
+       + (uint128_t)a1 * b[1]
+       + (uint128_t)a2 * b[0];
     VERIFY_BITS(c, 114);
     /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */
-    d += (__int128)a3 * b[4]
-       + (__int128)a4 * b[3];
+    d += (uint128_t)a3 * b[4]
+       + (uint128_t)a4 * b[3];
     VERIFY_BITS(d, 114);
     /* [d 0 0 t4 t3 c t1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
     c += (d & M) * R; d >>= 52;
@@ -153,64 +153,64 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t
 }
 
 SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) {
+    uint128_t c, d;
+    uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
+    int64_t t3, t4, tx, u0;
+    const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
+
     VERIFY_BITS(a[0], 56);
     VERIFY_BITS(a[1], 56);
     VERIFY_BITS(a[2], 56);
     VERIFY_BITS(a[3], 56);
     VERIFY_BITS(a[4], 52);
 
-    const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
     /**  [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n.
      *  px is a shorthand for sum(a[i]*a[x-i], i=0..x).
      *  Note that [x 0 0 0 0 0] = [x*R].
      */
 
-    __int128 c, d;
-
-    uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
-
-    d  = (__int128)(a0*2) * a3
-       + (__int128)(a1*2) * a2;
+    d  = (uint128_t)(a0*2) * a3
+       + (uint128_t)(a1*2) * a2;
     VERIFY_BITS(d, 114);
     /* [d 0 0 0] = [p3 0 0 0] */
-    c  = (__int128)a4 * a4;
+    c  = (uint128_t)a4 * a4;
     VERIFY_BITS(c, 112);
     /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
     d += (c & M) * R; c >>= 52;
     VERIFY_BITS(d, 115);
     VERIFY_BITS(c, 60);
     /* [c 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
-    uint64_t t3 = d & M; d >>= 52;
+    t3 = d & M; d >>= 52;
     VERIFY_BITS(t3, 52);
     VERIFY_BITS(d, 63);
     /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
 
     a4 *= 2;
-    d += (__int128)a0 * a4
-       + (__int128)(a1*2) * a3
-       + (__int128)a2 * a2;
+    d += (uint128_t)a0 * a4
+       + (uint128_t)(a1*2) * a3
+       + (uint128_t)a2 * a2;
     VERIFY_BITS(d, 115);
     /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
     d += c * R;
     VERIFY_BITS(d, 116);
     /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
-    uint64_t t4 = d & M; d >>= 52;
+    t4 = d & M; d >>= 52;
     VERIFY_BITS(t4, 52);
     VERIFY_BITS(d, 64);
     /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
-    uint64_t tx = (t4 >> 48); t4 &= (M >> 4);
+    tx = (t4 >> 48); t4 &= (M >> 4);
     VERIFY_BITS(tx, 4);
     VERIFY_BITS(t4, 48);
     /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
 
-    c  = (__int128)a0 * a0;
+    c  = (uint128_t)a0 * a0;
     VERIFY_BITS(c, 112);
     /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */
-    d += (__int128)a1 * a4
-       + (__int128)(a2*2) * a3;
+    d += (uint128_t)a1 * a4
+       + (uint128_t)(a2*2) * a3;
     VERIFY_BITS(d, 114);
     /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
-    uint64_t u0 = d & M; d >>= 52;
+    u0 = d & M; d >>= 52;
     VERIFY_BITS(u0, 52);
     VERIFY_BITS(d, 62);
     /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
@@ -218,7 +218,7 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t
     u0 = (u0 << 4) | tx;
     VERIFY_BITS(u0, 56);
     /* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
-    c += (__int128)u0 * (R >> 4);
+    c += (uint128_t)u0 * (R >> 4);
     VERIFY_BITS(c, 113);
     /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
     r[0] = c & M; c >>= 52;
@@ -227,11 +227,11 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t
     /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */
 
     a0 *= 2;
-    c += (__int128)a0 * a1;
+    c += (uint128_t)a0 * a1;
     VERIFY_BITS(c, 114);
     /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */
-    d += (__int128)a2 * a4
-       + (__int128)a3 * a3;
+    d += (uint128_t)a2 * a4
+       + (uint128_t)a3 * a3;
     VERIFY_BITS(d, 114);
     /* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
     c += (d & M) * R; d >>= 52;
@@ -243,11 +243,11 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t
     VERIFY_BITS(c, 63);
     /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
 
-    c += (__int128)a0 * a2
-       + (__int128)a1 * a1;
+    c += (uint128_t)a0 * a2
+       + (uint128_t)a1 * a1;
     VERIFY_BITS(c, 114);
     /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */
-    d += (__int128)a3 * a4;
+    d += (uint128_t)a3 * a4;
     VERIFY_BITS(d, 114);
     /* [d 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
     c += (d & M) * R; d >>= 52;
diff --git a/src/field_impl.h b/src/field_impl.h
index 4e2c24aa1..047914cf2 100644
--- a/src/field_impl.h
+++ b/src/field_impl.h
@@ -21,49 +21,6 @@
 #error "Please select field implementation"
 #endif
 
-static void secp256k1_fe_get_hex(char *r, int *rlen, const secp256k1_fe_t *a) {
-    if (*rlen < 65) {
-        *rlen = 65;
-        return;
-    }
-    *rlen = 65;
-    unsigned char tmp[32];
-    secp256k1_fe_t b = *a;
-    secp256k1_fe_normalize(&b);
-    secp256k1_fe_get_b32(tmp, &b);
-    for (int i=0; i<32; i++) {
-        static const char *c = "0123456789ABCDEF";
-        r[2*i]   = c[(tmp[i] >> 4) & 0xF];
-        r[2*i+1] = c[(tmp[i]) & 0xF];
-    }
-    r[64] = 0x00;
-}
-
-static int secp256k1_fe_set_hex(secp256k1_fe_t *r, const char *a, int alen) {
-    unsigned char tmp[32] = {};
-    static const int cvt[256] = {0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 1, 2, 3, 4, 5, 6,7,8,9,0,0,0,0,0,0,
-                                 0,10,11,12,13,14,15,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0,10,11,12,13,14,15,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
-                                 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0};
-    for (int i=0; i<32; i++) {
-        if (alen > i*2)
-            tmp[32 - alen/2 + i] = (cvt[(unsigned char)a[2*i]] << 4) + cvt[(unsigned char)a[2*i+1]];
-    }
-    return secp256k1_fe_set_b32(r, tmp);
-}
-
 SECP256K1_INLINE static int secp256k1_fe_equal_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
     secp256k1_fe_t na;
     secp256k1_fe_negate(&na, a, 1);
@@ -72,62 +29,62 @@ SECP256K1_INLINE static int secp256k1_fe_equal_var(const secp256k1_fe_t *a, cons
 }
 
 static int secp256k1_fe_sqrt_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
+    secp256k1_fe_t x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223, t1;
+    int j;
 
     /** The binary representation of (p + 1)/4 has 3 blocks of 1s, with lengths in
      *  { 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block:
      *  1, [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223]
      */
 
-    secp256k1_fe_t x2;
     secp256k1_fe_sqr(&x2, a);
     secp256k1_fe_mul(&x2, &x2, a);
 
-    secp256k1_fe_t x3;
     secp256k1_fe_sqr(&x3, &x2);
     secp256k1_fe_mul(&x3, &x3, a);
 
-    secp256k1_fe_t x6 = x3;
-    for (int j=0; j<3; j++) secp256k1_fe_sqr(&x6, &x6);
+    x6 = x3;
+    for (j=0; j<3; j++) secp256k1_fe_sqr(&x6, &x6);
     secp256k1_fe_mul(&x6, &x6, &x3);
 
-    secp256k1_fe_t x9 = x6;
-    for (int j=0; j<3; j++) secp256k1_fe_sqr(&x9, &x9);
+    x9 = x6;
+    for (j=0; j<3; j++) secp256k1_fe_sqr(&x9, &x9);
     secp256k1_fe_mul(&x9, &x9, &x3);
 
-    secp256k1_fe_t x11 = x9;
-    for (int j=0; j<2; j++) secp256k1_fe_sqr(&x11, &x11);
+    x11 = x9;
+    for (j=0; j<2; j++) secp256k1_fe_sqr(&x11, &x11);
     secp256k1_fe_mul(&x11, &x11, &x2);
 
-    secp256k1_fe_t x22 = x11;
-    for (int j=0; j<11; j++) secp256k1_fe_sqr(&x22, &x22);
+    x22 = x11;
+    for (j=0; j<11; j++) secp256k1_fe_sqr(&x22, &x22);
     secp256k1_fe_mul(&x22, &x22, &x11);
 
-    secp256k1_fe_t x44 = x22;
-    for (int j=0; j<22; j++) secp256k1_fe_sqr(&x44, &x44);
+    x44 = x22;
+    for (j=0; j<22; j++) secp256k1_fe_sqr(&x44, &x44);
     secp256k1_fe_mul(&x44, &x44, &x22);
 
-    secp256k1_fe_t x88 = x44;
-    for (int j=0; j<44; j++) secp256k1_fe_sqr(&x88, &x88);
+    x88 = x44;
+    for (j=0; j<44; j++) secp256k1_fe_sqr(&x88, &x88);
     secp256k1_fe_mul(&x88, &x88, &x44);
 
-    secp256k1_fe_t x176 = x88;
-    for (int j=0; j<88; j++) secp256k1_fe_sqr(&x176, &x176);
+    x176 = x88;
+    for (j=0; j<88; j++) secp256k1_fe_sqr(&x176, &x176);
     secp256k1_fe_mul(&x176, &x176, &x88);
 
-    secp256k1_fe_t x220 = x176;
-    for (int j=0; j<44; j++) secp256k1_fe_sqr(&x220, &x220);
+    x220 = x176;
+    for (j=0; j<44; j++) secp256k1_fe_sqr(&x220, &x220);
     secp256k1_fe_mul(&x220, &x220, &x44);
 
-    secp256k1_fe_t x223 = x220;
-    for (int j=0; j<3; j++) secp256k1_fe_sqr(&x223, &x223);
+    x223 = x220;
+    for (j=0; j<3; j++) secp256k1_fe_sqr(&x223, &x223);
     secp256k1_fe_mul(&x223, &x223, &x3);
 
     /* The final result is then assembled using a sliding window over the blocks. */
 
-    secp256k1_fe_t t1 = x223;
-    for (int j=0; j<23; j++) secp256k1_fe_sqr(&t1, &t1);
+    t1 = x223;
+    for (j=0; j<23; j++) secp256k1_fe_sqr(&t1, &t1);
     secp256k1_fe_mul(&t1, &t1, &x22);
-    for (int j=0; j<6; j++) secp256k1_fe_sqr(&t1, &t1);
+    for (j=0; j<6; j++) secp256k1_fe_sqr(&t1, &t1);
     secp256k1_fe_mul(&t1, &t1, &x2);
     secp256k1_fe_sqr(&t1, &t1);
     secp256k1_fe_sqr(r, &t1);
@@ -139,66 +96,66 @@ static int secp256k1_fe_sqrt_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
 }
 
 static void secp256k1_fe_inv(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
+    secp256k1_fe_t x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223, t1;
+    int j;
 
     /** The binary representation of (p - 2) has 5 blocks of 1s, with lengths in
      *  { 1, 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block:
      *  [1], [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223]
      */
 
-    secp256k1_fe_t x2;
     secp256k1_fe_sqr(&x2, a);
     secp256k1_fe_mul(&x2, &x2, a);
 
-    secp256k1_fe_t x3;
     secp256k1_fe_sqr(&x3, &x2);
     secp256k1_fe_mul(&x3, &x3, a);
 
-    secp256k1_fe_t x6 = x3;
-    for (int j=0; j<3; j++) secp256k1_fe_sqr(&x6, &x6);
+    x6 = x3;
+    for (j=0; j<3; j++) secp256k1_fe_sqr(&x6, &x6);
     secp256k1_fe_mul(&x6, &x6, &x3);
 
-    secp256k1_fe_t x9 = x6;
-    for (int j=0; j<3; j++) secp256k1_fe_sqr(&x9, &x9);
+    x9 = x6;
+    for (j=0; j<3; j++) secp256k1_fe_sqr(&x9, &x9);
     secp256k1_fe_mul(&x9, &x9, &x3);
 
-    secp256k1_fe_t x11 = x9;
-    for (int j=0; j<2; j++) secp256k1_fe_sqr(&x11, &x11);
+    x11 = x9;
+    for (j=0; j<2; j++) secp256k1_fe_sqr(&x11, &x11);
     secp256k1_fe_mul(&x11, &x11, &x2);
 
-    secp256k1_fe_t x22 = x11;
-    for (int j=0; j<11; j++) secp256k1_fe_sqr(&x22, &x22);
+    x22 = x11;
+    for (j=0; j<11; j++) secp256k1_fe_sqr(&x22, &x22);
     secp256k1_fe_mul(&x22, &x22, &x11);
 
-    secp256k1_fe_t x44 = x22;
-    for (int j=0; j<22; j++) secp256k1_fe_sqr(&x44, &x44);
+    x44 = x22;
+    for (j=0; j<22; j++) secp256k1_fe_sqr(&x44, &x44);
     secp256k1_fe_mul(&x44, &x44, &x22);
 
-    secp256k1_fe_t x88 = x44;
-    for (int j=0; j<44; j++) secp256k1_fe_sqr(&x88, &x88);
+    x88 = x44;
+    for (j=0; j<44; j++) secp256k1_fe_sqr(&x88, &x88);
     secp256k1_fe_mul(&x88, &x88, &x44);
 
-    secp256k1_fe_t x176 = x88;
-    for (int j=0; j<88; j++) secp256k1_fe_sqr(&x176, &x176);
+    x176 = x88;
+    for (j=0; j<88; j++) secp256k1_fe_sqr(&x176, &x176);
     secp256k1_fe_mul(&x176, &x176, &x88);
 
-    secp256k1_fe_t x220 = x176;
-    for (int j=0; j<44; j++) secp256k1_fe_sqr(&x220, &x220);
+    x220 = x176;
+    for (j=0; j<44; j++) secp256k1_fe_sqr(&x220, &x220);
     secp256k1_fe_mul(&x220, &x220, &x44);
 
-    secp256k1_fe_t x223 = x220;
-    for (int j=0; j<3; j++) secp256k1_fe_sqr(&x223, &x223);
+    x223 = x220;
+    for (j=0; j<3; j++) secp256k1_fe_sqr(&x223, &x223);
     secp256k1_fe_mul(&x223, &x223, &x3);
 
     /* The final result is then assembled using a sliding window over the blocks. */
 
-    secp256k1_fe_t t1 = x223;
-    for (int j=0; j<23; j++) secp256k1_fe_sqr(&t1, &t1);
+    t1 = x223;
+    for (j=0; j<23; j++) secp256k1_fe_sqr(&t1, &t1);
     secp256k1_fe_mul(&t1, &t1, &x22);
-    for (int j=0; j<5; j++) secp256k1_fe_sqr(&t1, &t1);
+    for (j=0; j<5; j++) secp256k1_fe_sqr(&t1, &t1);
     secp256k1_fe_mul(&t1, &t1, a);
-    for (int j=0; j<3; j++) secp256k1_fe_sqr(&t1, &t1);
+    for (j=0; j<3; j++) secp256k1_fe_sqr(&t1, &t1);
     secp256k1_fe_mul(&t1, &t1, &x2);
-    for (int j=0; j<2; j++) secp256k1_fe_sqr(&t1, &t1);
+    for (j=0; j<2; j++) secp256k1_fe_sqr(&t1, &t1);
     secp256k1_fe_mul(r, a, &t1);
 }
 
@@ -206,13 +163,21 @@ static void secp256k1_fe_inv_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
 #if defined(USE_FIELD_INV_BUILTIN)
     secp256k1_fe_inv(r, a);
 #elif defined(USE_FIELD_INV_NUM)
+    secp256k1_num_t n, m;
+    /* secp256k1 field prime, value p defined in "Standards for Efficient Cryptography" (SEC2) 2.7.1. */
+    static const unsigned char prime[32] = {
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+        0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F
+    };
     unsigned char b[32];
     secp256k1_fe_t c = *a;
     secp256k1_fe_normalize_var(&c);
     secp256k1_fe_get_b32(b, &c);
-    secp256k1_num_t n;
     secp256k1_num_set_bin(&n, b, 32);
-    secp256k1_num_mod_inverse(&n, &n, &secp256k1_fe_consts->p);
+    secp256k1_num_set_bin(&m, prime, 32);
+    secp256k1_num_mod_inverse(&n, &n, &m);
     secp256k1_num_get_bin(b, 32, &n);
     VERIFY_CHECK(secp256k1_fe_set_b32(r, b));
 #else
@@ -220,7 +185,9 @@ static void secp256k1_fe_inv_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
 #endif
 }
 
-static void secp256k1_fe_inv_all_var(size_t len, secp256k1_fe_t r[len], const secp256k1_fe_t a[len]) {
+static void secp256k1_fe_inv_all_var(size_t len, secp256k1_fe_t *r, const secp256k1_fe_t *a) {
+    secp256k1_fe_t u;
+    size_t i;
     if (len < 1)
         return;
 
@@ -228,12 +195,12 @@ static void secp256k1_fe_inv_all_var(size_t len, secp256k1_fe_t r[len], const se
 
     r[0] = a[0];
 
-    size_t i = 0;
+    i = 0;
     while (++i < len) {
         secp256k1_fe_mul(&r[i], &r[i - 1], &a[i]);
     }
 
-    secp256k1_fe_t u; secp256k1_fe_inv_var(&u, &r[--i]);
+    secp256k1_fe_inv_var(&u, &r[--i]);
 
     while (i > 0) {
         int j = i--;
@@ -244,32 +211,4 @@ static void secp256k1_fe_inv_all_var(size_t len, secp256k1_fe_t r[len], const se
     r[0] = u;
 }
 
-static void secp256k1_fe_start(void) {
-#ifndef USE_NUM_NONE
-    static const unsigned char secp256k1_fe_consts_p[] = {
-        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
-        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
-        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
-        0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFC,0x2F
-    };
-#endif
-    if (secp256k1_fe_consts == NULL) {
-        secp256k1_fe_inner_start();
-        secp256k1_fe_consts_t *ret = (secp256k1_fe_consts_t*)checked_malloc(sizeof(secp256k1_fe_consts_t));
-#ifndef USE_NUM_NONE
-        secp256k1_num_set_bin(&ret->p, secp256k1_fe_consts_p, sizeof(secp256k1_fe_consts_p));
-#endif
-        secp256k1_fe_consts = ret;
-    }
-}
-
-static void secp256k1_fe_stop(void) {
-    if (secp256k1_fe_consts != NULL) {
-        secp256k1_fe_consts_t *c = (secp256k1_fe_consts_t*)secp256k1_fe_consts;
-        free((void*)c);
-        secp256k1_fe_consts = NULL;
-        secp256k1_fe_inner_stop();
-    }
-}
-
 #endif
diff --git a/src/group.h b/src/group.h
index 6dea6bb5a..d1e583490 100644
--- a/src/group.h
+++ b/src/group.h
@@ -17,6 +17,9 @@ typedef struct {
     int infinity; /* whether this represents the point at infinity */
 } secp256k1_ge_t;
 
+#define SECP256K1_GE_CONST(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) {SECP256K1_FE_CONST((a),(b),(c),(d),(e),(f),(g),(h)), SECP256K1_FE_CONST((i),(j),(k),(l),(m),(n),(o),(p)), 0}
+#define SECP256K1_GE_CONST_INFINITY {SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), 1}
+
 /** A group element of the secp256k1 curve, in jacobian coordinates. */
 typedef struct {
     secp256k1_fe_t x; /* actual X: x/z^2 */
@@ -25,23 +28,15 @@ typedef struct {
     int infinity; /* whether this represents the point at infinity */
 } secp256k1_gej_t;
 
-/** Global constants related to the group */
-typedef struct {
-    secp256k1_ge_t g; /* the generator point */
-
-#ifdef USE_ENDOMORPHISM
-    /* constants related to secp256k1's efficiently computable endomorphism */
-    secp256k1_fe_t beta;
-#endif
-} secp256k1_ge_consts_t;
-
-static const secp256k1_ge_consts_t *secp256k1_ge_consts = NULL;
+#define SECP256K1_GEJ_CONST(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) {SECP256K1_FE_CONST((a),(b),(c),(d),(e),(f),(g),(h)), SECP256K1_FE_CONST((i),(j),(k),(l),(m),(n),(o),(p)), SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 1), 0}
+#define SECP256K1_GEJ_CONST_INFINITY {SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), 1}
 
-/** Initialize the group module. */
-static void secp256k1_ge_start(void);
+typedef struct {
+    secp256k1_fe_storage_t x;
+    secp256k1_fe_storage_t y;
+} secp256k1_ge_storage_t;
 
-/** De-initialize the group module. */
-static void secp256k1_ge_stop(void);
+#define SECP256K1_GE_STORAGE_CONST(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) {SECP256K1_FE_STORAGE_CONST((a),(b),(c),(d),(e),(f),(g),(h)), SECP256K1_FE_STORAGE_CONST((i),(j),(k),(l),(m),(n),(o),(p))}
 
 /** Set a group element equal to the point at infinity */
 static void secp256k1_ge_set_infinity(secp256k1_ge_t *r);
@@ -61,14 +56,11 @@ static int secp256k1_ge_is_valid_var(const secp256k1_ge_t *a);
 
 static void secp256k1_ge_neg(secp256k1_ge_t *r, const secp256k1_ge_t *a);
 
-/** Get a hex representation of a point. *rlen will be overwritten with the real length. */
-static void secp256k1_ge_get_hex(char *r, int *rlen, const secp256k1_ge_t *a);
-
 /** Set a group element equal to another which is given in jacobian coordinates */
 static void secp256k1_ge_set_gej(secp256k1_ge_t *r, secp256k1_gej_t *a);
 
 /** Set a batch of group elements equal to the inputs given in jacobian coordinates */
-static void secp256k1_ge_set_all_gej_var(size_t len, secp256k1_ge_t r[len], const secp256k1_gej_t a[len]);
+static void secp256k1_ge_set_all_gej_var(size_t len, secp256k1_ge_t *r, const secp256k1_gej_t *a);
 
 
 /** Set a group element (jacobian) equal to the point at infinity. */
@@ -103,9 +95,6 @@ static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, c
     guarantee, and b is allowed to be infinity. */
 static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b);
 
-/** Get a hex representation of a point. *rlen will be overwritten with the real length. */
-static void secp256k1_gej_get_hex(char *r, int *rlen, const secp256k1_gej_t *a);
-
 #ifdef USE_ENDOMORPHISM
 /** Set r to be equal to lambda times a, where lambda is chosen in a way such that this is very fast. */
 static void secp256k1_gej_mul_lambda(secp256k1_gej_t *r, const secp256k1_gej_t *a);
@@ -117,4 +106,13 @@ static void secp256k1_gej_clear(secp256k1_gej_t *r);
 /** Clear a secp256k1_ge_t to prevent leaking sensitive information. */
 static void secp256k1_ge_clear(secp256k1_ge_t *r);
 
+/** Convert a group element to the storage type. */
+static void secp256k1_ge_to_storage(secp256k1_ge_storage_t *r, const secp256k1_ge_t*);
+
+/** Convert a group element back from the storage type. */
+static void secp256k1_ge_from_storage(secp256k1_ge_t *r, const secp256k1_ge_storage_t*);
+
+/** If flag is true, set *r equal to *a; otherwise leave it. Constant-time. */
+static void secp256k1_ge_storage_cmov(secp256k1_ge_storage_t *r, const secp256k1_ge_storage_t *a, int flag);
+
 #endif
diff --git a/src/group_impl.h b/src/group_impl.h
index fef06df28..8d8c359c5 100644
--- a/src/group_impl.h
+++ b/src/group_impl.h
@@ -13,6 +13,16 @@
 #include "field.h"
 #include "group.h"
 
+/** Generator for secp256k1, value 'g' defined in
+ *  "Standards for Efficient Cryptography" (SEC2) 2.7.1.
+ */
+static const secp256k1_ge_t secp256k1_ge_const_g = SECP256K1_GE_CONST(
+    0x79BE667EUL, 0xF9DCBBACUL, 0x55A06295UL, 0xCE870B07UL,
+    0x029BFCDBUL, 0x2DCE28D9UL, 0x59F2815BUL, 0x16F81798UL,
+    0x483ADA77UL, 0x26A3C465UL, 0x5DA4FBFCUL, 0x0E1108A8UL,
+    0xFD17B448UL, 0xA6855419UL, 0x9C47D08FUL, 0xFB10D4B8UL
+);
+
 static void secp256k1_ge_set_infinity(secp256k1_ge_t *r) {
     r->infinity = 1;
 }
@@ -33,32 +43,12 @@ static void secp256k1_ge_neg(secp256k1_ge_t *r, const secp256k1_ge_t *a) {
     secp256k1_fe_negate(&r->y, &r->y, 1);
 }
 
-static void secp256k1_ge_get_hex(char *r, int *rlen, const secp256k1_ge_t *a) {
-    char cx[65]; int lx=65;
-    char cy[65]; int ly=65;
-    secp256k1_fe_get_hex(cx, &lx, &a->x);
-    secp256k1_fe_get_hex(cy, &ly, &a->y);
-    lx = strlen(cx);
-    ly = strlen(cy);
-    int len = lx + ly + 3 + 1;
-    if (*rlen < len) {
-        *rlen = len;
-        return;
-    }
-    *rlen = len;
-    r[0] = '(';
-    memcpy(r+1, cx, lx);
-    r[1+lx] = ',';
-    memcpy(r+2+lx, cy, ly);
-    r[2+lx+ly] = ')';
-    r[3+lx+ly] = 0;
-}
-
 static void secp256k1_ge_set_gej(secp256k1_ge_t *r, secp256k1_gej_t *a) {
+    secp256k1_fe_t z2, z3;
     r->infinity = a->infinity;
     secp256k1_fe_inv(&a->z, &a->z);
-    secp256k1_fe_t z2; secp256k1_fe_sqr(&z2, &a->z);
-    secp256k1_fe_t z3; secp256k1_fe_mul(&z3, &a->z, &z2);
+    secp256k1_fe_sqr(&z2, &a->z);
+    secp256k1_fe_mul(&z3, &a->z, &z2);
     secp256k1_fe_mul(&a->x, &a->x, &z2);
     secp256k1_fe_mul(&a->y, &a->y, &z3);
     secp256k1_fe_set_int(&a->z, 1);
@@ -67,13 +57,14 @@ static void secp256k1_ge_set_gej(secp256k1_ge_t *r, secp256k1_gej_t *a) {
 }
 
 static void secp256k1_ge_set_gej_var(secp256k1_ge_t *r, secp256k1_gej_t *a) {
+    secp256k1_fe_t z2, z3;
     r->infinity = a->infinity;
     if (a->infinity) {
         return;
     }
     secp256k1_fe_inv_var(&a->z, &a->z);
-    secp256k1_fe_t z2; secp256k1_fe_sqr(&z2, &a->z);
-    secp256k1_fe_t z3; secp256k1_fe_mul(&z3, &a->z, &z2);
+    secp256k1_fe_sqr(&z2, &a->z);
+    secp256k1_fe_mul(&z3, &a->z, &z2);
     secp256k1_fe_mul(&a->x, &a->x, &z2);
     secp256k1_fe_mul(&a->y, &a->y, &z3);
     secp256k1_fe_set_int(&a->z, 1);
@@ -81,26 +72,30 @@ static void secp256k1_ge_set_gej_var(secp256k1_ge_t *r, secp256k1_gej_t *a) {
     r->y = a->y;
 }
 
-static void secp256k1_ge_set_all_gej_var(size_t len, secp256k1_ge_t r[len], const secp256k1_gej_t a[len]) {
+static void secp256k1_ge_set_all_gej_var(size_t len, secp256k1_ge_t *r, const secp256k1_gej_t *a) {
+    secp256k1_fe_t *az;
+    secp256k1_fe_t *azi;
+    size_t i;
     size_t count = 0;
-    secp256k1_fe_t *az = checked_malloc(sizeof(secp256k1_fe_t) * len);
-    for (size_t i=0; i<len; i++) {
+    az = checked_malloc(sizeof(secp256k1_fe_t) * len);
+    for (i = 0; i < len; i++) {
         if (!a[i].infinity) {
             az[count++] = a[i].z;
         }
     }
 
-    secp256k1_fe_t *azi = checked_malloc(sizeof(secp256k1_fe_t) * count);
+    azi = checked_malloc(sizeof(secp256k1_fe_t) * count);
     secp256k1_fe_inv_all_var(count, azi, az);
     free(az);
 
     count = 0;
-    for (size_t i=0; i<len; i++) {
+    for (i = 0; i < len; i++) {
         r[i].infinity = a[i].infinity;
         if (!a[i].infinity) {
+            secp256k1_fe_t zi2, zi3;
             secp256k1_fe_t *zi = &azi[count++];
-            secp256k1_fe_t zi2; secp256k1_fe_sqr(&zi2, zi);
-            secp256k1_fe_t zi3; secp256k1_fe_mul(&zi3, &zi2, zi);
+            secp256k1_fe_sqr(&zi2, zi);
+            secp256k1_fe_mul(&zi3, &zi2, zi);
             secp256k1_fe_mul(&r[i].x, &a[i].x, &zi2);
             secp256k1_fe_mul(&r[i].y, &a[i].y, &zi3);
         }
@@ -136,11 +131,12 @@ static void secp256k1_ge_clear(secp256k1_ge_t *r) {
 }
 
 static int secp256k1_ge_set_xo_var(secp256k1_ge_t *r, const secp256k1_fe_t *x, int odd) {
+    secp256k1_fe_t x2, x3, c;
     r->x = *x;
-    secp256k1_fe_t x2; secp256k1_fe_sqr(&x2, x);
-    secp256k1_fe_t x3; secp256k1_fe_mul(&x3, x, &x2);
+    secp256k1_fe_sqr(&x2, x);
+    secp256k1_fe_mul(&x3, x, &x2);
     r->infinity = 0;
-    secp256k1_fe_t c; secp256k1_fe_set_int(&c, 7);
+    secp256k1_fe_set_int(&c, 7);
     secp256k1_fe_add(&c, &x3);
     if (!secp256k1_fe_sqrt_var(&r->y, &c))
         return 0;
@@ -158,9 +154,10 @@ static void secp256k1_gej_set_ge(secp256k1_gej_t *r, const secp256k1_ge_t *a) {
 }
 
 static int secp256k1_gej_eq_x_var(const secp256k1_fe_t *x, const secp256k1_gej_t *a) {
+    secp256k1_fe_t r, r2;
     VERIFY_CHECK(!a->infinity);
-    secp256k1_fe_t r; secp256k1_fe_sqr(&r, &a->z); secp256k1_fe_mul(&r, &r, x);
-    secp256k1_fe_t r2 = a->x; secp256k1_fe_normalize_weak(&r2);
+    secp256k1_fe_sqr(&r, &a->z); secp256k1_fe_mul(&r, &r, x);
+    r2 = a->x; secp256k1_fe_normalize_weak(&r2);
     return secp256k1_fe_equal_var(&r, &r2);
 }
 
@@ -178,6 +175,7 @@ static int secp256k1_gej_is_infinity(const secp256k1_gej_t *a) {
 }
 
 static int secp256k1_gej_is_valid_var(const secp256k1_gej_t *a) {
+    secp256k1_fe_t y2, x3, z2, z6;
     if (a->infinity)
         return 0;
     /** y^2 = x^3 + 7
@@ -185,10 +183,10 @@ static int secp256k1_gej_is_valid_var(const secp256k1_gej_t *a) {
      *  Y^2 / Z^6 = X^3 / Z^6 + 7
      *  Y^2 = X^3 + 7*Z^6
      */
-    secp256k1_fe_t y2; secp256k1_fe_sqr(&y2, &a->y);
-    secp256k1_fe_t x3; secp256k1_fe_sqr(&x3, &a->x); secp256k1_fe_mul(&x3, &x3, &a->x);
-    secp256k1_fe_t z2; secp256k1_fe_sqr(&z2, &a->z);
-    secp256k1_fe_t z6; secp256k1_fe_sqr(&z6, &z2); secp256k1_fe_mul(&z6, &z6, &z2);
+    secp256k1_fe_sqr(&y2, &a->y);
+    secp256k1_fe_sqr(&x3, &a->x); secp256k1_fe_mul(&x3, &x3, &a->x);
+    secp256k1_fe_sqr(&z2, &a->z);
+    secp256k1_fe_sqr(&z6, &z2); secp256k1_fe_mul(&z6, &z6, &z2);
     secp256k1_fe_mul_int(&z6, 7);
     secp256k1_fe_add(&x3, &z6);
     secp256k1_fe_normalize_weak(&x3);
@@ -196,27 +194,30 @@ static int secp256k1_gej_is_valid_var(const secp256k1_gej_t *a) {
 }
 
 static int secp256k1_ge_is_valid_var(const secp256k1_ge_t *a) {
+    secp256k1_fe_t y2, x3, c;
     if (a->infinity)
         return 0;
     /* y^2 = x^3 + 7 */
-    secp256k1_fe_t y2; secp256k1_fe_sqr(&y2, &a->y);
-    secp256k1_fe_t x3; secp256k1_fe_sqr(&x3, &a->x); secp256k1_fe_mul(&x3, &x3, &a->x);
-    secp256k1_fe_t c; secp256k1_fe_set_int(&c, 7);
+    secp256k1_fe_sqr(&y2, &a->y);
+    secp256k1_fe_sqr(&x3, &a->x); secp256k1_fe_mul(&x3, &x3, &a->x);
+    secp256k1_fe_set_int(&c, 7);
     secp256k1_fe_add(&x3, &c);
     secp256k1_fe_normalize_weak(&x3);
     return secp256k1_fe_equal_var(&y2, &x3);
 }
 
 static void secp256k1_gej_double_var(secp256k1_gej_t *r, const secp256k1_gej_t *a) {
-    // For secp256k1, 2Q is infinity if and only if Q is infinity. This is because if 2Q = infinity,
-    // Q must equal -Q, or that Q.y == -(Q.y), or Q.y is 0. For a point on y^2 = x^3 + 7 to have
-    // y=0, x^3 must be -7 mod p. However, -7 has no cube root mod p.
+    /* Operations: 3 mul, 4 sqr, 0 normalize, 12 mul_int/add/negate */
+    secp256k1_fe_t t1,t2,t3,t4;
+    /** For secp256k1, 2Q is infinity if and only if Q is infinity. This is because if 2Q = infinity,
+     *  Q must equal -Q, or that Q.y == -(Q.y), or Q.y is 0. For a point on y^2 = x^3 + 7 to have
+     *  y=0, x^3 must be -7 mod p. However, -7 has no cube root mod p.
+     */
     r->infinity = a->infinity;
     if (r->infinity) {
         return;
     }
 
-    secp256k1_fe_t t1,t2,t3,t4;
     secp256k1_fe_mul(&r->z, &a->z, &a->y);
     secp256k1_fe_mul_int(&r->z, 2);       /* Z' = 2*Y*Z (2) */
     secp256k1_fe_sqr(&t1, &a->x);
@@ -240,6 +241,8 @@ static void secp256k1_gej_double_var(secp256k1_gej_t *r, const secp256k1_gej_t *
 }
 
 static void secp256k1_gej_add_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_gej_t *b) {
+    /* Operations: 12 mul, 4 sqr, 2 normalize, 12 mul_int/add/negate */
+    secp256k1_fe_t z22, z12, u1, u2, s1, s2, h, i, i2, h2, h3, t;
     if (a->infinity) {
         *r = *b;
         return;
@@ -249,14 +252,14 @@ static void secp256k1_gej_add_var(secp256k1_gej_t *r, const secp256k1_gej_t *a,
         return;
     }
     r->infinity = 0;
-    secp256k1_fe_t z22; secp256k1_fe_sqr(&z22, &b->z);
-    secp256k1_fe_t z12; secp256k1_fe_sqr(&z12, &a->z);
-    secp256k1_fe_t u1; secp256k1_fe_mul(&u1, &a->x, &z22);
-    secp256k1_fe_t u2; secp256k1_fe_mul(&u2, &b->x, &z12);
-    secp256k1_fe_t s1; secp256k1_fe_mul(&s1, &a->y, &z22); secp256k1_fe_mul(&s1, &s1, &b->z);
-    secp256k1_fe_t s2; secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z);
-    secp256k1_fe_t h; secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2);
-    secp256k1_fe_t i; secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2);
+    secp256k1_fe_sqr(&z22, &b->z);
+    secp256k1_fe_sqr(&z12, &a->z);
+    secp256k1_fe_mul(&u1, &a->x, &z22);
+    secp256k1_fe_mul(&u2, &b->x, &z12);
+    secp256k1_fe_mul(&s1, &a->y, &z22); secp256k1_fe_mul(&s1, &s1, &b->z);
+    secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z);
+    secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2);
+    secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2);
     if (secp256k1_fe_normalizes_to_zero_var(&h)) {
         if (secp256k1_fe_normalizes_to_zero_var(&i)) {
             secp256k1_gej_double_var(r, a);
@@ -265,11 +268,11 @@ static void secp256k1_gej_add_var(secp256k1_gej_t *r, const secp256k1_gej_t *a,
         }
         return;
     }
-    secp256k1_fe_t i2; secp256k1_fe_sqr(&i2, &i);
-    secp256k1_fe_t h2; secp256k1_fe_sqr(&h2, &h);
-    secp256k1_fe_t h3; secp256k1_fe_mul(&h3, &h, &h2);
+    secp256k1_fe_sqr(&i2, &i);
+    secp256k1_fe_sqr(&h2, &h);
+    secp256k1_fe_mul(&h3, &h, &h2);
     secp256k1_fe_mul(&r->z, &a->z, &b->z); secp256k1_fe_mul(&r->z, &r->z, &h);
-    secp256k1_fe_t t; secp256k1_fe_mul(&t, &u1, &h2);
+    secp256k1_fe_mul(&t, &u1, &h2);
     r->x = t; secp256k1_fe_mul_int(&r->x, 2); secp256k1_fe_add(&r->x, &h3); secp256k1_fe_negate(&r->x, &r->x, 3); secp256k1_fe_add(&r->x, &i2);
     secp256k1_fe_negate(&r->y, &r->x, 5); secp256k1_fe_add(&r->y, &t); secp256k1_fe_mul(&r->y, &r->y, &i);
     secp256k1_fe_mul(&h3, &h3, &s1); secp256k1_fe_negate(&h3, &h3, 1);
@@ -277,6 +280,8 @@ static void secp256k1_gej_add_var(secp256k1_gej_t *r, const secp256k1_gej_t *a,
 }
 
 static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b) {
+    /* 8 mul, 3 sqr, 4 normalize, 12 mul_int/add/negate */
+    secp256k1_fe_t z12, u1, u2, s1, s2, h, i, i2, h2, h3, t;
     if (a->infinity) {
         r->infinity = b->infinity;
         r->x = b->x;
@@ -289,13 +294,13 @@ static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *
         return;
     }
     r->infinity = 0;
-    secp256k1_fe_t z12; secp256k1_fe_sqr(&z12, &a->z);
-    secp256k1_fe_t u1 = a->x; secp256k1_fe_normalize_weak(&u1);
-    secp256k1_fe_t u2; secp256k1_fe_mul(&u2, &b->x, &z12);
-    secp256k1_fe_t s1 = a->y; secp256k1_fe_normalize_weak(&s1);
-    secp256k1_fe_t s2; secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z);
-    secp256k1_fe_t h; secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2);
-    secp256k1_fe_t i; secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2);
+    secp256k1_fe_sqr(&z12, &a->z);
+    u1 = a->x; secp256k1_fe_normalize_weak(&u1);
+    secp256k1_fe_mul(&u2, &b->x, &z12);
+    s1 = a->y; secp256k1_fe_normalize_weak(&s1);
+    secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z);
+    secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2);
+    secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2);
     if (secp256k1_fe_normalizes_to_zero_var(&h)) {
         if (secp256k1_fe_normalizes_to_zero_var(&i)) {
             secp256k1_gej_double_var(r, a);
@@ -304,11 +309,11 @@ static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *
         }
         return;
     }
-    secp256k1_fe_t i2; secp256k1_fe_sqr(&i2, &i);
-    secp256k1_fe_t h2; secp256k1_fe_sqr(&h2, &h);
-    secp256k1_fe_t h3; secp256k1_fe_mul(&h3, &h, &h2);
+    secp256k1_fe_sqr(&i2, &i);
+    secp256k1_fe_sqr(&h2, &h);
+    secp256k1_fe_mul(&h3, &h, &h2);
     r->z = a->z; secp256k1_fe_mul(&r->z, &r->z, &h);
-    secp256k1_fe_t t; secp256k1_fe_mul(&t, &u1, &h2);
+    secp256k1_fe_mul(&t, &u1, &h2);
     r->x = t; secp256k1_fe_mul_int(&r->x, 2); secp256k1_fe_add(&r->x, &h3); secp256k1_fe_negate(&r->x, &r->x, 3); secp256k1_fe_add(&r->x, &i2);
     secp256k1_fe_negate(&r->y, &r->x, 5); secp256k1_fe_add(&r->y, &t); secp256k1_fe_mul(&r->y, &r->y, &i);
     secp256k1_fe_mul(&h3, &h3, &s1); secp256k1_fe_negate(&h3, &h3, 1);
@@ -316,6 +321,9 @@ static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *
 }
 
 static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b) {
+    /* Operations: 7 mul, 5 sqr, 5 normalize, 19 mul_int/add/negate */
+    secp256k1_fe_t zz, u1, u2, s1, s2, z, t, m, n, q, rr;
+    int infinity;
     VERIFY_CHECK(!b->infinity);
     VERIFY_CHECK(a->infinity == 0 || a->infinity == 1);
 
@@ -341,24 +349,24 @@ static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, c
      *  (Note that the paper uses xi = Xi / Zi and yi = Yi / Zi instead.)
      */
 
-    secp256k1_fe_t zz; secp256k1_fe_sqr(&zz, &a->z);                /* z = Z1^2 */
-    secp256k1_fe_t u1 = a->x; secp256k1_fe_normalize_weak(&u1);     /* u1 = U1 = X1*Z2^2 (1) */
-    secp256k1_fe_t u2; secp256k1_fe_mul(&u2, &b->x, &zz);           /* u2 = U2 = X2*Z1^2 (1) */
-    secp256k1_fe_t s1 = a->y; secp256k1_fe_normalize_weak(&s1);     /* s1 = S1 = Y1*Z2^3 (1) */
-    secp256k1_fe_t s2; secp256k1_fe_mul(&s2, &b->y, &zz);           /* s2 = Y2*Z2^2 (1) */
-    secp256k1_fe_mul(&s2, &s2, &a->z);                              /* s2 = S2 = Y2*Z1^3 (1) */
-    secp256k1_fe_t z = a->z;                                        /* z = Z = Z1*Z2 (8) */
-    secp256k1_fe_t t = u1; secp256k1_fe_add(&t, &u2);               /* t = T = U1+U2 (2) */
-    secp256k1_fe_t m = s1; secp256k1_fe_add(&m, &s2);               /* m = M = S1+S2 (2) */
-    secp256k1_fe_t n; secp256k1_fe_sqr(&n, &m);                     /* n = M^2 (1) */
-    secp256k1_fe_t q; secp256k1_fe_mul(&q, &n, &t);                 /* q = Q = T*M^2 (1) */
-    secp256k1_fe_sqr(&n, &n);                                       /* n = M^4 (1) */
-    secp256k1_fe_t rr; secp256k1_fe_sqr(&rr, &t);                   /* rr = T^2 (1) */
+    secp256k1_fe_sqr(&zz, &a->z);                       /* z = Z1^2 */
+    u1 = a->x; secp256k1_fe_normalize_weak(&u1);        /* u1 = U1 = X1*Z2^2 (1) */
+    secp256k1_fe_mul(&u2, &b->x, &zz);                  /* u2 = U2 = X2*Z1^2 (1) */
+    s1 = a->y; secp256k1_fe_normalize_weak(&s1);        /* s1 = S1 = Y1*Z2^3 (1) */
+    secp256k1_fe_mul(&s2, &b->y, &zz);                  /* s2 = Y2*Z2^2 (1) */
+    secp256k1_fe_mul(&s2, &s2, &a->z);                  /* s2 = S2 = Y2*Z1^3 (1) */
+    z = a->z;                                           /* z = Z = Z1*Z2 (8) */
+    t = u1; secp256k1_fe_add(&t, &u2);                  /* t = T = U1+U2 (2) */
+    m = s1; secp256k1_fe_add(&m, &s2);                  /* m = M = S1+S2 (2) */
+    secp256k1_fe_sqr(&n, &m);                           /* n = M^2 (1) */
+    secp256k1_fe_mul(&q, &n, &t);                       /* q = Q = T*M^2 (1) */
+    secp256k1_fe_sqr(&n, &n);                           /* n = M^4 (1) */
+    secp256k1_fe_sqr(&rr, &t);                          /* rr = T^2 (1) */
     secp256k1_fe_mul(&t, &u1, &u2); secp256k1_fe_negate(&t, &t, 1); /* t = -U1*U2 (2) */
     secp256k1_fe_add(&rr, &t);                                      /* rr = R = T^2-U1*U2 (3) */
     secp256k1_fe_sqr(&t, &rr);                                      /* t = R^2 (1) */
     secp256k1_fe_mul(&r->z, &m, &z);                                /* r->z = M*Z (1) */
-    int infinity = secp256k1_fe_normalizes_to_zero(&r->z) * (1 - a->infinity);
+    infinity = secp256k1_fe_normalizes_to_zero(&r->z) * (1 - a->infinity);
     secp256k1_fe_mul_int(&r->z, 2 * (1 - a->infinity)); /* r->z = Z3 = 2*M*Z (2) */
     r->x = t;                                           /* r->x = R^2 (1) */
     secp256k1_fe_negate(&q, &q, 1);                     /* q = -Q (2) */
@@ -386,63 +394,37 @@ static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, c
     r->infinity = infinity;
 }
 
+static void secp256k1_ge_to_storage(secp256k1_ge_storage_t *r, const secp256k1_ge_t *a) {
+    secp256k1_fe_t x, y;
+    VERIFY_CHECK(!a->infinity);
+    x = a->x;
+    secp256k1_fe_normalize(&x);
+    y = a->y;
+    secp256k1_fe_normalize(&y);
+    secp256k1_fe_to_storage(&r->x, &x);
+    secp256k1_fe_to_storage(&r->y, &y);
+}
 
+static void secp256k1_ge_from_storage(secp256k1_ge_t *r, const secp256k1_ge_storage_t *a) {
+    secp256k1_fe_from_storage(&r->x, &a->x);
+    secp256k1_fe_from_storage(&r->y, &a->y);
+    r->infinity = 0;
+}
 
-static void secp256k1_gej_get_hex(char *r, int *rlen, const secp256k1_gej_t *a) {
-    secp256k1_gej_t c = *a;
-    secp256k1_ge_t t; secp256k1_ge_set_gej(&t, &c);
-    secp256k1_ge_get_hex(r, rlen, &t);
+static SECP256K1_INLINE void secp256k1_ge_storage_cmov(secp256k1_ge_storage_t *r, const secp256k1_ge_storage_t *a, int flag) {
+    secp256k1_fe_storage_cmov(&r->x, &a->x, flag);
+    secp256k1_fe_storage_cmov(&r->y, &a->y, flag);
 }
 
 #ifdef USE_ENDOMORPHISM
 static void secp256k1_gej_mul_lambda(secp256k1_gej_t *r, const secp256k1_gej_t *a) {
-    const secp256k1_fe_t *beta = &secp256k1_ge_consts->beta;
+    static const secp256k1_fe_t beta = SECP256K1_FE_CONST(
+        0x7ae96a2bul, 0x657c0710ul, 0x6e64479eul, 0xac3434e9ul,
+        0x9cf04975ul, 0x12f58995ul, 0xc1396c28ul, 0x719501eeul
+    );
     *r = *a;
-    secp256k1_fe_mul(&r->x, &r->x, beta);
+    secp256k1_fe_mul(&r->x, &r->x, &beta);
 }
 #endif
 
-static void secp256k1_ge_start(void) {
-    static const unsigned char secp256k1_ge_consts_g_x[] = {
-        0x79,0xBE,0x66,0x7E,0xF9,0xDC,0xBB,0xAC,
-        0x55,0xA0,0x62,0x95,0xCE,0x87,0x0B,0x07,
-        0x02,0x9B,0xFC,0xDB,0x2D,0xCE,0x28,0xD9,
-        0x59,0xF2,0x81,0x5B,0x16,0xF8,0x17,0x98
-    };
-    static const unsigned char secp256k1_ge_consts_g_y[] = {
-        0x48,0x3A,0xDA,0x77,0x26,0xA3,0xC4,0x65,
-        0x5D,0xA4,0xFB,0xFC,0x0E,0x11,0x08,0xA8,
-        0xFD,0x17,0xB4,0x48,0xA6,0x85,0x54,0x19,
-        0x9C,0x47,0xD0,0x8F,0xFB,0x10,0xD4,0xB8
-    };
-#ifdef USE_ENDOMORPHISM
-    /* properties of secp256k1's efficiently computable endomorphism */
-    static const unsigned char secp256k1_ge_consts_beta[] = {
-        0x7a,0xe9,0x6a,0x2b,0x65,0x7c,0x07,0x10,
-        0x6e,0x64,0x47,0x9e,0xac,0x34,0x34,0xe9,
-        0x9c,0xf0,0x49,0x75,0x12,0xf5,0x89,0x95,
-        0xc1,0x39,0x6c,0x28,0x71,0x95,0x01,0xee
-    };
-#endif
-    if (secp256k1_ge_consts == NULL) {
-        secp256k1_ge_consts_t *ret = (secp256k1_ge_consts_t*)checked_malloc(sizeof(secp256k1_ge_consts_t));
-#ifdef USE_ENDOMORPHISM
-        VERIFY_CHECK(secp256k1_fe_set_b32(&ret->beta, secp256k1_ge_consts_beta));
-#endif
-        secp256k1_fe_t g_x, g_y;
-        VERIFY_CHECK(secp256k1_fe_set_b32(&g_x, secp256k1_ge_consts_g_x));
-        VERIFY_CHECK(secp256k1_fe_set_b32(&g_y, secp256k1_ge_consts_g_y));
-        secp256k1_ge_set_xy(&ret->g, &g_x, &g_y);
-        secp256k1_ge_consts = ret;
-    }
-}
-
-static void secp256k1_ge_stop(void) {
-    if (secp256k1_ge_consts != NULL) {
-        secp256k1_ge_consts_t *c = (secp256k1_ge_consts_t*)secp256k1_ge_consts;
-        free((void*)c);
-        secp256k1_ge_consts = NULL;
-    }
-}
-
 #endif
diff --git a/src/hash.h b/src/hash.h
index d1e65b968..843423d7f 100644
--- a/src/hash.h
+++ b/src/hash.h
@@ -12,7 +12,7 @@
 
 typedef struct {
     uint32_t s[32];
-    unsigned char buf[64];
+    uint32_t buf[16]; /* In big endian */
     size_t bytes;
 } secp256k1_sha256_t;
 
@@ -34,7 +34,7 @@ typedef struct {
     int retry;
 } secp256k1_rfc6979_hmac_sha256_t;
 
-static void secp256k1_rfc6979_hmac_sha256_initialize(secp256k1_rfc6979_hmac_sha256_t *rng, const unsigned char *key, size_t keylen, const unsigned char *msg, size_t msglen);
+static void secp256k1_rfc6979_hmac_sha256_initialize(secp256k1_rfc6979_hmac_sha256_t *rng, const unsigned char *key, size_t keylen, const unsigned char *msg, size_t msglen, const unsigned char *rnd, size_t rndlen);
 static void secp256k1_rfc6979_hmac_sha256_generate(secp256k1_rfc6979_hmac_sha256_t *rng, unsigned char *out, size_t outlen);
 static void secp256k1_rfc6979_hmac_sha256_finalize(secp256k1_rfc6979_hmac_sha256_t *rng);
 
diff --git a/src/hash_impl.h b/src/hash_impl.h
index f35c5f7a8..60fdbf771 100644
--- a/src/hash_impl.h
+++ b/src/hash_impl.h
@@ -11,6 +11,7 @@
 
 #include <stdlib.h>
 #include <stdint.h>
+#include <string.h>
 
 #define Ch(x,y,z) ((z) ^ ((x) & ((y) ^ (z))))
 #define Maj(x,y,z) (((x) & (y)) | ((z) & ((x) | (y))))
@@ -26,8 +27,11 @@
     (h) = t1 + t2; \
 } while(0)
 
-#define ReadBE32(p) (((uint32_t)((p)[0])) << 24 | ((uint32_t)((p)[1])) << 16 | ((uint32_t)((p)[2])) << 8 | ((uint32_t)((p)[3])))
-#define WriteBE32(p, v) do { (p)[0] = (v) >> 24; (p)[1] = (v) >> 16; (p)[2] = (v) >> 8; (p)[3] = (v); } while(0)
+#ifdef WORDS_BIGENDIAN
+#define BE32(x) (x)
+#else
+#define BE32(p) ((((p) & 0xFF) << 24) | (((p) & 0xFF00) << 8) | (((p) & 0xFF0000) >> 8) | (((p) & 0xFF000000) >> 24))
+#endif
 
 static void secp256k1_sha256_initialize(secp256k1_sha256_t *hash) {
     hash->s[0] = 0x6a09e667ul;
@@ -41,27 +45,27 @@ static void secp256k1_sha256_initialize(secp256k1_sha256_t *hash) {
     hash->bytes = 0;
 }
 
-/** Perform one SHA-256 transformation, processing a 64-byte chunk. */
-static void secp256k1_sha256_transform(uint32_t* s, const unsigned char* chunk) {
+/** Perform one SHA-256 transformation, processing 16 big endian 32-bit words. */
+static void secp256k1_sha256_transform(uint32_t* s, const uint32_t* chunk) {
     uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7];
     uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
 
-    Round(a, b, c, d, e, f, g, h, 0x428a2f98, w0 = ReadBE32(chunk + 0));
-    Round(h, a, b, c, d, e, f, g, 0x71374491, w1 = ReadBE32(chunk + 4));
-    Round(g, h, a, b, c, d, e, f, 0xb5c0fbcf, w2 = ReadBE32(chunk + 8));
-    Round(f, g, h, a, b, c, d, e, 0xe9b5dba5, w3 = ReadBE32(chunk + 12));
-    Round(e, f, g, h, a, b, c, d, 0x3956c25b, w4 = ReadBE32(chunk + 16));
-    Round(d, e, f, g, h, a, b, c, 0x59f111f1, w5 = ReadBE32(chunk + 20));
-    Round(c, d, e, f, g, h, a, b, 0x923f82a4, w6 = ReadBE32(chunk + 24));
-    Round(b, c, d, e, f, g, h, a, 0xab1c5ed5, w7 = ReadBE32(chunk + 28));
-    Round(a, b, c, d, e, f, g, h, 0xd807aa98, w8 = ReadBE32(chunk + 32));
-    Round(h, a, b, c, d, e, f, g, 0x12835b01, w9 = ReadBE32(chunk + 36));
-    Round(g, h, a, b, c, d, e, f, 0x243185be, w10 = ReadBE32(chunk + 40));
-    Round(f, g, h, a, b, c, d, e, 0x550c7dc3, w11 = ReadBE32(chunk + 44));
-    Round(e, f, g, h, a, b, c, d, 0x72be5d74, w12 = ReadBE32(chunk + 48));
-    Round(d, e, f, g, h, a, b, c, 0x80deb1fe, w13 = ReadBE32(chunk + 52));
-    Round(c, d, e, f, g, h, a, b, 0x9bdc06a7, w14 = ReadBE32(chunk + 56));
-    Round(b, c, d, e, f, g, h, a, 0xc19bf174, w15 = ReadBE32(chunk + 60));
+    Round(a, b, c, d, e, f, g, h, 0x428a2f98, w0 = BE32(chunk[0]));
+    Round(h, a, b, c, d, e, f, g, 0x71374491, w1 = BE32(chunk[1]));
+    Round(g, h, a, b, c, d, e, f, 0xb5c0fbcf, w2 = BE32(chunk[2]));
+    Round(f, g, h, a, b, c, d, e, 0xe9b5dba5, w3 = BE32(chunk[3]));
+    Round(e, f, g, h, a, b, c, d, 0x3956c25b, w4 = BE32(chunk[4]));
+    Round(d, e, f, g, h, a, b, c, 0x59f111f1, w5 = BE32(chunk[5]));
+    Round(c, d, e, f, g, h, a, b, 0x923f82a4, w6 = BE32(chunk[6]));
+    Round(b, c, d, e, f, g, h, a, 0xab1c5ed5, w7 = BE32(chunk[7]));
+    Round(a, b, c, d, e, f, g, h, 0xd807aa98, w8 = BE32(chunk[8]));
+    Round(h, a, b, c, d, e, f, g, 0x12835b01, w9 = BE32(chunk[9]));
+    Round(g, h, a, b, c, d, e, f, 0x243185be, w10 = BE32(chunk[10]));
+    Round(f, g, h, a, b, c, d, e, 0x550c7dc3, w11 = BE32(chunk[11]));
+    Round(e, f, g, h, a, b, c, d, 0x72be5d74, w12 = BE32(chunk[12]));
+    Round(d, e, f, g, h, a, b, c, 0x80deb1fe, w13 = BE32(chunk[13]));
+    Round(c, d, e, f, g, h, a, b, 0x9bdc06a7, w14 = BE32(chunk[14]));
+    Round(b, c, d, e, f, g, h, a, 0xc19bf174, w15 = BE32(chunk[15]));
 
     Round(a, b, c, d, e, f, g, h, 0xe49b69c1, w0 += sigma1(w14) + w9 + sigma0(w1));
     Round(h, a, b, c, d, e, f, g, 0xefbe4786, w1 += sigma1(w15) + w10 + sigma0(w2));
@@ -125,55 +129,40 @@ static void secp256k1_sha256_transform(uint32_t* s, const unsigned char* chunk)
 }
 
 static void secp256k1_sha256_write(secp256k1_sha256_t *hash, const unsigned char *data, size_t len) {
-    const unsigned char* end = data + len;
-    size_t bufsize = hash->bytes % 64;
-    if (bufsize && bufsize + len >= 64) {
-        // Fill the buffer, and process it.
-        memcpy(hash->buf + bufsize, data, 64 - bufsize);
-        hash->bytes += 64 - bufsize;
+    size_t bufsize = hash->bytes & 0x3F;
+    hash->bytes += len;
+    while (bufsize + len >= 64) {
+        /* Fill the buffer, and process it. */
+        memcpy(((unsigned char*)hash->buf) + bufsize, data, 64 - bufsize);
         data += 64 - bufsize;
+        len -= 64 - bufsize;
         secp256k1_sha256_transform(hash->s, hash->buf);
         bufsize = 0;
     }
-    while (end >= data + 64) {
-        // Process full chunks directly from the source.
-        secp256k1_sha256_transform(hash->s, data);
-        hash->bytes += 64;
-        data += 64;
-    }
-    if (end > data) {
-        // Fill the buffer with what remains.
-        memcpy(hash->buf + bufsize, data, end - data);
-        hash->bytes += end - data;
+    if (len) {
+        /* Fill the buffer with what remains. */
+        memcpy(((unsigned char*)hash->buf) + bufsize, data, len);
     }
 }
 
 static void secp256k1_sha256_finalize(secp256k1_sha256_t *hash, unsigned char *out32) {
     static const unsigned char pad[64] = {0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-    unsigned char sizedesc[8];
-    WriteBE32(sizedesc, hash->bytes >> 29);
-    WriteBE32(sizedesc + 4, hash->bytes << 3);
+    uint32_t sizedesc[2];
+    uint32_t out[8];
+    int i = 0;
+    sizedesc[0] = BE32(hash->bytes >> 29);
+    sizedesc[1] = BE32(hash->bytes << 3);
     secp256k1_sha256_write(hash, pad, 1 + ((119 - (hash->bytes % 64)) % 64));
-    secp256k1_sha256_write(hash, sizedesc, 8);
-    WriteBE32(out32, hash->s[0]);
-    hash->s[0] = 0;
-    WriteBE32(out32 + 4, hash->s[1]);
-    hash->s[1] = 0;
-    WriteBE32(out32 + 8, hash->s[2]);
-    hash->s[2] = 0;
-    WriteBE32(out32 + 12, hash->s[3]);
-    hash->s[3] = 0;
-    WriteBE32(out32 + 16, hash->s[4]);
-    hash->s[4] = 0;
-    WriteBE32(out32 + 20, hash->s[5]);
-    hash->s[5] = 0;
-    WriteBE32(out32 + 24, hash->s[6]);
-    hash->s[6] = 0;
-    WriteBE32(out32 + 28, hash->s[7]);
-    hash->s[7] = 0;
+    secp256k1_sha256_write(hash, (const unsigned char*)sizedesc, 8);
+    for (i = 0; i < 8; i++) {
+        out[i] = BE32(hash->s[i]);
+        hash->s[i] = 0;
+    }
+    memcpy(out32, (const unsigned char*)out, 32);
 }
 
 static void secp256k1_hmac_sha256_initialize(secp256k1_hmac_sha256_t *hash, const unsigned char *key, size_t keylen) {
+    int n;
     unsigned char rkey[64];
     if (keylen <= 64) {
         memcpy(rkey, key, keylen);
@@ -187,12 +176,12 @@ static void secp256k1_hmac_sha256_initialize(secp256k1_hmac_sha256_t *hash, cons
     }
 
     secp256k1_sha256_initialize(&hash->outer);
-    for (int n = 0; n < 64; n++)
+    for (n = 0; n < 64; n++)
         rkey[n] ^= 0x5c;
     secp256k1_sha256_write(&hash->outer, rkey, 64);
 
     secp256k1_sha256_initialize(&hash->inner);
-    for (int n = 0; n < 64; n++)
+    for (n = 0; n < 64; n++)
         rkey[n] ^= 0x5c ^ 0x36;
     secp256k1_sha256_write(&hash->inner, rkey, 64);
     memset(rkey, 0, 64);
@@ -211,19 +200,22 @@ static void secp256k1_hmac_sha256_finalize(secp256k1_hmac_sha256_t *hash, unsign
 }
 
 
-static void secp256k1_rfc6979_hmac_sha256_initialize(secp256k1_rfc6979_hmac_sha256_t *rng, const unsigned char *key, size_t keylen, const unsigned char *msg, size_t msglen) {
+static void secp256k1_rfc6979_hmac_sha256_initialize(secp256k1_rfc6979_hmac_sha256_t *rng, const unsigned char *key, size_t keylen, const unsigned char *msg, size_t msglen, const unsigned char *rnd, size_t rndlen) {
+    secp256k1_hmac_sha256_t hmac;
     static const unsigned char zero[1] = {0x00};
     static const unsigned char one[1] = {0x01};
 
     memset(rng->v, 0x01, 32);
     memset(rng->k, 0x00, 32);
 
-    secp256k1_hmac_sha256_t hmac;
     secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32);
     secp256k1_hmac_sha256_write(&hmac, rng->v, 32);
     secp256k1_hmac_sha256_write(&hmac, zero, 1);
     secp256k1_hmac_sha256_write(&hmac, key, keylen);
     secp256k1_hmac_sha256_write(&hmac, msg, msglen);
+    if (rnd && rndlen) {
+        secp256k1_hmac_sha256_write(&hmac, rnd, rndlen);
+    }
     secp256k1_hmac_sha256_finalize(&hmac, rng->k);
     secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32);
     secp256k1_hmac_sha256_write(&hmac, rng->v, 32);
@@ -234,6 +226,9 @@ static void secp256k1_rfc6979_hmac_sha256_initialize(secp256k1_rfc6979_hmac_sha2
     secp256k1_hmac_sha256_write(&hmac, one, 1);
     secp256k1_hmac_sha256_write(&hmac, key, keylen);
     secp256k1_hmac_sha256_write(&hmac, msg, msglen);
+    if (rnd && rndlen) {
+        secp256k1_hmac_sha256_write(&hmac, rnd, rndlen);
+    }
     secp256k1_hmac_sha256_finalize(&hmac, rng->k);
     secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32);
     secp256k1_hmac_sha256_write(&hmac, rng->v, 32);
@@ -256,10 +251,10 @@ static void secp256k1_rfc6979_hmac_sha256_generate(secp256k1_rfc6979_hmac_sha256
 
     while (outlen > 0) {
         secp256k1_hmac_sha256_t hmac;
+        int now = outlen;
         secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32);
         secp256k1_hmac_sha256_write(&hmac, rng->v, 32);
         secp256k1_hmac_sha256_finalize(&hmac, rng->v);
-        int now = outlen;
         if (now > 32) {
             now = 32;
         }
diff --git a/src/num_gmp_impl.h b/src/num_gmp_impl.h
index 19d474e59..3e4b92d32 100644
--- a/src/num_gmp_impl.h
+++ b/src/num_gmp_impl.h
@@ -29,10 +29,10 @@ static void secp256k1_num_copy(secp256k1_num_t *r, const secp256k1_num_t *a) {
 static void secp256k1_num_get_bin(unsigned char *r, unsigned int rlen, const secp256k1_num_t *a) {
     unsigned char tmp[65];
     int len = 0;
+    int shift = 0;
     if (a->limbs>1 || a->data[0] != 0) {
         len = mpn_get_str(tmp, 256, (mp_limb_t*)a->data, a->limbs);
     }
-    int shift = 0;
     while (shift < len && tmp[shift] == 0) shift++;
     VERIFY_CHECK(len-shift <= (int)rlen);
     memset(r, 0, rlen - len + shift);
@@ -43,9 +43,10 @@ static void secp256k1_num_get_bin(unsigned char *r, unsigned int rlen, const sec
 }
 
 static void secp256k1_num_set_bin(secp256k1_num_t *r, const unsigned char *a, unsigned int alen) {
+    int len;
     VERIFY_CHECK(alen > 0);
     VERIFY_CHECK(alen <= 64);
-    int len = mpn_set_str(r->data, a, alen, 256);
+    len = mpn_set_str(r->data, a, alen, 256);
     if (len == 0) {
         r->data[0] = 0;
         len = 1;
@@ -91,6 +92,12 @@ static void secp256k1_num_mod(secp256k1_num_t *r, const secp256k1_num_t *m) {
 }
 
 static void secp256k1_num_mod_inverse(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *m) {
+    int i;
+    mp_limb_t g[NUM_LIMBS+1];
+    mp_limb_t u[NUM_LIMBS+1];
+    mp_limb_t v[NUM_LIMBS+1];
+    mp_size_t sn;
+    mp_size_t gn;
     secp256k1_num_sanity(a);
     secp256k1_num_sanity(m);
 
@@ -106,15 +113,12 @@ static void secp256k1_num_mod_inverse(secp256k1_num_t *r, const secp256k1_num_t
      */
     VERIFY_CHECK(m->limbs <= NUM_LIMBS);
     VERIFY_CHECK(m->data[m->limbs-1] != 0);
-    mp_limb_t g[NUM_LIMBS+1];
-    mp_limb_t u[NUM_LIMBS+1];
-    mp_limb_t v[NUM_LIMBS+1];
-    for (int i=0; i < m->limbs; i++) {
+    for (i = 0; i < m->limbs; i++) {
         u[i] = (i < a->limbs) ? a->data[i] : 0;
         v[i] = m->data[i];
     }
-    mp_size_t sn = NUM_LIMBS+1;
-    mp_size_t gn = mpn_gcdext(g, r->data, &sn, u, m->limbs, v, m->limbs);
+    sn = NUM_LIMBS+1;
+    gn = mpn_gcdext(g, r->data, &sn, u, m->limbs, v, m->limbs);
     VERIFY_CHECK(gn == 1);
     VERIFY_CHECK(g[0] == 1);
     r->neg = a->neg ^ m->neg;
@@ -183,10 +187,10 @@ static void secp256k1_num_sub(secp256k1_num_t *r, const secp256k1_num_t *a, cons
 }
 
 static void secp256k1_num_mul(secp256k1_num_t *r, const secp256k1_num_t *a, const secp256k1_num_t *b) {
+    mp_limb_t tmp[2*NUM_LIMBS+1];
     secp256k1_num_sanity(a);
     secp256k1_num_sanity(b);
 
-    mp_limb_t tmp[2*NUM_LIMBS+1];
     VERIFY_CHECK(a->limbs + b->limbs <= 2*NUM_LIMBS+1);
     if ((a->limbs==1 && a->data[0]==0) || (b->limbs==1 && b->data[0]==0)) {
         r->limbs = 1;
@@ -207,13 +211,14 @@ static void secp256k1_num_mul(secp256k1_num_t *r, const secp256k1_num_t *a, cons
 }
 
 static void secp256k1_num_shift(secp256k1_num_t *r, int bits) {
+    int i;
     if (bits % GMP_NUMB_BITS) {
-        // Shift within limbs.
+        /* Shift within limbs. */
         mpn_rshift(r->data, r->data, r->limbs, bits % GMP_NUMB_BITS);
     }
     if (bits >= GMP_NUMB_BITS) {
-        // Shift full limbs.
-        for (int i = 0; i < r->limbs; i++) {
+        /* Shift full limbs. */
+        for (i = 0; i < r->limbs; i++) {
             int index = i + (bits / GMP_NUMB_BITS);
             if (index < r->limbs && index < 2*NUM_LIMBS) {
                 r->data[i] = r->data[index];
diff --git a/src/scalar.h b/src/scalar.h
index 2f5ba0d44..f5d09f8d4 100644
--- a/src/scalar.h
+++ b/src/scalar.h
@@ -21,9 +21,6 @@
 #error "Please select scalar implementation"
 #endif
 
-static void secp256k1_scalar_start(void);
-static void secp256k1_scalar_stop(void);
-
 /** Clear a scalar to prevent the leak of sensitive data. */
 static void secp256k1_scalar_clear(secp256k1_scalar_t *r);
 
@@ -83,9 +80,9 @@ static void secp256k1_scalar_order_get_num(secp256k1_num_t *r);
 /** Compare two scalars. */
 static int secp256k1_scalar_eq(const secp256k1_scalar_t *a, const secp256k1_scalar_t *b);
 
-static void secp256k1_scalar_split_128(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a);
-
 #ifdef USE_ENDOMORPHISM
+/** Find r1 and r2 such that r1+r2*2^128 = a. */
+static void secp256k1_scalar_split_128(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a);
 /** Find r1 and r2 such that r1+r2*lambda = a, and r1 and r2 are maximum 128 bits long (see secp256k1_gej_mul_lambda). */
 static void secp256k1_scalar_split_lambda_var(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a);
 #endif
diff --git a/src/scalar_4x64.h b/src/scalar_4x64.h
index 5a751c686..82899aa7b 100644
--- a/src/scalar_4x64.h
+++ b/src/scalar_4x64.h
@@ -14,4 +14,6 @@ typedef struct {
     uint64_t d[4];
 } secp256k1_scalar_t;
 
+#define SECP256K1_SCALAR_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {{((uint64_t)(d1)) << 32 | (d0), ((uint64_t)(d3)) << 32 | (d2), ((uint64_t)(d5)) << 32 | (d4), ((uint64_t)(d7)) << 32 | (d6)}}
+
 #endif
diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
index d14477522..ff365292f 100644
--- a/src/scalar_4x64_impl.h
+++ b/src/scalar_4x64_impl.h
@@ -7,8 +7,6 @@
 #ifndef _SECP256K1_SCALAR_REPR_IMPL_H_
 #define _SECP256K1_SCALAR_REPR_IMPL_H_
 
-typedef unsigned __int128 uint128_t;
-
 /* Limbs of the secp256k1 order. */
 #define SECP256K1_N_0 ((uint64_t)0xBFD25E8CD0364141ULL)
 #define SECP256K1_N_1 ((uint64_t)0xBAAEDCE6AF48A03BULL)
@@ -69,8 +67,9 @@ SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scal
 }
 
 SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar_t *r, unsigned int overflow) {
+    uint128_t t;
     VERIFY_CHECK(overflow <= 1);
-    uint128_t t = (uint128_t)r->d[0] + overflow * SECP256K1_N_C_0;
+    t = (uint128_t)r->d[0] + overflow * SECP256K1_N_C_0;
     r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
     t += (uint128_t)r->d[1] + overflow * SECP256K1_N_C_1;
     r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
@@ -82,6 +81,7 @@ SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar_t *r, unsig
 }
 
 static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
+    int overflow;
     uint128_t t = (uint128_t)a->d[0] + b->d[0];
     r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
     t += (uint128_t)a->d[1] + b->d[1];
@@ -90,15 +90,16 @@ static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t
     r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
     t += (uint128_t)a->d[3] + b->d[3];
     r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
-    int overflow = t + secp256k1_scalar_check_overflow(r);
+    overflow = t + secp256k1_scalar_check_overflow(r);
     VERIFY_CHECK(overflow == 0 || overflow == 1);
     secp256k1_scalar_reduce(r, overflow);
     return overflow;
 }
 
 static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit) {
+    uint128_t t;
     VERIFY_CHECK(bit < 256);
-    uint128_t t = (uint128_t)r->d[0] + (((uint64_t)((bit >> 6) == 0)) << (bit & 0x3F));
+    t = (uint128_t)r->d[0] + (((uint64_t)((bit >> 6) == 0)) << (bit & 0x3F));
     r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
     t += (uint128_t)r->d[1] + (((uint64_t)((bit >> 6) == 1)) << (bit & 0x3F));
     r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
@@ -113,11 +114,12 @@ static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit) {
 }
 
 static void secp256k1_scalar_set_b32(secp256k1_scalar_t *r, const unsigned char *b32, int *overflow) {
+    int over;
     r->d[0] = (uint64_t)b32[31] | (uint64_t)b32[30] << 8 | (uint64_t)b32[29] << 16 | (uint64_t)b32[28] << 24 | (uint64_t)b32[27] << 32 | (uint64_t)b32[26] << 40 | (uint64_t)b32[25] << 48 | (uint64_t)b32[24] << 56;
     r->d[1] = (uint64_t)b32[23] | (uint64_t)b32[22] << 8 | (uint64_t)b32[21] << 16 | (uint64_t)b32[20] << 24 | (uint64_t)b32[19] << 32 | (uint64_t)b32[18] << 40 | (uint64_t)b32[17] << 48 | (uint64_t)b32[16] << 56;
     r->d[2] = (uint64_t)b32[15] | (uint64_t)b32[14] << 8 | (uint64_t)b32[13] << 16 | (uint64_t)b32[12] << 24 | (uint64_t)b32[11] << 32 | (uint64_t)b32[10] << 40 | (uint64_t)b32[9] << 48 | (uint64_t)b32[8] << 56;
     r->d[3] = (uint64_t)b32[7] | (uint64_t)b32[6] << 8 | (uint64_t)b32[5] << 16 | (uint64_t)b32[4] << 24 | (uint64_t)b32[3] << 32 | (uint64_t)b32[2] << 40 | (uint64_t)b32[1] << 48 | (uint64_t)b32[0] << 56;
-    int over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r));
+    over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r));
     if (overflow) {
         *overflow = over;
     }
@@ -195,16 +197,16 @@ static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
 
 /** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
 #define muladd2(a,b) { \
-    uint64_t tl, th; \
+    uint64_t tl, th, th2, tl2; \
     { \
         uint128_t t = (uint128_t)a * b; \
         th = t >> 64;               /* at most 0xFFFFFFFFFFFFFFFE */ \
         tl = t; \
     } \
-    uint64_t th2 = th + th;         /* at most 0xFFFFFFFFFFFFFFFE (in case th was 0x7FFFFFFFFFFFFFFF) */ \
+    th2 = th + th;                  /* at most 0xFFFFFFFFFFFFFFFE (in case th was 0x7FFFFFFFFFFFFFFF) */ \
     c2 += (th2 < th) ? 1 : 0;       /* never overflows by contract (verified the next line) */ \
     VERIFY_CHECK((th2 >= th) || (c2 != 0)); \
-    uint64_t tl2 = tl + tl;         /* at most 0xFFFFFFFFFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFFFFFFFFFF) */ \
+    tl2 = tl + tl;                  /* at most 0xFFFFFFFFFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFFFFFFFFFF) */ \
     th2 += (tl2 < tl) ? 1 : 0;      /* at most 0xFFFFFFFFFFFFFFFF */ \
     c0 += tl2;                      /* overflow is handled on the next line */ \
     th2 += (c0 < tl2) ? 1 : 0;      /* second overflow is handled on the next line */ \
@@ -217,8 +219,9 @@ static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
 
 /** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */
 #define sumadd(a) { \
+    unsigned int over; \
     c0 += (a);                  /* overflow is handled on the next line */ \
-    unsigned int over = (c0 < (a)) ? 1 : 0; \
+    over = (c0 < (a)) ? 1 : 0; \
     c1 += over;                 /* overflow is handled on the next line */ \
     c2 += (c1 < over) ? 1 : 0;  /* never overflows by contract */ \
 }
@@ -248,63 +251,301 @@ static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
 }
 
 static void secp256k1_scalar_reduce_512(secp256k1_scalar_t *r, const uint64_t *l) {
-    uint64_t n0 = l[4], n1 = l[5], n2 = l[6], n3 = l[7];
+#ifdef USE_ASM_X86_64
+    /* Reduce 512 bits into 385. */
+    uint64_t m0, m1, m2, m3, m4, m5, m6;
+    uint64_t p0, p1, p2, p3, p4;
+    uint64_t c;
+
+    __asm__ __volatile__(
+    /* Preload. */
+    "movq 32(%%rsi), %%r11\n"
+    "movq 40(%%rsi), %%r12\n"
+    "movq 48(%%rsi), %%r13\n"
+    "movq 56(%%rsi), %%r14\n"
+    /* Initialize r8,r9,r10 */
+    "movq 0(%%rsi), %%r8\n"
+    "movq $0, %%r9\n"
+    "movq $0, %%r10\n"
+    /* (r8,r9) += n0 * c0 */
+    "movq %8, %%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    /* extract m0 */
+    "movq %%r8, %q0\n"
+    "movq $0, %%r8\n"
+    /* (r9,r10) += l1 */
+    "addq 8(%%rsi), %%r9\n"
+    "adcq $0, %%r10\n"
+    /* (r9,r10,r8) += n1 * c0 */
+    "movq %8, %%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* (r9,r10,r8) += n0 * c1 */
+    "movq %9, %%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* extract m1 */
+    "movq %%r9, %q1\n"
+    "movq $0, %%r9\n"
+    /* (r10,r8,r9) += l2 */
+    "addq 16(%%rsi), %%r10\n"
+    "adcq $0, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r10,r8,r9) += n2 * c0 */
+    "movq %8, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r10,r8,r9) += n1 * c1 */
+    "movq %9, %%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r10,r8,r9) += n0 */
+    "addq %%r11, %%r10\n"
+    "adcq $0, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* extract m2 */
+    "movq %%r10, %q2\n"
+    "movq $0, %%r10\n"
+    /* (r8,r9,r10) += l3 */
+    "addq 24(%%rsi), %%r8\n"
+    "adcq $0, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* (r8,r9,r10) += n3 * c0 */
+    "movq %8, %%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* (r8,r9,r10) += n2 * c1 */
+    "movq %9, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* (r8,r9,r10) += n1 */
+    "addq %%r12, %%r8\n"
+    "adcq $0, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* extract m3 */
+    "movq %%r8, %q3\n"
+    "movq $0, %%r8\n"
+    /* (r9,r10,r8) += n3 * c1 */
+    "movq %9, %%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* (r9,r10,r8) += n2 */
+    "addq %%r13, %%r9\n"
+    "adcq $0, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* extract m4 */
+    "movq %%r9, %q4\n"
+    /* (r10,r8) += n3 */
+    "addq %%r14, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* extract m5 */
+    "movq %%r10, %q5\n"
+    /* extract m6 */
+    "movq %%r8, %q6\n"
+    : "=g"(m0), "=g"(m1), "=g"(m2), "=g"(m3), "=g"(m4), "=g"(m5), "=g"(m6)
+    : "S"(l), "n"(SECP256K1_N_C_0), "n"(SECP256K1_N_C_1)
+    : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "cc");
 
-    /* 160 bit accumulator. */
-    uint64_t c0, c1;
-    uint32_t c2;
+    /* Reduce 385 bits into 258. */
+    __asm__ __volatile__(
+    /* Preload */
+    "movq %q9, %%r11\n"
+    "movq %q10, %%r12\n"
+    "movq %q11, %%r13\n"
+    /* Initialize (r8,r9,r10) */
+    "movq %q5, %%r8\n"
+    "movq $0, %%r9\n"
+    "movq $0, %%r10\n"
+    /* (r8,r9) += m4 * c0 */
+    "movq %12, %%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    /* extract p0 */
+    "movq %%r8, %q0\n"
+    "movq $0, %%r8\n"
+    /* (r9,r10) += m1 */
+    "addq %q6, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* (r9,r10,r8) += m5 * c0 */
+    "movq %12, %%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* (r9,r10,r8) += m4 * c1 */
+    "movq %13, %%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* extract p1 */
+    "movq %%r9, %q1\n"
+    "movq $0, %%r9\n"
+    /* (r10,r8,r9) += m2 */
+    "addq %q7, %%r10\n"
+    "adcq $0, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r10,r8,r9) += m6 * c0 */
+    "movq %12, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r10,r8,r9) += m5 * c1 */
+    "movq %13, %%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r10,r8,r9) += m4 */
+    "addq %%r11, %%r10\n"
+    "adcq $0, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* extract p2 */
+    "movq %%r10, %q2\n"
+    /* (r8,r9) += m3 */
+    "addq %q8, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r8,r9) += m6 * c1 */
+    "movq %13, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    /* (r8,r9) += m5 */
+    "addq %%r12, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* extract p3 */
+    "movq %%r8, %q3\n"
+    /* (r9) += m6 */
+    "addq %%r13, %%r9\n"
+    /* extract p4 */
+    "movq %%r9, %q4\n"
+    : "=&g"(p0), "=&g"(p1), "=&g"(p2), "=g"(p3), "=g"(p4)
+    : "g"(m0), "g"(m1), "g"(m2), "g"(m3), "g"(m4), "g"(m5), "g"(m6), "n"(SECP256K1_N_C_0), "n"(SECP256K1_N_C_1)
+    : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "cc");
+
+    /* Reduce 258 bits into 256. */
+    __asm__ __volatile__(
+    /* Preload */
+    "movq %q5, %%r10\n"
+    /* (rax,rdx) = p4 * c0 */
+    "movq %7, %%rax\n"
+    "mulq %%r10\n"
+    /* (rax,rdx) += p0 */
+    "addq %q1, %%rax\n"
+    "adcq $0, %%rdx\n"
+    /* extract r0 */
+    "movq %%rax, 0(%q6)\n"
+    /* Move to (r8,r9) */
+    "movq %%rdx, %%r8\n"
+    "movq $0, %%r9\n"
+    /* (r8,r9) += p1 */
+    "addq %q2, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r8,r9) += p4 * c1 */
+    "movq %8, %%rax\n"
+    "mulq %%r10\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    /* Extract r1 */
+    "movq %%r8, 8(%q6)\n"
+    "movq $0, %%r8\n"
+    /* (r9,r8) += p4 */
+    "addq %%r10, %%r9\n"
+    "adcq $0, %%r8\n"
+    /* (r9,r8) += p2 */
+    "addq %q3, %%r9\n"
+    "adcq $0, %%r8\n"
+    /* Extract r2 */
+    "movq %%r9, 16(%q6)\n"
+    "movq $0, %%r9\n"
+    /* (r8,r9) += p3 */
+    "addq %q4, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* Extract r3 */
+    "movq %%r8, 24(%q6)\n"
+    /* Extract c */
+    "movq %%r9, %q0\n"
+    : "=g"(c)
+    : "g"(p0), "g"(p1), "g"(p2), "g"(p3), "g"(p4), "D"(r), "n"(SECP256K1_N_C_0), "n"(SECP256K1_N_C_1)
+    : "rax", "rdx", "r8", "r9", "r10", "cc", "memory");
+#else
+    uint128_t c;
+    uint64_t c0, c1, c2;
+    uint64_t n0 = l[4], n1 = l[5], n2 = l[6], n3 = l[7];
+    uint64_t m0, m1, m2, m3, m4, m5;
+    uint32_t m6;
+    uint64_t p0, p1, p2, p3;
+    uint32_t p4;
 
     /* Reduce 512 bits into 385. */
     /* m[0..6] = l[0..3] + n[0..3] * SECP256K1_N_C. */
     c0 = l[0]; c1 = 0; c2 = 0;
     muladd_fast(n0, SECP256K1_N_C_0);
-    uint64_t m0; extract_fast(m0);
+    extract_fast(m0);
     sumadd_fast(l[1]);
     muladd(n1, SECP256K1_N_C_0);
     muladd(n0, SECP256K1_N_C_1);
-    uint64_t m1; extract(m1);
+    extract(m1);
     sumadd(l[2]);
     muladd(n2, SECP256K1_N_C_0);
     muladd(n1, SECP256K1_N_C_1);
     sumadd(n0);
-    uint64_t m2; extract(m2);
+    extract(m2);
     sumadd(l[3]);
     muladd(n3, SECP256K1_N_C_0);
     muladd(n2, SECP256K1_N_C_1);
     sumadd(n1);
-    uint64_t m3; extract(m3);
+    extract(m3);
     muladd(n3, SECP256K1_N_C_1);
     sumadd(n2);
-    uint64_t m4; extract(m4);
+    extract(m4);
     sumadd_fast(n3);
-    uint64_t m5; extract_fast(m5);
+    extract_fast(m5);
     VERIFY_CHECK(c0 <= 1);
-    uint32_t m6 = c0;
+    m6 = c0;
 
     /* Reduce 385 bits into 258. */
     /* p[0..4] = m[0..3] + m[4..6] * SECP256K1_N_C. */
     c0 = m0; c1 = 0; c2 = 0;
     muladd_fast(m4, SECP256K1_N_C_0);
-    uint64_t p0; extract_fast(p0);
+    extract_fast(p0);
     sumadd_fast(m1);
     muladd(m5, SECP256K1_N_C_0);
     muladd(m4, SECP256K1_N_C_1);
-    uint64_t p1; extract(p1);
+    extract(p1);
     sumadd(m2);
     muladd(m6, SECP256K1_N_C_0);
     muladd(m5, SECP256K1_N_C_1);
     sumadd(m4);
-    uint64_t p2; extract(p2);
+    extract(p2);
     sumadd_fast(m3);
     muladd_fast(m6, SECP256K1_N_C_1);
     sumadd_fast(m5);
-    uint64_t p3; extract_fast(p3);
-    uint32_t p4 = c0 + m6;
+    extract_fast(p3);
+    p4 = c0 + m6;
     VERIFY_CHECK(p4 <= 2);
 
     /* Reduce 258 bits into 256. */
     /* r[0..3] = p[0..3] + p[4] * SECP256K1_N_C. */
-    uint128_t c = p0 + (uint128_t)SECP256K1_N_C_0 * p4;
+    c = p0 + (uint128_t)SECP256K1_N_C_0 * p4;
     r->d[0] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
     c += p1 + (uint128_t)SECP256K1_N_C_1 * p4;
     r->d[1] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
@@ -312,12 +553,146 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar_t *r, const uint64_t *l
     r->d[2] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
     c += p3;
     r->d[3] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
+#endif
 
     /* Final reduction of r. */
     secp256k1_scalar_reduce(r, c + secp256k1_scalar_check_overflow(r));
 }
 
 static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
+#ifdef USE_ASM_X86_64
+    const uint64_t *pb = b->d;
+    __asm__ __volatile__(
+    /* Preload */
+    "movq 0(%%rdi), %%r15\n"
+    "movq 8(%%rdi), %%rbx\n"
+    "movq 16(%%rdi), %%rcx\n"
+    "movq 0(%%rdx), %%r11\n"
+    "movq 8(%%rdx), %%r12\n"
+    "movq 16(%%rdx), %%r13\n"
+    "movq 24(%%rdx), %%r14\n"
+    /* (rax,rdx) = a0 * b0 */
+    "movq %%r15, %%rax\n"
+    "mulq %%r11\n"
+    /* Extract l0 */
+    "movq %%rax, 0(%%rsi)\n"
+    /* (r8,r9,r10) = (rdx) */
+    "movq %%rdx, %%r8\n"
+    "xorq %%r9, %%r9\n"
+    "xorq %%r10, %%r10\n"
+    /* (r8,r9,r10) += a0 * b1 */
+    "movq %%r15, %%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* (r8,r9,r10) += a1 * b0 */
+    "movq %%rbx, %%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* Extract l1 */
+    "movq %%r8, 8(%%rsi)\n"
+    "xorq %%r8, %%r8\n"
+    /* (r9,r10,r8) += a0 * b2 */
+    "movq %%r15, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* (r9,r10,r8) += a1 * b1 */
+    "movq %%rbx, %%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* (r9,r10,r8) += a2 * b0 */
+    "movq %%rcx, %%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* Extract l2 */
+    "movq %%r9, 16(%%rsi)\n"
+    "xorq %%r9, %%r9\n"
+    /* (r10,r8,r9) += a0 * b3 */
+    "movq %%r15, %%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* Preload a3 */
+    "movq 24(%%rdi), %%r15\n"
+    /* (r10,r8,r9) += a1 * b2 */
+    "movq %%rbx, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r10,r8,r9) += a2 * b1 */
+    "movq %%rcx, %%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r10,r8,r9) += a3 * b0 */
+    "movq %%r15, %%rax\n"
+    "mulq %%r11\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* Extract l3 */
+    "movq %%r10, 24(%%rsi)\n"
+    "xorq %%r10, %%r10\n"
+    /* (r8,r9,r10) += a1 * b3 */
+    "movq %%rbx, %%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* (r8,r9,r10) += a2 * b2 */
+    "movq %%rcx, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* (r8,r9,r10) += a3 * b1 */
+    "movq %%r15, %%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* Extract l4 */
+    "movq %%r8, 32(%%rsi)\n"
+    "xorq %%r8, %%r8\n"
+    /* (r9,r10,r8) += a2 * b3 */
+    "movq %%rcx, %%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* (r9,r10,r8) += a3 * b2 */
+    "movq %%r15, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* Extract l5 */
+    "movq %%r9, 40(%%rsi)\n"
+    /* (r10,r8) += a3 * b3 */
+    "movq %%r15, %%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    /* Extract l6 */
+    "movq %%r10, 48(%%rsi)\n"
+    /* Extract l7 */
+    "movq %%r8, 56(%%rsi)\n"
+    : "+d"(pb)
+    : "S"(l), "D"(a->d)
+    : "rax", "rbx", "rcx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "cc", "memory");
+#else
     /* 160 bit accumulator. */
     uint64_t c0 = 0, c1 = 0;
     uint32_t c2 = 0;
@@ -348,9 +723,119 @@ static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar_t *a,
     extract_fast(l[6]);
     VERIFY_CHECK(c1 <= 0);
     l[7] = c0;
+#endif
 }
 
 static void secp256k1_scalar_sqr_512(uint64_t l[8], const secp256k1_scalar_t *a) {
+#ifdef USE_ASM_X86_64
+    __asm__ __volatile__(
+    /* Preload */
+    "movq 0(%%rdi), %%r11\n"
+    "movq 8(%%rdi), %%r12\n"
+    "movq 16(%%rdi), %%r13\n"
+    "movq 24(%%rdi), %%r14\n"
+    /* (rax,rdx) = a0 * a0 */
+    "movq %%r11, %%rax\n"
+    "mulq %%r11\n"
+    /* Extract l0 */
+    "movq %%rax, 0(%%rsi)\n"
+    /* (r8,r9,r10) = (rdx,0) */
+    "movq %%rdx, %%r8\n"
+    "xorq %%r9, %%r9\n"
+    "xorq %%r10, %%r10\n"
+    /* (r8,r9,r10) += 2 * a0 * a1 */
+    "movq %%r11, %%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* Extract l1 */
+    "movq %%r8, 8(%%rsi)\n"
+    "xorq %%r8, %%r8\n"
+    /* (r9,r10,r8) += 2 * a0 * a2 */
+    "movq %%r11, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* (r9,r10,r8) += a1 * a1 */
+    "movq %%r12, %%rax\n"
+    "mulq %%r12\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* Extract l2 */
+    "movq %%r9, 16(%%rsi)\n"
+    "xorq %%r9, %%r9\n"
+    /* (r10,r8,r9) += 2 * a0 * a3 */
+    "movq %%r11, %%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* (r10,r8,r9) += 2 * a1 * a2 */
+    "movq %%r12, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    "adcq $0, %%r9\n"
+    /* Extract l3 */
+    "movq %%r10, 24(%%rsi)\n"
+    "xorq %%r10, %%r10\n"
+    /* (r8,r9,r10) += 2 * a1 * a3 */
+    "movq %%r12, %%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* (r8,r9,r10) += a2 * a2 */
+    "movq %%r13, %%rax\n"
+    "mulq %%r13\n"
+    "addq %%rax, %%r8\n"
+    "adcq %%rdx, %%r9\n"
+    "adcq $0, %%r10\n"
+    /* Extract l4 */
+    "movq %%r8, 32(%%rsi)\n"
+    "xorq %%r8, %%r8\n"
+    /* (r9,r10,r8) += 2 * a2 * a3 */
+    "movq %%r13, %%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    "addq %%rax, %%r9\n"
+    "adcq %%rdx, %%r10\n"
+    "adcq $0, %%r8\n"
+    /* Extract l5 */
+    "movq %%r9, 40(%%rsi)\n"
+    /* (r10,r8) += a3 * a3 */
+    "movq %%r14, %%rax\n"
+    "mulq %%r14\n"
+    "addq %%rax, %%r10\n"
+    "adcq %%rdx, %%r8\n"
+    /* Extract l6 */
+    "movq %%r10, 48(%%rsi)\n"
+    /* Extract l7 */
+    "movq %%r8, 56(%%rsi)\n"
+    :
+    : "S"(l), "D"(a->d)
+    : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "cc", "memory");
+#else
     /* 160 bit accumulator. */
     uint64_t c0 = 0, c1 = 0;
     uint32_t c2 = 0;
@@ -375,6 +860,7 @@ static void secp256k1_scalar_sqr_512(uint64_t l[8], const secp256k1_scalar_t *a)
     extract_fast(l[6]);
     VERIFY_CHECK(c1 == 0);
     l[7] = c0;
+#endif
 }
 
 #undef sumadd
@@ -413,12 +899,15 @@ SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar_t *a, con
 }
 
 SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b, unsigned int shift) {
-    VERIFY_CHECK(shift >= 256);
     uint64_t l[8];
+    unsigned int shiftlimbs;
+    unsigned int shiftlow;
+    unsigned int shifthigh;
+    VERIFY_CHECK(shift >= 256);
     secp256k1_scalar_mul_512(l, a, b);
-    unsigned int shiftlimbs = shift >> 6;
-    unsigned int shiftlow = shift & 0x3F;
-    unsigned int shifthigh = 64 - shiftlow;
+    shiftlimbs = shift >> 6;
+    shiftlow = shift & 0x3F;
+    shifthigh = 64 - shiftlow;
     r->d[0] = shift < 512 ? (l[0 + shiftlimbs] >> shiftlow | (shift < 448 && shiftlow ? (l[1 + shiftlimbs] << shifthigh) : 0)) : 0;
     r->d[1] = shift < 448 ? (l[1 + shiftlimbs] >> shiftlow | (shift < 384 && shiftlow ? (l[2 + shiftlimbs] << shifthigh) : 0)) : 0;
     r->d[2] = shift < 384 ? (l[2 + shiftlimbs] >> shiftlow | (shift < 320 && shiftlow ? (l[3 + shiftlimbs] << shifthigh) : 0)) : 0;
diff --git a/src/scalar_8x32.h b/src/scalar_8x32.h
index f70328cfc..f17017e24 100644
--- a/src/scalar_8x32.h
+++ b/src/scalar_8x32.h
@@ -14,4 +14,6 @@ typedef struct {
     uint32_t d[8];
 } secp256k1_scalar_t;
 
+#define SECP256K1_SCALAR_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {{(d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7)}}
+
 #endif
diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
index 915cbcddb..22b31d411 100644
--- a/src/scalar_8x32_impl.h
+++ b/src/scalar_8x32_impl.h
@@ -91,8 +91,9 @@ SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scal
 }
 
 SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar_t *r, uint32_t overflow) {
+    uint64_t t;
     VERIFY_CHECK(overflow <= 1);
-    uint64_t t = (uint64_t)r->d[0] + overflow * SECP256K1_N_C_0;
+    t = (uint64_t)r->d[0] + overflow * SECP256K1_N_C_0;
     r->d[0] = t & 0xFFFFFFFFUL; t >>= 32;
     t += (uint64_t)r->d[1] + overflow * SECP256K1_N_C_1;
     r->d[1] = t & 0xFFFFFFFFUL; t >>= 32;
@@ -112,6 +113,7 @@ SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar_t *r, uint3
 }
 
 static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
+    int overflow;
     uint64_t t = (uint64_t)a->d[0] + b->d[0];
     r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
     t += (uint64_t)a->d[1] + b->d[1];
@@ -128,15 +130,16 @@ static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t
     r->d[6] = t & 0xFFFFFFFFULL; t >>= 32;
     t += (uint64_t)a->d[7] + b->d[7];
     r->d[7] = t & 0xFFFFFFFFULL; t >>= 32;
-    int overflow = t + secp256k1_scalar_check_overflow(r);
+    overflow = t + secp256k1_scalar_check_overflow(r);
     VERIFY_CHECK(overflow == 0 || overflow == 1);
     secp256k1_scalar_reduce(r, overflow);
     return overflow;
 }
 
 static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit) {
+    uint64_t t;
     VERIFY_CHECK(bit < 256);
-    uint64_t t = (uint64_t)r->d[0] + (((uint32_t)((bit >> 5) == 0)) << (bit & 0x1F));
+    t = (uint64_t)r->d[0] + (((uint32_t)((bit >> 5) == 0)) << (bit & 0x1F));
     r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
     t += (uint64_t)r->d[1] + (((uint32_t)((bit >> 5) == 1)) << (bit & 0x1F));
     r->d[1] = t & 0xFFFFFFFFULL; t >>= 32;
@@ -159,6 +162,7 @@ static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit) {
 }
 
 static void secp256k1_scalar_set_b32(secp256k1_scalar_t *r, const unsigned char *b32, int *overflow) {
+    int over;
     r->d[0] = (uint32_t)b32[31] | (uint32_t)b32[30] << 8 | (uint32_t)b32[29] << 16 | (uint32_t)b32[28] << 24;
     r->d[1] = (uint32_t)b32[27] | (uint32_t)b32[26] << 8 | (uint32_t)b32[25] << 16 | (uint32_t)b32[24] << 24;
     r->d[2] = (uint32_t)b32[23] | (uint32_t)b32[22] << 8 | (uint32_t)b32[21] << 16 | (uint32_t)b32[20] << 24;
@@ -167,7 +171,7 @@ static void secp256k1_scalar_set_b32(secp256k1_scalar_t *r, const unsigned char
     r->d[5] = (uint32_t)b32[11] | (uint32_t)b32[10] << 8 | (uint32_t)b32[9] << 16 | (uint32_t)b32[8] << 24;
     r->d[6] = (uint32_t)b32[7] | (uint32_t)b32[6] << 8 | (uint32_t)b32[5] << 16 | (uint32_t)b32[4] << 24;
     r->d[7] = (uint32_t)b32[3] | (uint32_t)b32[2] << 8 | (uint32_t)b32[1] << 16 | (uint32_t)b32[0] << 24;
-    int over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r));
+    over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r));
     if (overflow) {
         *overflow = over;
     }
@@ -263,16 +267,16 @@ static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
 
 /** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
 #define muladd2(a,b) { \
-    uint32_t tl, th; \
+    uint32_t tl, th, th2, tl2; \
     { \
         uint64_t t = (uint64_t)a * b; \
         th = t >> 32;               /* at most 0xFFFFFFFE */ \
         tl = t; \
     } \
-    uint32_t th2 = th + th;         /* at most 0xFFFFFFFE (in case th was 0x7FFFFFFF) */ \
+    th2 = th + th;                  /* at most 0xFFFFFFFE (in case th was 0x7FFFFFFF) */ \
     c2 += (th2 < th) ? 1 : 0;       /* never overflows by contract (verified the next line) */ \
     VERIFY_CHECK((th2 >= th) || (c2 != 0)); \
-    uint32_t tl2 = tl + tl;         /* at most 0xFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFF) */ \
+    tl2 = tl + tl;                  /* at most 0xFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFF) */ \
     th2 += (tl2 < tl) ? 1 : 0;      /* at most 0xFFFFFFFF */ \
     c0 += tl2;                      /* overflow is handled on the next line */ \
     th2 += (c0 < tl2) ? 1 : 0;      /* second overflow is handled on the next line */ \
@@ -285,8 +289,9 @@ static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
 
 /** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */
 #define sumadd(a) { \
+    unsigned int over; \
     c0 += (a);                  /* overflow is handled on the next line */ \
-    unsigned int over = (c0 < (a)) ? 1 : 0; \
+    over = (c0 < (a)) ? 1 : 0; \
     c1 += over;                 /* overflow is handled on the next line */ \
     c2 += (c1 < over) ? 1 : 0;  /* never overflows by contract */ \
 }
@@ -316,7 +321,10 @@ static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
 }
 
 static void secp256k1_scalar_reduce_512(secp256k1_scalar_t *r, const uint32_t *l) {
+    uint64_t c;
     uint32_t n0 = l[8], n1 = l[9], n2 = l[10], n3 = l[11], n4 = l[12], n5 = l[13], n6 = l[14], n7 = l[15];
+    uint32_t m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12;
+    uint32_t p0, p1, p2, p3, p4, p5, p6, p7, p8;
 
     /* 96 bit accumulator. */
     uint32_t c0, c1, c2;
@@ -325,115 +333,115 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar_t *r, const uint32_t *l
     /* m[0..12] = l[0..7] + n[0..7] * SECP256K1_N_C. */
     c0 = l[0]; c1 = 0; c2 = 0;
     muladd_fast(n0, SECP256K1_N_C_0);
-    uint32_t m0; extract_fast(m0);
+    extract_fast(m0);
     sumadd_fast(l[1]);
     muladd(n1, SECP256K1_N_C_0);
     muladd(n0, SECP256K1_N_C_1);
-    uint32_t m1; extract(m1);
+    extract(m1);
     sumadd(l[2]);
     muladd(n2, SECP256K1_N_C_0);
     muladd(n1, SECP256K1_N_C_1);
     muladd(n0, SECP256K1_N_C_2);
-    uint32_t m2; extract(m2);
+    extract(m2);
     sumadd(l[3]);
     muladd(n3, SECP256K1_N_C_0);
     muladd(n2, SECP256K1_N_C_1);
     muladd(n1, SECP256K1_N_C_2);
     muladd(n0, SECP256K1_N_C_3);
-    uint32_t m3; extract(m3);
+    extract(m3);
     sumadd(l[4]);
     muladd(n4, SECP256K1_N_C_0);
     muladd(n3, SECP256K1_N_C_1);
     muladd(n2, SECP256K1_N_C_2);
     muladd(n1, SECP256K1_N_C_3);
     sumadd(n0);
-    uint32_t m4; extract(m4);
+    extract(m4);
     sumadd(l[5]);
     muladd(n5, SECP256K1_N_C_0);
     muladd(n4, SECP256K1_N_C_1);
     muladd(n3, SECP256K1_N_C_2);
     muladd(n2, SECP256K1_N_C_3);
     sumadd(n1);
-    uint32_t m5; extract(m5);
+    extract(m5);
     sumadd(l[6]);
     muladd(n6, SECP256K1_N_C_0);
     muladd(n5, SECP256K1_N_C_1);
     muladd(n4, SECP256K1_N_C_2);
     muladd(n3, SECP256K1_N_C_3);
     sumadd(n2);
-    uint32_t m6; extract(m6);
+    extract(m6);
     sumadd(l[7]);
     muladd(n7, SECP256K1_N_C_0);
     muladd(n6, SECP256K1_N_C_1);
     muladd(n5, SECP256K1_N_C_2);
     muladd(n4, SECP256K1_N_C_3);
     sumadd(n3);
-    uint32_t m7; extract(m7);
+    extract(m7);
     muladd(n7, SECP256K1_N_C_1);
     muladd(n6, SECP256K1_N_C_2);
     muladd(n5, SECP256K1_N_C_3);
     sumadd(n4);
-    uint32_t m8; extract(m8);
+    extract(m8);
     muladd(n7, SECP256K1_N_C_2);
     muladd(n6, SECP256K1_N_C_3);
     sumadd(n5);
-    uint32_t m9; extract(m9);
+    extract(m9);
     muladd(n7, SECP256K1_N_C_3);
     sumadd(n6);
-    uint32_t m10; extract(m10);
+    extract(m10);
     sumadd_fast(n7);
-    uint32_t m11; extract_fast(m11);
+    extract_fast(m11);
     VERIFY_CHECK(c0 <= 1);
-    uint32_t m12 = c0;
+    m12 = c0;
 
     /* Reduce 385 bits into 258. */
     /* p[0..8] = m[0..7] + m[8..12] * SECP256K1_N_C. */
     c0 = m0; c1 = 0; c2 = 0;
     muladd_fast(m8, SECP256K1_N_C_0);
-    uint32_t p0; extract_fast(p0);
+    extract_fast(p0);
     sumadd_fast(m1);
     muladd(m9, SECP256K1_N_C_0);
     muladd(m8, SECP256K1_N_C_1);
-    uint32_t p1; extract(p1);
+    extract(p1);
     sumadd(m2);
     muladd(m10, SECP256K1_N_C_0);
     muladd(m9, SECP256K1_N_C_1);
     muladd(m8, SECP256K1_N_C_2);
-    uint32_t p2; extract(p2);
+    extract(p2);
     sumadd(m3);
     muladd(m11, SECP256K1_N_C_0);
     muladd(m10, SECP256K1_N_C_1);
     muladd(m9, SECP256K1_N_C_2);
     muladd(m8, SECP256K1_N_C_3);
-    uint32_t p3; extract(p3);
+    extract(p3);
     sumadd(m4);
     muladd(m12, SECP256K1_N_C_0);
     muladd(m11, SECP256K1_N_C_1);
     muladd(m10, SECP256K1_N_C_2);
     muladd(m9, SECP256K1_N_C_3);
     sumadd(m8);
-    uint32_t p4; extract(p4);
+    extract(p4);
     sumadd(m5);
     muladd(m12, SECP256K1_N_C_1);
     muladd(m11, SECP256K1_N_C_2);
     muladd(m10, SECP256K1_N_C_3);
     sumadd(m9);
-    uint32_t p5; extract(p5);
+    extract(p5);
     sumadd(m6);
     muladd(m12, SECP256K1_N_C_2);
     muladd(m11, SECP256K1_N_C_3);
     sumadd(m10);
-    uint32_t p6; extract(p6);
+    extract(p6);
     sumadd_fast(m7);
     muladd_fast(m12, SECP256K1_N_C_3);
     sumadd_fast(m11);
-    uint32_t p7; extract_fast(p7);
-    uint32_t p8 = c0 + m12;
+    extract_fast(p7);
+    p8 = c0 + m12;
     VERIFY_CHECK(p8 <= 2);
 
     /* Reduce 258 bits into 256. */
     /* r[0..7] = p[0..7] + p[8] * SECP256K1_N_C. */
-    uint64_t c = p0 + (uint64_t)SECP256K1_N_C_0 * p8;
+    c = p0 + (uint64_t)SECP256K1_N_C_0 * p8;
     r->d[0] = c & 0xFFFFFFFFUL; c >>= 32;
     c += p1 + (uint64_t)SECP256K1_N_C_1 * p8;
     r->d[1] = c & 0xFFFFFFFFUL; c >>= 32;
@@ -454,7 +462,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar_t *r, const uint32_t *l
     secp256k1_scalar_reduce(r, c + secp256k1_scalar_check_overflow(r));
 }
 
-static void secp256k1_scalar_mul_512(uint32_t l[16], const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
+static void secp256k1_scalar_mul_512(uint32_t *l, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
     /* 96 bit accumulator. */
     uint32_t c0 = 0, c1 = 0, c2 = 0;
 
@@ -542,7 +550,7 @@ static void secp256k1_scalar_mul_512(uint32_t l[16], const secp256k1_scalar_t *a
     l[15] = c0;
 }
 
-static void secp256k1_scalar_sqr_512(uint32_t l[16], const secp256k1_scalar_t *a) {
+static void secp256k1_scalar_sqr_512(uint32_t *l, const secp256k1_scalar_t *a) {
     /* 96 bit accumulator. */
     uint32_t c0 = 0, c1 = 0, c2 = 0;
 
@@ -622,6 +630,7 @@ static void secp256k1_scalar_sqr(secp256k1_scalar_t *r, const secp256k1_scalar_t
     secp256k1_scalar_reduce_512(r, l);
 }
 
+#ifdef USE_ENDOMORPHISM
 static void secp256k1_scalar_split_128(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a) {
     r1->d[0] = a->d[0];
     r1->d[1] = a->d[1];
@@ -640,18 +649,22 @@ static void secp256k1_scalar_split_128(secp256k1_scalar_t *r1, secp256k1_scalar_
     r2->d[6] = 0;
     r2->d[7] = 0;
 }
+#endif
 
 SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
     return ((a->d[0] ^ b->d[0]) | (a->d[1] ^ b->d[1]) | (a->d[2] ^ b->d[2]) | (a->d[3] ^ b->d[3]) | (a->d[4] ^ b->d[4]) | (a->d[5] ^ b->d[5]) | (a->d[6] ^ b->d[6]) | (a->d[7] ^ b->d[7])) == 0;
 }
 
 SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b, unsigned int shift) {
-    VERIFY_CHECK(shift >= 256);
     uint32_t l[16];
+    unsigned int shiftlimbs;
+    unsigned int shiftlow;
+    unsigned int shifthigh;
+    VERIFY_CHECK(shift >= 256);
     secp256k1_scalar_mul_512(l, a, b);
-    unsigned int shiftlimbs = shift >> 5;
-    unsigned int shiftlow = shift & 0x1F;
-    unsigned int shifthigh = 32 - shiftlow;
+    shiftlimbs = shift >> 5;
+    shiftlow = shift & 0x1F;
+    shifthigh = 32 - shiftlow;
     r->d[0] = shift < 512 ? (l[0 + shiftlimbs] >> shiftlow | (shift < 480 && shiftlow ? (l[1 + shiftlimbs] << shifthigh) : 0)) : 0;
     r->d[1] = shift < 480 ? (l[1 + shiftlimbs] >> shiftlow | (shift < 448 && shiftlow ? (l[2 + shiftlimbs] << shifthigh) : 0)) : 0;
     r->d[2] = shift < 448 ? (l[2 + shiftlimbs] >> shiftlow | (shift < 416 && shiftlow ? (l[3 + shiftlimbs] << shifthigh) : 0)) : 0;
diff --git a/src/scalar_impl.h b/src/scalar_impl.h
index 4408cce2d..3acbe264a 100644
--- a/src/scalar_impl.h
+++ b/src/scalar_impl.h
@@ -24,121 +24,6 @@
 #error "Please select scalar implementation"
 #endif
 
-typedef struct {
-#ifndef USE_NUM_NONE
-    secp256k1_num_t order;
-#endif
-#ifdef USE_ENDOMORPHISM
-    secp256k1_scalar_t minus_lambda, minus_b1, minus_b2, g1, g2;
-#endif
-} secp256k1_scalar_consts_t;
-
-static const secp256k1_scalar_consts_t *secp256k1_scalar_consts = NULL;
-
-static void secp256k1_scalar_start(void) {
-    if (secp256k1_scalar_consts != NULL)
-        return;
-
-    /* Allocate. */
-    secp256k1_scalar_consts_t *ret = (secp256k1_scalar_consts_t*)checked_malloc(sizeof(secp256k1_scalar_consts_t));
-
-#ifndef USE_NUM_NONE
-    static const unsigned char secp256k1_scalar_consts_order[] = {
-        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
-        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,
-        0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B,
-        0xBF,0xD2,0x5E,0x8C,0xD0,0x36,0x41,0x41
-    };
-    secp256k1_num_set_bin(&ret->order, secp256k1_scalar_consts_order, sizeof(secp256k1_scalar_consts_order));
-#endif
-#ifdef USE_ENDOMORPHISM
-    /**
-     * Lambda is a scalar which has the property for secp256k1 that point multiplication by
-     * it is efficiently computable (see secp256k1_gej_mul_lambda). */
-    static const unsigned char secp256k1_scalar_consts_lambda[32] = {
-         0x53,0x63,0xad,0x4c,0xc0,0x5c,0x30,0xe0,
-         0xa5,0x26,0x1c,0x02,0x88,0x12,0x64,0x5a,
-         0x12,0x2e,0x22,0xea,0x20,0x81,0x66,0x78,
-         0xdf,0x02,0x96,0x7c,0x1b,0x23,0xbd,0x72
-    };
-    /**
-     * "Guide to Elliptic Curve Cryptography" (Hankerson, Menezes, Vanstone) gives an algorithm
-     * (algorithm 3.74) to find k1 and k2 given k, such that k1 + k2 * lambda == k mod n, and k1
-     * and k2 have a small size.
-     * It relies on constants a1, b1, a2, b2. These constants for the value of lambda above are:
-     *
-     * - a1 =      {0x30,0x86,0xd2,0x21,0xa7,0xd4,0x6b,0xcd,0xe8,0x6c,0x90,0xe4,0x92,0x84,0xeb,0x15}
-     * - b1 =     -{0xe4,0x43,0x7e,0xd6,0x01,0x0e,0x88,0x28,0x6f,0x54,0x7f,0xa9,0x0a,0xbf,0xe4,0xc3}
-     * - a2 = {0x01,0x14,0xca,0x50,0xf7,0xa8,0xe2,0xf3,0xf6,0x57,0xc1,0x10,0x8d,0x9d,0x44,0xcf,0xd8}
-     * - b2 =      {0x30,0x86,0xd2,0x21,0xa7,0xd4,0x6b,0xcd,0xe8,0x6c,0x90,0xe4,0x92,0x84,0xeb,0x15}
-     *
-     * The algorithm then computes c1 = round(b1 * k / n) and c2 = round(b2 * k / n), and gives
-     * k1 = k - (c1*a1 + c2*a2) and k2 = -(c1*b1 + c2*b2). Instead, we use modular arithmetic, and
-     * compute k1 as k - k2 * lambda, avoiding the need for constants a1 and a2.
-     *
-     * g1, g2 are precomputed constants used to replace division with a rounded multiplication
-     * when decomposing the scalar for an endomorphism-based point multiplication.
-     *
-     * The possibility of using precomputed estimates is mentioned in "Guide to Elliptic Curve
-     * Cryptography" (Hankerson, Menezes, Vanstone) in section 3.5.
-     *
-     * The derivation is described in the paper "Efficient Software Implementation of Public-Key
-     * Cryptography on Sensor Networks Using the MSP430X Microcontroller" (Gouvea, Oliveira, Lopez),
-     * Section 4.3 (here we use a somewhat higher-precision estimate):
-     * d = a1*b2 - b1*a2
-     * g1 = round((2^272)*b2/d)
-     * g2 = round((2^272)*b1/d)
-     *
-     * (Note that 'd' is also equal to the curve order here because [a1,b1] and [a2,b2] are found
-     * as outputs of the Extended Euclidean Algorithm on inputs 'order' and 'lambda').
-     */
-    static const unsigned char secp256k1_scalar_consts_minus_b1[32] = {
-        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-        0xe4,0x43,0x7e,0xd6,0x01,0x0e,0x88,0x28,
-        0x6f,0x54,0x7f,0xa9,0x0a,0xbf,0xe4,0xc3
-    };
-    static const unsigned char secp256k1_scalar_consts_b2[32] = {
-        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-        0x30,0x86,0xd2,0x21,0xa7,0xd4,0x6b,0xcd,
-        0xe8,0x6c,0x90,0xe4,0x92,0x84,0xeb,0x15
-    };
-    static const unsigned char secp256k1_scalar_consts_g1[32] = {
-        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-        0x00,0x00,0x00,0x00,0x00,0x00,0x30,0x86,
-        0xd2,0x21,0xa7,0xd4,0x6b,0xcd,0xe8,0x6c,
-        0x90,0xe4,0x92,0x84,0xeb,0x15,0x3d,0xab
-    };
-    static const unsigned char secp256k1_scalar_consts_g2[32] = {
-        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-        0x00,0x00,0x00,0x00,0x00,0x00,0xe4,0x43,
-        0x7e,0xd6,0x01,0x0e,0x88,0x28,0x6f,0x54,
-        0x7f,0xa9,0x0a,0xbf,0xe4,0xc4,0x22,0x12
-    };
-
-    secp256k1_scalar_set_b32(&ret->minus_lambda, secp256k1_scalar_consts_lambda, NULL);
-    secp256k1_scalar_negate(&ret->minus_lambda, &ret->minus_lambda);
-    secp256k1_scalar_set_b32(&ret->minus_b1, secp256k1_scalar_consts_minus_b1, NULL);
-    secp256k1_scalar_set_b32(&ret->minus_b2, secp256k1_scalar_consts_b2, NULL);
-    secp256k1_scalar_negate(&ret->minus_b2, &ret->minus_b2);
-    secp256k1_scalar_set_b32(&ret->g1, secp256k1_scalar_consts_g1, NULL);
-    secp256k1_scalar_set_b32(&ret->g2, secp256k1_scalar_consts_g2, NULL);
-#endif
-
-    /* Set the global pointer. */
-    secp256k1_scalar_consts = ret;
-}
-
-static void secp256k1_scalar_stop(void) {
-    if (secp256k1_scalar_consts == NULL)
-        return;
-
-    secp256k1_scalar_consts_t *c = (secp256k1_scalar_consts_t*)secp256k1_scalar_consts;
-    secp256k1_scalar_consts = NULL;
-    free(c);
-}
-
 #ifndef USE_NUM_NONE
 static void secp256k1_scalar_get_num(secp256k1_num_t *r, const secp256k1_scalar_t *a) {
     unsigned char c[32];
@@ -146,12 +31,21 @@ static void secp256k1_scalar_get_num(secp256k1_num_t *r, const secp256k1_scalar_
     secp256k1_num_set_bin(r, c, 32);
 }
 
+/** secp256k1 curve order, see secp256k1_ecdsa_const_order_as_fe in ecdsa_impl.h */
 static void secp256k1_scalar_order_get_num(secp256k1_num_t *r) {
-    *r = secp256k1_scalar_consts->order;
+    static const unsigned char order[32] = {
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,
+        0xBA,0xAE,0xDC,0xE6,0xAF,0x48,0xA0,0x3B,
+        0xBF,0xD2,0x5E,0x8C,0xD0,0x36,0x41,0x41
+    };
+    secp256k1_num_set_bin(r, order, 32);
 }
 #endif
 
 static void secp256k1_scalar_inverse(secp256k1_scalar_t *r, const secp256k1_scalar_t *x) {
+    secp256k1_scalar_t *t;
+    int i;
     /* First compute x ^ (2^N - 1) for some values of N. */
     secp256k1_scalar_t x2, x3, x4, x6, x7, x8, x15, x30, x60, x120, x127;
 
@@ -175,129 +69,129 @@ static void secp256k1_scalar_inverse(secp256k1_scalar_t *r, const secp256k1_scal
     secp256k1_scalar_mul(&x8, &x8,  x);
 
     secp256k1_scalar_sqr(&x15, &x8);
-    for (int i=0; i<6; i++)
+    for (i = 0; i < 6; i++)
         secp256k1_scalar_sqr(&x15, &x15);
     secp256k1_scalar_mul(&x15, &x15, &x7);
 
     secp256k1_scalar_sqr(&x30, &x15);
-    for (int i=0; i<14; i++)
+    for (i = 0; i < 14; i++)
         secp256k1_scalar_sqr(&x30, &x30);
     secp256k1_scalar_mul(&x30, &x30, &x15);
 
     secp256k1_scalar_sqr(&x60, &x30);
-    for (int i=0; i<29; i++)
+    for (i = 0; i < 29; i++)
         secp256k1_scalar_sqr(&x60, &x60);
     secp256k1_scalar_mul(&x60, &x60, &x30);
 
     secp256k1_scalar_sqr(&x120, &x60);
-    for (int i=0; i<59; i++)
+    for (i = 0; i < 59; i++)
         secp256k1_scalar_sqr(&x120, &x120);
     secp256k1_scalar_mul(&x120, &x120, &x60);
 
     secp256k1_scalar_sqr(&x127, &x120);
-    for (int i=0; i<6; i++)
+    for (i = 0; i < 6; i++)
         secp256k1_scalar_sqr(&x127, &x127);
     secp256k1_scalar_mul(&x127, &x127, &x7);
 
     /* Then accumulate the final result (t starts at x127). */
-    secp256k1_scalar_t *t = &x127;
-    for (int i=0; i<2; i++) /* 0 */
+    t = &x127;
+    for (i = 0; i < 2; i++) /* 0 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, x); /* 1 */
-    for (int i=0; i<4; i++) /* 0 */
+    for (i = 0; i < 4; i++) /* 0 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, &x3); /* 111 */
-    for (int i=0; i<2; i++) /* 0 */
+    for (i = 0; i < 2; i++) /* 0 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, x); /* 1 */
-    for (int i=0; i<2; i++) /* 0 */
+    for (i = 0; i < 2; i++) /* 0 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, x); /* 1 */
-    for (int i=0; i<2; i++) /* 0 */
+    for (i = 0; i < 2; i++) /* 0 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, x); /* 1 */
-    for (int i=0; i<4; i++) /* 0 */
+    for (i = 0; i < 4; i++) /* 0 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, &x3); /* 111 */
-    for (int i=0; i<3; i++) /* 0 */
+    for (i = 0; i < 3; i++) /* 0 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, &x2); /* 11 */
-    for (int i=0; i<4; i++) /* 0 */
+    for (i = 0; i < 4; i++) /* 0 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, &x3); /* 111 */
-    for (int i=0; i<5; i++) /* 00 */
+    for (i = 0; i < 5; i++) /* 00 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, &x3); /* 111 */
-    for (int i=0; i<4; i++) /* 00 */
+    for (i = 0; i < 4; i++) /* 00 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, &x2); /* 11 */
-    for (int i=0; i<2; i++) /* 0 */
+    for (i = 0; i < 2; i++) /* 0 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, x); /* 1 */
-    for (int i=0; i<2; i++) /* 0 */
+    for (i = 0; i < 2; i++) /* 0 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, x); /* 1 */
-    for (int i=0; i<5; i++) /* 0 */
+    for (i = 0; i < 5; i++) /* 0 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, &x4); /* 1111 */
-    for (int i=0; i<2; i++) /* 0 */
+    for (i = 0; i < 2; i++) /* 0 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, x); /* 1 */
-    for (int i=0; i<3; i++) /* 00 */
+    for (i = 0; i < 3; i++) /* 00 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, x); /* 1 */
-    for (int i=0; i<4; i++) /* 000 */
+    for (i = 0; i < 4; i++) /* 000 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, x); /* 1 */
-    for (int i=0; i<2; i++) /* 0 */
+    for (i = 0; i < 2; i++) /* 0 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, x); /* 1 */
-    for (int i=0; i<10; i++) /* 0000000 */
+    for (i = 0; i < 10; i++) /* 0000000 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, &x3); /* 111 */
-    for (int i=0; i<4; i++) /* 0 */
+    for (i = 0; i < 4; i++) /* 0 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, &x3); /* 111 */
-    for (int i=0; i<9; i++) /* 0 */
+    for (i = 0; i < 9; i++) /* 0 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, &x8); /* 11111111 */
-    for (int i=0; i<2; i++) /* 0 */
+    for (i = 0; i < 2; i++) /* 0 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, x); /* 1 */
-    for (int i=0; i<3; i++) /* 00 */
+    for (i = 0; i < 3; i++) /* 00 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, x); /* 1 */
-    for (int i=0; i<3; i++) /* 00 */
+    for (i = 0; i < 3; i++) /* 00 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, x); /* 1 */
-    for (int i=0; i<5; i++) /* 0 */
+    for (i = 0; i < 5; i++) /* 0 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, &x4); /* 1111 */
-    for (int i=0; i<2; i++) /* 0 */
+    for (i = 0; i < 2; i++) /* 0 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, x); /* 1 */
-    for (int i=0; i<5; i++) /* 000 */
+    for (i = 0; i < 5; i++) /* 000 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, &x2); /* 11 */
-    for (int i=0; i<4; i++) /* 00 */
+    for (i = 0; i < 4; i++) /* 00 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, &x2); /* 11 */
-    for (int i=0; i<2; i++) /* 0 */
+    for (i = 0; i < 2; i++) /* 0 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, x); /* 1 */
-    for (int i=0; i<8; i++) /* 000000 */
+    for (i = 0; i < 8; i++) /* 000000 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, &x2); /* 11 */
-    for (int i=0; i<3; i++) /* 0 */
+    for (i = 0; i < 3; i++) /* 0 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, &x2); /* 11 */
-    for (int i=0; i<3; i++) /* 00 */
+    for (i = 0; i < 3; i++) /* 00 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, x); /* 1 */
-    for (int i=0; i<6; i++) /* 00000 */
+    for (i = 0; i < 6; i++) /* 00000 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(t, t, x); /* 1 */
-    for (int i=0; i<8; i++) /* 00 */
+    for (i = 0; i < 8; i++) /* 00 */
         secp256k1_scalar_sqr(t, t);
     secp256k1_scalar_mul(r, t, &x6); /* 111111 */
 }
@@ -307,10 +201,11 @@ static void secp256k1_scalar_inverse_var(secp256k1_scalar_t *r, const secp256k1_
     secp256k1_scalar_inverse(r, x);
 #elif defined(USE_SCALAR_INV_NUM)
     unsigned char b[32];
+    secp256k1_num_t n, m;
     secp256k1_scalar_get_b32(b, x);
-    secp256k1_num_t n;
     secp256k1_num_set_bin(&n, b, 32);
-    secp256k1_num_mod_inverse(&n, &n, &secp256k1_scalar_consts->order);
+    secp256k1_scalar_order_get_num(&m);
+    secp256k1_num_mod_inverse(&n, &n, &m);
     secp256k1_num_get_bin(b, 32, &n);
     secp256k1_scalar_set_b32(r, b, NULL);
 #else
@@ -319,16 +214,74 @@ static void secp256k1_scalar_inverse_var(secp256k1_scalar_t *r, const secp256k1_
 }
 
 #ifdef USE_ENDOMORPHISM
+/**
+ * The Secp256k1 curve has an endomorphism, where lambda * (x, y) = (beta * x, y), where
+ * lambda is {0x53,0x63,0xad,0x4c,0xc0,0x5c,0x30,0xe0,0xa5,0x26,0x1c,0x02,0x88,0x12,0x64,0x5a,
+ *            0x12,0x2e,0x22,0xea,0x20,0x81,0x66,0x78,0xdf,0x02,0x96,0x7c,0x1b,0x23,0xbd,0x72}
+ *
+ * "Guide to Elliptic Curve Cryptography" (Hankerson, Menezes, Vanstone) gives an algorithm
+ * (algorithm 3.74) to find k1 and k2 given k, such that k1 + k2 * lambda == k mod n, and k1
+ * and k2 have a small size.
+ * It relies on constants a1, b1, a2, b2. These constants for the value of lambda above are:
+ *
+ * - a1 =      {0x30,0x86,0xd2,0x21,0xa7,0xd4,0x6b,0xcd,0xe8,0x6c,0x90,0xe4,0x92,0x84,0xeb,0x15}
+ * - b1 =     -{0xe4,0x43,0x7e,0xd6,0x01,0x0e,0x88,0x28,0x6f,0x54,0x7f,0xa9,0x0a,0xbf,0xe4,0xc3}
+ * - a2 = {0x01,0x14,0xca,0x50,0xf7,0xa8,0xe2,0xf3,0xf6,0x57,0xc1,0x10,0x8d,0x9d,0x44,0xcf,0xd8}
+ * - b2 =      {0x30,0x86,0xd2,0x21,0xa7,0xd4,0x6b,0xcd,0xe8,0x6c,0x90,0xe4,0x92,0x84,0xeb,0x15}
+ *
+ * The algorithm then computes c1 = round(b1 * k / n) and c2 = round(b2 * k / n), and gives
+ * k1 = k - (c1*a1 + c2*a2) and k2 = -(c1*b1 + c2*b2). Instead, we use modular arithmetic, and
+ * compute k1 as k - k2 * lambda, avoiding the need for constants a1 and a2.
+ *
+ * g1, g2 are precomputed constants used to replace division with a rounded multiplication
+ * when decomposing the scalar for an endomorphism-based point multiplication.
+ *
+ * The possibility of using precomputed estimates is mentioned in "Guide to Elliptic Curve
+ * Cryptography" (Hankerson, Menezes, Vanstone) in section 3.5.
+ *
+ * The derivation is described in the paper "Efficient Software Implementation of Public-Key
+ * Cryptography on Sensor Networks Using the MSP430X Microcontroller" (Gouvea, Oliveira, Lopez),
+ * Section 4.3 (here we use a somewhat higher-precision estimate):
+ * d = a1*b2 - b1*a2
+ * g1 = round((2^272)*b2/d)
+ * g2 = round((2^272)*b1/d)
+ *
+ * (Note that 'd' is also equal to the curve order here because [a1,b1] and [a2,b2] are found
+ * as outputs of the Extended Euclidean Algorithm on inputs 'order' and 'lambda').
+ *
+ * The function below splits a in r1 and r2, such that r1 + lambda * r2 == a (mod order).
+ */
+
 static void secp256k1_scalar_split_lambda_var(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a) {
+    secp256k1_scalar_t c1, c2;
+    static const secp256k1_scalar_t minus_lambda = SECP256K1_SCALAR_CONST(
+        0xAC9C52B3UL, 0x3FA3CF1FUL, 0x5AD9E3FDUL, 0x77ED9BA4UL,
+        0xA880B9FCUL, 0x8EC739C2UL, 0xE0CFC810UL, 0xB51283CFUL
+    );
+    static const secp256k1_scalar_t minus_b1 = SECP256K1_SCALAR_CONST(
+        0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
+        0xE4437ED6UL, 0x010E8828UL, 0x6F547FA9UL, 0x0ABFE4C3UL
+    );
+    static const secp256k1_scalar_t minus_b2 = SECP256K1_SCALAR_CONST(
+        0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFEUL,
+        0x8A280AC5UL, 0x0774346DUL, 0xD765CDA8UL, 0x3DB1562CUL
+    );
+    static const secp256k1_scalar_t g1 = SECP256K1_SCALAR_CONST(
+        0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00003086UL,
+        0xD221A7D4UL, 0x6BCDE86CUL, 0x90E49284UL, 0xEB153DABUL
+    );
+    static const secp256k1_scalar_t g2 = SECP256K1_SCALAR_CONST(
+        0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0000E443UL,
+        0x7ED6010EUL, 0x88286F54UL, 0x7FA90ABFUL, 0xE4C42212UL
+    );
     VERIFY_CHECK(r1 != a);
     VERIFY_CHECK(r2 != a);
-    secp256k1_scalar_t c1, c2;
-    secp256k1_scalar_mul_shift_var(&c1, a, &secp256k1_scalar_consts->g1, 272);
-    secp256k1_scalar_mul_shift_var(&c2, a, &secp256k1_scalar_consts->g2, 272);
-    secp256k1_scalar_mul(&c1, &c1, &secp256k1_scalar_consts->minus_b1);
-    secp256k1_scalar_mul(&c2, &c2, &secp256k1_scalar_consts->minus_b2);
+    secp256k1_scalar_mul_shift_var(&c1, a, &g1, 272);
+    secp256k1_scalar_mul_shift_var(&c2, a, &g2, 272);
+    secp256k1_scalar_mul(&c1, &c1, &minus_b1);
+    secp256k1_scalar_mul(&c2, &c2, &minus_b2);
     secp256k1_scalar_add(r2, &c1, &c2);
-    secp256k1_scalar_mul(r1, r2, &secp256k1_scalar_consts->minus_lambda);
+    secp256k1_scalar_mul(r1, r2, &minus_lambda);
     secp256k1_scalar_add(r1, r1, a);
 }
 #endif
diff --git a/src/secp256k1.c b/src/secp256k1.c
index 58bcd8d00..8c4eca4b6 100644
--- a/src/secp256k1.c
+++ b/src/secp256k1.c
@@ -20,10 +20,6 @@
 #include "hash_impl.h"
 
 void secp256k1_start(unsigned int flags) {
-    secp256k1_fe_start();
-    secp256k1_ge_start();
-    secp256k1_scalar_start();
-    secp256k1_ecdsa_start();
     if (flags & SECP256K1_START_SIGN) {
         secp256k1_ecmult_gen_start();
     }
@@ -35,46 +31,43 @@ void secp256k1_start(unsigned int flags) {
 void secp256k1_stop(void) {
     secp256k1_ecmult_stop();
     secp256k1_ecmult_gen_stop();
-    secp256k1_ecdsa_stop();
-    secp256k1_scalar_stop();
-    secp256k1_ge_stop();
-    secp256k1_fe_stop();
 }
 
 int secp256k1_ecdsa_verify(const unsigned char *msg32, const unsigned char *sig, int siglen, const unsigned char *pubkey, int pubkeylen) {
+    secp256k1_ge_t q;
+    secp256k1_ecdsa_sig_t s;
+    secp256k1_scalar_t m;
+    int ret = -3;
     DEBUG_CHECK(secp256k1_ecmult_consts != NULL);
     DEBUG_CHECK(msg32 != NULL);
     DEBUG_CHECK(sig != NULL);
     DEBUG_CHECK(pubkey != NULL);
 
-    int ret = -3;
-    secp256k1_scalar_t m;
-    secp256k1_ecdsa_sig_t s;
-    secp256k1_ge_t q;
     secp256k1_scalar_set_b32(&m, msg32, NULL);
 
-    if (!secp256k1_eckey_pubkey_parse(&q, pubkey, pubkeylen)) {
+    if (secp256k1_eckey_pubkey_parse(&q, pubkey, pubkeylen)) {
+        if (secp256k1_ecdsa_sig_parse(&s, sig, siglen)) {
+            if (secp256k1_ecdsa_sig_verify(&s, &q, &m)) {
+                /* success is 1, all other values are fail */
+                ret = 1;
+            } else {
+                ret = 0;
+            }
+        } else {
+            ret = -2;
+        }
+    } else {
         ret = -1;
-        goto end;
-    }
-    if (!secp256k1_ecdsa_sig_parse(&s, sig, siglen)) {
-        ret = -2;
-        goto end;
     }
-    if (!secp256k1_ecdsa_sig_verify(&s, &q, &m)) {
-        ret = 0;
-        goto end;
-    }
-    ret = 1;
-end:
+
     return ret;
 }
 
 static int nonce_function_rfc6979(unsigned char *nonce32, const unsigned char *msg32, const unsigned char *key32, unsigned int counter, const void *data) {
-   (void)data;
    secp256k1_rfc6979_hmac_sha256_t rng;
-   secp256k1_rfc6979_hmac_sha256_initialize(&rng, key32, 32, msg32, 32);
-   for (unsigned int i = 0; i <= counter; i++) {
+   unsigned int i;
+   secp256k1_rfc6979_hmac_sha256_initialize(&rng, key32, 32, msg32, 32, data, data != NULL ? 32 : 0);
+   for (i = 0; i <= counter; i++) {
        secp256k1_rfc6979_hmac_sha256_generate(&rng, nonce32, 32);
    }
    secp256k1_rfc6979_hmac_sha256_finalize(&rng);
@@ -85,6 +78,11 @@ const secp256k1_nonce_function_t secp256k1_nonce_function_rfc6979 = nonce_functi
 const secp256k1_nonce_function_t secp256k1_nonce_function_default = nonce_function_rfc6979;
 
 int secp256k1_ecdsa_sign(const unsigned char *msg32, unsigned char *signature, int *signaturelen, const unsigned char *seckey, secp256k1_nonce_function_t noncefp, const void* noncedata) {
+    secp256k1_ecdsa_sig_t sig;
+    secp256k1_scalar_t sec, non, msg;
+    int ret = 0;
+    int overflow = 0;
+    unsigned int count = 0;
     DEBUG_CHECK(secp256k1_ecmult_gen_consts != NULL);
     DEBUG_CHECK(msg32 != NULL);
     DEBUG_CHECK(signature != NULL);
@@ -94,38 +92,44 @@ int secp256k1_ecdsa_sign(const unsigned char *msg32, unsigned char *signature, i
         noncefp = secp256k1_nonce_function_default;
     }
 
-    secp256k1_scalar_t sec, non, msg;
-    secp256k1_scalar_set_b32(&sec, seckey, NULL);
-    secp256k1_scalar_set_b32(&msg, msg32, NULL);
-    int overflow = 0;
-    int ret = 0;
-    unsigned int count = 0;
-    secp256k1_ecdsa_sig_t sig;
-    while (1) {
-        unsigned char nonce32[32];
-        ret = noncefp(nonce32, msg32, seckey, count, noncedata);
-        if (!ret) {
-            break;
-        }
-        secp256k1_scalar_set_b32(&non, nonce32, &overflow);
-        memset(nonce32, 0, 32);
-        if (!secp256k1_scalar_is_zero(&non) && !overflow) {
-            if (secp256k1_ecdsa_sig_sign(&sig, &sec, &msg, &non, NULL)) {
+    secp256k1_scalar_set_b32(&sec, seckey, &overflow);
+    /* Fail if the secret key is invalid. */
+    if (!overflow && !secp256k1_scalar_is_zero(&sec)) {
+        secp256k1_scalar_set_b32(&msg, msg32, NULL);
+        while (1) {
+            unsigned char nonce32[32];
+            ret = noncefp(nonce32, msg32, seckey, count, noncedata);
+            if (!ret) {
                 break;
             }
+            secp256k1_scalar_set_b32(&non, nonce32, &overflow);
+            memset(nonce32, 0, 32);
+            if (!secp256k1_scalar_is_zero(&non) && !overflow) {
+                if (secp256k1_ecdsa_sig_sign(&sig, &sec, &msg, &non, NULL)) {
+                    break;
+                }
+            }
+            count++;
+        }
+        if (ret) {
+            ret = secp256k1_ecdsa_sig_serialize(signature, signaturelen, &sig);
         }
-        count++;
+        secp256k1_scalar_clear(&msg);
+        secp256k1_scalar_clear(&non);
+        secp256k1_scalar_clear(&sec);
     }
-    if (ret) {
-        ret = secp256k1_ecdsa_sig_serialize(signature, signaturelen, &sig);
+    if (!ret) {
+        *signaturelen = 0;
     }
-    secp256k1_scalar_clear(&msg);
-    secp256k1_scalar_clear(&non);
-    secp256k1_scalar_clear(&sec);
     return ret;
 }
 
 int secp256k1_ecdsa_sign_compact(const unsigned char *msg32, unsigned char *sig64, const unsigned char *seckey, secp256k1_nonce_function_t noncefp, const void* noncedata, int *recid) {
+    secp256k1_ecdsa_sig_t sig;
+    secp256k1_scalar_t sec, non, msg;
+    int ret = 0;
+    int overflow = 0;
+    unsigned int count = 0;
     DEBUG_CHECK(secp256k1_ecmult_gen_consts != NULL);
     DEBUG_CHECK(msg32 != NULL);
     DEBUG_CHECK(sig64 != NULL);
@@ -134,39 +138,45 @@ int secp256k1_ecdsa_sign_compact(const unsigned char *msg32, unsigned char *sig6
         noncefp = secp256k1_nonce_function_default;
     }
 
-    secp256k1_scalar_t sec, non, msg;
-    secp256k1_scalar_set_b32(&sec, seckey, NULL);
-    secp256k1_scalar_set_b32(&msg, msg32, NULL);
-    int overflow = 0;
-    int ret = 0;
-    unsigned int count = 0;
-    secp256k1_ecdsa_sig_t sig;
-    while (1) {
-        unsigned char nonce32[32];
-        ret = noncefp(nonce32, msg32, seckey, count, noncedata);
-        if (!ret) {
-            break;
-        }
-        secp256k1_scalar_set_b32(&non, nonce32, &overflow);
-        memset(nonce32, 0, 32);
-        if (!secp256k1_scalar_is_zero(&non) && !overflow) {
-            if (secp256k1_ecdsa_sig_sign(&sig, &sec, &msg, &non, recid)) {
+    secp256k1_scalar_set_b32(&sec, seckey, &overflow);
+    /* Fail if the secret key is invalid. */
+    if (!overflow && !secp256k1_scalar_is_zero(&sec)) {
+        secp256k1_scalar_set_b32(&msg, msg32, NULL);
+        while (1) {
+            unsigned char nonce32[32];
+            ret = noncefp(nonce32, msg32, seckey, count, noncedata);
+            if (!ret) {
                 break;
             }
+            secp256k1_scalar_set_b32(&non, nonce32, &overflow);
+            memset(nonce32, 0, 32);
+            if (!secp256k1_scalar_is_zero(&non) && !overflow) {
+                if (secp256k1_ecdsa_sig_sign(&sig, &sec, &msg, &non, recid)) {
+                    break;
+                }
+            }
+            count++;
+        }
+        if (ret) {
+            secp256k1_scalar_get_b32(sig64, &sig.r);
+            secp256k1_scalar_get_b32(sig64 + 32, &sig.s);
         }
-        count++;
+        secp256k1_scalar_clear(&msg);
+        secp256k1_scalar_clear(&non);
+        secp256k1_scalar_clear(&sec);
     }
-    if (ret) {
-        secp256k1_scalar_get_b32(sig64, &sig.r);
-        secp256k1_scalar_get_b32(sig64 + 32, &sig.s);
+    if (!ret) {
+        memset(sig64, 0, 64);
     }
-    secp256k1_scalar_clear(&msg);
-    secp256k1_scalar_clear(&non);
-    secp256k1_scalar_clear(&sec);
     return ret;
 }
 
 int secp256k1_ecdsa_recover_compact(const unsigned char *msg32, const unsigned char *sig64, unsigned char *pubkey, int *pubkeylen, int compressed, int recid) {
+    secp256k1_ge_t q;
+    secp256k1_ecdsa_sig_t sig;
+    secp256k1_scalar_t m;
+    int ret = 0;
+    int overflow = 0;
     DEBUG_CHECK(secp256k1_ecmult_consts != NULL);
     DEBUG_CHECK(msg32 != NULL);
     DEBUG_CHECK(sig64 != NULL);
@@ -174,82 +184,87 @@ int secp256k1_ecdsa_recover_compact(const unsigned char *msg32, const unsigned c
     DEBUG_CHECK(pubkeylen != NULL);
     DEBUG_CHECK(recid >= 0 && recid <= 3);
 
-    int ret = 0;
-    secp256k1_scalar_t m;
-    secp256k1_ecdsa_sig_t sig;
-    int overflow = 0;
     secp256k1_scalar_set_b32(&sig.r, sig64, &overflow);
-    if (overflow) {
-        return 0;
-    }
-    secp256k1_scalar_set_b32(&sig.s, sig64 + 32, &overflow);
-    if (overflow) {
-        return 0;
-    }
-    secp256k1_scalar_set_b32(&m, msg32, NULL);
+    if (!overflow) {
+        secp256k1_scalar_set_b32(&sig.s, sig64 + 32, &overflow);
+        if (!overflow) {
+            secp256k1_scalar_set_b32(&m, msg32, NULL);
 
-    secp256k1_ge_t q;
-    if (secp256k1_ecdsa_sig_recover(&sig, &q, &m, recid)) {
-        ret = secp256k1_eckey_pubkey_serialize(&q, pubkey, pubkeylen, compressed);
+            if (secp256k1_ecdsa_sig_recover(&sig, &q, &m, recid)) {
+                ret = secp256k1_eckey_pubkey_serialize(&q, pubkey, pubkeylen, compressed);
+            }
+        }
     }
     return ret;
 }
 
 int secp256k1_ec_seckey_verify(const unsigned char *seckey) {
-    DEBUG_CHECK(seckey != NULL);
-
     secp256k1_scalar_t sec;
+    int ret;
     int overflow;
+    DEBUG_CHECK(seckey != NULL);
+
     secp256k1_scalar_set_b32(&sec, seckey, &overflow);
-    int ret = !secp256k1_scalar_is_zero(&sec) && !overflow;
+    ret = !secp256k1_scalar_is_zero(&sec) && !overflow;
     secp256k1_scalar_clear(&sec);
     return ret;
 }
 
 int secp256k1_ec_pubkey_verify(const unsigned char *pubkey, int pubkeylen) {
+    secp256k1_ge_t q;
     DEBUG_CHECK(pubkey != NULL);
 
-    secp256k1_ge_t q;
     return secp256k1_eckey_pubkey_parse(&q, pubkey, pubkeylen);
 }
 
 int secp256k1_ec_pubkey_create(unsigned char *pubkey, int *pubkeylen, const unsigned char *seckey, int compressed) {
+    secp256k1_gej_t pj;
+    secp256k1_ge_t p;
+    secp256k1_scalar_t sec;
+    int overflow;
+    int ret = 0;
     DEBUG_CHECK(secp256k1_ecmult_gen_consts != NULL);
     DEBUG_CHECK(pubkey != NULL);
     DEBUG_CHECK(pubkeylen != NULL);
     DEBUG_CHECK(seckey != NULL);
 
-    secp256k1_scalar_t sec;
-    secp256k1_scalar_set_b32(&sec, seckey, NULL);
-    secp256k1_gej_t pj;
-    secp256k1_ecmult_gen(&pj, &sec);
-    secp256k1_scalar_clear(&sec);
-    secp256k1_ge_t p;
-    secp256k1_ge_set_gej(&p, &pj);
-    return secp256k1_eckey_pubkey_serialize(&p, pubkey, pubkeylen, compressed);
+    secp256k1_scalar_set_b32(&sec, seckey, &overflow);
+    if (!overflow) {
+        secp256k1_ecmult_gen(&pj, &sec);
+        secp256k1_scalar_clear(&sec);
+        secp256k1_ge_set_gej(&p, &pj);
+        ret = secp256k1_eckey_pubkey_serialize(&p, pubkey, pubkeylen, compressed);
+    }
+    if (!ret) {
+        *pubkeylen = 0;
+    }
+    return ret;
 }
 
 int secp256k1_ec_pubkey_decompress(unsigned char *pubkey, int *pubkeylen) {
+    secp256k1_ge_t p;
+    int ret = 0;
     DEBUG_CHECK(pubkey != NULL);
     DEBUG_CHECK(pubkeylen != NULL);
 
-    secp256k1_ge_t p;
-    if (!secp256k1_eckey_pubkey_parse(&p, pubkey, *pubkeylen))
-        return 0;
-    return secp256k1_eckey_pubkey_serialize(&p, pubkey, pubkeylen, 0);
+    if (secp256k1_eckey_pubkey_parse(&p, pubkey, *pubkeylen)) {
+        ret = secp256k1_eckey_pubkey_serialize(&p, pubkey, pubkeylen, 0);
+    }
+    return ret;
 }
 
 int secp256k1_ec_privkey_tweak_add(unsigned char *seckey, const unsigned char *tweak) {
+    secp256k1_scalar_t term;
+    secp256k1_scalar_t sec;
+    int ret = 0;
+    int overflow = 0;
     DEBUG_CHECK(seckey != NULL);
     DEBUG_CHECK(tweak != NULL);
 
-    secp256k1_scalar_t term;
-    int overflow = 0;
     secp256k1_scalar_set_b32(&term, tweak, &overflow);
-    secp256k1_scalar_t sec;
     secp256k1_scalar_set_b32(&sec, seckey, NULL);
 
-    int ret = secp256k1_eckey_privkey_tweak_add(&sec, &term) && !overflow;
+    ret = secp256k1_eckey_privkey_tweak_add(&sec, &term) && !overflow;
     if (ret) {
         secp256k1_scalar_get_b32(seckey, &sec);
     }
@@ -260,40 +275,41 @@ int secp256k1_ec_privkey_tweak_add(unsigned char *seckey, const unsigned char *t
 }
 
 int secp256k1_ec_pubkey_tweak_add(unsigned char *pubkey, int pubkeylen, const unsigned char *tweak) {
+    secp256k1_ge_t p;
+    secp256k1_scalar_t term;
+    int ret = 0;
+    int overflow = 0;
     DEBUG_CHECK(secp256k1_ecmult_consts != NULL);
     DEBUG_CHECK(pubkey != NULL);
     DEBUG_CHECK(tweak != NULL);
 
-    secp256k1_scalar_t term;
-    int overflow = 0;
     secp256k1_scalar_set_b32(&term, tweak, &overflow);
-    if (overflow) {
-        return 0;
-    }
-    secp256k1_ge_t p;
-    int ret = secp256k1_eckey_pubkey_parse(&p, pubkey, pubkeylen);
-    if (ret) {
-        ret = secp256k1_eckey_pubkey_tweak_add(&p, &term);
-    }
-    if (ret) {
-        int oldlen = pubkeylen;
-        ret = secp256k1_eckey_pubkey_serialize(&p, pubkey, &pubkeylen, oldlen <= 33);
-        VERIFY_CHECK(pubkeylen == oldlen);
+    if (!overflow) {
+        ret = secp256k1_eckey_pubkey_parse(&p, pubkey, pubkeylen);
+        if (ret) {
+            ret = secp256k1_eckey_pubkey_tweak_add(&p, &term);
+        }
+        if (ret) {
+            int oldlen = pubkeylen;
+            ret = secp256k1_eckey_pubkey_serialize(&p, pubkey, &pubkeylen, oldlen <= 33);
+            VERIFY_CHECK(pubkeylen == oldlen);
+        }
     }
 
     return ret;
 }
 
 int secp256k1_ec_privkey_tweak_mul(unsigned char *seckey, const unsigned char *tweak) {
+    secp256k1_scalar_t factor;
+    secp256k1_scalar_t sec;
+    int ret = 0;
+    int overflow = 0;
     DEBUG_CHECK(seckey != NULL);
     DEBUG_CHECK(tweak != NULL);
 
-    secp256k1_scalar_t factor;
-    int overflow = 0;
     secp256k1_scalar_set_b32(&factor, tweak, &overflow);
-    secp256k1_scalar_t sec;
     secp256k1_scalar_set_b32(&sec, seckey, NULL);
-    int ret = secp256k1_eckey_privkey_tweak_mul(&sec, &factor) && !overflow;
+    ret = secp256k1_eckey_privkey_tweak_mul(&sec, &factor) && !overflow;
     if (ret) {
         secp256k1_scalar_get_b32(seckey, &sec);
     }
@@ -304,50 +320,53 @@ int secp256k1_ec_privkey_tweak_mul(unsigned char *seckey, const unsigned char *t
 }
 
 int secp256k1_ec_pubkey_tweak_mul(unsigned char *pubkey, int pubkeylen, const unsigned char *tweak) {
+    secp256k1_ge_t p;
+    secp256k1_scalar_t factor;
+    int ret = 0;
+    int overflow = 0;
     DEBUG_CHECK(secp256k1_ecmult_consts != NULL);
     DEBUG_CHECK(pubkey != NULL);
     DEBUG_CHECK(tweak != NULL);
 
-    secp256k1_scalar_t factor;
-    int overflow = 0;
     secp256k1_scalar_set_b32(&factor, tweak, &overflow);
-    if (overflow) {
-        return 0;
-    }
-    secp256k1_ge_t p;
-    int ret = secp256k1_eckey_pubkey_parse(&p, pubkey, pubkeylen);
-    if (ret) {
-        ret = secp256k1_eckey_pubkey_tweak_mul(&p, &factor);
-    }
-    if (ret) {
-        int oldlen = pubkeylen;
-        ret = secp256k1_eckey_pubkey_serialize(&p, pubkey, &pubkeylen, oldlen <= 33);
-        VERIFY_CHECK(pubkeylen == oldlen);
+    if (!overflow) {
+        ret = secp256k1_eckey_pubkey_parse(&p, pubkey, pubkeylen);
+        if (ret) {
+            ret = secp256k1_eckey_pubkey_tweak_mul(&p, &factor);
+        }
+        if (ret) {
+            int oldlen = pubkeylen;
+            ret = secp256k1_eckey_pubkey_serialize(&p, pubkey, &pubkeylen, oldlen <= 33);
+            VERIFY_CHECK(pubkeylen == oldlen);
+        }
     }
 
     return ret;
 }
 
 int secp256k1_ec_privkey_export(const unsigned char *seckey, unsigned char *privkey, int *privkeylen, int compressed) {
+    secp256k1_scalar_t key;
+    int ret = 0;
     DEBUG_CHECK(seckey != NULL);
     DEBUG_CHECK(privkey != NULL);
     DEBUG_CHECK(privkeylen != NULL);
 
-    secp256k1_scalar_t key;
     secp256k1_scalar_set_b32(&key, seckey, NULL);
-    int ret = secp256k1_eckey_privkey_serialize(privkey, privkeylen, &key, compressed);
+    ret = secp256k1_eckey_privkey_serialize(privkey, privkeylen, &key, compressed);
     secp256k1_scalar_clear(&key);
     return ret;
 }
 
 int secp256k1_ec_privkey_import(unsigned char *seckey, const unsigned char *privkey, int privkeylen) {
+    secp256k1_scalar_t key;
+    int ret = 0;
     DEBUG_CHECK(seckey != NULL);
     DEBUG_CHECK(privkey != NULL);
 
-    secp256k1_scalar_t key;
-    int ret = secp256k1_eckey_privkey_parse(&key, privkey, privkeylen);
-    if (ret)
+    ret = secp256k1_eckey_privkey_parse(&key, privkey, privkeylen);
+    if (ret) {
         secp256k1_scalar_get_b32(seckey, &key);
+    }
     secp256k1_scalar_clear(&key);
     return ret;
 }
diff --git a/src/testrand.h b/src/testrand.h
index 018b65cd5..041bb92c4 100644
--- a/src/testrand.h
+++ b/src/testrand.h
@@ -11,8 +11,10 @@
 #include "libsecp256k1-config.h"
 #endif
 
-/** Seed the pseudorandom number generator. */
-SECP256K1_INLINE static void secp256k1_rand_seed(uint64_t v);
+/* A non-cryptographic RNG used only for test infrastructure. */
+
+/** Seed the pseudorandom number generator for testing. */
+SECP256K1_INLINE static void secp256k1_rand_seed(const unsigned char *seed16);
 
 /** Generate a pseudorandom 32-bit number. */
 static uint32_t secp256k1_rand32(void);
diff --git a/src/testrand_impl.h b/src/testrand_impl.h
index 677c4b9a0..21c69f1c5 100644
--- a/src/testrand_impl.h
+++ b/src/testrand_impl.h
@@ -11,44 +11,44 @@
 #include <string.h>
 
 #include "testrand.h"
+#include "hash.h"
 
-static uint32_t secp256k1_Rz = 11, secp256k1_Rw = 11;
+static secp256k1_rfc6979_hmac_sha256_t secp256k1_test_rng;
+static uint32_t secp256k1_test_rng_precomputed[8];
+static int secp256k1_test_rng_precomputed_used = 8;
 
-SECP256K1_INLINE static void secp256k1_rand_seed(uint64_t v) {
-    secp256k1_Rz = v >> 32;
-    secp256k1_Rw = v;
-
-    if (secp256k1_Rz == 0 || secp256k1_Rz == 0x9068ffffU) {
-        secp256k1_Rz = 111;
-    }
-    if (secp256k1_Rw == 0 || secp256k1_Rw == 0x464fffffU) {
-        secp256k1_Rw = 111;
-    }
+SECP256K1_INLINE static void secp256k1_rand_seed(const unsigned char *seed16) {
+    secp256k1_rfc6979_hmac_sha256_initialize(&secp256k1_test_rng, (const unsigned char*)"TestRNG", 7, seed16, 16, NULL, 0);
 }
 
 SECP256K1_INLINE static uint32_t secp256k1_rand32(void) {
-    secp256k1_Rz = 36969 * (secp256k1_Rz & 0xFFFF) + (secp256k1_Rz >> 16);
-    secp256k1_Rw = 18000 * (secp256k1_Rw & 0xFFFF) + (secp256k1_Rw >> 16);
-    return (secp256k1_Rw << 16) + (secp256k1_Rw >> 16) + secp256k1_Rz;
+    if (secp256k1_test_rng_precomputed_used == 8) {
+        secp256k1_rfc6979_hmac_sha256_generate(&secp256k1_test_rng, (unsigned char*)(&secp256k1_test_rng_precomputed[0]), sizeof(secp256k1_test_rng_precomputed));
+        secp256k1_test_rng_precomputed_used = 0;
+    }
+    return secp256k1_test_rng_precomputed[secp256k1_test_rng_precomputed_used++];
 }
 
 static void secp256k1_rand256(unsigned char *b32) {
-    for (int i=0; i<8; i++) {
-        uint32_t r = secp256k1_rand32();
-        b32[i*4 + 0] = (r >>  0) & 0xFF;
-        b32[i*4 + 1] = (r >>  8) & 0xFF;
-        b32[i*4 + 2] = (r >> 16) & 0xFF;
-        b32[i*4 + 3] = (r >> 24) & 0xFF;
-    }
+    secp256k1_rfc6979_hmac_sha256_generate(&secp256k1_test_rng, b32, 32);
 }
 
 static void secp256k1_rand256_test(unsigned char *b32) {
     int bits=0;
+    uint64_t ent = 0;
+    int entleft = 0;
     memset(b32, 0, 32);
     while (bits < 256) {
-        uint32_t ent = secp256k1_rand32();
-        int now = 1 + ((ent % 64)*((ent >> 6) % 32)+16)/31;
-        uint32_t val = 1 & (ent >> 11);
+        int now;
+        uint32_t val;
+        if (entleft < 12) {
+            ent |= ((uint64_t)secp256k1_rand32()) << entleft;
+            entleft += 32;
+        }
+        now = 1 + ((ent % 64)*((ent >> 6) % 32)+16)/31;
+        val = 1 & (ent >> 11);
+        ent >>= 12;
+        entleft -= 12;
         while (now > 0 && bits < 256) {
             b32[bits / 8] |= val << (bits % 8);
             now--;
diff --git a/src/tests.c b/src/tests.c
index cff32f1d0..f7f1acac6 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -36,12 +36,12 @@ void random_field_element_test(secp256k1_fe_t *fe) {
 }
 
 void random_field_element_magnitude(secp256k1_fe_t *fe) {
+    secp256k1_fe_t zero;
     int n = secp256k1_rand32() % 9;
     secp256k1_fe_normalize(fe);
     if (n == 0) {
         return;
     }
-    secp256k1_fe_t zero;
     secp256k1_fe_clear(&zero);
     secp256k1_fe_negate(&zero, &zero, 0);
     secp256k1_fe_mul_int(&zero, n - 1);
@@ -61,14 +61,15 @@ void random_group_element_test(secp256k1_ge_t *ge) {
 }
 
 void random_group_element_jacobian_test(secp256k1_gej_t *gej, const secp256k1_ge_t *ge) {
+    secp256k1_fe_t z2, z3;
     do {
         random_field_element_test(&gej->z);
         if (!secp256k1_fe_is_zero(&gej->z)) {
             break;
         }
     } while(1);
-    secp256k1_fe_t z2; secp256k1_fe_sqr(&z2, &gej->z);
-    secp256k1_fe_t z3; secp256k1_fe_mul(&z3, &z2, &gej->z);
+    secp256k1_fe_sqr(&z2, &gej->z);
+    secp256k1_fe_mul(&z3, &z2, &gej->z);
     secp256k1_fe_mul(&gej->x, &ge->x, &z2);
     secp256k1_fe_mul(&gej->y, &ge->y, &z3);
     gej->infinity = ge->infinity;
@@ -77,8 +78,8 @@ void random_group_element_jacobian_test(secp256k1_gej_t *gej, const secp256k1_ge
 void random_scalar_order_test(secp256k1_scalar_t *num) {
     do {
         unsigned char b32[32];
-        secp256k1_rand256_test(b32);
         int overflow = 0;
+        secp256k1_rand256_test(b32);
         secp256k1_scalar_set_b32(num, b32, &overflow);
         if (overflow || secp256k1_scalar_is_zero(num))
             continue;
@@ -89,8 +90,8 @@ void random_scalar_order_test(secp256k1_scalar_t *num) {
 void random_scalar_order(secp256k1_scalar_t *num) {
     do {
         unsigned char b32[32];
-        secp256k1_rand256(b32);
         int overflow = 0;
+        secp256k1_rand256(b32);
         secp256k1_scalar_set_b32(num, b32, &overflow);
         if (overflow || secp256k1_scalar_is_zero(num))
             continue;
@@ -117,16 +118,17 @@ void run_sha256_tests(void) {
         {0xf0, 0x8a, 0x78, 0xcb, 0xba, 0xee, 0x08, 0x2b, 0x05, 0x2a, 0xe0, 0x70, 0x8f, 0x32, 0xfa, 0x1e, 0x50, 0xc5, 0xc4, 0x21, 0xaa, 0x77, 0x2b, 0xa5, 0xdb, 0xb4, 0x06, 0xa2, 0xea, 0x6b, 0xe3, 0x42},
         {0xab, 0x64, 0xef, 0xf7, 0xe8, 0x8e, 0x2e, 0x46, 0x16, 0x5e, 0x29, 0xf2, 0xbc, 0xe4, 0x18, 0x26, 0xbd, 0x4c, 0x7b, 0x35, 0x52, 0xf6, 0xb3, 0x82, 0xa9, 0xe7, 0xd3, 0xaf, 0x47, 0xc2, 0x45, 0xf8}
     };
-    for (int i = 0; i < 8; i++) {
+    int i;
+    for (i = 0; i < 8; i++) {
+        unsigned char out[32];
         secp256k1_sha256_t hasher;
         secp256k1_sha256_initialize(&hasher);
         secp256k1_sha256_write(&hasher, (const unsigned char*)(inputs[i]), strlen(inputs[i]));
-        unsigned char out[32];
         secp256k1_sha256_finalize(&hasher, out);
         CHECK(memcmp(out, outputs[i], 32) == 0);
         if (strlen(inputs[i]) > 0) {
-            secp256k1_sha256_initialize(&hasher);
             int split = secp256k1_rand32() % strlen(inputs[i]);
+            secp256k1_sha256_initialize(&hasher);
             secp256k1_sha256_write(&hasher, (const unsigned char*)(inputs[i]), split);
             secp256k1_sha256_write(&hasher, (const unsigned char*)(inputs[i] + split), strlen(inputs[i]) - split);
             secp256k1_sha256_finalize(&hasher, out);
@@ -160,16 +162,17 @@ void run_hmac_sha256_tests(void) {
         {0x60, 0xe4, 0x31, 0x59, 0x1e, 0xe0, 0xb6, 0x7f, 0x0d, 0x8a, 0x26, 0xaa, 0xcb, 0xf5, 0xb7, 0x7f, 0x8e, 0x0b, 0xc6, 0x21, 0x37, 0x28, 0xc5, 0x14, 0x05, 0x46, 0x04, 0x0f, 0x0e, 0xe3, 0x7f, 0x54},
         {0x9b, 0x09, 0xff, 0xa7, 0x1b, 0x94, 0x2f, 0xcb, 0x27, 0x63, 0x5f, 0xbc, 0xd5, 0xb0, 0xe9, 0x44, 0xbf, 0xdc, 0x63, 0x64, 0x4f, 0x07, 0x13, 0x93, 0x8a, 0x7f, 0x51, 0x53, 0x5c, 0x3a, 0x35, 0xe2}
     };
-    for (int i = 0; i < 6; i++) {
+    int i;
+    for (i = 0; i < 6; i++) {
         secp256k1_hmac_sha256_t hasher;
+        unsigned char out[32];
         secp256k1_hmac_sha256_initialize(&hasher, (const unsigned char*)(keys[i]), strlen(keys[i]));
         secp256k1_hmac_sha256_write(&hasher, (const unsigned char*)(inputs[i]), strlen(inputs[i]));
-        unsigned char out[32];
         secp256k1_hmac_sha256_finalize(&hasher, out);
         CHECK(memcmp(out, outputs[i], 32) == 0);
         if (strlen(inputs[i]) > 0) {
-            secp256k1_hmac_sha256_initialize(&hasher, (const unsigned char*)(keys[i]), strlen(keys[i]));
             int split = secp256k1_rand32() % strlen(inputs[i]);
+            secp256k1_hmac_sha256_initialize(&hasher, (const unsigned char*)(keys[i]), strlen(keys[i]));
             secp256k1_hmac_sha256_write(&hasher, (const unsigned char*)(inputs[i]), split);
             secp256k1_hmac_sha256_write(&hasher, (const unsigned char*)(inputs[i] + split), strlen(inputs[i]) - split);
             secp256k1_hmac_sha256_finalize(&hasher, out);
@@ -197,16 +200,25 @@ void run_rfc6979_hmac_sha256_tests(void) {
 
     secp256k1_rfc6979_hmac_sha256_t rng;
     unsigned char out[32];
+    unsigned char zero[1] = {0};
+    int i;
 
-    secp256k1_rfc6979_hmac_sha256_initialize(&rng, key1, 32, msg1, 32);
-    for (int i = 0; i < 3; i++) {
+    secp256k1_rfc6979_hmac_sha256_initialize(&rng, key1, 32, msg1, 32, NULL, 1);
+    for (i = 0; i < 3; i++) {
         secp256k1_rfc6979_hmac_sha256_generate(&rng, out, 32);
         CHECK(memcmp(out, out1[i], 32) == 0);
     }
     secp256k1_rfc6979_hmac_sha256_finalize(&rng);
 
-    secp256k1_rfc6979_hmac_sha256_initialize(&rng, key2, 32, msg2, 32);
-    for (int i = 0; i < 3; i++) {
+    secp256k1_rfc6979_hmac_sha256_initialize(&rng, key1, 32, msg1, 32, zero, 1);
+    for (i = 0; i < 3; i++) {
+        secp256k1_rfc6979_hmac_sha256_generate(&rng, out, 32);
+        CHECK(memcmp(out, out1[i], 32) != 0);
+    }
+    secp256k1_rfc6979_hmac_sha256_finalize(&rng);
+
+    secp256k1_rfc6979_hmac_sha256_initialize(&rng, key2, 32, msg2, 32, zero, 0);
+    for (i = 0; i < 3; i++) {
         secp256k1_rfc6979_hmac_sha256_generate(&rng, out, 32);
         CHECK(memcmp(out, out2[i], 32) == 0);
     }
@@ -254,9 +266,10 @@ void test_num_negate(void) {
 }
 
 void test_num_add_sub(void) {
-    int r = secp256k1_rand32();
     secp256k1_num_t n1;
     secp256k1_num_t n2;
+    secp256k1_num_t n1p2, n2p1, n1m2, n2m1;
+    int r = secp256k1_rand32();
     random_num_order_test(&n1); /* n1 = R1 */
     if (r & 1) {
         random_num_negate(&n1);
@@ -265,7 +278,6 @@ void test_num_add_sub(void) {
     if (r & 2) {
         random_num_negate(&n2);
     }
-    secp256k1_num_t n1p2, n2p1, n1m2, n2m1;
     secp256k1_num_add(&n1p2, &n1, &n2); /* n1p2 = R1 + R2 */
     secp256k1_num_add(&n2p1, &n2, &n1); /* n2p1 = R2 + R1 */
     secp256k1_num_sub(&n1m2, &n1, &n2); /* n1m2 = R1 - R2 */
@@ -283,7 +295,8 @@ void test_num_add_sub(void) {
 }
 
 void run_num_smalltests(void) {
-    for (int i=0; i<100*count; i++) {
+    int i;
+    for (i = 0; i < 100*count; i++) {
         test_num_negate();
         test_num_add_sub();
     }
@@ -293,41 +306,45 @@ void run_num_smalltests(void) {
 /***** SCALAR TESTS *****/
 
 void scalar_test(void) {
+    secp256k1_scalar_t s;
+    secp256k1_scalar_t s1;
+    secp256k1_scalar_t s2;
+#ifndef USE_NUM_NONE
+    secp256k1_num_t snum, s1num, s2num;
+    secp256k1_num_t order, half_order;
+#endif
     unsigned char c[32];
 
     /* Set 's' to a random scalar, with value 'snum'. */
-    secp256k1_scalar_t s;
     random_scalar_order_test(&s);
 
     /* Set 's1' to a random scalar, with value 's1num'. */
-    secp256k1_scalar_t s1;
     random_scalar_order_test(&s1);
 
     /* Set 's2' to a random scalar, with value 'snum2', and byte array representation 'c'. */
-    secp256k1_scalar_t s2;
     random_scalar_order_test(&s2);
     secp256k1_scalar_get_b32(c, &s2);
 
 #ifndef USE_NUM_NONE
-    secp256k1_num_t snum, s1num, s2num;
     secp256k1_scalar_get_num(&snum, &s);
     secp256k1_scalar_get_num(&s1num, &s1);
     secp256k1_scalar_get_num(&s2num, &s2);
 
-    secp256k1_num_t order;
     secp256k1_scalar_order_get_num(&order);
-    secp256k1_num_t half_order = order;
+    half_order = order;
     secp256k1_num_shift(&half_order, 1);
 #endif
 
     {
+        int i;
         /* Test that fetching groups of 4 bits from a scalar and recursing n(i)=16*n(i-1)+p(i) reconstructs it. */
         secp256k1_scalar_t n;
         secp256k1_scalar_set_int(&n, 0);
-        for (int i = 0; i < 256; i += 4) {
+        for (i = 0; i < 256; i += 4) {
             secp256k1_scalar_t t;
+            int j;
             secp256k1_scalar_set_int(&t, secp256k1_scalar_get_bits(&s, 256 - 4 - i, 4));
-            for (int j = 0; j < 4; j++) {
+            for (j = 0; j < 4; j++) {
                 secp256k1_scalar_add(&n, &n, &n);
             }
             secp256k1_scalar_add(&n, &n, &t);
@@ -338,16 +355,17 @@ void scalar_test(void) {
     {
         /* Test that fetching groups of randomly-sized bits from a scalar and recursing n(i)=b*n(i-1)+p(i) reconstructs it. */
         secp256k1_scalar_t n;
-        secp256k1_scalar_set_int(&n, 0);
         int i = 0;
+        secp256k1_scalar_set_int(&n, 0);
         while (i < 256) {
+            secp256k1_scalar_t t;
+            int j;
             int now = (secp256k1_rand32() % 15) + 1;
             if (now + i > 256) {
                 now = 256 - i;
             }
-            secp256k1_scalar_t t;
             secp256k1_scalar_set_int(&t, secp256k1_scalar_get_bits_var(&s, 256 - now - i, now));
-            for (int j = 0; j < now; j++) {
+            for (j = 0; j < now; j++) {
                 secp256k1_scalar_add(&n, &n, &n);
             }
             secp256k1_scalar_add(&n, &n, &t);
@@ -360,23 +378,23 @@ void scalar_test(void) {
     {
         /* Test that adding the scalars together is equal to adding their numbers together modulo the order. */
         secp256k1_num_t rnum;
+        secp256k1_num_t r2num;
+        secp256k1_scalar_t r;
         secp256k1_num_add(&rnum, &snum, &s2num);
         secp256k1_num_mod(&rnum, &order);
-        secp256k1_scalar_t r;
         secp256k1_scalar_add(&r, &s, &s2);
-        secp256k1_num_t r2num;
         secp256k1_scalar_get_num(&r2num, &r);
         CHECK(secp256k1_num_eq(&rnum, &r2num));
     }
 
     {
         /* Test that multipying the scalars is equal to multiplying their numbers modulo the order. */
+        secp256k1_scalar_t r;
+        secp256k1_num_t r2num;
         secp256k1_num_t rnum;
         secp256k1_num_mul(&rnum, &snum, &s2num);
         secp256k1_num_mod(&rnum, &order);
-        secp256k1_scalar_t r;
         secp256k1_scalar_mul(&r, &s, &s2);
-        secp256k1_num_t r2num;
         secp256k1_scalar_get_num(&r2num, &r);
         CHECK(secp256k1_num_eq(&rnum, &r2num));
         /* The result can only be zero if at least one of the factors was zero. */
@@ -387,20 +405,20 @@ void scalar_test(void) {
     }
 
     {
+        secp256k1_scalar_t neg;
+        secp256k1_num_t negnum;
+        secp256k1_num_t negnum2;
         /* Check that comparison with zero matches comparison with zero on the number. */
         CHECK(secp256k1_num_is_zero(&snum) == secp256k1_scalar_is_zero(&s));
         /* Check that comparison with the half order is equal to testing for high scalar. */
         CHECK(secp256k1_scalar_is_high(&s) == (secp256k1_num_cmp(&snum, &half_order) > 0));
-        secp256k1_scalar_t neg;
         secp256k1_scalar_negate(&neg, &s);
-        secp256k1_num_t negnum;
         secp256k1_num_sub(&negnum, &order, &snum);
         secp256k1_num_mod(&negnum, &order);
         /* Check that comparison with the half order is equal to testing for high scalar after negation. */
         CHECK(secp256k1_scalar_is_high(&neg) == (secp256k1_num_cmp(&negnum, &half_order) > 0));
         /* Negating should change the high property, unless the value was already zero. */
         CHECK((secp256k1_scalar_is_high(&s) == secp256k1_scalar_is_high(&neg)) == secp256k1_scalar_is_zero(&s));
-        secp256k1_num_t negnum2;
         secp256k1_scalar_get_num(&negnum2, &neg);
         /* Negating a scalar should be equal to (order - n) mod order on the number. */
         CHECK(secp256k1_num_eq(&negnum, &negnum2));
@@ -415,17 +433,17 @@ void scalar_test(void) {
     {
         /* Test secp256k1_scalar_mul_shift_var. */
         secp256k1_scalar_t r;
+        secp256k1_num_t one;
+        secp256k1_num_t rnum;
+        secp256k1_num_t rnum2;
+        unsigned char cone[1] = {0x01};
         unsigned int shift = 256 + (secp256k1_rand32() % 257);
         secp256k1_scalar_mul_shift_var(&r, &s1, &s2, shift);
-        secp256k1_num_t rnum;
         secp256k1_num_mul(&rnum, &s1num, &s2num);
         secp256k1_num_shift(&rnum, shift - 1);
-        secp256k1_num_t one;
-        unsigned char cone[1] = {0x01};
         secp256k1_num_set_bin(&one, cone, 1);
         secp256k1_num_add(&rnum, &rnum, &one);
         secp256k1_num_shift(&rnum, 1);
-        secp256k1_num_t rnum2;
         secp256k1_scalar_get_num(&rnum2, &r);
         CHECK(secp256k1_num_eq(&rnum, &rnum2));
     }
@@ -435,11 +453,13 @@ void scalar_test(void) {
         /* Test that scalar inverses are equal to the inverse of their number modulo the order. */
         if (!secp256k1_scalar_is_zero(&s)) {
             secp256k1_scalar_t inv;
-            secp256k1_scalar_inverse(&inv, &s);
 #ifndef USE_NUM_NONE
             secp256k1_num_t invnum;
-            secp256k1_num_mod_inverse(&invnum, &snum, &order);
             secp256k1_num_t invnum2;
+#endif
+            secp256k1_scalar_inverse(&inv, &s);
+#ifndef USE_NUM_NONE
+            secp256k1_num_mod_inverse(&invnum, &snum, &order);
             secp256k1_scalar_get_num(&invnum2, &inv);
             CHECK(secp256k1_num_eq(&invnum, &invnum2));
 #endif
@@ -461,15 +481,18 @@ void scalar_test(void) {
     }
 
     {
+        secp256k1_scalar_t r1, r2;
+        secp256k1_scalar_t b;
+        int i;
         /* Test add_bit. */
         int bit = secp256k1_rand32() % 256;
-        secp256k1_scalar_t b;
         secp256k1_scalar_set_int(&b, 1);
         CHECK(secp256k1_scalar_is_one(&b));
-        for (int i = 0; i < bit; i++) {
+        for (i = 0; i < bit; i++) {
             secp256k1_scalar_add(&b, &b, &b);
         }
-        secp256k1_scalar_t r1 = s1, r2 = s1;
+        r1 = s1;
+        r2 = s1;
         if (!secp256k1_scalar_add(&r1, &r1, &b)) {
             /* No overflow happened. */
             secp256k1_scalar_add_bit(&r2, bit);
@@ -551,7 +574,8 @@ void scalar_test(void) {
 }
 
 void run_scalar_tests(void) {
-    for (int i = 0; i < 128 * count; i++) {
+    int i;
+    for (i = 0; i < 128 * count; i++) {
         scalar_test();
     }
 
@@ -571,11 +595,11 @@ void run_scalar_tests(void) {
     {
         /* A scalar with value of the curve order should be 0. */
         secp256k1_num_t order;
-        secp256k1_scalar_order_get_num(&order);
-        unsigned char bin[32];
-        secp256k1_num_get_bin(bin, 32, &order);
         secp256k1_scalar_t zero;
+        unsigned char bin[32];
         int overflow = 0;
+        secp256k1_scalar_order_get_num(&order);
+        secp256k1_num_get_bin(bin, 32, &order);
         secp256k1_scalar_set_b32(&zero, bin, &overflow);
         CHECK(overflow == 1);
         CHECK(secp256k1_scalar_is_zero(&zero));
@@ -608,39 +632,67 @@ void random_fe_non_zero(secp256k1_fe_t *nz) {
 }
 
 void random_fe_non_square(secp256k1_fe_t *ns) {
-    random_fe_non_zero(ns);
     secp256k1_fe_t r;
+    random_fe_non_zero(ns);
     if (secp256k1_fe_sqrt_var(&r, ns)) {
         secp256k1_fe_negate(ns, ns, 1);
     }
 }
 
 int check_fe_equal(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
-    secp256k1_fe_t an = *a; secp256k1_fe_normalize_weak(&an);
-    secp256k1_fe_t bn = *b; secp256k1_fe_normalize_var(&bn);
+    secp256k1_fe_t an = *a;
+    secp256k1_fe_t bn = *b;
+    secp256k1_fe_normalize_weak(&an);
+    secp256k1_fe_normalize_var(&bn);
     return secp256k1_fe_equal_var(&an, &bn);
 }
 
 int check_fe_inverse(const secp256k1_fe_t *a, const secp256k1_fe_t *ai) {
-    secp256k1_fe_t x; secp256k1_fe_mul(&x, a, ai);
-    secp256k1_fe_t one; secp256k1_fe_set_int(&one, 1);
+    secp256k1_fe_t x;
+    secp256k1_fe_t one = SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 1);
+    secp256k1_fe_mul(&x, a, ai);
     return check_fe_equal(&x, &one);
 }
 
-void run_field_misc(void) {
-    const unsigned char f32_5[32] = {
-        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05,
+void run_field_convert(void) {
+    static const unsigned char b32[32] = {
+        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+        0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+        0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29,
+        0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x40
     };
+    static const secp256k1_fe_storage_t fes = SECP256K1_FE_STORAGE_CONST(
+        0x00010203UL, 0x04050607UL, 0x11121314UL, 0x15161718UL,
+        0x22232425UL, 0x26272829UL, 0x33343536UL, 0x37383940UL
+    );
+    static const secp256k1_fe_t fe = SECP256K1_FE_CONST(
+        0x00010203UL, 0x04050607UL, 0x11121314UL, 0x15161718UL,
+        0x22232425UL, 0x26272829UL, 0x33343536UL, 0x37383940UL
+    );
+    secp256k1_fe_t fe2;
+    unsigned char b322[32];
+    secp256k1_fe_storage_t fes2;
+    /* Check conversions to fe. */
+    CHECK(secp256k1_fe_set_b32(&fe2, b32));
+    CHECK(secp256k1_fe_equal_var(&fe, &fe2));
+    secp256k1_fe_from_storage(&fe2, &fes);
+    CHECK(secp256k1_fe_equal_var(&fe, &fe2));
+    /* Check conversion from fe. */
+    secp256k1_fe_get_b32(b322, &fe);
+    CHECK(memcmp(b322, b32, 32) == 0);
+    secp256k1_fe_to_storage(&fes2, &fe);
+    CHECK(memcmp(&fes2, &fes, sizeof(fes)) == 0);
+}
+
+void run_field_misc(void) {
     secp256k1_fe_t x;
     secp256k1_fe_t y;
     secp256k1_fe_t z;
     secp256k1_fe_t q;
-    secp256k1_fe_t fe5;
-    CHECK(secp256k1_fe_set_b32(&fe5, f32_5));
-    for (int i=0; i<5*count; i++) {
+    secp256k1_fe_t fe5 = SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 5);
+    int i;
+    for (i = 0; i < 5*count; i++) {
+        secp256k1_fe_storage_t xs, ys, zs;
         random_fe(&x);
         random_fe_non_zero(&y);
         /* Test the fe equality and comparison operations. */
@@ -649,12 +701,17 @@ void run_field_misc(void) {
         z = x;
         secp256k1_fe_add(&z,&y);
         secp256k1_fe_normalize(&z);
-        /* Test the conditional move. */
-        secp256k1_fe_cmov(&z, &x, 0);
-        CHECK(secp256k1_fe_equal_var(&x, &z) == 0);
-        CHECK(secp256k1_fe_cmp_var(&x, &z) != 0);
-        secp256k1_fe_cmov(&y, &x, 1);
-        CHECK(secp256k1_fe_equal_var(&x, &y));
+        /* Test storage conversion and conditional moves. */
+        secp256k1_fe_to_storage(&xs, &x);
+        secp256k1_fe_to_storage(&ys, &y);
+        secp256k1_fe_to_storage(&zs, &z);
+        secp256k1_fe_storage_cmov(&zs, &xs, 0);
+        CHECK(memcmp(&xs, &zs, sizeof(xs)) != 0);
+        secp256k1_fe_storage_cmov(&ys, &xs, 1);
+        CHECK(memcmp(&xs, &ys, sizeof(xs)) == 0);
+        secp256k1_fe_from_storage(&x, &xs);
+        secp256k1_fe_from_storage(&y, &ys);
+        secp256k1_fe_from_storage(&z, &zs);
         /* Test that mul_int, mul, and add agree. */
         secp256k1_fe_add(&y, &x);
         secp256k1_fe_add(&y, &x);
@@ -678,7 +735,8 @@ void run_field_misc(void) {
 
 void run_field_inv(void) {
     secp256k1_fe_t x, xi, xii;
-    for (int i=0; i<10*count; i++) {
+    int i;
+    for (i = 0; i < 10*count; i++) {
         random_fe_non_zero(&x);
         secp256k1_fe_inv(&xi, &x);
         CHECK(check_fe_inverse(&x, &xi));
@@ -689,7 +747,8 @@ void run_field_inv(void) {
 
 void run_field_inv_var(void) {
     secp256k1_fe_t x, xi, xii;
-    for (int i=0; i<10*count; i++) {
+    int i;
+    for (i = 0; i < 10*count; i++) {
         random_fe_non_zero(&x);
         secp256k1_fe_inv_var(&xi, &x);
         CHECK(check_fe_inverse(&x, &xi));
@@ -700,17 +759,19 @@ void run_field_inv_var(void) {
 
 void run_field_inv_all_var(void) {
     secp256k1_fe_t x[16], xi[16], xii[16];
+    int i;
     /* Check it's safe to call for 0 elements */
     secp256k1_fe_inv_all_var(0, xi, x);
-    for (int i=0; i<count; i++) {
+    for (i = 0; i < count; i++) {
+        size_t j;
         size_t len = (secp256k1_rand32() & 15) + 1;
-        for (size_t j=0; j<len; j++)
+        for (j = 0; j < len; j++)
             random_fe_non_zero(&x[j]);
         secp256k1_fe_inv_all_var(len, xi, x);
-        for (size_t j=0; j<len; j++)
+        for (j = 0; j < len; j++)
             CHECK(check_fe_inverse(&x[j], &xi[j]));
         secp256k1_fe_inv_all_var(len, xii, xi);
-        for (size_t j=0; j<len; j++)
+        for (j = 0; j < len; j++)
             CHECK(check_fe_equal(&x[j], &xii[j]));
     }
 }
@@ -719,10 +780,11 @@ void run_sqr(void) {
     secp256k1_fe_t x, s;
 
     {
+        int i;
         secp256k1_fe_set_int(&x, 1);
         secp256k1_fe_negate(&x, &x, 1);
 
-        for (int i=1; i<=512; ++i) {
+        for (i = 1; i <= 512; ++i) {
             secp256k1_fe_mul_int(&x, 2);
             secp256k1_fe_normalize(&x);
             secp256k1_fe_sqr(&s, &x);
@@ -746,6 +808,7 @@ void test_sqrt(const secp256k1_fe_t *a, const secp256k1_fe_t *k) {
 
 void run_sqrt(void) {
     secp256k1_fe_t ns, x, s, t;
+    int i;
 
     /* Check sqrt(0) is 0 */
     secp256k1_fe_set_int(&x, 0);
@@ -753,7 +816,7 @@ void run_sqrt(void) {
     test_sqrt(&s, &x);
 
     /* Check sqrt of small squares (and their negatives) */
-    for (int i=1; i<=100; i++) {
+    for (i = 1; i <= 100; i++) {
         secp256k1_fe_set_int(&x, i);
         secp256k1_fe_sqr(&s, &x);
         test_sqrt(&s, &x);
@@ -762,9 +825,10 @@ void run_sqrt(void) {
     }
 
     /* Consistency checks for large random values */
-    for (int i=0; i<10; i++) {
+    for (i = 0; i < 10; i++) {
+        int j;
         random_fe_non_square(&ns);
-        for (int j=0; j<count; j++) {
+        for (j = 0; j < count; j++) {
             random_fe(&x);
             secp256k1_fe_sqr(&s, &x);
             test_sqrt(&s, &x);
@@ -787,13 +851,13 @@ void ge_equals_ge(const secp256k1_ge_t *a, const secp256k1_ge_t *b) {
 }
 
 void ge_equals_gej(const secp256k1_ge_t *a, const secp256k1_gej_t *b) {
+    secp256k1_fe_t z2s;
+    secp256k1_fe_t u1, u2, s1, s2;
     CHECK(a->infinity == b->infinity);
     if (a->infinity)
         return;
     /* Check a.x * b.z^2 == b.x && a.y * b.z^3 == b.y, to avoid inverses. */
-    secp256k1_fe_t z2s;
     secp256k1_fe_sqr(&z2s, &b->z);
-    secp256k1_fe_t u1, u2, s1, s2;
     secp256k1_fe_mul(&u1, &a->x, &z2s);
     u2 = b->x; secp256k1_fe_normalize_weak(&u2);
     secp256k1_fe_mul(&s1, &a->y, &z2s); secp256k1_fe_mul(&s1, &s1, &b->z);
@@ -803,6 +867,7 @@ void ge_equals_gej(const secp256k1_ge_t *a, const secp256k1_gej_t *b) {
 }
 
 void test_ge(void) {
+    int i, i1;
     int runs = 4;
     /* Points: (infinity, p1, p1, -p1, -p1, p2, p2, -p2, -p2, p3, p3, -p3, -p3, p4, p4, -p4, -p4).
      * The second in each pair of identical points uses a random Z coordinate in the Jacobian form.
@@ -814,7 +879,8 @@ void test_ge(void) {
     secp256k1_gej_set_infinity(&gej[0]);
     secp256k1_ge_clear(&ge[0]);
     secp256k1_ge_set_gej_var(&ge[0], &gej[0]);
-    for (int i = 0; i < runs; i++) {
+    for (i = 0; i < runs; i++) {
+        int j;
         secp256k1_ge_t g;
         random_group_element_test(&g);
         ge[1 + 4 * i] = g;
@@ -825,7 +891,7 @@ void test_ge(void) {
         random_group_element_jacobian_test(&gej[2 + 4 * i], &ge[2 + 4 * i]);
         secp256k1_gej_set_ge(&gej[3 + 4 * i], &ge[3 + 4 * i]);
         random_group_element_jacobian_test(&gej[4 + 4 * i], &ge[4 + 4 * i]);
-        for (int j = 0; j < 4; j++) {
+        for (j = 0; j < 4; j++) {
             random_field_element_magnitude(&ge[1 + j + 4 * i].x);
             random_field_element_magnitude(&ge[1 + j + 4 * i].y);
             random_field_element_magnitude(&gej[1 + j + 4 * i].x);
@@ -834,8 +900,9 @@ void test_ge(void) {
         }
     }
 
-    for (int i1 = 0; i1 < 1 + 4 * runs; i1++) {
-        for (int i2 = 0; i2 < 1 + 4 * runs; i2++) {
+    for (i1 = 0; i1 < 1 + 4 * runs; i1++) {
+        int i2;
+        for (i2 = 0; i2 < 1 + 4 * runs; i2++) {
             /* Compute reference result using gej + gej (var). */
             secp256k1_gej_t refj, resj;
             secp256k1_ge_t ref;
@@ -883,11 +950,12 @@ void test_ge(void) {
 
     /* Test adding all points together in random order equals infinity. */
     {
+        secp256k1_gej_t sum = SECP256K1_GEJ_CONST_INFINITY;
         secp256k1_gej_t *gej_shuffled = malloc((4 * runs + 1) * sizeof(secp256k1_gej_t));
-        for (int i = 0; i < 4 * runs + 1; i++) {
+        for (i = 0; i < 4 * runs + 1; i++) {
             gej_shuffled[i] = gej[i];
         }
-        for (int i = 0; i < 4 * runs + 1; i++) {
+        for (i = 0; i < 4 * runs + 1; i++) {
             int swap = i + secp256k1_rand32() % (4 * runs + 1 - i);
             if (swap != i) {
                 secp256k1_gej_t t = gej_shuffled[i];
@@ -895,9 +963,7 @@ void test_ge(void) {
                 gej_shuffled[swap] = t;
             }
         }
-        secp256k1_gej_t sum;
-        secp256k1_gej_set_infinity(&sum);
-        for (int i = 0; i < 4 * runs + 1; i++) {
+        for (i = 0; i < 4 * runs + 1; i++) {
             secp256k1_gej_add_var(&sum, &sum, &gej_shuffled[i]);
         }
         CHECK(secp256k1_gej_is_infinity(&sum));
@@ -908,7 +974,7 @@ void test_ge(void) {
     {
         secp256k1_ge_t *ge_set_all = malloc((4 * runs + 1) * sizeof(secp256k1_ge_t));
         secp256k1_ge_set_all_gej_var(4 * runs + 1, ge_set_all, gej);
-        for (int i = 0; i < 4 * runs + 1; i++) {
+        for (i = 0; i < 4 * runs + 1; i++) {
             ge_equals_gej(&ge_set_all[i], &gej[i]);
         }
         free(ge_set_all);
@@ -919,7 +985,8 @@ void test_ge(void) {
 }
 
 void run_ge(void) {
-    for (int i = 0; i < count * 32; i++) {
+    int i;
+    for (i = 0; i < count * 32; i++) {
         test_ge();
     }
 }
@@ -928,41 +995,35 @@ void run_ge(void) {
 
 void run_ecmult_chain(void) {
     /* random starting point A (on the curve) */
-    secp256k1_fe_t ax; VERIFY_CHECK(secp256k1_fe_set_hex(&ax, "8b30bbe9ae2a990696b22f670709dff3727fd8bc04d3362c6c7bf458e2846004", 64));
-    secp256k1_fe_t ay; VERIFY_CHECK(secp256k1_fe_set_hex(&ay, "a357ae915c4a65281309edf20504740f0eb3343990216b4f81063cb65f2f7e0f", 64));
-    secp256k1_gej_t a; secp256k1_gej_set_xy(&a, &ax, &ay);
+    secp256k1_gej_t a = SECP256K1_GEJ_CONST(
+        0x8b30bbe9, 0xae2a9906, 0x96b22f67, 0x0709dff3,
+        0x727fd8bc, 0x04d3362c, 0x6c7bf458, 0xe2846004,
+        0xa357ae91, 0x5c4a6528, 0x1309edf2, 0x0504740f,
+        0x0eb33439, 0x90216b4f, 0x81063cb6, 0x5f2f7e0f
+    );
     /* two random initial factors xn and gn */
-    static const unsigned char xni[32] = {
-        0x84, 0xcc, 0x54, 0x52, 0xf7, 0xfd, 0xe1, 0xed,
-        0xb4, 0xd3, 0x8a, 0x8c, 0xe9, 0xb1, 0xb8, 0x4c,
-        0xce, 0xf3, 0x1f, 0x14, 0x6e, 0x56, 0x9b, 0xe9,
-        0x70, 0x5d, 0x35, 0x7a, 0x42, 0x98, 0x54, 0x07
-    };
-    secp256k1_scalar_t xn;
-    secp256k1_scalar_set_b32(&xn, xni, NULL);
-    static const unsigned char gni[32] = {
-        0xa1, 0xe5, 0x8d, 0x22, 0x55, 0x3d, 0xcd, 0x42,
-        0xb2, 0x39, 0x80, 0x62, 0x5d, 0x4c, 0x57, 0xa9,
-        0x6e, 0x93, 0x23, 0xd4, 0x2b, 0x31, 0x52, 0xe5,
-        0xca, 0x2c, 0x39, 0x90, 0xed, 0xc7, 0xc9, 0xde
-    };
-    secp256k1_scalar_t gn;
-    secp256k1_scalar_set_b32(&gn, gni, NULL);
+    secp256k1_scalar_t xn = SECP256K1_SCALAR_CONST(
+        0x84cc5452, 0xf7fde1ed, 0xb4d38a8c, 0xe9b1b84c,
+        0xcef31f14, 0x6e569be9, 0x705d357a, 0x42985407
+    );
+    secp256k1_scalar_t gn = SECP256K1_SCALAR_CONST(
+        0xa1e58d22, 0x553dcd42, 0xb2398062, 0x5d4c57a9,
+        0x6e9323d4, 0x2b3152e5, 0xca2c3990, 0xedc7c9de
+    );
     /* two small multipliers to be applied to xn and gn in every iteration: */
-    static const unsigned char xfi[32] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x13,0x37};
-    secp256k1_scalar_t xf;
-    secp256k1_scalar_set_b32(&xf, xfi, NULL);
-    static const unsigned char gfi[32] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x71,0x13};
-    secp256k1_scalar_t gf;
-    secp256k1_scalar_set_b32(&gf, gfi, NULL);
+    static const secp256k1_scalar_t xf = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0x1337);
+    static const secp256k1_scalar_t gf = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0x7113);
     /* accumulators with the resulting coefficients to A and G */
-    secp256k1_scalar_t ae;
-    secp256k1_scalar_set_int(&ae, 1);
-    secp256k1_scalar_t ge;
-    secp256k1_scalar_set_int(&ge, 0);
-    /* the point being computed */
+    secp256k1_scalar_t ae = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 1);
+    secp256k1_scalar_t ge = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0);
+    /* actual points */
     secp256k1_gej_t x = a;
-    for (int i=0; i<200*count; i++) {
+    secp256k1_gej_t x2;
+    int i;
+
+    /* the point being computed */
+    x = a;
+    for (i = 0; i < 200*count; i++) {
         /* in each iteration, compute X = xn*X + gn*G; */
         secp256k1_ecmult(&x, &x, &xn, &gn);
         /* also compute ae and ge: the actual accumulated factors for A and G */
@@ -976,78 +1037,84 @@ void run_ecmult_chain(void) {
 
         /* verify */
         if (i == 19999) {
-            char res[132]; int resl = 132;
-            secp256k1_gej_get_hex(res, &resl, &x);
-            CHECK(strcmp(res, "(D6E96687F9B10D092A6F35439D86CEBEA4535D0D409F53586440BD74B933E830,B95CBCA2C77DA786539BE8FD53354D2D3B4F566AE658045407ED6015EE1B2A88)") == 0);
+            /* expected result after 19999 iterations */
+            secp256k1_gej_t rp = SECP256K1_GEJ_CONST(
+                0xD6E96687, 0xF9B10D09, 0x2A6F3543, 0x9D86CEBE,
+                0xA4535D0D, 0x409F5358, 0x6440BD74, 0xB933E830,
+                0xB95CBCA2, 0xC77DA786, 0x539BE8FD, 0x53354D2D,
+                0x3B4F566A, 0xE6580454, 0x07ED6015, 0xEE1B2A88
+            );
+
+            secp256k1_gej_neg(&rp, &rp);
+            secp256k1_gej_add_var(&rp, &rp, &x);
+            CHECK(secp256k1_gej_is_infinity(&rp));
         }
     }
     /* redo the computation, but directly with the resulting ae and ge coefficients: */
-    secp256k1_gej_t x2; secp256k1_ecmult(&x2, &a, &ae, &ge);
-    char res[132]; int resl = 132;
-    char res2[132]; int resl2 = 132;
-    secp256k1_gej_get_hex(res, &resl, &x);
-    secp256k1_gej_get_hex(res2, &resl2, &x2);
-    CHECK(strcmp(res, res2) == 0);
-    CHECK(strlen(res) == 131);
+    secp256k1_ecmult(&x2, &a, &ae, &ge);
+    secp256k1_gej_neg(&x2, &x2);
+    secp256k1_gej_add_var(&x2, &x2, &x);
+    CHECK(secp256k1_gej_is_infinity(&x2));
 }
 
 void test_point_times_order(const secp256k1_gej_t *point) {
-    unsigned char pub[65];
     /* X * (point + G) + (order-X) * (pointer + G) = 0 */
     secp256k1_scalar_t x;
-    random_scalar_order_test(&x);
     secp256k1_scalar_t nx;
-    secp256k1_scalar_negate(&nx, &x);
     secp256k1_gej_t res1, res2;
+    secp256k1_ge_t res3;
+    unsigned char pub[65];
+    int psize = 65;
+    random_scalar_order_test(&x);
+    secp256k1_scalar_negate(&nx, &x);
     secp256k1_ecmult(&res1, point, &x, &x); /* calc res1 = x * point + x * G; */
     secp256k1_ecmult(&res2, point, &nx, &nx); /* calc res2 = (order - x) * point + (order - x) * G; */
     secp256k1_gej_add_var(&res1, &res1, &res2);
     CHECK(secp256k1_gej_is_infinity(&res1));
     CHECK(secp256k1_gej_is_valid_var(&res1) == 0);
-    secp256k1_ge_t res3;
     secp256k1_ge_set_gej(&res3, &res1);
     CHECK(secp256k1_ge_is_infinity(&res3));
     CHECK(secp256k1_ge_is_valid_var(&res3) == 0);
-    int psize = 65;
     CHECK(secp256k1_eckey_pubkey_serialize(&res3, pub, &psize, 0) == 0);
     psize = 65;
     CHECK(secp256k1_eckey_pubkey_serialize(&res3, pub, &psize, 1) == 0);
 }
 
 void run_point_times_order(void) {
-    secp256k1_fe_t x; VERIFY_CHECK(secp256k1_fe_set_hex(&x, "02", 2));
-    for (int i=0; i<500; i++) {
+    int i;
+    secp256k1_fe_t x = SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 2);
+    static const secp256k1_fe_t xr = SECP256K1_FE_CONST(
+        0x7603CB59, 0xB0EF6C63, 0xFE608479, 0x2A0C378C,
+        0xDB3233A8, 0x0F8A9A09, 0xA877DEAD, 0x31B38C45
+    );
+    for (i = 0; i < 500; i++) {
         secp256k1_ge_t p;
         if (secp256k1_ge_set_xo_var(&p, &x, 1)) {
-            CHECK(secp256k1_ge_is_valid_var(&p));
             secp256k1_gej_t j;
+            CHECK(secp256k1_ge_is_valid_var(&p));
             secp256k1_gej_set_ge(&j, &p);
             CHECK(secp256k1_gej_is_valid_var(&j));
             test_point_times_order(&j);
         }
         secp256k1_fe_sqr(&x, &x);
     }
-    char c[65];
-    int cl = 1;
-    c[1] = 123;
-    secp256k1_fe_get_hex(c, &cl, &x); /* Check that fe_get_hex handles a too short input. */
-    CHECK(c[1] == 123);
-    cl = 65;
-    secp256k1_fe_get_hex(c, &cl, &x);
-    CHECK(strcmp(c, "7603CB59B0EF6C63FE6084792A0C378CDB3233A80F8A9A09A877DEAD31B38C45") == 0);
+    secp256k1_fe_normalize_var(&x);
+    CHECK(secp256k1_fe_equal_var(&x, &xr));
 }
 
 void test_wnaf(const secp256k1_scalar_t *number, int w) {
     secp256k1_scalar_t x, two, t;
+    int wnaf[256];
+    int zeroes = -1;
+    int i;
+    int bits;
     secp256k1_scalar_set_int(&x, 0);
     secp256k1_scalar_set_int(&two, 2);
-    int wnaf[256];
-    int bits = secp256k1_ecmult_wnaf(wnaf, number, w);
+    bits = secp256k1_ecmult_wnaf(wnaf, number, w);
     CHECK(bits <= 256);
-    int zeroes = -1;
-    for (int i=bits-1; i>=0; i--) {
-        secp256k1_scalar_mul(&x, &x, &two);
+    for (i = bits-1; i >= 0; i--) {
         int v = wnaf[i];
+        secp256k1_scalar_mul(&x, &x, &two);
         if (v) {
             CHECK(zeroes == -1 || zeroes >= w-1); /* check that distance between non-zero elements is at least w-1 */
             zeroes=0;
@@ -1070,8 +1137,9 @@ void test_wnaf(const secp256k1_scalar_t *number, int w) {
 }
 
 void run_wnaf(void) {
+    int i;
     secp256k1_scalar_t n;
-    for (int i=0; i<count; i++) {
+    for (i = 0; i < count; i++) {
         random_scalar_order(&n);
         if (i % 1)
             secp256k1_scalar_negate(&n, &n);
@@ -1087,26 +1155,29 @@ void random_sign(secp256k1_ecdsa_sig_t *sig, const secp256k1_scalar_t *key, cons
 }
 
 void test_ecdsa_sign_verify(void) {
+    secp256k1_gej_t pubj;
+    secp256k1_ge_t pub;
+    secp256k1_scalar_t one;
+    secp256k1_scalar_t msg, key;
+    secp256k1_ecdsa_sig_t sig;
     int recid;
     int getrec;
-    secp256k1_scalar_t msg, key;
     random_scalar_order_test(&msg);
     random_scalar_order_test(&key);
-    secp256k1_gej_t pubj; secp256k1_ecmult_gen(&pubj, &key);
-    secp256k1_ge_t pub; secp256k1_ge_set_gej(&pub, &pubj);
-    secp256k1_ecdsa_sig_t sig;
+    secp256k1_ecmult_gen(&pubj, &key);
+    secp256k1_ge_set_gej(&pub, &pubj);
     getrec = secp256k1_rand32()&1;
     random_sign(&sig, &key, &msg, getrec?&recid:NULL);
     if (getrec) CHECK(recid >= 0 && recid < 4);
     CHECK(secp256k1_ecdsa_sig_verify(&sig, &pub, &msg));
-    secp256k1_scalar_t one;
     secp256k1_scalar_set_int(&one, 1);
     secp256k1_scalar_add(&msg, &msg, &one);
     CHECK(!secp256k1_ecdsa_sig_verify(&sig, &pub, &msg));
 }
 
 void run_ecdsa_sign_verify(void) {
-    for (int i=0; i<10*count; i++) {
+    int i;
+    for (i = 0; i < 10*count; i++) {
         test_ecdsa_sign_verify();
     }
 }
@@ -1149,9 +1220,32 @@ static int nonce_function_test_retry(unsigned char *nonce32, const unsigned char
    return nonce_function_rfc6979(nonce32, msg32, key32, counter - 5, data);
 }
 
+int is_empty_compact_signature(const unsigned char *sig64) {
+    static const unsigned char res[64] = {0};
+    return memcmp(sig64, res, 64) == 0;
+}
+
 void test_ecdsa_end_to_end(void) {
+    unsigned char extra[32] = {0x00};
     unsigned char privkey[32];
     unsigned char message[32];
+    unsigned char privkey2[32];
+    unsigned char csignature[64];
+    unsigned char signature[72];
+    unsigned char signature2[72];
+    unsigned char signature3[72];
+    unsigned char signature4[72];
+    unsigned char pubkey[65];
+    unsigned char recpubkey[65];
+    unsigned char seckey[300];
+    int signaturelen = 72;
+    int signaturelen2 = 72;
+    int signaturelen3 = 72;
+    int signaturelen4 = 72;
+    int recid = 0;
+    int recpubkeylen = 0;
+    int pubkeylen = 65;
+    int seckeylen = 300;
 
     /* Generate a random key and message. */
     {
@@ -1164,7 +1258,6 @@ void test_ecdsa_end_to_end(void) {
 
     /* Construct and verify corresponding public key. */
     CHECK(secp256k1_ec_seckey_verify(privkey) == 1);
-    unsigned char pubkey[65]; int pubkeylen = 65;
     CHECK(secp256k1_ec_pubkey_create(pubkey, &pubkeylen, privkey, (secp256k1_rand32() & 3) != 0) == 1);
     if (secp256k1_rand32() & 1) {
         CHECK(secp256k1_ec_pubkey_decompress(pubkey, &pubkeylen));
@@ -1172,52 +1265,73 @@ void test_ecdsa_end_to_end(void) {
     CHECK(secp256k1_ec_pubkey_verify(pubkey, pubkeylen));
 
     /* Verify private key import and export. */
-    unsigned char seckey[300]; int seckeylen = 300;
     CHECK(secp256k1_ec_privkey_export(privkey, seckey, &seckeylen, secp256k1_rand32() % 2) == 1);
-    unsigned char privkey2[32];
     CHECK(secp256k1_ec_privkey_import(privkey2, seckey, seckeylen) == 1);
     CHECK(memcmp(privkey, privkey2, 32) == 0);
 
     /* Optionally tweak the keys using addition. */
     if (secp256k1_rand32() % 3 == 0) {
+        int ret1;
+        int ret2;
         unsigned char rnd[32];
+        unsigned char pubkey2[65];
+        int pubkeylen2 = 65;
         secp256k1_rand256_test(rnd);
-        int ret1 = secp256k1_ec_privkey_tweak_add(privkey, rnd);
-        int ret2 = secp256k1_ec_pubkey_tweak_add(pubkey, pubkeylen, rnd);
+        ret1 = secp256k1_ec_privkey_tweak_add(privkey, rnd);
+        ret2 = secp256k1_ec_pubkey_tweak_add(pubkey, pubkeylen, rnd);
         CHECK(ret1 == ret2);
         if (ret1 == 0) return;
-        unsigned char pubkey2[65]; int pubkeylen2 = 65;
         CHECK(secp256k1_ec_pubkey_create(pubkey2, &pubkeylen2, privkey, pubkeylen == 33) == 1);
         CHECK(memcmp(pubkey, pubkey2, pubkeylen) == 0);
     }
 
     /* Optionally tweak the keys using multiplication. */
     if (secp256k1_rand32() % 3 == 0) {
+        int ret1;
+        int ret2;
         unsigned char rnd[32];
+        unsigned char pubkey2[65];
+        int pubkeylen2 = 65;
         secp256k1_rand256_test(rnd);
-        int ret1 = secp256k1_ec_privkey_tweak_mul(privkey, rnd);
-        int ret2 = secp256k1_ec_pubkey_tweak_mul(pubkey, pubkeylen, rnd);
+        ret1 = secp256k1_ec_privkey_tweak_mul(privkey, rnd);
+        ret2 = secp256k1_ec_pubkey_tweak_mul(pubkey, pubkeylen, rnd);
         CHECK(ret1 == ret2);
         if (ret1 == 0) return;
-        unsigned char pubkey2[65]; int pubkeylen2 = 65;
         CHECK(secp256k1_ec_pubkey_create(pubkey2, &pubkeylen2, privkey, pubkeylen == 33) == 1);
         CHECK(memcmp(pubkey, pubkey2, pubkeylen) == 0);
     }
 
     /* Sign. */
-    unsigned char signature[72]; int signaturelen = 72;
     CHECK(secp256k1_ecdsa_sign(message, signature, &signaturelen, privkey, NULL, NULL) == 1);
+    CHECK(signaturelen > 0);
+    CHECK(secp256k1_ecdsa_sign(message, signature2, &signaturelen2, privkey, NULL, extra) == 1);
+    CHECK(signaturelen2 > 0);
+    extra[31] = 1;
+    CHECK(secp256k1_ecdsa_sign(message, signature3, &signaturelen3, privkey, NULL, extra) == 1);
+    CHECK(signaturelen3 > 0);
+    extra[31] = 0;
+    extra[0] = 1;
+    CHECK(secp256k1_ecdsa_sign(message, signature4, &signaturelen4, privkey, NULL, extra) == 1);
+    CHECK(signaturelen3 > 0);
+    CHECK((signaturelen != signaturelen2) || (memcmp(signature, signature2, signaturelen) != 0));
+    CHECK((signaturelen != signaturelen3) || (memcmp(signature, signature3, signaturelen) != 0));
+    CHECK((signaturelen3 != signaturelen2) || (memcmp(signature3, signature2, signaturelen3) != 0));
+    CHECK((signaturelen4 != signaturelen3) || (memcmp(signature4, signature3, signaturelen4) != 0));
+    CHECK((signaturelen4 != signaturelen2) || (memcmp(signature4, signature2, signaturelen4) != 0));
+    CHECK((signaturelen4 != signaturelen) || (memcmp(signature4, signature, signaturelen4) != 0));
     /* Verify. */
     CHECK(secp256k1_ecdsa_verify(message, signature, signaturelen, pubkey, pubkeylen) == 1);
+    CHECK(secp256k1_ecdsa_verify(message, signature2, signaturelen2, pubkey, pubkeylen) == 1);
+    CHECK(secp256k1_ecdsa_verify(message, signature3, signaturelen3, pubkey, pubkeylen) == 1);
+    CHECK(secp256k1_ecdsa_verify(message, signature4, signaturelen4, pubkey, pubkeylen) == 1);
     /* Destroy signature and verify again. */
     signature[signaturelen - 1 - secp256k1_rand32() % 20] += 1 + (secp256k1_rand32() % 255);
     CHECK(secp256k1_ecdsa_verify(message, signature, signaturelen, pubkey, pubkeylen) != 1);
 
     /* Compact sign. */
-    unsigned char csignature[64]; int recid = 0;
     CHECK(secp256k1_ecdsa_sign_compact(message, csignature, privkey, NULL, NULL, &recid) == 1);
+    CHECK(!is_empty_compact_signature(csignature));
     /* Recover. */
-    unsigned char recpubkey[65]; int recpubkeylen = 0;
     CHECK(secp256k1_ecdsa_recover_compact(message, csignature, recpubkey, &recpubkeylen, pubkeylen == 33, recid) == 1);
     CHECK(recpubkeylen == pubkeylen);
     CHECK(memcmp(pubkey, recpubkey, pubkeylen) == 0);
@@ -1230,6 +1344,8 @@ void test_ecdsa_end_to_end(void) {
 }
 
 void test_random_pubkeys(void) {
+    secp256k1_ge_t elem;
+    secp256k1_ge_t elem2;
     unsigned char in[65];
     /* Generate some randomly sized pubkeys. */
     uint32_t r = secp256k1_rand32();
@@ -1247,8 +1363,6 @@ void test_random_pubkeys(void) {
     r>>=11;
     if (len > 1) secp256k1_rand256(&in[1]);
     if (len > 33) secp256k1_rand256(&in[33]);
-    secp256k1_ge_t elem;
-    secp256k1_ge_t elem2;
     if (secp256k1_eckey_pubkey_parse(&elem, in, len)) {
         unsigned char out[65];
         unsigned char firstb;
@@ -1282,13 +1396,15 @@ void test_random_pubkeys(void) {
 }
 
 void run_random_pubkeys(void) {
-    for (int i=0; i<10*count; i++) {
+    int i;
+    for (i = 0; i < 10*count; i++) {
         test_random_pubkeys();
     }
 }
 
 void run_ecdsa_end_to_end(void) {
-    for (int i=0; i<64*count; i++) {
+    int i;
+    for (i = 0; i < 64*count; i++) {
         test_ecdsa_end_to_end();
     }
 }
@@ -1314,12 +1430,8 @@ void test_ecdsa_edge_cases(void) {
         0x6E, 0x1B, 0xE8, 0xEC, 0xC7, 0xDD, 0x95, 0x57
     };
     unsigned char pubkey[65];
+    int t;
     int pubkeylen = 65;
-    CHECK(!secp256k1_ecdsa_recover_compact(msg32, sig64, pubkey, &pubkeylen, 0, 0));
-    CHECK(secp256k1_ecdsa_recover_compact(msg32, sig64, pubkey, &pubkeylen, 0, 1));
-    CHECK(!secp256k1_ecdsa_recover_compact(msg32, sig64, pubkey, &pubkeylen, 0, 2));
-    CHECK(!secp256k1_ecdsa_recover_compact(msg32, sig64, pubkey, &pubkeylen, 0, 3));
-
     /* signature (r,s) = (4,4), which can be recovered with all 4 recids. */
     const unsigned char sigb64[64] = {
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -1333,7 +1445,16 @@ void test_ecdsa_edge_cases(void) {
     };
     unsigned char pubkeyb[33];
     int pubkeyblen = 33;
-    for (int recid = 0; recid < 4; recid++) {
+    int recid;
+
+    CHECK(!secp256k1_ecdsa_recover_compact(msg32, sig64, pubkey, &pubkeylen, 0, 0));
+    CHECK(secp256k1_ecdsa_recover_compact(msg32, sig64, pubkey, &pubkeylen, 0, 1));
+    CHECK(!secp256k1_ecdsa_recover_compact(msg32, sig64, pubkey, &pubkeylen, 0, 2));
+    CHECK(!secp256k1_ecdsa_recover_compact(msg32, sig64, pubkey, &pubkeylen, 0, 3));
+
+    for (recid = 0; recid < 4; recid++) {
+        int i;
+        int recid2;
         /* (4,4) encoded in DER. */
         unsigned char sigbder[8] = {0x30, 0x06, 0x02, 0x01, 0x04, 0x02, 0x01, 0x04};
         unsigned char sigcder_zr[7] = {0x30, 0x05, 0x02, 0x00, 0x02, 0x01, 0x01};
@@ -1376,7 +1497,7 @@ void test_ecdsa_edge_cases(void) {
         };
         CHECK(secp256k1_ecdsa_recover_compact(msg32, sigb64, pubkeyb, &pubkeyblen, 1, recid));
         CHECK(secp256k1_ecdsa_verify(msg32, sigbder, sizeof(sigbder), pubkeyb, pubkeyblen) == 1);
-        for (int recid2 = 0; recid2 < 4; recid2++) {
+        for (recid2 = 0; recid2 < 4; recid2++) {
             unsigned char pubkey2b[33];
             int pubkey2blen = 33;
             CHECK(secp256k1_ecdsa_recover_compact(msg32, sigb64, pubkey2b, &pubkey2blen, 1, recid2));
@@ -1402,10 +1523,11 @@ void test_ecdsa_edge_cases(void) {
         sigbder[7]--;
         CHECK(secp256k1_ecdsa_verify(msg32, sigbder, 6, pubkeyb, pubkeyblen) == -2);
         CHECK(secp256k1_ecdsa_verify(msg32, sigbder, sizeof(sigbder)-1, pubkeyb, pubkeyblen) == -2);
-        for(int i = 0; i<8; i++) {
+        for(i = 0; i < 8; i++) {
+            int c;
             unsigned char orig = sigbder[i];
             /*Try every single-byte change.*/
-            for (int c=0; c<256; c++) {
+            for (c = 0; c < 256; c++) {
                 if (c == orig ) continue;
                 sigbder[i] = c;
                 CHECK(secp256k1_ecdsa_verify(msg32, sigbder, sizeof(sigbder), pubkeyb, pubkeyblen) ==
@@ -1417,16 +1539,17 @@ void test_ecdsa_edge_cases(void) {
 
     /* Test the case where ECDSA recomputes a point that is infinity. */
     {
+        secp256k1_gej_t keyj;
+        secp256k1_ge_t key;
+        secp256k1_scalar_t msg;
         secp256k1_ecdsa_sig_t sig;
         secp256k1_scalar_set_int(&sig.s, 1);
         secp256k1_scalar_negate(&sig.s, &sig.s);
         secp256k1_scalar_inverse(&sig.s, &sig.s);
         secp256k1_scalar_set_int(&sig.r, 1);
-        secp256k1_gej_t keyj;
         secp256k1_ecmult_gen(&keyj, &sig.r);
-        secp256k1_ge_t key;
         secp256k1_ge_set_gej(&key, &keyj);
-        secp256k1_scalar_t msg = sig.s;
+        msg = sig.s;
         CHECK(secp256k1_ecdsa_sig_verify(&sig, &key, &msg) == 0);
     }
 
@@ -1489,69 +1612,97 @@ void test_ecdsa_edge_cases(void) {
         unsigned char sig[72];
         int siglen = 72;
         CHECK(secp256k1_ecdsa_sign(msg, sig, &siglen, key, precomputed_nonce_function, nonce) == 0);
+        CHECK(siglen == 0);
         CHECK(secp256k1_ecdsa_sign(msg, sig, &siglen, key, precomputed_nonce_function, nonce2) == 0);
+        CHECK(siglen == 0);
         msg[31] = 0xaa;
         siglen = 72;
         CHECK(secp256k1_ecdsa_sign(msg, sig, &siglen, key, precomputed_nonce_function, nonce) == 1);
+        CHECK(siglen > 0);
         CHECK(secp256k1_ecdsa_sign(msg, sig, &siglen, key, precomputed_nonce_function, nonce2) == 1);
+        CHECK(siglen > 0);
         siglen = 10;
         CHECK(secp256k1_ecdsa_sign(msg, sig, &siglen, key, precomputed_nonce_function, nonce) != 1);
+        CHECK(siglen == 0);
     }
 
     /* Nonce function corner cases. */
-    {
+    for (t = 0; t < 2; t++) {
+        static const unsigned char zero[32] = {0x00};
+        int i;
         unsigned char key[32];
         unsigned char msg[32];
         unsigned char sig[72];
-        memset(key, 0, 32);
+        unsigned char sig2[72];
+        secp256k1_ecdsa_sig_t s[512];
+        int siglen = 72;
+        int siglen2 = 72;
+        int recid2;
+        const unsigned char *extra;
+        extra = t == 0 ? NULL : zero;
         memset(msg, 0, 32);
-        key[31] = 1;
         msg[31] = 1;
-        int siglen = 72;
-        int recid;
+        /* High key results in signature failure. */
+        memset(key, 0xFF, 32);
+        CHECK(secp256k1_ecdsa_sign(msg, sig, &siglen, key, NULL, extra) == 0);
+        CHECK(siglen == 0);
+        /* Zero key results in signature failure. */
+        memset(key, 0, 32);
+        CHECK(secp256k1_ecdsa_sign(msg, sig, &siglen, key, NULL, extra) == 0);
+        CHECK(siglen == 0);
         /* Nonce function failure results in signature failure. */
-        CHECK(secp256k1_ecdsa_sign(msg, sig, &siglen, key, nonce_function_test_fail, NULL) == 0);
-        CHECK(secp256k1_ecdsa_sign_compact(msg, sig, key, nonce_function_test_fail, NULL, &recid) == 0);
+        key[31] = 1;
+        CHECK(secp256k1_ecdsa_sign(msg, sig, &siglen, key, nonce_function_test_fail, extra) == 0);
+        CHECK(siglen == 0);
+        CHECK(secp256k1_ecdsa_sign_compact(msg, sig, key, nonce_function_test_fail, extra, &recid) == 0);
+        CHECK(is_empty_compact_signature(sig));
         /* The retry loop successfully makes its way to the first good value. */
-        unsigned char sig2[72];
-        int siglen2 = 72;
         siglen = 72;
-        CHECK(secp256k1_ecdsa_sign(msg, sig, &siglen, key, nonce_function_test_retry, NULL) == 1);
-        CHECK(secp256k1_ecdsa_sign(msg, sig2, &siglen2, key, nonce_function_rfc6979, NULL) == 1);
+        CHECK(secp256k1_ecdsa_sign(msg, sig, &siglen, key, nonce_function_test_retry, extra) == 1);
+        CHECK(siglen > 0);
+        CHECK(secp256k1_ecdsa_sign(msg, sig2, &siglen2, key, nonce_function_rfc6979, extra) == 1);
+        CHECK(siglen > 0);
         CHECK((siglen == siglen2) && (memcmp(sig, sig2, siglen) == 0));
-        int recid2;
-        CHECK(secp256k1_ecdsa_sign_compact(msg, sig, key, nonce_function_test_retry, NULL, &recid) == 1);
-        CHECK(secp256k1_ecdsa_sign_compact(msg, sig2, key, nonce_function_rfc6979, NULL, &recid2) == 1);
+        CHECK(secp256k1_ecdsa_sign_compact(msg, sig, key, nonce_function_test_retry, extra, &recid) == 1);
+        CHECK(!is_empty_compact_signature(sig));
+        CHECK(secp256k1_ecdsa_sign_compact(msg, sig2, key, nonce_function_rfc6979, extra, &recid2) == 1);
+        CHECK(!is_empty_compact_signature(sig2));
         CHECK((recid == recid2) && (memcmp(sig, sig2, 64) == 0));
         /* The default nonce function is determinstic. */
         siglen = 72;
         siglen2 = 72;
-        CHECK(secp256k1_ecdsa_sign(msg, sig, &siglen, key, NULL, NULL) == 1);
-        CHECK(secp256k1_ecdsa_sign(msg, sig2, &siglen2, key, NULL, NULL) == 1);
+        CHECK(secp256k1_ecdsa_sign(msg, sig, &siglen, key, NULL, extra) == 1);
+        CHECK(siglen > 0);
+        CHECK(secp256k1_ecdsa_sign(msg, sig2, &siglen2, key, NULL, extra) == 1);
+        CHECK(siglen2 > 0);
         CHECK((siglen == siglen2) && (memcmp(sig, sig2, siglen) == 0));
-        CHECK(secp256k1_ecdsa_sign_compact(msg, sig, key, NULL, NULL, &recid) == 1);
-        CHECK(secp256k1_ecdsa_sign_compact(msg, sig2, key, NULL, NULL, &recid2) == 1);
+        CHECK(secp256k1_ecdsa_sign_compact(msg, sig, key, NULL, extra, &recid) == 1);
+        CHECK(!is_empty_compact_signature(sig));
+        CHECK(secp256k1_ecdsa_sign_compact(msg, sig2, key, NULL, extra, &recid2) == 1);
+        CHECK(!is_empty_compact_signature(sig));
         CHECK((recid == recid2) && (memcmp(sig, sig2, 64) == 0));
         /* The default nonce function changes output with different messages. */
-        secp256k1_ecdsa_sig_t s[512];
-        for(int i=0; i<256; i++) {
+        for(i = 0; i < 256; i++) {
+            int j;
             siglen2 = 72;
             msg[0] = i;
-            CHECK(secp256k1_ecdsa_sign(msg, sig2, &siglen2, key, NULL, NULL) == 1);
+            CHECK(secp256k1_ecdsa_sign(msg, sig2, &siglen2, key, NULL, extra) == 1);
+            CHECK(!is_empty_compact_signature(sig));
             CHECK(secp256k1_ecdsa_sig_parse(&s[i], sig2, siglen2));
-            for (int j=0; j<i; j++) {
+            for (j = 0; j < i; j++) {
                 CHECK(!secp256k1_scalar_eq(&s[i].r, &s[j].r));
             }
         }
         msg[0] = 0;
         msg[31] = 2;
         /* The default nonce function changes output with different keys. */
-        for(int i=256; i<512; i++) {
+        for(i = 256; i < 512; i++) {
+            int j;
             siglen2 = 72;
             key[0] = i - 256;
-            CHECK(secp256k1_ecdsa_sign(msg, sig2, &siglen2, key, NULL, NULL) == 1);
+            CHECK(secp256k1_ecdsa_sign(msg, sig2, &siglen2, key, NULL, extra) == 1);
             CHECK(secp256k1_ecdsa_sig_parse(&s[i], sig2, siglen2));
-            for (int j=0; j<i; j++) {
+            for (j = 0; j < i; j++) {
                 CHECK(!secp256k1_scalar_eq(&s[i].r, &s[j].r));
             }
         }
@@ -1581,8 +1732,8 @@ void run_ecdsa_edge_cases(void) {
 EC_KEY *get_openssl_key(const secp256k1_scalar_t *key) {
     unsigned char privkey[300];
     int privkeylen;
-    int compr = secp256k1_rand32() & 1;
     const unsigned char* pbegin = privkey;
+    int compr = secp256k1_rand32() & 1;
     EC_KEY *ec_key = EC_KEY_new_by_curve_name(NID_secp256k1);
     CHECK(secp256k1_eckey_privkey_serialize(privkey, &privkeylen, key, compr));
     CHECK(d2i_ECPrivateKey(&ec_key, &pbegin, privkeylen));
@@ -1591,31 +1742,32 @@ EC_KEY *get_openssl_key(const secp256k1_scalar_t *key) {
 }
 
 void test_ecdsa_openssl(void) {
+    secp256k1_gej_t qj;
+    secp256k1_ge_t q;
+    secp256k1_ecdsa_sig_t sig;
+    secp256k1_scalar_t one;
+    secp256k1_scalar_t msg2;
     secp256k1_scalar_t key, msg;
+    EC_KEY *ec_key;
+    unsigned int sigsize = 80;
+    int secp_sigsize = 80;
     unsigned char message[32];
+    unsigned char signature[80];
     secp256k1_rand256_test(message);
     secp256k1_scalar_set_b32(&msg, message, NULL);
     random_scalar_order_test(&key);
-    secp256k1_gej_t qj;
     secp256k1_ecmult_gen(&qj, &key);
-    secp256k1_ge_t q;
     secp256k1_ge_set_gej(&q, &qj);
-    EC_KEY *ec_key = get_openssl_key(&key);
+    ec_key = get_openssl_key(&key);
     CHECK(ec_key);
-    unsigned char signature[80];
-    unsigned int sigsize = 80;
     CHECK(ECDSA_sign(0, message, sizeof(message), signature, &sigsize, ec_key));
-    secp256k1_ecdsa_sig_t sig;
     CHECK(secp256k1_ecdsa_sig_parse(&sig, signature, sigsize));
     CHECK(secp256k1_ecdsa_sig_verify(&sig, &q, &msg));
-    secp256k1_scalar_t one;
     secp256k1_scalar_set_int(&one, 1);
-    secp256k1_scalar_t msg2;
     secp256k1_scalar_add(&msg2, &msg, &one);
     CHECK(!secp256k1_ecdsa_sig_verify(&sig, &q, &msg2));
 
     random_sign(&sig, &key, &msg, NULL);
-    int secp_sigsize = 80;
     CHECK(secp256k1_ecdsa_sig_serialize(signature, &secp_sigsize, &sig));
     CHECK(ECDSA_verify(0, message, sizeof(message), signature, secp_sigsize, ec_key) == 1);
 
@@ -1623,33 +1775,54 @@ void test_ecdsa_openssl(void) {
 }
 
 void run_ecdsa_openssl(void) {
-    for (int i=0; i<10*count; i++) {
+    int i;
+    for (i = 0; i < 10*count; i++) {
         test_ecdsa_openssl();
     }
 }
 #endif
 
 int main(int argc, char **argv) {
+    unsigned char seed16[16] = {0};
+    unsigned char run32[32] = {0};
     /* find iteration count */
     if (argc > 1) {
         count = strtol(argv[1], NULL, 0);
     }
 
     /* find random seed */
-    uint64_t seed;
     if (argc > 2) {
-        seed = strtoull(argv[2], NULL, 0);
+        int pos = 0;
+        const char* ch = argv[2];
+        while (pos < 16 && ch[0] != 0 && ch[1] != 0) {
+            unsigned short sh;
+            if (sscanf(ch, "%2hx", &sh)) {
+                seed16[pos] = sh;
+            } else {
+                break;
+            }
+            ch += 2;
+            pos++;
+        }
     } else {
         FILE *frand = fopen("/dev/urandom", "r");
-        if (!frand || !fread(&seed, sizeof(seed), 1, frand)) {
-            seed = time(NULL) * 1337;
+        if (!frand || !fread(&seed16, sizeof(seed16), 1, frand)) {
+            uint64_t t = time(NULL) * (uint64_t)1337;
+            seed16[0] ^= t;
+            seed16[1] ^= t >> 8;
+            seed16[2] ^= t >> 16;
+            seed16[3] ^= t >> 24;
+            seed16[4] ^= t >> 32;
+            seed16[5] ^= t >> 40;
+            seed16[6] ^= t >> 48;
+            seed16[7] ^= t >> 56;
         }
         fclose(frand);
     }
-    secp256k1_rand_seed(seed);
+    secp256k1_rand_seed(seed16);
 
     printf("test count = %i\n", count);
-    printf("random seed = %llu\n", (unsigned long long)seed);
+    printf("random seed = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", seed16[0], seed16[1], seed16[2], seed16[3], seed16[4], seed16[5], seed16[6], seed16[7], seed16[8], seed16[9], seed16[10], seed16[11], seed16[12], seed16[13], seed16[14], seed16[15]);
 
     /* initialize */
     secp256k1_start(SECP256K1_START_SIGN | SECP256K1_START_VERIFY);
@@ -1657,12 +1830,6 @@ int main(int argc, char **argv) {
     /* initializing a second time shouldn't cause any harm or memory leaks. */
     secp256k1_start(SECP256K1_START_SIGN | SECP256K1_START_VERIFY);
 
-    /* Likewise, re-running the internal init functions should be harmless. */
-    secp256k1_fe_start();
-    secp256k1_ge_start();
-    secp256k1_scalar_start();
-    secp256k1_ecdsa_start();
-
     run_sha256_tests();
     run_hmac_sha256_tests();
     run_rfc6979_hmac_sha256_tests();
@@ -1680,6 +1847,7 @@ int main(int argc, char **argv) {
     run_field_inv_var();
     run_field_inv_all_var();
     run_field_misc();
+    run_field_convert();
     run_sqr();
     run_sqrt();
 
@@ -1700,18 +1868,13 @@ int main(int argc, char **argv) {
     run_ecdsa_openssl();
 #endif
 
-    printf("random run = %llu\n", (unsigned long long)secp256k1_rand32() + ((unsigned long long)secp256k1_rand32() << 32));
+    secp256k1_rand256(run32);
+    printf("random run = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", run32[0], run32[1], run32[2], run32[3], run32[4], run32[5], run32[6], run32[7], run32[8], run32[9], run32[10], run32[11], run32[12], run32[13], run32[14], run32[15]);
 
     /* shutdown */
     secp256k1_stop();
 
     /* shutting down twice shouldn't cause any double frees. */
     secp256k1_stop();
-
-    /* Same for the internal shutdown functions. */
-    secp256k1_fe_stop();
-    secp256k1_ge_stop();
-    secp256k1_scalar_stop();
-    secp256k1_ecdsa_stop();
     return 0;
 }
diff --git a/src/util.h b/src/util.h
index c3a8f3a42..ae98639f7 100644
--- a/src/util.h
+++ b/src/util.h
@@ -27,7 +27,7 @@
 } while(0)
 #endif
 
-#ifndef HAVE_BUILTIN_EXPECT
+#ifdef HAVE_BUILTIN_EXPECT
 #define EXPECT(x,c) __builtin_expect((x),(c))
 #else
 #define EXPECT(x,c) (x)
@@ -61,7 +61,7 @@
 #define VERIFY_CHECK(cond) do { (void)(cond); } while(0)
 #endif
 
-static inline void *checked_malloc(size_t size) {
+static SECP256K1_INLINE void *checked_malloc(size_t size) {
     void *ret = malloc(size);
     CHECK(ret != NULL);
     return ret;
@@ -84,4 +84,21 @@ static inline void *checked_malloc(size_t size) {
 # endif
 #endif
 
+#if defined(_WIN32)
+# define I64FORMAT "I64d"
+# define I64uFORMAT "I64u"
+#else
+# define I64FORMAT "lld"
+# define I64uFORMAT "llu"
+#endif
+
+#if defined(HAVE___INT128)
+# if defined(__GNUC__)
+#  define SECP256K1_GNUC_EXT __extension__
+# else
+#  define SECP256K1_GNUC_EXT
+# endif
+SECP256K1_GNUC_EXT typedef unsigned __int128 uint128_t;
+#endif
+
 #endif