Implementation
static void secp256k1FeSqr(Secp256k1Fe r, Secp256k1Fe a) {
void verifyBits(BigInt x, int n) => _verifyBits(x, n, "secp256k1FeSqr");
void verifyBits128(Secp256k1Uint128 x, int n) =>
_verifyBits128(x, n, "secp256k1FeSqr");
Secp256k1Uint128 c = Secp256k1Uint128(), d = Secp256k1Uint128();
BigInt a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
BigInt t3, t4, tx, u0;
final M = Secp256k1Const.mask52, R = Secp256k1Const.bit33Mask;
verifyBits(a[0], 56);
verifyBits(a[1], 56);
verifyBits(a[2], 56);
verifyBits(a[3], 56);
verifyBits(a[4], 52);
secp256k1U128Mul(d, a0 * BigInt.two, a3);
secp256k1U128AccumMul(d, a1 * BigInt.two, a2);
verifyBits128(d, 114);
/// [d 0 0 0] = [p3 0 0 0]
secp256k1U128Mul(c, a4, a4);
verifyBits128(c, 112);
/// [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0]
secp256k1U128AccumMul(d, R, secp256k1U128ToU64(c));
secp256k1U128Rshift(c, 64);
verifyBits128(d, 115);
verifyBits128(c, 48);
/// [(c<<12) 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0]
t3 = (secp256k1U128ToU64(d) & M).toUnsigned64;
secp256k1U128Rshift(d, 52);
verifyBits(t3, 52);
verifyBits128(d, 63);
/// [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0]
a4 = (a4 * BigInt.two).toUnsigned64;
secp256k1U128AccumMul(d, a0, a4);
secp256k1U128AccumMul(d, a1 * BigInt.two, a3);
secp256k1U128AccumMul(d, a2, a2);
verifyBits128(d, 115);
/// [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0]
secp256k1U128AccumMul(d, R << 12, secp256k1U128ToU64(c));
verifyBits128(d, 116);
/// [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0]
t4 = (secp256k1U128ToU64(d) & M).toUnsigned64;
secp256k1U128Rshift(d, 52);
verifyBits(t4, 52);
verifyBits128(d, 64);
/// [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0]
tx = (t4 >> 48).toUnsigned64;
t4 = (t4 & (M >> 4)).toUnsigned64;
verifyBits(tx, 4);
verifyBits(t4, 48);
/// [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0]
secp256k1U128Mul(c, a0, a0);
verifyBits128(c, 112);
/// [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0]
secp256k1U128AccumMul(d, a1, a4);
secp256k1U128AccumMul(d, a2 * BigInt.two, a3);
verifyBits128(d, 114);
/// [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0]
u0 = (secp256k1U128ToU64(d) & M).toUnsigned64;
secp256k1U128Rshift(d, 52);
verifyBits(u0, 52);
verifyBits128(d, 62);
/// [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0]
/// [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0]
u0 = ((u0 << 4) | tx).toUnsigned64;
verifyBits(u0, 56);
/// [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0]
secp256k1U128AccumMul(c, u0, R >> 4);
verifyBits128(c, 113);
/// [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0]
r[0] = secp256k1U128ToU64(c) & M;
secp256k1U128Rshift(c, 52);
verifyBits(r[0], 52);
verifyBits128(c, 61);
/// [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0]
a0 = (a0 * BigInt.two).toUnsigned64;
secp256k1U128AccumMul(c, a0, a1);
verifyBits128(c, 114);
/// [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0]
secp256k1U128AccumMul(d, a2, a4);
secp256k1U128AccumMul(d, a3, a3);
verifyBits128(d, 114);
/// [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0]
secp256k1U128AccumMul(c, secp256k1U128ToU64(d) & M, R);
secp256k1U128Rshift(d, 52);
verifyBits128(c, 115);
verifyBits128(d, 62);
/// [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0]
r[1] = secp256k1U128ToU64(c) & M;
secp256k1U128Rshift(c, 52);
verifyBits(r[1], 52);
verifyBits128(c, 63);
/// [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0]
secp256k1U128AccumMul(c, a0, a2);
secp256k1U128AccumMul(c, a1, a1);
verifyBits128(c, 114);
/// [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0]
secp256k1U128AccumMul(d, a3, a4);
verifyBits128(d, 114);
/// [d 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0]
secp256k1U128AccumMul(c, R, secp256k1U128ToU64(d));
secp256k1U128Rshift(d, 64);
verifyBits128(c, 115);
verifyBits128(d, 50);
/// [(d<<12) 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0]
r[2] = secp256k1U128ToU64(c) & M;
secp256k1U128Rshift(c, 52);
verifyBits(r[2], 52);
verifyBits128(c, 63);
/// [(d<<12) 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0]
secp256k1U128AccumMul(c, R << 12, secp256k1U128ToU64(d));
secp256k1U128AccumU64(c, t3);
verifyBits128(c, 100);
/// [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0]
r[3] = secp256k1U128ToU64(c) & M;
secp256k1U128Rshift(c, 52);
verifyBits(r[3], 52);
verifyBits128(c, 48);
/// [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0]
r[4] = secp256k1U128ToU64(c) + t4;
verifyBits(r[4], 49);
/// [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0]
}