Skip to main content

binius_field/arch/portable/arithmetic/
ghash.rs

1// Copyright 2023-2025 Irreducible Inc.
2// Copyright 2026 The Binius Developers
3
4//! Portable (software) implementation of GHASH field multiplication.
5
6use std::{
7	iter::Sum,
8	ops::{Add, AddAssign, Sub, SubAssign},
9};
10
11use bytemuck::TransparentWrapper;
12
13use super::super::univariate_mul_utils_128::{Underlier64bLanes, Underlier128bLanes, bmul64};
14use crate::{BinaryField128bGhash as GhashB128, WideMul, arch::PackedPrimitiveType};
15
16/// Multiply two GHASH field elements using software implementation.
17///
18/// Method described at:
19/// * <https://www.bearssl.org/constanttime.html#ghash-for-gcm>
20/// * <https://crypto.stackexchange.com/questions/66448/how-does-bearssls-gcm-modular-reduction-work/66462#66462>
21///
22/// This code does not conform to the bit-endianness requirements of the GCM specification, but is
23/// a valid GHASH field multiplication with the modified representation.
24#[inline]
25pub fn ghash_mul<U: Underlier128bLanes>(x: U, y: U) -> U {
26	ghash_wide_mul(x, y).reduce()
27}
28
29/// Widening multiply: the schoolbook polynomial product of two GHASH field elements, without the
30/// modular reduction. The unreduced result can be accumulated by XOR and reduced once at the end
31/// via [`WideGhashProduct::reduce`].
32#[inline]
33pub fn ghash_wide_mul<U: Underlier128bLanes>(x: U, y: U) -> WideGhashProduct<U> {
34	// Convert to U64x2 representation
35	let (x1, x0) = U::split_hi_lo_64(x);
36	let (y1, y0) = U::split_hi_lo_64(y);
37
38	// Perform multiplication
39	let x0r = x0.reverse_bits_64();
40	let x1r = x1.reverse_bits_64();
41	let x2 = x0 ^ x1;
42	let x2r = x0r ^ x1r;
43
44	let y0r = y0.reverse_bits_64();
45	let y1r = y1.reverse_bits_64();
46	let y2 = y0 ^ y1;
47	let y2r = y0r ^ y1r;
48
49	let z0 = bmul64(y0, x0);
50	let z1 = bmul64(y1, x1);
51	let mut z2 = bmul64(y2, x2);
52
53	let mut z0h = bmul64(y0r, x0r);
54	let mut z1h = bmul64(y1r, x1r);
55	let mut z2h = bmul64(y2r, x2r);
56
57	z2 ^= z0 ^ z1;
58	z2h ^= z0h ^ z1h;
59	z0h = z0h.reverse_bits_64().shr_64(1);
60	z1h = z1h.reverse_bits_64().shr_64(1);
61	z2h = z2h.reverse_bits_64().shr_64(1);
62
63	WideGhashProduct {
64		v0: z0,
65		v1: z0h ^ z2,
66		v2: z1 ^ z2h,
67		v3: z1h,
68	}
69}
70
71#[inline]
72pub fn ghash_square<U: Underlier128bLanes>(x: U) -> U {
73	// Squared value in the polynomial basis is just a value with bits interleaved with zeroes.
74	let (hi, lo) = x.spread_bits_128();
75
76	let (v3, v2) = hi.split_hi_lo_64();
77	let (v1, v0) = lo.split_hi_lo_64();
78
79	reduce_64(v0, v1, v2, v3)
80}
81
82/// Reduce a 256-bit value represented as four 64-bit values by the GHASH polynomial.
83#[inline]
84fn reduce_64<U: Underlier128bLanes>(
85	mut v0: U::U64,
86	mut v1: U::U64,
87	mut v2: U::U64,
88	v3: U::U64,
89) -> U {
90	// Reduce modulo X^64 + X^7 + X^2 + X + 1.
91	v1 ^= v3 ^ v3.shl_64(1) ^ v3.shl_64(2) ^ v3.shl_64(7);
92	v2 ^= v3.shr_64(63) ^ v3.shr_64(62) ^ v3.shr_64(57);
93	v0 ^= v2 ^ v2.shl_64(1) ^ v2.shl_64(2) ^ v2.shl_64(7);
94	v1 ^= v2.shr_64(63) ^ v2.shr_64(62) ^ v2.shr_64(57);
95
96	// Convert back to 128-bit lanes
97	U::join_u64s(v1, v0)
98}
99
100/// An unreduced GHASH product, stored as the four 64-bit limbs `(v0, v1, v2, v3)` of the 256-bit
101/// schoolbook product. Values of this type can be summed by XOR and reduced once at the end via
102/// [`reduce`](WideGhashProduct::reduce).
103#[derive(Clone, Copy, Default, Debug)]
104pub struct WideGhashProduct<U: Underlier128bLanes> {
105	v0: U::U64,
106	v1: U::U64,
107	v2: U::U64,
108	v3: U::U64,
109}
110
111impl<U: Underlier128bLanes> WideGhashProduct<U> {
112	/// Reduce the accumulated wide product to a single GF(2^128) element.
113	#[inline]
114	pub fn reduce(self) -> U {
115		reduce_64(self.v0, self.v1, self.v2, self.v3)
116	}
117}
118
119impl<U: Underlier128bLanes> Add for WideGhashProduct<U> {
120	type Output = Self;
121
122	#[inline]
123	fn add(self, rhs: Self) -> Self {
124		Self {
125			v0: self.v0 ^ rhs.v0,
126			v1: self.v1 ^ rhs.v1,
127			v2: self.v2 ^ rhs.v2,
128			v3: self.v3 ^ rhs.v3,
129		}
130	}
131}
132
133impl<U: Underlier128bLanes> AddAssign for WideGhashProduct<U> {
134	#[inline]
135	fn add_assign(&mut self, rhs: Self) {
136		self.v0 ^= rhs.v0;
137		self.v1 ^= rhs.v1;
138		self.v2 ^= rhs.v2;
139		self.v3 ^= rhs.v3;
140	}
141}
142
143impl<U: Underlier128bLanes> Sum for WideGhashProduct<U> {
144	#[inline]
145	fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
146		iter.fold(Self::default(), |acc, x| acc + x)
147	}
148}
149
150// In characteristic 2, subtraction is identical to addition (XOR).
151impl<U: Underlier128bLanes> Sub for WideGhashProduct<U> {
152	type Output = Self;
153
154	#[inline]
155	fn sub(self, rhs: Self) -> Self {
156		Self {
157			v0: self.v0 ^ rhs.v0,
158			v1: self.v1 ^ rhs.v1,
159			v2: self.v2 ^ rhs.v2,
160			v3: self.v3 ^ rhs.v3,
161		}
162	}
163}
164
165impl<U: Underlier128bLanes> SubAssign for WideGhashProduct<U> {
166	#[inline]
167	fn sub_assign(&mut self, rhs: Self) {
168		self.v0 ^= rhs.v0;
169		self.v1 ^= rhs.v1;
170		self.v2 ^= rhs.v2;
171		self.v3 ^= rhs.v3;
172	}
173}
174
175/// Widening-multiply wrapper for the portable GHASH packing.
176///
177/// [`wide_mul`](WideMul::wide_mul) computes the unreduced schoolbook product via
178/// [`ghash_wide_mul`], and [`reduce`](WideMul::reduce) performs the GHASH modular reduction. This
179/// defers the reduction so a sum of products is reduced only once.
180#[repr(transparent)]
181#[derive(bytemuck::TransparentWrapper)]
182pub struct GhashWideMul<T>(T);
183
184impl<U: Underlier128bLanes> WideMul for GhashWideMul<PackedPrimitiveType<U, GhashB128>> {
185	type Output = WideGhashProduct<U>;
186
187	#[inline]
188	fn wide_mul(a: Self, b: Self) -> Self::Output {
189		let a = PackedPrimitiveType::peel(Self::peel(a));
190		let b = PackedPrimitiveType::peel(Self::peel(b));
191		ghash_wide_mul(a, b)
192	}
193
194	#[inline]
195	fn reduce(wide: Self::Output) -> Self {
196		Self::wrap(PackedPrimitiveType::wrap(wide.reduce()))
197	}
198}
199
200#[cfg(test)]
201mod tests {
202	use proptest::{prelude::any, proptest};
203
204	use super::{super::super::m128::M128, ghash_mul, ghash_wide_mul};
205
206	// Exercises the deferred wide-mul building blocks (`ghash_wide_mul` + `WideGhashProduct`) that
207	// `GhashWideMul` wraps, directly on the portable `M128`. This runs on every host, whereas the
208	// portable `PackedBinaryGhash1x128b` is only a usable `PackedField` on targets where it is the
209	// re-exported b128 type (covered there by the proptests in `packed_ghash.rs`).
210	proptest! {
211		// The split must agree with the fused multiply: wide-multiply then reduce == ghash_mul.
212		#[test]
213		fn wide_mul_then_reduce_matches_ghash_mul(a in any::<u128>(), b in any::<u128>()) {
214			let (a, b) = (M128::from(a), M128::from(b));
215			assert_eq!(ghash_wide_mul(a, b).reduce(), ghash_mul(a, b));
216		}
217
218		// Accumulate two unreduced products and reduce once.
219		#[test]
220		fn wide_mul_deferred_accumulation(
221			a1 in any::<u128>(), b1 in any::<u128>(),
222			a2 in any::<u128>(), b2 in any::<u128>(),
223		) {
224			let (a1, b1) = (M128::from(a1), M128::from(b1));
225			let (a2, b2) = (M128::from(a2), M128::from(b2));
226			let acc = ghash_wide_mul(a1, b1) + ghash_wide_mul(a2, b2);
227			assert_eq!(acc.reduce(), ghash_mul(a1, b1) ^ ghash_mul(a2, b2));
228		}
229	}
230}