binius_field/arch/portable/
packed_arithmetic.rs

1// Copyright 2024-2025 Irreducible Inc.
2
3use crate::{
4	arch::PackedStrategy,
5	arithmetic_traits::{
6		MulAlpha, TaggedInvertOrZero, TaggedMul, TaggedMulAlpha, TaggedPackedTransformationFactory,
7		TaggedSquare,
8	},
9	binary_field::{BinaryField, TowerExtensionField},
10	linear_transformation::{FieldLinearTransformation, Transformation},
11	packed::PackedBinaryField,
12	underlier::{UnderlierType, UnderlierWithBitOps, WithUnderlier},
13	PackedExtension, PackedField, TowerField,
14};
15
16pub trait UnderlierWithBitConstants: UnderlierWithBitOps
17where
18	Self: 'static,
19{
20	const INTERLEAVE_EVEN_MASK: &'static [Self];
21	const INTERLEAVE_ODD_MASK: &'static [Self];
22
23	/// Interleave with the given bit size
24	fn interleave(self, other: Self, log_block_len: usize) -> (Self, Self) {
25		// There are 2^7 = 128 bits in a u128
26		assert!(log_block_len < Self::INTERLEAVE_EVEN_MASK.len());
27
28		let block_len = 1 << log_block_len;
29
30		// See Hacker's Delight, Section 7-3.
31		// https://dl.acm.org/doi/10.5555/2462741
32		let t = ((self >> block_len) ^ other) & Self::INTERLEAVE_EVEN_MASK[log_block_len];
33		let c = self ^ t << block_len;
34		let d = other ^ t;
35
36		(c, d)
37	}
38
39	/// Transpose with the given bit size
40	fn transpose(mut self, mut other: Self, log_block_len: usize) -> (Self, Self) {
41		// There are 2^7 = 128 bits in a u128
42		assert!(log_block_len < Self::INTERLEAVE_EVEN_MASK.len());
43
44		for log_block_len in (log_block_len..Self::LOG_BITS).rev() {
45			(self, other) = self.interleave(other, log_block_len);
46		}
47
48		(self, other)
49	}
50}
51
52/// Abstraction for a packed tower field of height greater than 0.
53///
54/// Helper trait
55pub(crate) trait PackedTowerField: PackedField + WithUnderlier {
56	/// A scalar of a lower height
57	type DirectSubfield: TowerField;
58	/// Packed type with the same underlier with a lower height
59	type PackedDirectSubfield: PackedField<Scalar = Self::DirectSubfield>
60		+ WithUnderlier<Underlier = Self::Underlier>;
61
62	/// Reinterpret value as a packed field over a lower height
63	fn as_packed_subfield(self) -> Self::PackedDirectSubfield;
64
65	/// Reinterpret packed subfield as a current type
66	fn from_packed_subfield(value: Self::PackedDirectSubfield) -> Self;
67}
68
69impl<F, P, U: UnderlierType> PackedTowerField for P
70where
71	F: TowerExtensionField,
72	P: PackedField<Scalar = F> + PackedExtension<F::DirectSubfield> + WithUnderlier<Underlier = U>,
73	P::PackedSubfield: WithUnderlier<Underlier = U>,
74{
75	type DirectSubfield = F::DirectSubfield;
76	type PackedDirectSubfield = <Self as PackedExtension<F::DirectSubfield>>::PackedSubfield;
77
78	#[inline]
79	fn as_packed_subfield(self) -> Self::PackedDirectSubfield {
80		P::cast_base(self)
81	}
82
83	#[inline]
84	fn from_packed_subfield(value: Self::PackedDirectSubfield) -> Self {
85		P::cast_ext(value)
86	}
87}
88
89/// Compile-time known constants needed for packed multiply implementation.
90pub(crate) trait TowerConstants<U> {
91	/// Alpha values in odd positions, zeroes in even.
92	const ALPHAS_ODD: U;
93}
94
95macro_rules! impl_tower_constants {
96	($tower_field:ty, $underlier:ty, $value:tt) => {
97		impl $crate::arch::portable::packed_arithmetic::TowerConstants<$underlier>
98			for $tower_field
99		{
100			const ALPHAS_ODD: $underlier = $value;
101		}
102	};
103}
104
105pub(crate) use impl_tower_constants;
106
107impl<PT> TaggedMul<PackedStrategy> for PT
108where
109	PT: PackedTowerField,
110	PT::Underlier: UnderlierWithBitConstants,
111	PT::DirectSubfield: TowerConstants<PT::Underlier>,
112{
113	/// Optimized packed field multiplication algorithm
114	#[inline]
115	fn mul(self, b: Self) -> Self {
116		let a = self;
117
118		// a and b can be interpreted as packed subfield elements:
119		// a = <a_lo_0, a_hi_0, a_lo_1, a_hi_1, ...>
120		// b = <b_lo_0, b_hi_0, b_lo_1, b_hi_1, ...>//
121		// ab is the product of a * b as packed subfield elements
122		// ab = <a_lo_0 * b_lo_0, a_hi_0 * b_hi_0, a_lo_1 * b_lo_1, a_hi_1 * b_hi_1, ...>
123		let repacked_a = a.as_packed_subfield();
124		let repacked_b = b.as_packed_subfield();
125		let z0_even_z2_odd = repacked_a * repacked_b;
126
127		// lo = <a_lo_0, b_lo_0, a_lo_1, b_lo_1, ...>
128		// hi = <a_hi_0, b_hi_0, a_hi_1, b_hi_1, ...>
129		let (lo, hi) =
130			interleave::<PT::Underlier, PT::DirectSubfield>(a.to_underlier(), b.to_underlier());
131
132		// <a_lo_0 + a_hi_0, b_lo_0 + b_hi_0, a_lo_1 + a_hi_1, b_lo_1 + b_hi_1, ...>
133		let lo_plus_hi_a_even_b_odd = lo ^ hi;
134
135		let odd_mask = <PT::Underlier as UnderlierWithBitConstants>::INTERLEAVE_ODD_MASK
136			[PT::DirectSubfield::TOWER_LEVEL];
137
138		let alphas = PT::DirectSubfield::ALPHAS_ODD;
139
140		// <α, z2_0, α, z2_1, ...>
141		let alpha_even_z2_odd = alphas ^ (z0_even_z2_odd.to_underlier() & odd_mask);
142
143		// a_lo_plus_hi_even_z2_odd    = <a_lo_0 + a_hi_0, z2_0, a_lo_1 + a_hi_1, z2_1, ...>
144		// b_lo_plus_hi_even_alpha_odd = <b_lo_0 + b_hi_0,    α, a_lo_1 + a_hi_1,   αz, ...>
145		let (a_lo_plus_hi_even_alpha_odd, b_lo_plus_hi_even_z2_odd) =
146			interleave::<PT::Underlier, PT::DirectSubfield>(
147				lo_plus_hi_a_even_b_odd,
148				alpha_even_z2_odd,
149			);
150
151		// <z1_0 + z0_0 + z2_0, z2a_0, z1_1 + z0_1 + z2_1, z2a_1, ...>
152		let z1_plus_z0_plus_z2_even_z2a_odd =
153			PT::PackedDirectSubfield::from_underlier(a_lo_plus_hi_even_alpha_odd)
154				* PT::PackedDirectSubfield::from_underlier(b_lo_plus_hi_even_z2_odd);
155
156		// <0, z1_0 + z2a_0 + z0_0 + z2_0, 0, z1_1 + z2a_1 + z0_1 + z2_1, ...>
157		let zero_even_z1_plus_z2a_plus_z0_plus_z2_odd = (z1_plus_z0_plus_z2_even_z2a_odd
158			.to_underlier()
159			^ (z1_plus_z0_plus_z2_even_z2a_odd.to_underlier() << PT::DirectSubfield::N_BITS))
160			& odd_mask;
161
162		// <z0_0 + z2_0, z0_0 + z2_0, z0_1 + z2_1, z0_1 + z2_1, ...>
163		let z0_plus_z2_dup =
164			xor_adjacent::<PT::Underlier, PT::DirectSubfield>(z0_even_z2_odd.to_underlier());
165
166		// <z0_0 + z2_0, z1_0 + z2a_0, z0_1 + z2_1, z1_1 + z2a_1, ...>
167		Self::from_underlier(z0_plus_z2_dup ^ zero_even_z1_plus_z2a_plus_z0_plus_z2_odd)
168	}
169}
170
171/// Generate the mask with alphas in the odd packed element positions and zeros in even
172macro_rules! alphas {
173	($underlier:ty, $tower_level:literal) => {{
174		let mut alphas: $underlier = if $tower_level == 0 {
175			1
176		} else {
177			1 << (1 << ($tower_level - 1))
178		};
179
180		let log_width = <$underlier as $crate::underlier::UnderlierType>::LOG_BITS - $tower_level;
181		let mut i = 1;
182		while i < log_width {
183			alphas |= alphas << (1 << ($tower_level + i));
184			i += 1;
185		}
186
187		alphas
188	}};
189}
190
191pub(crate) use alphas;
192
193/// Generate the mask with ones in the odd packed element positions and zeros in even
194macro_rules! interleave_mask_even {
195	($underlier:ty, $tower_level:literal) => {{
196		let scalar_bits = 1 << $tower_level;
197
198		let mut mask: $underlier = (1 << scalar_bits) - 1;
199		let log_width = <$underlier>::LOG_BITS - $tower_level;
200		let mut i = 1;
201		while i < log_width {
202			mask |= mask << (scalar_bits << i);
203			i += 1;
204		}
205
206		mask
207	}};
208}
209
210pub(crate) use interleave_mask_even;
211
212/// Generate the mask with ones in the even packed element positions and zeros in odd
213macro_rules! interleave_mask_odd {
214	($underlier:ty, $tower_level:literal) => {
215		interleave_mask_even!($underlier, $tower_level) << (1 << $tower_level)
216	};
217}
218
219pub(crate) use interleave_mask_odd;
220
221/// View the inputs as vectors of packed binary tower elements and transpose as 2x2 square matrices.
222/// Given vectors <a_0, a_1, a_2, a_3, ...> and <b_0, b_1, b_2, b_3, ...>, returns a tuple with
223/// <a0, b0, a2, b2, ...> and <a1, b1, a3, b3>.
224fn interleave<U: UnderlierWithBitConstants, F: TowerField>(a: U, b: U) -> (U, U) {
225	let mask = U::INTERLEAVE_EVEN_MASK[F::TOWER_LEVEL];
226
227	let block_len = F::N_BITS;
228	let t = ((a >> block_len) ^ b) & mask;
229	let c = a ^ (t << block_len);
230	let d = b ^ t;
231
232	(c, d)
233}
234
235/// View the input as a vector of packed binary tower elements and add the adjacent ones.
236/// Given a vector <a_0, a_1, a_2, a_3, ...>, returns <a0 + a1, a0 + a1, a2 + a3, a2 + a3, ...>.
237fn xor_adjacent<U: UnderlierWithBitConstants, F: TowerField>(a: U) -> U {
238	let mask = U::INTERLEAVE_EVEN_MASK[F::TOWER_LEVEL];
239
240	let block_len = F::N_BITS;
241	let t = ((a >> block_len) ^ a) & mask;
242
243	t ^ (t << block_len)
244}
245
246impl<PT> TaggedMulAlpha<PackedStrategy> for PT
247where
248	PT: PackedTowerField,
249	PT::PackedDirectSubfield: MulAlpha,
250	PT::Underlier: UnderlierWithBitConstants,
251{
252	#[inline]
253	fn mul_alpha(self) -> Self {
254		let block_len = PT::DirectSubfield::N_BITS;
255		let even_mask = <PT::Underlier as UnderlierWithBitConstants>::INTERLEAVE_EVEN_MASK
256			[PT::DirectSubfield::TOWER_LEVEL];
257		let odd_mask = <PT::Underlier as UnderlierWithBitConstants>::INTERLEAVE_ODD_MASK
258			[PT::DirectSubfield::TOWER_LEVEL];
259
260		let a = self.to_underlier();
261		let a0 = a & even_mask;
262		let a1 = a & odd_mask;
263		let z1 = PT::PackedDirectSubfield::from_underlier(a1)
264			.mul_alpha()
265			.to_underlier();
266
267		Self::from_underlier((a1 >> block_len) | ((a0 << block_len) ^ z1))
268	}
269}
270
271impl<PT> TaggedSquare<PackedStrategy> for PT
272where
273	PT: PackedTowerField,
274	PT::PackedDirectSubfield: MulAlpha,
275	PT::Underlier: UnderlierWithBitConstants,
276{
277	#[inline]
278	fn square(self) -> Self {
279		let block_len = PT::DirectSubfield::N_BITS;
280		let even_mask = <PT::Underlier as UnderlierWithBitConstants>::INTERLEAVE_EVEN_MASK
281			[PT::DirectSubfield::TOWER_LEVEL];
282		let odd_mask = <PT::Underlier as UnderlierWithBitConstants>::INTERLEAVE_ODD_MASK
283			[PT::DirectSubfield::TOWER_LEVEL];
284
285		let z_02 = PackedField::square(self.as_packed_subfield());
286		let z_2a = z_02.mul_alpha().to_underlier() & odd_mask;
287
288		let z_0_xor_z_2 = (z_02.to_underlier() ^ (z_02.to_underlier() >> block_len)) & even_mask;
289
290		Self::from_underlier(z_0_xor_z_2 | z_2a)
291	}
292}
293
294impl<PT> TaggedInvertOrZero<PackedStrategy> for PT
295where
296	PT: PackedTowerField,
297	PT::PackedDirectSubfield: MulAlpha,
298	PT::Underlier: UnderlierWithBitConstants,
299{
300	#[inline]
301	fn invert_or_zero(self) -> Self {
302		let block_len = PT::DirectSubfield::N_BITS;
303		let even_mask = <PT::Underlier as UnderlierWithBitConstants>::INTERLEAVE_EVEN_MASK
304			[PT::DirectSubfield::TOWER_LEVEL];
305		let odd_mask = <PT::Underlier as UnderlierWithBitConstants>::INTERLEAVE_ODD_MASK
306			[PT::DirectSubfield::TOWER_LEVEL];
307
308		// has meaningful values in even positions
309		let a_1_even = PT::PackedDirectSubfield::from_underlier(self.to_underlier() >> block_len);
310		let intermediate = self.as_packed_subfield() + a_1_even.mul_alpha();
311		let delta = self.as_packed_subfield() * intermediate
312			+ <PT::PackedDirectSubfield as PackedField>::square(a_1_even);
313		let delta_inv = PackedField::invert_or_zero(delta);
314
315		// set values from even positions to odd as well
316		let mut delta_inv_delta_inv = delta_inv.to_underlier() & even_mask;
317		delta_inv_delta_inv |= delta_inv_delta_inv << block_len;
318
319		let intermediate_a1 =
320			(self.to_underlier() & odd_mask) | (intermediate.to_underlier() & even_mask);
321		let result = PT::PackedDirectSubfield::from_underlier(delta_inv_delta_inv)
322			* PT::PackedDirectSubfield::from_underlier(intermediate_a1);
323		Self::from_underlier(result.to_underlier())
324	}
325}
326
327/// Packed transformation implementation.
328/// Stores bases in a form of:
329/// [
330///     [<base vec 1> ... <base vec 1>],
331///     ...
332///     [<base vec N> ... <base vec N>]
333/// ]
334/// Transformation complexity is `N*log(N)` where `N` is `OP::Scalar::DEGREE`.
335pub struct PackedTransformation<OP> {
336	bases: Vec<OP>,
337}
338
339impl<OP> PackedTransformation<OP>
340where
341	OP: PackedBinaryField,
342{
343	pub fn new<Data: AsRef<[OP::Scalar]> + Sync>(
344		transformation: &FieldLinearTransformation<OP::Scalar, Data>,
345	) -> Self {
346		Self {
347			bases: transformation
348				.bases()
349				.iter()
350				.map(|base| OP::broadcast(*base))
351				.collect(),
352		}
353	}
354}
355
356/// Broadcast lowest field for each element, e.g. [<0001><0000>] -> [<1111><0000>]
357fn broadcast_lowest_bit<U: UnderlierWithBitOps>(mut data: U, log_packed_bits: usize) -> U {
358	for i in 0..log_packed_bits {
359		data |= data << (1 << i)
360	}
361
362	data
363}
364
365impl<U, IP, OP, IF, OF> Transformation<IP, OP> for PackedTransformation<OP>
366where
367	IP: PackedField<Scalar = IF> + WithUnderlier<Underlier = U>,
368	OP: PackedField<Scalar = OF> + WithUnderlier<Underlier = U>,
369	IF: BinaryField,
370	OF: BinaryField,
371	U: UnderlierWithBitOps,
372{
373	fn transform(&self, input: &IP) -> OP {
374		let mut result = OP::zero();
375		let ones = OP::one().to_underlier();
376		let mut input = input.to_underlier();
377
378		for base in &self.bases {
379			let base_component = input & ones;
380			// contains ones at positions which correspond to non-zero components
381			let mask = broadcast_lowest_bit(base_component, OF::LOG_DEGREE);
382			result += OP::from_underlier(mask & base.to_underlier());
383			input = input >> 1;
384		}
385
386		result
387	}
388}
389
390impl<IP, OP> TaggedPackedTransformationFactory<PackedStrategy, OP> for IP
391where
392	IP: PackedBinaryField + WithUnderlier<Underlier: UnderlierWithBitOps>,
393	OP: PackedBinaryField + WithUnderlier<Underlier = IP::Underlier>,
394{
395	type PackedTransformation<Data: AsRef<[OP::Scalar]> + Sync> = PackedTransformation<OP>;
396
397	fn make_packed_transformation<Data: AsRef<[OP::Scalar]> + Sync>(
398		transformation: FieldLinearTransformation<OP::Scalar, Data>,
399	) -> Self::PackedTransformation<Data> {
400		PackedTransformation::new(&transformation)
401	}
402}
403
404#[cfg(test)]
405mod tests {
406	use std::fmt::Debug;
407
408	use rand::thread_rng;
409
410	use super::*;
411	use crate::{
412		arch::portable::packed_128::{
413			PackedBinaryField16x8b, PackedBinaryField1x128b, PackedBinaryField2x64b,
414			PackedBinaryField32x4b, PackedBinaryField4x32b, PackedBinaryField64x2b,
415			PackedBinaryField8x16b,
416		},
417		test_utils::{
418			define_invert_tests, define_mul_alpha_tests, define_multiply_tests,
419			define_square_tests, define_transformation_tests,
420		},
421		BinaryField16b, BinaryField1b, BinaryField2b, BinaryField32b, BinaryField4b,
422		BinaryField64b, BinaryField8b,
423	};
424
425	const NUM_TESTS: u64 = 100;
426
427	fn check_interleave<F: TowerField>(a: u128, b: u128, c: u128, d: u128) {
428		assert_eq!(interleave::<u128, F>(a, b), (c, d));
429		assert_eq!(interleave::<u128, F>(c, d), (a, b));
430	}
431
432	#[test]
433	fn test_interleave() {
434		check_interleave::<BinaryField1b>(
435			0x0000000000000000FFFFFFFFFFFFFFFF,
436			0xFFFFFFFFFFFFFFFF0000000000000000,
437			0xAAAAAAAAAAAAAAAA5555555555555555,
438			0xAAAAAAAAAAAAAAAA5555555555555555,
439		);
440
441		check_interleave::<BinaryField2b>(
442			0x0000000000000000FFFFFFFFFFFFFFFF,
443			0xFFFFFFFFFFFFFFFF0000000000000000,
444			0xCCCCCCCCCCCCCCCC3333333333333333,
445			0xCCCCCCCCCCCCCCCC3333333333333333,
446		);
447
448		check_interleave::<BinaryField4b>(
449			0x0000000000000000FFFFFFFFFFFFFFFF,
450			0xFFFFFFFFFFFFFFFF0000000000000000,
451			0xF0F0F0F0F0F0F0F00F0F0F0F0F0F0F0F,
452			0xF0F0F0F0F0F0F0F00F0F0F0F0F0F0F0F,
453		);
454
455		check_interleave::<BinaryField8b>(
456			0x0F0E0D0C0B0A09080706050403020100,
457			0x1F1E1D1C1B1A19181716151413121110,
458			0x1E0E1C0C1A0A18081606140412021000,
459			0x1F0F1D0D1B0B19091707150513031101,
460		);
461
462		check_interleave::<BinaryField16b>(
463			0x0F0E0D0C0B0A09080706050403020100,
464			0x1F1E1D1C1B1A19181716151413121110,
465			0x1D1C0D0C191809081514050411100100,
466			0x1F1E0F0E1B1A0B0A1716070613120302,
467		);
468
469		check_interleave::<BinaryField32b>(
470			0x0F0E0D0C0B0A09080706050403020100,
471			0x1F1E1D1C1B1A19181716151413121110,
472			0x1B1A19180B0A09081312111003020100,
473			0x1F1E1D1C0F0E0D0C1716151407060504,
474		);
475
476		check_interleave::<BinaryField64b>(
477			0x0F0E0D0C0B0A09080706050403020100,
478			0x1F1E1D1C1B1A19181716151413121110,
479			0x17161514131211100706050403020100,
480			0x1F1E1D1C1B1A19180F0E0D0C0B0A0908,
481		);
482	}
483
484	fn test_packed_multiply_alpha<P>()
485	where
486		P: PackedField + MulAlpha + Debug,
487		P::Scalar: MulAlpha,
488	{
489		let mut rng = thread_rng();
490
491		for _ in 0..NUM_TESTS {
492			let a = P::random(&mut rng);
493
494			let result = a.mul_alpha();
495			for i in 0..P::WIDTH {
496				assert_eq!(result.get(i), MulAlpha::mul_alpha(a.get(i)));
497			}
498		}
499	}
500
501	#[test]
502	fn test_multiply_alpha() {
503		test_packed_multiply_alpha::<PackedBinaryField64x2b>();
504		test_packed_multiply_alpha::<PackedBinaryField32x4b>();
505		test_packed_multiply_alpha::<PackedBinaryField16x8b>();
506		test_packed_multiply_alpha::<PackedBinaryField8x16b>();
507		test_packed_multiply_alpha::<PackedBinaryField4x32b>();
508		test_packed_multiply_alpha::<PackedBinaryField2x64b>();
509		test_packed_multiply_alpha::<PackedBinaryField1x128b>();
510	}
511
512	define_multiply_tests!(TaggedMul<PackedStrategy>::mul, TaggedMul<PackedStrategy>);
513
514	define_square_tests!(TaggedSquare<PackedStrategy>::square, TaggedSquare<PackedStrategy>);
515
516	define_invert_tests!(
517		TaggedInvertOrZero<PackedStrategy>::invert_or_zero,
518		TaggedInvertOrZero<PackedStrategy>
519	);
520
521	define_mul_alpha_tests!(
522		TaggedMulAlpha<PackedStrategy>::mul_alpha,
523		TaggedMulAlpha<PackedStrategy>
524	);
525
526	#[allow(unused)]
527	trait SelfPackedTransformationFactory:
528		TaggedPackedTransformationFactory<PackedStrategy, Self>
529	{
530	}
531
532	impl<T: TaggedPackedTransformationFactory<PackedStrategy, Self>> SelfPackedTransformationFactory
533		for T
534	{
535	}
536
537	define_transformation_tests!(SelfPackedTransformationFactory);
538}