binius_field/arch/portable/
packed.rs

1// Copyright 2024-2025 Irreducible Inc.
2
3// This is because derive(bytemuck::TransparentWrapper) adds some type constraints to
4// PackedPrimitiveType in addition to the type constraints we define. Even more, annoying, the
5// allow attribute has to be added to the module, it doesn't work to add it to the struct
6// definition.
7#![allow(clippy::multiple_bound_locations)]
8
9use std::{
10	fmt::Debug,
11	iter::{Product, Sum},
12	marker::PhantomData,
13	ops::{Add, AddAssign, Mul, MulAssign, Sub, SubAssign},
14};
15
16use binius_utils::{checked_arithmetics::checked_int_div, iter::IterExtensions};
17use bytemuck::{Pod, TransparentWrapper, Zeroable};
18use rand::RngCore;
19use subtle::{Choice, ConstantTimeEq};
20
21use super::packed_arithmetic::UnderlierWithBitConstants;
22use crate::{
23	BinaryField, PackedField,
24	arithmetic_traits::{Broadcast, InvertOrZero, MulAlpha, Square},
25	underlier::{
26		IterationMethods, IterationStrategy, NumCast, U1, U2, U4, UnderlierType,
27		UnderlierWithBitOps, WithUnderlier,
28	},
29};
30
31#[derive(PartialEq, Eq, Clone, Copy, Default, bytemuck::TransparentWrapper)]
32#[repr(transparent)]
33#[transparent(U)]
34pub struct PackedPrimitiveType<U: UnderlierType, Scalar: BinaryField>(
35	pub U,
36	pub PhantomData<Scalar>,
37);
38
39impl<U: UnderlierType, Scalar: BinaryField> PackedPrimitiveType<U, Scalar> {
40	pub const WIDTH: usize = {
41		assert!(U::BITS % Scalar::N_BITS == 0);
42
43		U::BITS / Scalar::N_BITS
44	};
45
46	pub const LOG_WIDTH: usize = {
47		let result = Self::WIDTH.ilog2();
48
49		assert!(2usize.pow(result) == Self::WIDTH);
50
51		result as usize
52	};
53
54	#[inline]
55	pub const fn from_underlier(val: U) -> Self {
56		Self(val, PhantomData)
57	}
58
59	#[inline]
60	pub const fn to_underlier(self) -> U {
61		self.0
62	}
63}
64
65unsafe impl<U: UnderlierType, Scalar: BinaryField> WithUnderlier
66	for PackedPrimitiveType<U, Scalar>
67{
68	type Underlier = U;
69
70	#[inline(always)]
71	fn to_underlier(self) -> Self::Underlier {
72		TransparentWrapper::peel(self)
73	}
74
75	#[inline(always)]
76	fn to_underlier_ref(&self) -> &Self::Underlier {
77		TransparentWrapper::peel_ref(self)
78	}
79
80	#[inline(always)]
81	fn to_underlier_ref_mut(&mut self) -> &mut Self::Underlier {
82		TransparentWrapper::peel_mut(self)
83	}
84
85	#[inline(always)]
86	fn to_underliers_ref(val: &[Self]) -> &[Self::Underlier] {
87		TransparentWrapper::peel_slice(val)
88	}
89
90	#[inline(always)]
91	fn to_underliers_ref_mut(val: &mut [Self]) -> &mut [Self::Underlier] {
92		TransparentWrapper::peel_slice_mut(val)
93	}
94
95	#[inline(always)]
96	fn from_underlier(val: Self::Underlier) -> Self {
97		TransparentWrapper::wrap(val)
98	}
99
100	#[inline(always)]
101	fn from_underlier_ref(val: &Self::Underlier) -> &Self {
102		TransparentWrapper::wrap_ref(val)
103	}
104
105	#[inline(always)]
106	fn from_underlier_ref_mut(val: &mut Self::Underlier) -> &mut Self {
107		TransparentWrapper::wrap_mut(val)
108	}
109
110	#[inline(always)]
111	fn from_underliers_ref(val: &[Self::Underlier]) -> &[Self] {
112		TransparentWrapper::wrap_slice(val)
113	}
114
115	#[inline(always)]
116	fn from_underliers_ref_mut(val: &mut [Self::Underlier]) -> &mut [Self] {
117		TransparentWrapper::wrap_slice_mut(val)
118	}
119}
120
121impl<U: UnderlierWithBitOps, Scalar: BinaryField> Debug for PackedPrimitiveType<U, Scalar>
122where
123	Self: PackedField,
124{
125	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
126		let width = checked_int_div(U::BITS, Scalar::N_BITS);
127		let values_str = self
128			.iter()
129			.map(|value| format!("{value}"))
130			.collect::<Vec<_>>()
131			.join(",");
132
133		write!(f, "Packed{}x{}([{}])", width, Scalar::N_BITS, values_str)
134	}
135}
136
137impl<U: UnderlierType, Scalar: BinaryField> From<U> for PackedPrimitiveType<U, Scalar> {
138	#[inline]
139	fn from(val: U) -> Self {
140		Self(val, PhantomData)
141	}
142}
143
144impl<U: UnderlierType, Scalar: BinaryField> ConstantTimeEq for PackedPrimitiveType<U, Scalar> {
145	fn ct_eq(&self, other: &Self) -> Choice {
146		self.0.ct_eq(&other.0)
147	}
148}
149
150impl<U: UnderlierWithBitOps, Scalar: BinaryField> Add for PackedPrimitiveType<U, Scalar> {
151	type Output = Self;
152
153	#[inline]
154	#[allow(clippy::suspicious_arithmetic_impl)]
155	fn add(self, rhs: Self) -> Self::Output {
156		(self.0 ^ rhs.0).into()
157	}
158}
159
160impl<U: UnderlierWithBitOps, Scalar: BinaryField> Sub for PackedPrimitiveType<U, Scalar> {
161	type Output = Self;
162
163	#[inline]
164	#[allow(clippy::suspicious_arithmetic_impl)]
165	fn sub(self, rhs: Self) -> Self::Output {
166		(self.0 ^ rhs.0).into()
167	}
168}
169
170impl<U: UnderlierType, Scalar: BinaryField> AddAssign for PackedPrimitiveType<U, Scalar>
171where
172	Self: Add<Output = Self>,
173{
174	fn add_assign(&mut self, rhs: Self) {
175		*self = *self + rhs;
176	}
177}
178
179impl<U: UnderlierType, Scalar: BinaryField> SubAssign for PackedPrimitiveType<U, Scalar>
180where
181	Self: Sub<Output = Self>,
182{
183	fn sub_assign(&mut self, rhs: Self) {
184		*self = *self - rhs;
185	}
186}
187
188impl<U: UnderlierType, Scalar: BinaryField> MulAssign for PackedPrimitiveType<U, Scalar>
189where
190	Self: Mul<Output = Self>,
191{
192	fn mul_assign(&mut self, rhs: Self) {
193		*self = *self * rhs;
194	}
195}
196
197impl<U: UnderlierType, Scalar: BinaryField> Add<Scalar> for PackedPrimitiveType<U, Scalar>
198where
199	Self: Broadcast<Scalar> + Add<Output = Self>,
200{
201	type Output = Self;
202
203	fn add(self, rhs: Scalar) -> Self::Output {
204		self + Self::broadcast(rhs)
205	}
206}
207
208impl<U: UnderlierType, Scalar: BinaryField> Sub<Scalar> for PackedPrimitiveType<U, Scalar>
209where
210	Self: Broadcast<Scalar> + Sub<Output = Self>,
211{
212	type Output = Self;
213
214	fn sub(self, rhs: Scalar) -> Self::Output {
215		self - Self::broadcast(rhs)
216	}
217}
218
219impl<U: UnderlierType, Scalar: BinaryField> Mul<Scalar> for PackedPrimitiveType<U, Scalar>
220where
221	Self: Broadcast<Scalar> + Mul<Output = Self>,
222{
223	type Output = Self;
224
225	fn mul(self, rhs: Scalar) -> Self::Output {
226		self * Self::broadcast(rhs)
227	}
228}
229
230impl<U: UnderlierType, Scalar: BinaryField> AddAssign<Scalar> for PackedPrimitiveType<U, Scalar>
231where
232	Self: Broadcast<Scalar> + AddAssign<Self>,
233{
234	fn add_assign(&mut self, rhs: Scalar) {
235		*self += Self::broadcast(rhs);
236	}
237}
238
239impl<U: UnderlierType, Scalar: BinaryField> SubAssign<Scalar> for PackedPrimitiveType<U, Scalar>
240where
241	Self: Broadcast<Scalar> + SubAssign<Self>,
242{
243	fn sub_assign(&mut self, rhs: Scalar) {
244		*self -= Self::broadcast(rhs);
245	}
246}
247
248impl<U: UnderlierType, Scalar: BinaryField> MulAssign<Scalar> for PackedPrimitiveType<U, Scalar>
249where
250	Self: Broadcast<Scalar> + MulAssign<Self>,
251{
252	fn mul_assign(&mut self, rhs: Scalar) {
253		*self *= Self::broadcast(rhs);
254	}
255}
256
257impl<U: UnderlierType, Scalar: BinaryField> Sum for PackedPrimitiveType<U, Scalar>
258where
259	Self: Add<Output = Self>,
260{
261	fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
262		iter.fold(Self::from(U::default()), |result, next| result + next)
263	}
264}
265
266impl<U: UnderlierType, Scalar: BinaryField> Product for PackedPrimitiveType<U, Scalar>
267where
268	Self: Broadcast<Scalar> + Mul<Output = Self>,
269{
270	fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
271		iter.fold(Self::broadcast(Scalar::ONE), |result, next| result * next)
272	}
273}
274
275unsafe impl<U: UnderlierType + Zeroable, Scalar: BinaryField> Zeroable
276	for PackedPrimitiveType<U, Scalar>
277{
278}
279
280unsafe impl<U: UnderlierType + Pod, Scalar: BinaryField> Pod for PackedPrimitiveType<U, Scalar> {}
281
282impl<U: UnderlierWithBitOps, Scalar> PackedField for PackedPrimitiveType<U, Scalar>
283where
284	Self: Broadcast<Scalar> + Square + InvertOrZero + Mul<Output = Self>,
285	U: UnderlierWithBitConstants + From<Scalar::Underlier> + Send + Sync + 'static,
286	Scalar: BinaryField + WithUnderlier<Underlier: UnderlierWithBitOps>,
287	Scalar::Underlier: NumCast<U>,
288	IterationMethods<Scalar::Underlier, U>: IterationStrategy<Scalar::Underlier, U>,
289{
290	type Scalar = Scalar;
291
292	const LOG_WIDTH: usize = (U::BITS / Scalar::N_BITS).ilog2() as usize;
293
294	#[inline]
295	unsafe fn get_unchecked(&self, i: usize) -> Self::Scalar {
296		Scalar::from_underlier(unsafe { self.0.get_subvalue(i) })
297	}
298
299	#[inline]
300	unsafe fn set_unchecked(&mut self, i: usize, scalar: Scalar) {
301		unsafe {
302			self.0.set_subvalue(i, scalar.to_underlier());
303		}
304	}
305
306	#[inline]
307	fn zero() -> Self {
308		Self::from_underlier(U::ZERO)
309	}
310
311	fn random(rng: impl RngCore) -> Self {
312		U::random(rng).into()
313	}
314
315	#[inline]
316	fn iter(&self) -> impl Iterator<Item = Self::Scalar> + Send + Clone + '_ {
317		IterationMethods::<Scalar::Underlier, U>::ref_iter(&self.0)
318			.map(|underlier| Scalar::from_underlier(underlier))
319	}
320
321	#[inline]
322	fn into_iter(self) -> impl Iterator<Item = Self::Scalar> + Send + Clone {
323		IterationMethods::<Scalar::Underlier, U>::value_iter(self.0)
324			.map(|underlier| Scalar::from_underlier(underlier))
325	}
326
327	#[inline]
328	fn iter_slice(slice: &[Self]) -> impl Iterator<Item = Self::Scalar> + Send + Clone + '_ {
329		IterationMethods::<Scalar::Underlier, U>::slice_iter(Self::to_underliers_ref(slice))
330			.map_skippable(|underlier| Scalar::from_underlier(underlier))
331	}
332
333	#[inline]
334	fn interleave(self, other: Self, log_block_len: usize) -> (Self, Self) {
335		assert!(log_block_len < Self::LOG_WIDTH);
336		let log_bit_len = Self::Scalar::N_BITS.ilog2() as usize;
337		let (c, d) = self.0.interleave(other.0, log_block_len + log_bit_len);
338		(c.into(), d.into())
339	}
340
341	#[inline]
342	fn unzip(self, other: Self, log_block_len: usize) -> (Self, Self) {
343		assert!(log_block_len < Self::LOG_WIDTH);
344		let log_bit_len = Self::Scalar::N_BITS.ilog2() as usize;
345		let (c, d) = self.0.transpose(other.0, log_block_len + log_bit_len);
346		(c.into(), d.into())
347	}
348
349	#[inline]
350	unsafe fn spread_unchecked(self, log_block_len: usize, block_idx: usize) -> Self {
351		debug_assert!(log_block_len <= Self::LOG_WIDTH, "{} <= {}", log_block_len, Self::LOG_WIDTH);
352		debug_assert!(
353			block_idx < 1 << (Self::LOG_WIDTH - log_block_len),
354			"{} < {}",
355			block_idx,
356			1 << (Self::LOG_WIDTH - log_block_len)
357		);
358
359		unsafe {
360			self.0
361				.spread::<<Self::Scalar as WithUnderlier>::Underlier>(log_block_len, block_idx)
362				.into()
363		}
364	}
365
366	#[inline]
367	fn broadcast(scalar: Self::Scalar) -> Self {
368		<Self as Broadcast<Self::Scalar>>::broadcast(scalar)
369	}
370
371	#[inline]
372	fn from_fn(mut f: impl FnMut(usize) -> Self::Scalar) -> Self {
373		U::from_fn(move |i| f(i).to_underlier()).into()
374	}
375
376	#[inline]
377	fn square(self) -> Self {
378		<Self as Square>::square(self)
379	}
380
381	#[inline]
382	fn invert_or_zero(self) -> Self {
383		<Self as InvertOrZero>::invert_or_zero(self)
384	}
385}
386
387/// Multiply `PT1` values by upcasting to wider `PT2` type with the same scalar.
388/// This is useful for the cases when SIMD multiplication is faster.
389#[allow(dead_code)]
390pub fn mul_as_bigger_type<PT1, PT2>(lhs: PT1, rhs: PT1) -> PT1
391where
392	PT1: PackedField + WithUnderlier,
393	PT2: PackedField<Scalar = PT1::Scalar> + WithUnderlier,
394	PT2::Underlier: From<PT1::Underlier>,
395	PT1::Underlier: NumCast<PT2::Underlier>,
396{
397	let bigger_lhs = PT2::from_underlier(lhs.to_underlier().into());
398	let bigger_rhs = PT2::from_underlier(rhs.to_underlier().into());
399
400	let bigger_result = bigger_lhs * bigger_rhs;
401
402	PT1::from_underlier(PT1::Underlier::num_cast_from(bigger_result.to_underlier()))
403}
404
405/// Square `PT1` values by upcasting to wider `PT2` type with the same scalar.
406/// This is useful for the cases when SIMD square is faster.
407#[allow(dead_code)]
408pub fn square_as_bigger_type<PT1, PT2>(val: PT1) -> PT1
409where
410	PT1: PackedField + WithUnderlier,
411	PT2: PackedField<Scalar = PT1::Scalar> + WithUnderlier,
412	PT2::Underlier: From<PT1::Underlier>,
413	PT1::Underlier: NumCast<PT2::Underlier>,
414{
415	let bigger_val = PT2::from_underlier(val.to_underlier().into());
416
417	let bigger_result = bigger_val.square();
418
419	PT1::from_underlier(PT1::Underlier::num_cast_from(bigger_result.to_underlier()))
420}
421
422/// Invert `PT1` values by upcasting to wider `PT2` type with the same scalar.
423/// This is useful for the cases when SIMD invert is faster.
424#[allow(dead_code)]
425pub fn invert_as_bigger_type<PT1, PT2>(val: PT1) -> PT1
426where
427	PT1: PackedField + WithUnderlier,
428	PT2: PackedField<Scalar = PT1::Scalar> + WithUnderlier,
429	PT2::Underlier: From<PT1::Underlier>,
430	PT1::Underlier: NumCast<PT2::Underlier>,
431{
432	let bigger_val = PT2::from_underlier(val.to_underlier().into());
433
434	let bigger_result = bigger_val.invert_or_zero();
435
436	PT1::from_underlier(PT1::Underlier::num_cast_from(bigger_result.to_underlier()))
437}
438
439/// Multiply by alpha `PT1` values by upcasting to wider `PT2` type with the same scalar.
440/// This is useful for the cases when SIMD multiply by alpha is faster.
441#[allow(dead_code)]
442pub fn mul_alpha_as_bigger_type<PT1, PT2>(val: PT1) -> PT1
443where
444	PT1: PackedField + WithUnderlier,
445	PT2: PackedField<Scalar = PT1::Scalar> + WithUnderlier + MulAlpha,
446	PT2::Underlier: From<PT1::Underlier>,
447	PT1::Underlier: NumCast<PT2::Underlier>,
448{
449	let bigger_val = PT2::from_underlier(val.to_underlier().into());
450
451	let bigger_result = bigger_val.mul_alpha();
452
453	PT1::from_underlier(PT1::Underlier::num_cast_from(bigger_result.to_underlier()))
454}
455
456macro_rules! impl_pack_scalar {
457	($underlier:ty) => {
458		impl<F> $crate::as_packed_field::PackScalar<F> for $underlier
459		where
460			F: BinaryField,
461			PackedPrimitiveType<$underlier, F>:
462				$crate::packed::PackedField<Scalar = F> + WithUnderlier<Underlier = $underlier>,
463		{
464			type Packed = PackedPrimitiveType<$underlier, F>;
465		}
466	};
467}
468
469pub(crate) use impl_pack_scalar;
470
471impl_pack_scalar!(U1);
472impl_pack_scalar!(U2);
473impl_pack_scalar!(U4);
474impl_pack_scalar!(u8);
475impl_pack_scalar!(u16);
476impl_pack_scalar!(u32);
477impl_pack_scalar!(u64);
478impl_pack_scalar!(u128);