binius_field/arch/
arch_optimal.rs

1// Copyright 2024-2025 Irreducible Inc.
2
3use cfg_if::cfg_if;
4
5use crate::{
6	as_packed_field::{PackScalar, PackedType},
7	underlier::WithUnderlier,
8	ByteSlicedUnderlier, Field, PackedField,
9};
10
11/// A trait to retrieve the optimal throughput `ordinal` packed field for a given architecture.
12pub trait ArchOptimal: Field {
13	type OptimalThroughputPacked: PackedField<Scalar = Self>
14		+ WithUnderlier<Underlier = OptimalUnderlier>;
15}
16
17impl<F> ArchOptimal for F
18where
19	F: Field,
20	OptimalUnderlier: PackScalar<F>,
21{
22	type OptimalThroughputPacked = PackedType<OptimalUnderlier, F>;
23}
24
25/// A trait to retrieve the optimal throughput packed byte-sliced field for a given architecture
26pub trait ArchOptimalByteSliced: Field {
27	type OptimalByteSliced: PackedField<Scalar = Self>
28		+ WithUnderlier<Underlier = OptimalUnderlierByteSliced>;
29}
30
31impl<F> ArchOptimalByteSliced for F
32where
33	F: Field,
34	OptimalUnderlierByteSliced: PackScalar<F>,
35{
36	type OptimalByteSliced = PackedType<OptimalUnderlierByteSliced, F>;
37}
38
39cfg_if! {
40	if #[cfg(all(feature = "nightly_features", target_arch = "x86_64", target_feature = "avx512f"))] {
41		pub const OPTIMAL_ALIGNMENT: usize = 512;
42
43		pub type OptimalUnderlier128b = crate::arch::x86_64::m128::M128;
44		pub type OptimalUnderlier256b = crate::arch::x86_64::m256::M256;
45		pub type OptimalUnderlier512b = crate::arch::x86_64::m512::M512;
46		pub type OptimalUnderlier = OptimalUnderlier512b;
47	} else if #[cfg(all(feature = "nightly_features", target_arch = "x86_64", target_feature = "avx2"))] {
48		use crate::underlier::ScaledUnderlier;
49
50		pub const OPTIMAL_ALIGNMENT: usize = 256;
51
52		pub type OptimalUnderlier128b = crate::arch::x86_64::m128::M128;
53		pub type OptimalUnderlier256b = crate::arch::x86_64::m256::M256;
54		pub type OptimalUnderlier512b = ScaledUnderlier<OptimalUnderlier256b, 2>;
55		pub type OptimalUnderlier = OptimalUnderlier256b;
56	} else if #[cfg(all(feature = "nightly_features", target_arch = "x86_64", target_feature = "sse2"))] {
57		use crate::underlier::ScaledUnderlier;
58
59		pub const OPTIMAL_ALIGNMENT: usize = 128;
60
61		pub type OptimalUnderlier128b = crate::arch::x86_64::m128::M128;
62		pub type OptimalUnderlier256b = ScaledUnderlier<OptimalUnderlier128b, 2>;
63		pub type OptimalUnderlier512b = ScaledUnderlier<OptimalUnderlier256b, 2>;
64		pub type OptimalUnderlier = OptimalUnderlier128b;
65	} else if #[cfg(all(target_arch = "aarch64", target_feature = "neon", target_feature = "aes"))] {
66		use crate::underlier::ScaledUnderlier;
67
68		pub const OPTIMAL_ALIGNMENT: usize = 128;
69
70		pub type OptimalUnderlier128b = crate::arch::aarch64::m128::M128;
71		pub type OptimalUnderlier256b = ScaledUnderlier<OptimalUnderlier128b, 2>;
72		pub type OptimalUnderlier512b = ScaledUnderlier<OptimalUnderlier256b, 2>;
73		pub type OptimalUnderlier = OptimalUnderlier128b;
74	} else {
75		use crate::underlier::ScaledUnderlier;
76
77		pub const OPTIMAL_ALIGNMENT: usize = 128;
78
79		pub type OptimalUnderlier128b = u128;
80		pub type OptimalUnderlier256b = ScaledUnderlier<OptimalUnderlier128b, 2>;
81		pub type OptimalUnderlier512b = ScaledUnderlier<OptimalUnderlier256b, 2>;
82		pub type OptimalUnderlier = OptimalUnderlier128b;
83	}
84}
85
86/// Optimal underlier for byte-sliced packed field for the current architecture.
87/// This underlier can pack up to 128b scalars.
88pub type OptimalUnderlierByteSliced = ByteSlicedUnderlier<OptimalUnderlier, 16>;