binius_field/arch/x86_64/simd/
m128.rs

1// Copyright 2024-2025 Irreducible Inc.
2
3use core::arch::x86_64::*;
4
5use super::simd_arithmetic::TowerSimdType;
6use crate::{arch::x86_64::m128::M128, BinaryField};
7
8impl TowerSimdType for M128 {
9	#[inline(always)]
10	fn xor(a: Self, b: Self) -> Self {
11		unsafe { _mm_xor_si128(a.0, b.0) }.into()
12	}
13
14	#[inline(always)]
15	fn shuffle_epi8(a: Self, b: Self) -> Self {
16		unsafe { _mm_shuffle_epi8(a.0, b.0) }.into()
17	}
18
19	#[inline(always)]
20	fn blend_odd_even<Scalar: BinaryField>(a: Self, b: Self) -> Self {
21		let mask = Self::even_mask::<Scalar>();
22		unsafe { _mm_blendv_epi8(a.0, b.0, mask.0) }.into()
23	}
24
25	#[inline(always)]
26	fn set_alpha_even<Scalar: BinaryField>(self) -> Self {
27		unsafe {
28			let alpha = Self::alpha::<Scalar>();
29			let mask = Self::even_mask::<Scalar>();
30			// NOTE: There appears to be a bug in _mm_blendv_epi8 where the mask bit selects b, not a
31			_mm_blendv_epi8(self.0, alpha.0, mask.0)
32		}
33		.into()
34	}
35
36	#[inline(always)]
37	fn set1_epi128(val: __m128i) -> Self {
38		val.into()
39	}
40
41	#[inline(always)]
42	fn set_epi_64(val: i64) -> Self {
43		unsafe { _mm_set1_epi64x(val) }.into()
44	}
45
46	#[inline(always)]
47	fn bslli_epi128<const IMM8: i32>(self) -> Self {
48		unsafe { _mm_bslli_si128::<IMM8>(self.0) }.into()
49	}
50
51	#[inline(always)]
52	fn bsrli_epi128<const IMM8: i32>(self) -> Self {
53		unsafe { _mm_bsrli_si128::<IMM8>(self.0) }.into()
54	}
55
56	#[inline(always)]
57	fn apply_mask<Scalar: BinaryField>(mut mask: Self, a: Self) -> Self {
58		let tower_level = Scalar::N_BITS.ilog2();
59		match tower_level {
60			0..=2 => {
61				for i in 0..tower_level {
62					mask |= mask >> (1 << i);
63				}
64
65				unsafe { _mm_and_si128(a.0, mask.0) }
66			}
67			3 => unsafe { _mm_blendv_epi8(_mm_setzero_si128(), a.0, mask.0) },
68			4..=7 => {
69				let shuffle = Self::make_epi8_mask_shuffle::<Scalar>();
70				unsafe {
71					let mask = _mm_shuffle_epi8(mask.0, shuffle.0);
72					_mm_blendv_epi8(_mm_setzero_si128(), a.0, mask)
73				}
74			}
75			_ => panic!("unsupported bit count"),
76		}
77		.into()
78	}
79}