binius_field/arch/x86_64/simd/
m128.rs

1// Copyright 2024-2025 Irreducible Inc.
2
3use core::arch::x86_64::*;
4
5use super::simd_arithmetic::TowerSimdType;
6use crate::{BinaryField, arch::x86_64::m128::M128};
7
8impl TowerSimdType for M128 {
9	#[inline(always)]
10	fn xor(a: Self, b: Self) -> Self {
11		unsafe { _mm_xor_si128(a.0, b.0) }.into()
12	}
13
14	#[inline(always)]
15	fn shuffle_epi8(a: Self, b: Self) -> Self {
16		unsafe { _mm_shuffle_epi8(a.0, b.0) }.into()
17	}
18
19	#[inline(always)]
20	fn blend_odd_even<Scalar: BinaryField>(a: Self, b: Self) -> Self {
21		let mask = Self::even_mask::<Scalar>();
22		unsafe { _mm_blendv_epi8(a.0, b.0, mask.0) }.into()
23	}
24
25	#[inline(always)]
26	fn set_alpha_even<Scalar: BinaryField>(self) -> Self {
27		unsafe {
28			let alpha = Self::alpha::<Scalar>();
29			let mask = Self::even_mask::<Scalar>();
30			// NOTE: There appears to be a bug in _mm_blendv_epi8 where the mask bit selects b, not
31			// a
32			_mm_blendv_epi8(self.0, alpha.0, mask.0)
33		}
34		.into()
35	}
36
37	#[inline(always)]
38	fn set1_epi128(val: __m128i) -> Self {
39		val.into()
40	}
41
42	#[inline(always)]
43	fn set_epi_64(val: i64) -> Self {
44		unsafe { _mm_set1_epi64x(val) }.into()
45	}
46
47	#[inline(always)]
48	fn bslli_epi128<const IMM8: i32>(self) -> Self {
49		unsafe { _mm_bslli_si128::<IMM8>(self.0) }.into()
50	}
51
52	#[inline(always)]
53	fn bsrli_epi128<const IMM8: i32>(self) -> Self {
54		unsafe { _mm_bsrli_si128::<IMM8>(self.0) }.into()
55	}
56
57	#[inline(always)]
58	fn apply_mask<Scalar: BinaryField>(mut mask: Self, a: Self) -> Self {
59		let tower_level = Scalar::N_BITS.ilog2();
60		match tower_level {
61			0..=2 => {
62				for i in 0..tower_level {
63					mask |= mask >> (1 << i);
64				}
65
66				unsafe { _mm_and_si128(a.0, mask.0) }
67			}
68			3 => unsafe { _mm_blendv_epi8(_mm_setzero_si128(), a.0, mask.0) },
69			4..=7 => {
70				let shuffle = Self::make_epi8_mask_shuffle::<Scalar>();
71				unsafe {
72					let mask = _mm_shuffle_epi8(mask.0, shuffle.0);
73					_mm_blendv_epi8(_mm_setzero_si128(), a.0, mask)
74				}
75			}
76			_ => panic!("unsupported bit count"),
77		}
78		.into()
79	}
80}