1 /**
2 * Lightweight wrappers for SSE2 intrinsics for 32-bit operations
3 * 
4 * Copyright:
5 * (C) 2009 Jack Lloyd
6 * (C) 2014-2015 Etienne Cimon
7 *
8 * License:
9 * Botan is released under the Simplified BSD License (see LICENSE.md)
10 */
11 module botan.simd.simd_sse2;
12 
13 import botan.constants;
14 static if (BOTAN_HAS_SIMD_SSE2):
15 
16 import botan.utils.cpuid;
17 import botan.utils.simd.emmintrin;
18 
19 struct SIMDSSE2
20 {
21 public:
22     static bool enabled() { return CPUID.hasSse2(); }
23 
24     this(in uint[4] B)
25     {
26         m_reg = _mm_loadu_si128(cast(const(__m128i*))(B.ptr));
27     }
28 
29     this(uint B0, uint B1, uint B2, uint B3)
30     {
31         m_reg = _mm_set_epi32(B0, B1, B2, B3);
32     }
33 
34     this(uint B)
35     {
36         m_reg = _mm_set1_epi32(B);
37     }
38 
39     static SIMDSSE2 loadLittleEndian(in void* input)
40     {
41         SIMDSSE2 simd;
42         simd.m_reg = _mm_loadu_si128(cast(const(__m128i*))(input));
43         return simd;
44     }
45 
46     static SIMDSSE2 loadBigEndian(in void* input)
47     {
48         return loadLittleEndian(input).bswap();
49     }
50 
51     void storeLittleEndian(ubyte* output)
52     {
53         _mm_storeu_si128(cast(__m128i*)(output), m_reg);
54     }
55 
56     void storeBigEndian(ubyte* output)
57     {
58         bswap().storeLittleEndian(output);
59     }
60 
61     void rotateLeft(int ROT)()
62     {
63         m_reg = _mm_or_si128(_mm_slli_epi32!ROT(m_reg),
64                              _mm_srli_epi32!(32-ROT)(m_reg));
65     }
66 
67     void rotateRight(int rot)()
68     {
69         rotateLeft!(32 - rot)();
70     }
71 
72     void opOpAssign(string op)(in SIMDSSE2 other)
73         if (op == "+")
74     {
75         m_reg = _mm_add_epi32(m_reg, other.m_reg);
76     }
77 
78     SIMDSSE2 opBinary(string op)(in SIMDSSE2 other)
79         if (op == "+")
80     {
81         SIMDSSE2 ret;
82         ret.m_reg = _mm_add_epi32(m_reg, other.m_reg);
83         return ret;
84     }
85 
86     void opOpAssign(string op)(in SIMDSSE2 other)
87         if (op == "-")
88     {
89         m_reg = _mm_sub_epi32(m_reg, other.m_reg);
90     }
91 
92     SIMDSSE2 opBinary(string op)(in SIMDSSE2 other)
93         if (op == "-")
94     {
95         SIMDSSE2 ret;
96         ret.m_reg = _mm_sub_epi32(m_reg, other.m_reg);
97         return ret;
98     }
99 
100     void opOpAssign(string op)(in SIMDSSE2 other)
101         if (op == "^")
102     {
103         m_reg = _mm_xor_si128(m_reg, other.m_reg);
104     }
105 
106     SIMDSSE2 opBinary(string op)(in SIMDSSE2 other)
107         if (op == "^")
108     {
109         SIMDSSE2 ret;
110         ret.m_reg = _mm_xor_si128(m_reg, other.m_reg);
111         return ret;
112     }
113 
114     void opOpAssign(string op)(in SIMDSSE2 other)
115         if (op == "|")
116     {
117         m_reg = _mm_or_si128(m_reg, other.m_reg);
118     }
119 
120     SIMDSSE2 opBinary(string op)(in SIMDSSE2 other)
121         if (op == "&")
122     {
123         SIMDSSE2 ret;
124         ret.m_reg = _mm_and_si128(m_reg, other.m_reg);
125         return ret;
126     }
127 
128     void opOpAssign(string op)(in SIMDSSE2 other)
129         if (op == "&")
130     {
131         m_reg = _mm_and_si128(m_reg, other.m_reg);
132     }
133 
134     SIMDSSE2 lshift(size_t shift)()
135     {
136         SIMDSSE2 ret;
137         ret.m_reg = _mm_slli_epi32!shift(m_reg);
138         return ret;
139     }
140 
141     SIMDSSE2 rshift(size_t shift)()
142     {
143         SIMDSSE2 ret;
144         ret.m_reg = _mm_srli_epi32!shift(m_reg);
145         return ret;
146     }
147 
148     SIMDSSE2 opUnary(string op)()
149         if (op == "~")
150     {
151         SIMDSSE2 ret;
152         ret.m_reg = _mm_xor_si128(m_reg, _mm_set1_epi32!(0xFFFFFFFF)());
153         return ret;
154     }
155 
156     // (~reg) & other
157     SIMDSSE2 andc(in SIMDSSE2 other)
158     {
159         SIMDSSE2 ret;
160         ret.m_reg = _mm_andnot_si128(m_reg, other.m_reg);
161         return ret;
162     }
163 
164     SIMDSSE2 bswap()
165     {
166         __m128i T = m_reg;
167 
168         const SHUF = _MM_SHUFFLE(2, 3, 0, 1);
169         T = _mm_shufflehi_epi16!SHUF(T);
170         T = _mm_shufflelo_epi16!SHUF(T);
171 
172         SIMDSSE2 ret;
173         ret.m_reg = _mm_or_si128(_mm_srli_epi16!8(T),
174                                  _mm_slli_epi16!8(T));
175         return ret;
176     }
177 
178     static void transpose(ref SIMDSSE2 B0, ref SIMDSSE2 B1,
179                           ref SIMDSSE2 B2, ref SIMDSSE2 B3)
180     {
181         __m128i T0 = _mm_unpacklo_epi32(B0.m_reg, B1.m_reg);
182         __m128i T1 = _mm_unpacklo_epi32(B2.m_reg, B3.m_reg);
183         __m128i T2 = _mm_unpackhi_epi32(B0.m_reg, B1.m_reg);
184         __m128i T3 = _mm_unpackhi_epi32(B2.m_reg, B3.m_reg);
185         B0.m_reg = _mm_unpacklo_epi64(T0, T1);
186         B1.m_reg = _mm_unpackhi_epi64(T0, T1);
187         B2.m_reg = _mm_unpacklo_epi64(T2, T3);
188         B3.m_reg = _mm_unpackhi_epi64(T2, T3);
189     }
190 
191 private:
192     this(__m128i input) { m_reg = input; }
193 
194     __m128i m_reg;
195 }