1 /** 2 * Lightweight wrappers for SSE2 intrinsics for 32-bit operations 3 * 4 * Copyright: 5 * (C) 2009 Jack Lloyd 6 * (C) 2014-2015 Etienne Cimon 7 * 8 * License: 9 * Botan is released under the Simplified BSD License (see LICENSE.md) 10 */ 11 module botan.simd.simd_sse2; 12 13 import botan.constants; 14 static if (BOTAN_HAS_SIMD_SSE2): 15 16 import botan.utils.cpuid; 17 import botan.utils.simd.emmintrin; 18 19 struct SIMDSSE2 20 { 21 public: 22 static bool enabled() { return CPUID.hasSse2(); } 23 24 this(in uint[4] B) 25 { 26 m_reg = _mm_loadu_si128(cast(const(__m128i*))(B.ptr)); 27 } 28 29 this(uint B0, uint B1, uint B2, uint B3) 30 { 31 m_reg = _mm_set_epi32(B0, B1, B2, B3); 32 } 33 34 this(uint B) 35 { 36 m_reg = _mm_set1_epi32(B); 37 } 38 39 static SIMDSSE2 loadLittleEndian(in void* input) 40 { 41 SIMDSSE2 simd; 42 simd.m_reg = _mm_loadu_si128(cast(const(__m128i*))(input)); 43 return simd; 44 } 45 46 static SIMDSSE2 loadBigEndian(in void* input) 47 { 48 return loadLittleEndian(input).bswap(); 49 } 50 51 void storeLittleEndian(ubyte* output) 52 { 53 _mm_storeu_si128(cast(__m128i*)(output), m_reg); 54 } 55 56 void storeBigEndian(ubyte* output) 57 { 58 bswap().storeLittleEndian(output); 59 } 60 61 void rotateLeft(int ROT)() 62 { 63 m_reg = _mm_or_si128(_mm_slli_epi32!ROT(m_reg), 64 _mm_srli_epi32!(32-ROT)(m_reg)); 65 } 66 67 void rotateRight(int rot)() 68 { 69 rotateLeft!(32 - rot)(); 70 } 71 72 void opOpAssign(string op)(in SIMDSSE2 other) 73 if (op == "+") 74 { 75 m_reg = _mm_add_epi32(m_reg, other.m_reg); 76 } 77 78 SIMDSSE2 opBinary(string op)(in SIMDSSE2 other) 79 if (op == "+") 80 { 81 SIMDSSE2 ret; 82 ret.m_reg = _mm_add_epi32(m_reg, other.m_reg); 83 return ret; 84 } 85 86 void opOpAssign(string op)(in SIMDSSE2 other) 87 if (op == "-") 88 { 89 m_reg = _mm_sub_epi32(m_reg, other.m_reg); 90 } 91 92 SIMDSSE2 opBinary(string op)(in SIMDSSE2 other) 93 if (op == "-") 94 { 95 SIMDSSE2 ret; 96 ret.m_reg = _mm_sub_epi32(m_reg, other.m_reg); 97 return ret; 98 } 99 100 void opOpAssign(string op)(in SIMDSSE2 other) 101 if (op == "^") 102 { 103 m_reg = _mm_xor_si128(m_reg, other.m_reg); 104 } 105 106 SIMDSSE2 opBinary(string op)(in SIMDSSE2 other) 107 if (op == "^") 108 { 109 SIMDSSE2 ret; 110 ret.m_reg = _mm_xor_si128(m_reg, other.m_reg); 111 return ret; 112 } 113 114 void opOpAssign(string op)(in SIMDSSE2 other) 115 if (op == "|") 116 { 117 m_reg = _mm_or_si128(m_reg, other.m_reg); 118 } 119 120 SIMDSSE2 opBinary(string op)(in SIMDSSE2 other) 121 if (op == "&") 122 { 123 SIMDSSE2 ret; 124 ret.m_reg = _mm_and_si128(m_reg, other.m_reg); 125 return ret; 126 } 127 128 void opOpAssign(string op)(in SIMDSSE2 other) 129 if (op == "&") 130 { 131 m_reg = _mm_and_si128(m_reg, other.m_reg); 132 } 133 134 SIMDSSE2 lshift(size_t shift)() 135 { 136 SIMDSSE2 ret; 137 ret.m_reg = _mm_slli_epi32!shift(m_reg); 138 return ret; 139 } 140 141 SIMDSSE2 rshift(size_t shift)() 142 { 143 SIMDSSE2 ret; 144 ret.m_reg = _mm_srli_epi32!shift(m_reg); 145 return ret; 146 } 147 148 SIMDSSE2 opUnary(string op)() 149 if (op == "~") 150 { 151 SIMDSSE2 ret; 152 ret.m_reg = _mm_xor_si128(m_reg, _mm_set1_epi32!(0xFFFFFFFF)()); 153 return ret; 154 } 155 156 // (~reg) & other 157 SIMDSSE2 andc(in SIMDSSE2 other) 158 { 159 SIMDSSE2 ret; 160 ret.m_reg = _mm_andnot_si128(m_reg, other.m_reg); 161 return ret; 162 } 163 164 SIMDSSE2 bswap() 165 { 166 __m128i T = m_reg; 167 168 const SHUF = _MM_SHUFFLE(2, 3, 0, 1); 169 T = _mm_shufflehi_epi16!SHUF(T); 170 T = _mm_shufflelo_epi16!SHUF(T); 171 172 SIMDSSE2 ret; 173 ret.m_reg = _mm_or_si128(_mm_srli_epi16!8(T), 174 _mm_slli_epi16!8(T)); 175 return ret; 176 } 177 178 static void transpose(ref SIMDSSE2 B0, ref SIMDSSE2 B1, 179 ref SIMDSSE2 B2, ref SIMDSSE2 B3) 180 { 181 __m128i T0 = _mm_unpacklo_epi32(B0.m_reg, B1.m_reg); 182 __m128i T1 = _mm_unpacklo_epi32(B2.m_reg, B3.m_reg); 183 __m128i T2 = _mm_unpackhi_epi32(B0.m_reg, B1.m_reg); 184 __m128i T3 = _mm_unpackhi_epi32(B2.m_reg, B3.m_reg); 185 B0.m_reg = _mm_unpacklo_epi64(T0, T1); 186 B1.m_reg = _mm_unpackhi_epi64(T0, T1); 187 B2.m_reg = _mm_unpacklo_epi64(T2, T3); 188 B3.m_reg = _mm_unpackhi_epi64(T2, T3); 189 } 190 191 private: 192 this(__m128i input) { m_reg = input; } 193 194 __m128i m_reg; 195 }