1 /**
2 * wmmintrin.h style functions
3 * 
4 * Copyright:
5 * (C) 2014-2015 Etienne Cimon
6 *
7 * License:
8 * Released under the MIT license
9 */
10 module botan.utils.simd.wmmintrin;
11 /*
12 * LDC, GDC, DMD Intrinsics for SSSE 3
13 * (C) 2014-. Etienne Cimon
14 *
15 * Distributed under the terms of the MIT License.
16 */
17 
18 import botan.constants;
19 import std.conv : to;
20 static if (BOTAN_HAS_SIMD_SSE2):
21 
22 public import botan.utils.simd.emmintrin;
23 pure:
24 // _mm_aesenc_si128
25 // _mm_aesenclast_si128
26 // _mm_aesdec_si128
27 // _mm_aesdeclast_si128
28 // _mm_aesimc_si128
29 // _mm_aeskeygenassist_si128
30 
31 version(GDC) {
32 @inline:
33     // _mm_aesenc_si128
34     __m128i _mm_aesenc_si128()(auto ref __m128i a, auto const ref __m128i b) {
35         return cast(__m128i) __builtin_ia32_aesenc128(cast(long2) a, cast(long2) b);
36     }
37 
38     __m128i _mm_aesenclast_si128()(auto ref __m128i a, auto const ref __m128i b) {
39         return cast(__m128i) __builtin_ia32_aesenclast128(cast(long2) a, cast(long2) b);
40     }
41 
42     __m128i _mm_aesdec_si128()(auto ref __m128i a, auto const ref __m128i b) {
43         return cast(__m128i) __builtin_ia32_aesdec128(cast(long2) a, cast(long2) b);
44     }
45 
46     __m128i _mm_aesdeclast_si128()(auto ref __m128i a, auto const ref __m128i b) {
47         return cast(__m128i) __builtin_ia32_aesdeclast128(cast(long2) a, cast(long2) b);
48     }
49 
50     __m128i _mm_aesimc_si128(__m128i a) {
51         return cast(__m128i) __builtin_ia32_aesimc128(cast(long2) a);
52     }
53 
54     __m128i _mm_aeskeygenassist_si128(int b)(__m128i a) {
55         return cast(__m128i) __builtin_ia32_aeskeygenassist128(cast(long2) a, b);
56     }
57 
58     __m128i _mm_clmulepi64_si128(int c)(auto ref __m128i a, auto ref __m128i b) {
59         return cast(__m128i) __builtin_ia32_pclmulqdq128(cast(long2) a, cast(long2) b, c);
60     }
61 }
62 
63 version(none) {
64     // _mm_aesenc_si128
65     __m128i _mm_aesenc_si128()(auto ref __m128i a, auto const ref __m128i b) {
66         return cast(__m128i) __builtin_ia32_aesenc128(cast(long2) a, cast(long2) b);
67     }
68     
69     __m128i _mm_aesenclast_si128()(auto ref __m128i a, auto const ref __m128i b) {
70         return cast(__m128i) __builtin_ia32_aesenclast128(cast(long2) a, cast(long2) b);
71     }
72     
73     __m128i _mm_aesdec_si128()(auto ref __m128i a, auto const ref __m128i b) {
74         return cast(__m128i) __builtin_ia32_aesdec128(cast(long2) a, cast(long2) b);
75     }
76     
77     __m128i _mm_aesdeclast_si128()(auto ref __m128i a, auto const ref __m128i b) {
78         return cast(__m128i) __builtin_ia32_aesdeclast128(cast(long2) a, cast(long2) b);
79     }
80     
81     __m128i _mm_aesimc_si128(__m128i a) {
82         return cast(__m128i) __builtin_ia32_aesimc128(cast(long2) a);
83     }
84 
85     __m128i _mm_aeskeygenassist_si128(int b)(__m128i a) {
86         return cast(__m128i) __builtin_ia32_aeskeygenassist128(cast(long2) a, b);
87     }
88 
89     __m128i _mm_clmulepi64_si128(int c)(auto ref __m128i a, auto ref __m128i b) {
90         return cast(__m128i) __builtin_ia32_pclmulqdq128(cast(long2) a, cast(long2) b, c);
91     }
92 }
93 
94 version(D_InlineAsm_X86_64) {
95     __m128i _mm_aesenc_si128()(auto ref __m128i a, auto const ref __m128i b) {
96         __m128i* _a = &a;
97         const(__m128i)* _b = &b;
98         
99         asm pure nothrow {
100             mov RAX, _a;
101             mov RBX, _b;
102             movdqu XMM1, [RAX];
103             movdqu XMM2, [RBX];
104             aesenc XMM1, XMM2;
105             movdqu [RAX], XMM1;
106         }
107         
108         return a;
109     }
110 
111     __m128i _mm_aesenclast_si128()(auto ref __m128i a, auto const ref __m128i b) {
112         __m128i* _a = &a;
113         const(__m128i)* _b = &b;
114         
115         asm pure nothrow {
116             mov RAX, _a;
117             mov RBX, _b;
118             movdqu XMM1, [RAX];
119             movdqu XMM2, [RBX];
120             aesenclast XMM1, XMM2;
121             movdqu [RAX], XMM1;
122         }
123         
124         return a;
125     }
126     
127     __m128i _mm_aesdec_si128()(auto ref __m128i a, auto const ref __m128i b) {
128         __m128i* _a = &a;
129         const(__m128i)* _b = &b;
130         
131         asm pure nothrow {
132             mov RAX, _a;
133             mov RBX, _b;
134             movdqu XMM1, [RAX];
135             movdqu XMM2, [RBX];
136             aesdec XMM1, XMM2;
137             movdqu [RAX], XMM1;
138         }
139         
140         return a;
141     }
142 
143 
144     __m128i _mm_aesdeclast_si128()(auto ref __m128i a, auto const ref __m128i b) {
145         __m128i* _a = &a;
146         const(__m128i)* _b = &b;
147         
148         asm pure nothrow {
149             mov RAX, _a;
150             mov RBX, _b;
151             movdqu XMM1, [RAX];
152             movdqu XMM2, [RBX];
153             aesdeclast XMM1, XMM2;
154             movdqu [RAX], XMM1;
155         }
156         
157         return a;
158     }
159 
160     __m128i _mm_aesimc_si128(__m128i a) {
161         __m128i* _a = &a;
162         
163         asm pure nothrow {
164             mov RAX, _a;
165             movdqu XMM2, [RAX];
166             aesimc XMM1, XMM2;
167             movdqu [RAX], XMM1;
168         }
169         
170         return a;
171     }
172 
173     __m128i _mm_aeskeygenassist_si128(int b)(__m128i a) {
174         __m128i* _a = &a;
175         
176         mixin(`asm pure nothrow {
177             mov RAX, _a;
178             movdqu XMM1, [RAX];
179             aeskeygenassist XMM2, XMM1, ` ~ b.to!string ~ `;
180             movdqu [RAX], XMM2;
181         }`);
182         
183         return a;
184     }
185 
186     __m128i _mm_clmulepi64_si128(string imm)(auto ref __m128i a, auto ref __m128i b) {
187         /// todo: Enable this after adding PCLMULQDQ in dmd
188         __m128i* _a = &a;
189         __m128i* _b = &b;
190         __m128i c;
191         __m128i* _c = &c;
192         
193         mixin(`asm pure nothrow {
194             mov RAX, _a;
195             mov RBX, _b;
196             mov RCX, _c;
197             movdqu XMM1, [RAX];
198             movdqu XMM2, [RBX];
199             db 0x66, 0x0F, 0x3A, 0x44, 0xCA, ` ~ imm ~ `; // PCLMULQDQ
200             movdqu [RCX], XMM1;
201         }`);
202         
203         return c;
204     }
205 }