1 /**
2 * GCM Mode
3 *
4 * Copyright:
5 * (C) 2013 Jack Lloyd
6 * (C) 2014-2015 Etienne Cimon
7 *
8 * License:
9 * Botan is released under the Simplified BSD License (see LICENSE.md)
10 */
11 module botan.modes.aead.gcm;
12
13 import botan.constants;
14
15 static if (BOTAN_HAS_AEAD_GCM):
16
17 import botan.modes.aead.aead;
18 import botan.block.block_cipher;
19 import botan.stream.stream_cipher;
20 import botan.stream.ctr;
21 import botan.utils.xor_buf;
22 import botan.utils.loadstor;
23 import botan.utils.mem_ops;
24
25 import botan.utils.simd.immintrin;
26 import botan.utils.simd.wmmintrin;
27
28 import botan.utils.types;
29
30 import std.conv : to;
31 import std.algorithm : min;
32
33 static if (BOTAN_HAS_GCM_CLMUL) {
34 import botan.utils.simd.wmmintrin;
35 import botan.utils.cpuid;
36 }
37
38 /**
39 * GCM Mode
40 */
41 abstract class GCMMode : AEADMode, Transformation
42 {
43 public:
44 ~this() { destroy(m_ctr); destroy(m_ghash); } // TODO: for some reason CTR needs to be destroyed before ghash
45
46 override SecureVector!ubyte startRaw(const(ubyte)* nonce, size_t nonce_len)
47 {
48 if (!validNonceLength(nonce_len))
49 throw new InvalidIVLength(name, nonce_len);
50
51 SecureVector!ubyte y0 = SecureVector!ubyte(BS);
52
53 if (nonce_len == 12)
54 {
55 copyMem(y0.ptr, nonce, nonce_len);
56 y0[15] = 1;
57 }
58 else
59 {
60 y0 = m_ghash.nonceHash(nonce, nonce_len);
61 }
62
63 m_ctr.setIv(y0.ptr, y0.length);
64
65 SecureVector!ubyte m_enc_y0 = SecureVector!ubyte(BS);
66 m_ctr.encipher(m_enc_y0);
67
68 m_ghash.start(m_enc_y0.ptr, m_enc_y0.length);
69
70 return SecureVector!ubyte();
71 }
72
73 override void setAssociatedData(const(ubyte)* ad, size_t ad_len)
74 {
75 m_ghash.setAssociatedData(ad, ad_len);
76 }
77
78 override @property string name() const
79 {
80 return (m_cipher_name ~ "/GCM");
81 }
82
83 override size_t updateGranularity() const
84 {
85 return 4096; // CTR-BE's internal block size
86 }
87
88 override KeyLengthSpecification keySpec() const
89 {
90 return m_ctr.keySpec();
91 }
92
93 // GCM supports arbitrary nonce lengths
94 override bool validNonceLength(size_t) const { return true; }
95
96 override size_t tagSize() const { return m_tag_size; }
97
98 override void clear()
99 {
100 m_ctr.clear();
101 m_ghash.clear();
102
103 }
104
105 override size_t defaultNonceLength() const { return super.defaultNonceLength(); }
106
107 protected:
108 override void keySchedule(const(ubyte)* key, size_t length)
109 {
110 m_ctr.setKey(key, length);
111
112 const Vector!ubyte zeros = Vector!ubyte(BS);
113 m_ctr.setIv(zeros.ptr, zeros.length);
114
115 SecureVector!ubyte H = SecureVector!ubyte(BS);
116 m_ctr.encipher(H);
117 m_ghash.setKey(H);
118 }
119
120 /*
121 * GCMMode Constructor
122 */
123 this(BlockCipher cipher, size_t tag_size)
124 {
125 m_tag_size = tag_size;
126 m_cipher_name = cipher.name;
127 if (cipher.blockSize() != BS)
128 throw new InvalidArgument("GCM requires a 128 bit cipher so cannot be used with " ~ cipher.name);
129
130 m_ghash = new GHASH;
131
132 m_ctr = new CTRBE(cipher); // CTR_BE takes ownership of cipher
133
134 if (m_tag_size != 8 && m_tag_size != 16)
135 throw new InvalidArgument(name ~ ": Bad tag size " ~ to!string(m_tag_size));
136 }
137
138 __gshared immutable size_t BS = 16;
139
140 const size_t m_tag_size;
141 const string m_cipher_name;
142
143 Unique!StreamCipher m_ctr;
144 Unique!GHASH m_ghash;
145 }
146
147 /**
148 * GCM Encryption
149 */
150 final class GCMEncryption : GCMMode, Transformation
151 {
152 public:
153 /**
154 * Params:
155 * cipher = the 128 bit block cipher to use
156 * tag_size = is how big the auth tag will be
157 */
158 this(BlockCipher cipher, size_t tag_size = 16)
159 {
160 super(cipher, tag_size);
161 }
162
163 override size_t outputLength(size_t input_length) const
164 { return input_length + tagSize(); }
165
166 override size_t minimumFinalSize() const { return 0; }
167
168 override void update(ref SecureVector!ubyte buffer, size_t offset = 0)
169 {
170 assert(buffer.length >= offset, "Offset is sane");
171 const size_t sz = buffer.length - offset;
172 ubyte* buf = buffer.ptr + offset;
173
174 m_ctr.cipher(buf, buf, sz);
175 m_ghash.update(buf, sz);
176 }
177
178 override void finish(ref SecureVector!ubyte buffer, size_t offset = 0)
179 {
180 import std.algorithm : max;
181 update(buffer, offset);
182 auto mac = m_ghash.finished();
183 buffer ~= mac.ptr[0 .. tagSize()];
184 }
185
186 // Interface fallthrough
187 override string provider() const { return "core"; }
188 override SecureVector!ubyte startRaw(const(ubyte)* nonce, size_t nonce_len) { return super.startRaw(nonce, nonce_len); }
189 override size_t updateGranularity() const { return super.updateGranularity(); }
190 override size_t defaultNonceLength() const { return super.defaultNonceLength(); }
191 override bool validNonceLength(size_t nonce_len) const { return super.validNonceLength(nonce_len); }
192 override @property string name() const { return super.name; }
193 override void clear() { return super.clear(); }
194 }
195
196 /**
197 * GCM Decryption
198 */
199 final class GCMDecryption : GCMMode, Transformation
200 {
201 public:
202 /**
203 * Params:
204 * cipher = the 128 bit block cipher to use
205 * tag_size = is how big the auth tag will be
206 */
207 this(BlockCipher cipher, size_t tag_size = 16)
208 {
209 super(cipher, tag_size);
210 }
211
212 override size_t outputLength(size_t input_length) const
213 {
214 assert(input_length > tagSize(), "Sufficient input");
215 return input_length - tagSize();
216 }
217
218 override size_t minimumFinalSize() const { return tagSize(); }
219
220 override void update(ref SecureVector!ubyte buffer, size_t offset = 0)
221 {
222 assert(buffer.length >= offset, "Offset is sane");
223 const size_t sz = buffer.length - offset;
224 ubyte* buf = buffer.ptr + offset;
225
226 m_ghash.update(buf, sz);
227 m_ctr.cipher(buf, buf, sz);
228 }
229
230 override void finish(ref SecureVector!ubyte buffer, size_t offset)
231 {
232 assert(buffer.length >= offset, "Offset is sane");
233 const size_t sz = buffer.length - offset;
234
235 ubyte* buf = buffer.ptr + offset;
236
237 assert(sz >= tagSize(), "Have the tag as part of final input");
238
239 const size_t remaining = sz - tagSize();
240
241 // handle any final input before the tag
242 if (remaining)
243 {
244 m_ghash.update(buf, remaining);
245
246 m_ctr.cipher(buf, buf, remaining);
247 }
248
249 auto mac = m_ghash.finished();
250
251 const(ubyte)* included_tag = &buffer[remaining];
252
253 if (!sameMem(mac.ptr, included_tag, tagSize()))
254 throw new IntegrityFailure("GCM tag check failed");
255
256 buffer.resize(offset + remaining);
257 }
258
259 // Interface fallthrough
260 override string provider() const { return "core"; }
261 override SecureVector!ubyte startRaw(const(ubyte)* nonce, size_t nonce_len) { return super.startRaw(nonce, nonce_len); }
262 override size_t updateGranularity() const { return super.updateGranularity(); }
263 override size_t defaultNonceLength() const { return super.defaultNonceLength(); }
264 override bool validNonceLength(size_t nonce_len) const { return super.validNonceLength(nonce_len); }
265 override @property string name() const { return super.name; }
266 override void clear() { return super.clear(); }
267 }
268
269 /**
270 * GCM's GHASH
271 * Maybe a Transform?
272 */
273 final class GHASH : SymmetricAlgorithm
274 {
275 public:
276 void setAssociatedData(const(ubyte)* input, size_t length)
277 {
278 zeroise(m_H_ad);
279 ghashUpdate(m_H_ad, input, length);
280 m_ad_len = length;
281 }
282
283 SecureVector!ubyte nonceHash(const(ubyte)* nonce, size_t nonce_len)
284 {
285 assert(m_ghash.length == 0, "nonceHash called during wrong time");
286 SecureVector!ubyte y0 = SecureVector!ubyte(16);
287
288 ghashUpdate(y0, nonce, nonce_len);
289 addFinalBlock(y0, 0, nonce_len);
290
291 return y0.move;
292 }
293
294 void start(const(ubyte)* nonce, size_t len)
295 {
296 m_nonce[] = nonce[0 .. len];
297 m_ghash = m_H_ad.dup;
298 }
299
300 /*
301 * Assumes input len is multiple of 16
302 */
303 void update(const(ubyte)* input, size_t length)
304 {
305 assert(m_ghash.length == 16, "Key was set");
306
307 m_text_len += length;
308
309 ghashUpdate(m_ghash, input, length);
310 }
311
312 SecureVector!ubyte finished()
313 {
314 addFinalBlock(m_ghash, m_ad_len, m_text_len);
315 m_ghash ^= m_nonce;
316 m_text_len = 0;
317 return m_ghash.move;
318 }
319
320 KeyLengthSpecification keySpec() const { return KeyLengthSpecification(16); }
321
322 override void clear()
323 {
324 zeroise(m_H);
325 zeroise(m_H_ad);
326 m_ghash.clear();
327 m_text_len = m_ad_len = 0;
328 }
329
330 @property string name() const { return "GHASH"; }
331
332 override void keySchedule(const(ubyte)* key, size_t length)
333 {
334 m_H[] = key[0 .. length];
335 m_H_ad.resize(16);
336 m_ad_len = 0;
337 m_text_len = 0;
338 }
339
340 private:
341 void gcmMultiply(ref SecureVector!ubyte x)
342 {
343 import std.algorithm : max;
344 static if (BOTAN_HAS_GCM_CLMUL) {
345 if (CPUID.hasClmul()) {
346 return gcmMultiplyClmul(*cast(ubyte[16]*) x.ptr, *cast(ubyte[16]*) m_H.ptr);
347 }
348 }
349
350 __gshared immutable ulong R = 0xE100000000000000;
351
352 ulong[2] H = [ loadBigEndian!ulong(m_H.ptr, 0), loadBigEndian!ulong(m_H.ptr, 1) ];
353 ulong[2] Z = [ 0, 0 ];
354
355 // SSE2 might be useful here
356 foreach (size_t i; 0 .. 2)
357 {
358 const ulong X = loadBigEndian!ulong(x.ptr, i);
359
360 foreach (size_t j; 0 .. 64)
361 {
362 if ((X >> (63-j)) & 1)
363 {
364 Z[0] ^= H[0];
365 Z[1] ^= H[1];
366 }
367
368 const ulong r = (H[1] & 1) ? R : 0;
369
370 H[1] = (H[0] << 63) | (H[1] >> 1);
371 H[0] = (H[0] >> 1) ^ r;
372 }
373 }
374
375 storeBigEndian!ulong(x.ptr, Z[0], Z[1]);
376 }
377
378 void ghashUpdate(ref SecureVector!ubyte ghash, const(ubyte)* input, size_t length)
379 {
380 __gshared immutable size_t BS = 16;
381
382 /*
383 This assumes if less than block size input then we're just on the
384 final block and should pad with zeros
385 */
386 while (length)
387 {
388 const size_t to_proc = min(length, BS);
389
390 xorBuf(ghash.ptr, input, to_proc);
391 gcmMultiply(ghash);
392
393 input += to_proc;
394 length -= to_proc;
395 }
396 }
397
398 void addFinalBlock(ref SecureVector!ubyte hash,
399 size_t ad_len, size_t text_len)
400 {
401 SecureVector!ubyte final_block = SecureVector!ubyte(16);
402 storeBigEndian!ulong(final_block.ptr, 8*ad_len, 8*text_len);
403 ghashUpdate(hash, final_block.ptr, final_block.length);
404 }
405
406 SecureVector!ubyte m_H;
407 SecureVector!ubyte m_H_ad;
408 SecureVector!ubyte m_nonce;
409 SecureVector!ubyte m_ghash;
410 size_t m_ad_len = 0, m_text_len = 0;
411 }
412
413
414
415 static if (BOTAN_HAS_GCM_CLMUL)
416 void gcmMultiplyClmul(ref ubyte[16] x, in ubyte[16] H)
417 {
418 __gshared immutable(__m128i) BSWAP_MASK = _mm_set1_epi8!([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])();
419 version(D_InlineAsm_X86_64) {
420 version(DMD) {
421 enum USE_ASM = true;
422 } else enum USE_ASM = false;
423 } else enum USE_ASM = false;
424
425 static if (USE_ASM) {
426 __m128i* a = cast(__m128i*) x.ptr;
427 __m128i* b = cast(__m128i*) H.ptr;
428 __m128i* c = cast(__m128i*) &BSWAP_MASK;
429
430 asm pure nothrow {
431 mov RAX, a;
432 mov RBX, b;
433 mov RCX, c;
434 movdqu XMM13, [RAX]; // __m128i a = _mm_loadu_si128(cast(const(__m128i*)) x.ptr);
435 movdqu XMM14, [RBX]; // __m128i b = _mm_loadu_si128(cast(const(__m128i*)) H.ptr);
436 movdqu XMM15, [RCX];
437 pshufb XMM13, XMM15; // a = _mm_shuffle_epi8(a, BSWAP_MASK);
438 pshufb XMM14, XMM15; // b = _mm_shuffle_epi8(b, BSWAP_MASK);
439 movdqa XMM0, XMM13; // XMM0 => T0
440 movdqa XMM1, XMM13; // XMM1 => T1
441 movdqa XMM2, XMM13; // XMM2 => T2
442 movdqa XMM3, XMM13; // XMM3 => T3
443
444 db 0x66, 0x41, 0x0F, 0x3A, 0x44, 0xC6, 0x00; // T0 = _mm_clmulepi64_si128!"0x00"(a, b);
445 db 0x66, 0x41, 0x0F, 0x3A, 0x44, 0xCE, 0x01; // T1 = _mm_clmulepi64_si128!"0x01"(a, b);
446 db 0x66, 0x41, 0x0F, 0x3A, 0x44, 0xD6, 0x10; // T2 = _mm_clmulepi64_si128!"0x10"(a, b);
447 db 0x66, 0x41, 0x0F, 0x3A, 0x44, 0xDE, 0x11; // T3 = _mm_clmulepi64_si128!"0x11"(a, b);
448 pxor XMM1, XMM2; // T1 = _mm_xor_si128(T1, T2);
449 movdqa XMM6, XMM1;
450 pslldq XMM6, 8; // T2 = _mm_slli_si128!8(T1);
451 movdqa XMM2, XMM6;
452 psrldq XMM1, 8; // T1 = _mm_srli_si128!8(T1);
453 pxor XMM0, XMM2; // T0 = _mm_xor_si128(T0, T2);
454 pxor XMM3, XMM1; // T3 = _mm_xor_si128(T3, T1);
455 movdqa XMM6, XMM0;
456 psrld XMM6, 31; // T4 = _mm_srli_epi32!31(T0)
457 movdqa XMM4, XMM6;
458 pslld XMM0, 1; // T0 = _mm_slli_epi32!1(T0);
459 movdqa XMM6, XMM3;
460 psrld XMM6, 31;
461 movdqa XMM5, XMM6; // T5 = _mm_srli_epi32!31(T3);
462 pslld XMM3, 1; // T3 = _mm_slli_epi32!1(T3);
463 movdqa XMM6, XMM4;
464 psrldq XMM6, 12; // T2 = _mm_srli_si128!12(T4);
465 movdqa XMM2, XMM6;
466 pslldq XMM5, 4; // T5 = _mm_slli_si128!4(T5);
467 pslldq XMM4, 4; // T4 = _mm_slli_si128!4(T4);
468 por XMM0, XMM4; // T0 = _mm_or_si128(T0, T4);
469 por XMM3, XMM5; // T3 = _mm_or_si128(T3, T5);
470 por XMM3, XMM2; // T3 = _mm_or_si128(T3, T2);
471 movdqa XMM6, XMM0;
472 pslld XMM6, 31; // T4 = _mm_slli_epi32!31(T0);
473 movdqa XMM4, XMM6;
474 movdqa XMM6, XMM0;
475 pslld XMM6, 30; // T5 = _mm_slli_epi32!30(T0);
476 movdqa XMM5, XMM6;
477 movdqa XMM6, XMM0;
478 pslld XMM6, 25; // T2 = _mm_slli_epi32!25(T0);
479 movdqa XMM2, XMM6;
480 pxor XMM4, XMM5; // T4 = _mm_xor_si128(T4, T5);
481 pxor XMM4, XMM2; // T4 = _mm_xor_si128(T4, T2);
482 movdqa XMM6, XMM4;
483 psrldq XMM6, 4; // T5 = _mm_srli_si128!4(T4);
484 movdqa XMM5, XMM6;
485 pxor XMM3, XMM5; // T3 = _mm_xor_si128(T3, T5);
486 pslldq XMM4, 12; // T4 = _mm_slli_si128!12(T4);
487 pxor XMM0, XMM4; // T0 = _mm_xor_si128(T0, T4);
488 pxor XMM3, XMM0; // T3 = _mm_xor_si128(T3, T0);
489 movdqa XMM6, XMM0;
490 psrld XMM6, 1; // T4 = _mm_srli_epi32!1(T0);
491 movdqa XMM4, XMM6;
492 movdqa XMM6, XMM0;
493 psrld XMM6, 2; // T1 = _mm_srli_epi32!2(T0);
494 movdqa XMM1, XMM6;
495 movdqa XMM6, XMM0;
496 psrld XMM6, 7; // T2 = _mm_srli_epi32!7(T0);
497 movdqa XMM2, XMM6;
498 pxor XMM3, XMM1; // T3 = _mm_xor_si128(T3, T1);
499 pxor XMM3, XMM2; // T3 = _mm_xor_si128(T3, T2);
500 pxor XMM3, XMM4; // T3 = _mm_xor_si128(T3, T4);
501 mov RCX, c;
502 movdqu XMM15, [RCX];
503 pshufb XMM3, XMM15; // T3 = _mm_shuffle_epi8(T3, BSWAP_MASK);
504 mov RAX, a;
505 movdqu [RAX], XMM3; // _mm_storeu_si128(cast(__m128i*) x.ptr, T3);
506 }
507 }
508 else {
509 /*
510 * Algorithms 1 and 5 from Intel's CLMUL guide
511 */
512 __m128i a = _mm_loadu_si128(cast(const(__m128i*)) x.ptr);
513 __m128i b = _mm_loadu_si128(cast(const(__m128i*)) H.ptr);
514
515 a = _mm_shuffle_epi8(a, BSWAP_MASK);
516 b = _mm_shuffle_epi8(b, BSWAP_MASK);
517
518 __m128i T0, T1, T2, T3, T4, T5;
519
520 T0 = _mm_clmulepi64_si128!"0x00"(a, b);
521 T1 = _mm_clmulepi64_si128!"0x01"(a, b);
522 T2 = _mm_clmulepi64_si128!"0x10"(a, b);
523 T3 = _mm_clmulepi64_si128!"0x11"(a, b);
524
525 T1 = _mm_xor_si128(T1, T2);
526 T2 = _mm_slli_si128!8(T1);
527 T1 = _mm_srli_si128!8(T1);
528 T0 = _mm_xor_si128(T0, T2);
529 T3 = _mm_xor_si128(T3, T1);
530
531 T4 = _mm_srli_epi32!31(T0);
532 T0 = _mm_slli_epi32!1(T0);
533
534 T5 = _mm_srli_epi32!31(T3);
535 T3 = _mm_slli_epi32!1(T3);
536
537 T2 = _mm_srli_si128!12(T4);
538 T5 = _mm_slli_si128!4(T5);
539 T4 = _mm_slli_si128!4(T4);
540 T0 = _mm_or_si128(T0, T4);
541 T3 = _mm_or_si128(T3, T5);
542 T3 = _mm_or_si128(T3, T2);
543
544 T4 = _mm_slli_epi32!31(T0);
545 T5 = _mm_slli_epi32!30(T0);
546 T2 = _mm_slli_epi32!25(T0);
547
548 T4 = _mm_xor_si128(T4, T5);
549 T4 = _mm_xor_si128(T4, T2);
550 T5 = _mm_srli_si128!4(T4);
551 T3 = _mm_xor_si128(T3, T5);
552 T4 = _mm_slli_si128!12(T4);
553 T0 = _mm_xor_si128(T0, T4);
554 T3 = _mm_xor_si128(T3, T0);
555
556 T4 = _mm_srli_epi32!1(T0);
557 T1 = _mm_srli_epi32!2(T0);
558 T2 = _mm_srli_epi32!7(T0);
559 T3 = _mm_xor_si128(T3, T1);
560 T3 = _mm_xor_si128(T3, T2);
561 T3 = _mm_xor_si128(T3, T4);
562
563 T3 = _mm_shuffle_epi8(T3, BSWAP_MASK);
564
565 _mm_storeu_si128(cast(__m128i*) x.ptr, T3);
566 }
567 }