1 /** 2 * GCM Mode 3 * 4 * Copyright: 5 * (C) 2013 Jack Lloyd 6 * (C) 2014-2015 Etienne Cimon 7 * 8 * License: 9 * Botan is released under the Simplified BSD License (see LICENSE.md) 10 */ 11 module botan.modes.aead.gcm; 12 13 import botan.constants; 14 15 static if (BOTAN_HAS_AEAD_GCM): 16 17 import botan.modes.aead.aead; 18 import botan.block.block_cipher; 19 import botan.stream.stream_cipher; 20 import botan.stream.ctr; 21 import botan.utils.xor_buf; 22 import botan.utils.loadstor; 23 import botan.utils.mem_ops; 24 25 import botan.utils.simd.immintrin; 26 import botan.utils.simd.wmmintrin; 27 28 import botan.utils.types; 29 30 import std.conv : to; 31 import std.algorithm : min; 32 33 static if (BOTAN_HAS_GCM_CLMUL) { 34 import botan.utils.simd.wmmintrin; 35 import botan.utils.cpuid; 36 } 37 38 /** 39 * GCM Mode 40 */ 41 abstract class GCMMode : AEADMode, Transformation 42 { 43 public: 44 ~this() { destroy(m_ctr); destroy(m_ghash); } // TODO: for some reason CTR needs to be destroyed before ghash 45 46 override SecureVector!ubyte startRaw(const(ubyte)* nonce, size_t nonce_len) 47 { 48 if (!validNonceLength(nonce_len)) 49 throw new InvalidIVLength(name, nonce_len); 50 51 SecureVector!ubyte y0 = SecureVector!ubyte(BS); 52 53 if (nonce_len == 12) 54 { 55 copyMem(y0.ptr, nonce, nonce_len); 56 y0[15] = 1; 57 } 58 else 59 { 60 y0 = m_ghash.nonceHash(nonce, nonce_len); 61 } 62 63 m_ctr.setIv(y0.ptr, y0.length); 64 65 SecureVector!ubyte m_enc_y0 = SecureVector!ubyte(BS); 66 m_ctr.encipher(m_enc_y0); 67 68 m_ghash.start(m_enc_y0.ptr, m_enc_y0.length); 69 70 return SecureVector!ubyte(); 71 } 72 73 override void setAssociatedData(const(ubyte)* ad, size_t ad_len) 74 { 75 m_ghash.setAssociatedData(ad, ad_len); 76 } 77 78 override @property string name() const 79 { 80 return (m_cipher_name ~ "/GCM"); 81 } 82 83 override size_t updateGranularity() const 84 { 85 return 4096; // CTR-BE's internal block size 86 } 87 88 override KeyLengthSpecification keySpec() const 89 { 90 return m_ctr.keySpec(); 91 } 92 93 // GCM supports arbitrary nonce lengths 94 override bool validNonceLength(size_t) const { return true; } 95 96 override size_t tagSize() const { return m_tag_size; } 97 98 override void clear() 99 { 100 m_ctr.clear(); 101 m_ghash.clear(); 102 103 } 104 105 override size_t defaultNonceLength() const { return super.defaultNonceLength(); } 106 107 protected: 108 override void keySchedule(const(ubyte)* key, size_t length) 109 { 110 m_ctr.setKey(key, length); 111 112 const Vector!ubyte zeros = Vector!ubyte(BS); 113 m_ctr.setIv(zeros.ptr, zeros.length); 114 115 SecureVector!ubyte H = SecureVector!ubyte(BS); 116 m_ctr.encipher(H); 117 m_ghash.setKey(H); 118 } 119 120 /* 121 * GCMMode Constructor 122 */ 123 this(BlockCipher cipher, size_t tag_size) 124 { 125 m_tag_size = tag_size; 126 m_cipher_name = cipher.name; 127 if (cipher.blockSize() != BS) 128 throw new InvalidArgument("GCM requires a 128 bit cipher so cannot be used with " ~ cipher.name); 129 130 m_ghash = new GHASH; 131 132 m_ctr = new CTRBE(cipher); // CTR_BE takes ownership of cipher 133 134 if (m_tag_size != 8 && m_tag_size != 16) 135 throw new InvalidArgument(name ~ ": Bad tag size " ~ to!string(m_tag_size)); 136 } 137 138 __gshared immutable size_t BS = 16; 139 140 const size_t m_tag_size; 141 const string m_cipher_name; 142 143 Unique!StreamCipher m_ctr; 144 Unique!GHASH m_ghash; 145 } 146 147 /** 148 * GCM Encryption 149 */ 150 final class GCMEncryption : GCMMode, Transformation 151 { 152 public: 153 /** 154 * Params: 155 * cipher = the 128 bit block cipher to use 156 * tag_size = is how big the auth tag will be 157 */ 158 this(BlockCipher cipher, size_t tag_size = 16) 159 { 160 super(cipher, tag_size); 161 } 162 163 override size_t outputLength(size_t input_length) const 164 { return input_length + tagSize(); } 165 166 override size_t minimumFinalSize() const { return 0; } 167 168 override void update(ref SecureVector!ubyte buffer, size_t offset = 0) 169 { 170 assert(buffer.length >= offset, "Offset is sane"); 171 const size_t sz = buffer.length - offset; 172 ubyte* buf = buffer.ptr + offset; 173 174 m_ctr.cipher(buf, buf, sz); 175 m_ghash.update(buf, sz); 176 } 177 178 override void finish(ref SecureVector!ubyte buffer, size_t offset = 0) 179 { 180 import std.algorithm : max; 181 update(buffer, offset); 182 auto mac = m_ghash.finished(); 183 buffer ~= mac.ptr[0 .. tagSize()]; 184 } 185 186 // Interface fallthrough 187 override string provider() const { return "core"; } 188 override SecureVector!ubyte startRaw(const(ubyte)* nonce, size_t nonce_len) { return super.startRaw(nonce, nonce_len); } 189 override size_t updateGranularity() const { return super.updateGranularity(); } 190 override size_t defaultNonceLength() const { return super.defaultNonceLength(); } 191 override bool validNonceLength(size_t nonce_len) const { return super.validNonceLength(nonce_len); } 192 override @property string name() const { return super.name; } 193 override void clear() { return super.clear(); } 194 } 195 196 /** 197 * GCM Decryption 198 */ 199 final class GCMDecryption : GCMMode, Transformation 200 { 201 public: 202 /** 203 * Params: 204 * cipher = the 128 bit block cipher to use 205 * tag_size = is how big the auth tag will be 206 */ 207 this(BlockCipher cipher, size_t tag_size = 16) 208 { 209 super(cipher, tag_size); 210 } 211 212 override size_t outputLength(size_t input_length) const 213 { 214 assert(input_length > tagSize(), "Sufficient input"); 215 return input_length - tagSize(); 216 } 217 218 override size_t minimumFinalSize() const { return tagSize(); } 219 220 override void update(ref SecureVector!ubyte buffer, size_t offset = 0) 221 { 222 assert(buffer.length >= offset, "Offset is sane"); 223 const size_t sz = buffer.length - offset; 224 ubyte* buf = buffer.ptr + offset; 225 226 m_ghash.update(buf, sz); 227 m_ctr.cipher(buf, buf, sz); 228 } 229 230 override void finish(ref SecureVector!ubyte buffer, size_t offset) 231 { 232 assert(buffer.length >= offset, "Offset is sane"); 233 const size_t sz = buffer.length - offset; 234 235 ubyte* buf = buffer.ptr + offset; 236 237 assert(sz >= tagSize(), "Have the tag as part of final input"); 238 239 const size_t remaining = sz - tagSize(); 240 241 // handle any final input before the tag 242 if (remaining) 243 { 244 m_ghash.update(buf, remaining); 245 246 m_ctr.cipher(buf, buf, remaining); 247 } 248 249 auto mac = m_ghash.finished(); 250 251 const(ubyte)* included_tag = &buffer[remaining]; 252 253 if (!sameMem(mac.ptr, included_tag, tagSize())) 254 throw new IntegrityFailure("GCM tag check failed"); 255 256 buffer.resize(offset + remaining); 257 } 258 259 // Interface fallthrough 260 override string provider() const { return "core"; } 261 override SecureVector!ubyte startRaw(const(ubyte)* nonce, size_t nonce_len) { return super.startRaw(nonce, nonce_len); } 262 override size_t updateGranularity() const { return super.updateGranularity(); } 263 override size_t defaultNonceLength() const { return super.defaultNonceLength(); } 264 override bool validNonceLength(size_t nonce_len) const { return super.validNonceLength(nonce_len); } 265 override @property string name() const { return super.name; } 266 override void clear() { return super.clear(); } 267 } 268 269 /** 270 * GCM's GHASH 271 * Maybe a Transform? 272 */ 273 final class GHASH : SymmetricAlgorithm 274 { 275 public: 276 void setAssociatedData(const(ubyte)* input, size_t length) 277 { 278 zeroise(m_H_ad); 279 ghashUpdate(m_H_ad, input, length); 280 m_ad_len = length; 281 } 282 283 SecureVector!ubyte nonceHash(const(ubyte)* nonce, size_t nonce_len) 284 { 285 assert(m_ghash.length == 0, "nonceHash called during wrong time"); 286 SecureVector!ubyte y0 = SecureVector!ubyte(16); 287 288 ghashUpdate(y0, nonce, nonce_len); 289 addFinalBlock(y0, 0, nonce_len); 290 291 return y0.move; 292 } 293 294 void start(const(ubyte)* nonce, size_t len) 295 { 296 m_nonce[] = nonce[0 .. len]; 297 m_ghash = m_H_ad.dup; 298 } 299 300 /* 301 * Assumes input len is multiple of 16 302 */ 303 void update(const(ubyte)* input, size_t length) 304 { 305 assert(m_ghash.length == 16, "Key was set"); 306 307 m_text_len += length; 308 309 ghashUpdate(m_ghash, input, length); 310 } 311 312 SecureVector!ubyte finished() 313 { 314 addFinalBlock(m_ghash, m_ad_len, m_text_len); 315 m_ghash ^= m_nonce; 316 m_text_len = 0; 317 return m_ghash.move; 318 } 319 320 KeyLengthSpecification keySpec() const { return KeyLengthSpecification(16); } 321 322 override void clear() 323 { 324 zeroise(m_H); 325 zeroise(m_H_ad); 326 m_ghash.clear(); 327 m_text_len = m_ad_len = 0; 328 } 329 330 @property string name() const { return "GHASH"; } 331 332 override void keySchedule(const(ubyte)* key, size_t length) 333 { 334 m_H[] = key[0 .. length]; 335 m_H_ad.resize(16); 336 m_ad_len = 0; 337 m_text_len = 0; 338 } 339 340 private: 341 void gcmMultiply(ref SecureVector!ubyte x) 342 { 343 import std.algorithm : max; 344 static if (BOTAN_HAS_GCM_CLMUL) { 345 if (CPUID.hasClmul()) { 346 return gcmMultiplyClmul(*cast(ubyte[16]*) x.ptr, *cast(ubyte[16]*) m_H.ptr); 347 } 348 } 349 350 __gshared immutable ulong R = 0xE100000000000000; 351 352 ulong[2] H = [ loadBigEndian!ulong(m_H.ptr, 0), loadBigEndian!ulong(m_H.ptr, 1) ]; 353 ulong[2] Z = [ 0, 0 ]; 354 355 // SSE2 might be useful here 356 foreach (size_t i; 0 .. 2) 357 { 358 const ulong X = loadBigEndian!ulong(x.ptr, i); 359 360 foreach (size_t j; 0 .. 64) 361 { 362 if ((X >> (63-j)) & 1) 363 { 364 Z[0] ^= H[0]; 365 Z[1] ^= H[1]; 366 } 367 368 const ulong r = (H[1] & 1) ? R : 0; 369 370 H[1] = (H[0] << 63) | (H[1] >> 1); 371 H[0] = (H[0] >> 1) ^ r; 372 } 373 } 374 375 storeBigEndian!ulong(x.ptr, Z[0], Z[1]); 376 } 377 378 void ghashUpdate(ref SecureVector!ubyte ghash, const(ubyte)* input, size_t length) 379 { 380 __gshared immutable size_t BS = 16; 381 382 /* 383 This assumes if less than block size input then we're just on the 384 final block and should pad with zeros 385 */ 386 while (length) 387 { 388 const size_t to_proc = min(length, BS); 389 390 xorBuf(ghash.ptr, input, to_proc); 391 gcmMultiply(ghash); 392 393 input += to_proc; 394 length -= to_proc; 395 } 396 } 397 398 void addFinalBlock(ref SecureVector!ubyte hash, 399 size_t ad_len, size_t text_len) 400 { 401 SecureVector!ubyte final_block = SecureVector!ubyte(16); 402 storeBigEndian!ulong(final_block.ptr, 8*ad_len, 8*text_len); 403 ghashUpdate(hash, final_block.ptr, final_block.length); 404 } 405 406 SecureVector!ubyte m_H; 407 SecureVector!ubyte m_H_ad; 408 SecureVector!ubyte m_nonce; 409 SecureVector!ubyte m_ghash; 410 size_t m_ad_len = 0, m_text_len = 0; 411 } 412 413 414 415 static if (BOTAN_HAS_GCM_CLMUL) 416 void gcmMultiplyClmul(ref ubyte[16] x, in ubyte[16] H) 417 { 418 __gshared immutable(__m128i) BSWAP_MASK = _mm_set1_epi8!([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])(); 419 version(D_InlineAsm_X86_64) { 420 version(DMD) { 421 enum USE_ASM = true; 422 } else enum USE_ASM = false; 423 } else enum USE_ASM = false; 424 425 static if (USE_ASM) { 426 __m128i* a = cast(__m128i*) x.ptr; 427 __m128i* b = cast(__m128i*) H.ptr; 428 __m128i* c = cast(__m128i*) &BSWAP_MASK; 429 430 asm pure nothrow { 431 mov RAX, a; 432 mov RBX, b; 433 mov RCX, c; 434 movdqu XMM13, [RAX]; // __m128i a = _mm_loadu_si128(cast(const(__m128i*)) x.ptr); 435 movdqu XMM14, [RBX]; // __m128i b = _mm_loadu_si128(cast(const(__m128i*)) H.ptr); 436 movdqu XMM15, [RCX]; 437 pshufb XMM13, XMM15; // a = _mm_shuffle_epi8(a, BSWAP_MASK); 438 pshufb XMM14, XMM15; // b = _mm_shuffle_epi8(b, BSWAP_MASK); 439 movdqa XMM0, XMM13; // XMM0 => T0 440 movdqa XMM1, XMM13; // XMM1 => T1 441 movdqa XMM2, XMM13; // XMM2 => T2 442 movdqa XMM3, XMM13; // XMM3 => T3 443 444 db 0x66, 0x41, 0x0F, 0x3A, 0x44, 0xC6, 0x00; // T0 = _mm_clmulepi64_si128!"0x00"(a, b); 445 db 0x66, 0x41, 0x0F, 0x3A, 0x44, 0xCE, 0x01; // T1 = _mm_clmulepi64_si128!"0x01"(a, b); 446 db 0x66, 0x41, 0x0F, 0x3A, 0x44, 0xD6, 0x10; // T2 = _mm_clmulepi64_si128!"0x10"(a, b); 447 db 0x66, 0x41, 0x0F, 0x3A, 0x44, 0xDE, 0x11; // T3 = _mm_clmulepi64_si128!"0x11"(a, b); 448 pxor XMM1, XMM2; // T1 = _mm_xor_si128(T1, T2); 449 movdqa XMM6, XMM1; 450 pslldq XMM6, 8; // T2 = _mm_slli_si128!8(T1); 451 movdqa XMM2, XMM6; 452 psrldq XMM1, 8; // T1 = _mm_srli_si128!8(T1); 453 pxor XMM0, XMM2; // T0 = _mm_xor_si128(T0, T2); 454 pxor XMM3, XMM1; // T3 = _mm_xor_si128(T3, T1); 455 movdqa XMM6, XMM0; 456 psrld XMM6, 31; // T4 = _mm_srli_epi32!31(T0) 457 movdqa XMM4, XMM6; 458 pslld XMM0, 1; // T0 = _mm_slli_epi32!1(T0); 459 movdqa XMM6, XMM3; 460 psrld XMM6, 31; 461 movdqa XMM5, XMM6; // T5 = _mm_srli_epi32!31(T3); 462 pslld XMM3, 1; // T3 = _mm_slli_epi32!1(T3); 463 movdqa XMM6, XMM4; 464 psrldq XMM6, 12; // T2 = _mm_srli_si128!12(T4); 465 movdqa XMM2, XMM6; 466 pslldq XMM5, 4; // T5 = _mm_slli_si128!4(T5); 467 pslldq XMM4, 4; // T4 = _mm_slli_si128!4(T4); 468 por XMM0, XMM4; // T0 = _mm_or_si128(T0, T4); 469 por XMM3, XMM5; // T3 = _mm_or_si128(T3, T5); 470 por XMM3, XMM2; // T3 = _mm_or_si128(T3, T2); 471 movdqa XMM6, XMM0; 472 pslld XMM6, 31; // T4 = _mm_slli_epi32!31(T0); 473 movdqa XMM4, XMM6; 474 movdqa XMM6, XMM0; 475 pslld XMM6, 30; // T5 = _mm_slli_epi32!30(T0); 476 movdqa XMM5, XMM6; 477 movdqa XMM6, XMM0; 478 pslld XMM6, 25; // T2 = _mm_slli_epi32!25(T0); 479 movdqa XMM2, XMM6; 480 pxor XMM4, XMM5; // T4 = _mm_xor_si128(T4, T5); 481 pxor XMM4, XMM2; // T4 = _mm_xor_si128(T4, T2); 482 movdqa XMM6, XMM4; 483 psrldq XMM6, 4; // T5 = _mm_srli_si128!4(T4); 484 movdqa XMM5, XMM6; 485 pxor XMM3, XMM5; // T3 = _mm_xor_si128(T3, T5); 486 pslldq XMM4, 12; // T4 = _mm_slli_si128!12(T4); 487 pxor XMM0, XMM4; // T0 = _mm_xor_si128(T0, T4); 488 pxor XMM3, XMM0; // T3 = _mm_xor_si128(T3, T0); 489 movdqa XMM6, XMM0; 490 psrld XMM6, 1; // T4 = _mm_srli_epi32!1(T0); 491 movdqa XMM4, XMM6; 492 movdqa XMM6, XMM0; 493 psrld XMM6, 2; // T1 = _mm_srli_epi32!2(T0); 494 movdqa XMM1, XMM6; 495 movdqa XMM6, XMM0; 496 psrld XMM6, 7; // T2 = _mm_srli_epi32!7(T0); 497 movdqa XMM2, XMM6; 498 pxor XMM3, XMM1; // T3 = _mm_xor_si128(T3, T1); 499 pxor XMM3, XMM2; // T3 = _mm_xor_si128(T3, T2); 500 pxor XMM3, XMM4; // T3 = _mm_xor_si128(T3, T4); 501 mov RCX, c; 502 movdqu XMM15, [RCX]; 503 pshufb XMM3, XMM15; // T3 = _mm_shuffle_epi8(T3, BSWAP_MASK); 504 mov RAX, a; 505 movdqu [RAX], XMM3; // _mm_storeu_si128(cast(__m128i*) x.ptr, T3); 506 } 507 } 508 else { 509 /* 510 * Algorithms 1 and 5 from Intel's CLMUL guide 511 */ 512 __m128i a = _mm_loadu_si128(cast(const(__m128i*)) x.ptr); 513 __m128i b = _mm_loadu_si128(cast(const(__m128i*)) H.ptr); 514 515 a = _mm_shuffle_epi8(a, BSWAP_MASK); 516 b = _mm_shuffle_epi8(b, BSWAP_MASK); 517 518 __m128i T0, T1, T2, T3, T4, T5; 519 520 T0 = _mm_clmulepi64_si128!"0x00"(a, b); 521 T1 = _mm_clmulepi64_si128!"0x01"(a, b); 522 T2 = _mm_clmulepi64_si128!"0x10"(a, b); 523 T3 = _mm_clmulepi64_si128!"0x11"(a, b); 524 525 T1 = _mm_xor_si128(T1, T2); 526 T2 = _mm_slli_si128!8(T1); 527 T1 = _mm_srli_si128!8(T1); 528 T0 = _mm_xor_si128(T0, T2); 529 T3 = _mm_xor_si128(T3, T1); 530 531 T4 = _mm_srli_epi32!31(T0); 532 T0 = _mm_slli_epi32!1(T0); 533 534 T5 = _mm_srli_epi32!31(T3); 535 T3 = _mm_slli_epi32!1(T3); 536 537 T2 = _mm_srli_si128!12(T4); 538 T5 = _mm_slli_si128!4(T5); 539 T4 = _mm_slli_si128!4(T4); 540 T0 = _mm_or_si128(T0, T4); 541 T3 = _mm_or_si128(T3, T5); 542 T3 = _mm_or_si128(T3, T2); 543 544 T4 = _mm_slli_epi32!31(T0); 545 T5 = _mm_slli_epi32!30(T0); 546 T2 = _mm_slli_epi32!25(T0); 547 548 T4 = _mm_xor_si128(T4, T5); 549 T4 = _mm_xor_si128(T4, T2); 550 T5 = _mm_srli_si128!4(T4); 551 T3 = _mm_xor_si128(T3, T5); 552 T4 = _mm_slli_si128!12(T4); 553 T0 = _mm_xor_si128(T0, T4); 554 T3 = _mm_xor_si128(T3, T0); 555 556 T4 = _mm_srli_epi32!1(T0); 557 T1 = _mm_srli_epi32!2(T0); 558 T2 = _mm_srli_epi32!7(T0); 559 T3 = _mm_xor_si128(T3, T1); 560 T3 = _mm_xor_si128(T3, T2); 561 T3 = _mm_xor_si128(T3, T4); 562 563 T3 = _mm_shuffle_epi8(T3, BSWAP_MASK); 564 565 _mm_storeu_si128(cast(__m128i*) x.ptr, T3); 566 } 567 }