1 /** 2 * XTEA in SIMD 3 * 4 * Copyright: 5 * (C) 2009 Jack Lloyd 6 * (C) 2014-2015 Etienne Cimon 7 * 8 * License: 9 * Botan is released under the Simplified BSD License (see LICENSE.md) 10 */ 11 module botan.block.xtea_simd; 12 13 import botan.constants; 14 static if (BOTAN_HAS_XTEA_SIMD): 15 16 17 import botan.block.xtea; 18 import botan.utils.loadstor; 19 import botan.simd.simd_32; 20 import std.range : iota; 21 import botan.block.block_cipher; 22 import botan.utils.mem_ops; 23 24 /** 25 * XTEA implemented using SIMD operations 26 */ 27 final class XTEASIMD : XTEA 28 { 29 public: 30 override @property size_t parallelism() const { return 8; } 31 32 /* 33 * XTEA Encryption 34 */ 35 override void encryptN(const(ubyte)* input, ubyte* output, size_t blocks) 36 { 37 const uint* KS = this.getEK().ptr; 38 39 while (blocks >= 8) 40 { 41 xtea_encrypt_8(*cast(ubyte[64]*) input, *cast(ubyte[64]*) output, *cast(uint[64]*) KS); 42 input += 8 * BLOCK_SIZE; 43 output += 8 * BLOCK_SIZE; 44 blocks -= 8; 45 } 46 47 if (blocks) 48 super.encryptN(input, output, blocks); 49 } 50 51 /* 52 * XTEA Decryption 53 */ 54 override void decryptN(const(ubyte)* input, ubyte* output, size_t blocks) 55 { 56 const uint* KS = this.getEK().ptr; 57 58 while (blocks >= 8) 59 { 60 xtea_decrypt_8(*cast(ubyte[64]*) input, *cast(ubyte[64]*) output, *cast(uint[64]*) KS); 61 input += 8 * BLOCK_SIZE; 62 output += 8 * BLOCK_SIZE; 63 blocks -= 8; 64 } 65 66 if (blocks) 67 super.decryptN(input, output, blocks); 68 } 69 70 override BlockCipher clone() const { return new XTEASIMD; } 71 } 72 73 package: 74 75 void xtea_encrypt_8(in ubyte[64] input, ref ubyte[64] output, in uint[64] EK) 76 { 77 SIMD32 L0 = SIMD32.loadBigEndian(input.ptr ); 78 SIMD32 R0 = SIMD32.loadBigEndian(input.ptr + 16); 79 SIMD32 L1 = SIMD32.loadBigEndian(input.ptr + 32); 80 SIMD32 R1 = SIMD32.loadBigEndian(input.ptr + 48); 81 82 SIMD32.transpose(L0, R0, L1, R1); 83 84 foreach (size_t i; iota(0, 32, 2)) 85 { 86 SIMD32 K0 = SIMD32(EK[2*i ]); 87 SIMD32 K1 = SIMD32(EK[2*i+1]); 88 SIMD32 K2 = SIMD32(EK[2*i+2]); 89 SIMD32 K3 = SIMD32(EK[2*i+3]); 90 91 L0 += (((R0.lshift!4()) ^ (R0.rshift!5())) + R0) ^ K0; 92 L1 += (((R1.lshift!4()) ^ (R1.rshift!5())) + R1) ^ K0; 93 94 R0 += (((L0.lshift!4()) ^ (L0.rshift!5())) + L0) ^ K1; 95 R1 += (((L1.lshift!4()) ^ (L1.rshift!5())) + L1) ^ K1; 96 97 L0 += (((R0.lshift!4()) ^ (R0.rshift!5())) + R0) ^ K2; 98 L1 += (((R1.lshift!4()) ^ (R1.rshift!5())) + R1) ^ K2; 99 100 R0 += (((L0.lshift!4()) ^ (L0.rshift!5())) + L0) ^ K3; 101 R1 += (((L1.lshift!4()) ^ (L1.rshift!5())) + L1) ^ K3; 102 } 103 104 SIMD32.transpose(L0, R0, L1, R1); 105 106 L0.storeBigEndian(output.ptr); 107 R0.storeBigEndian(output.ptr + 16); 108 L1.storeBigEndian(output.ptr + 32); 109 R1.storeBigEndian(output.ptr + 48); 110 } 111 112 void xtea_decrypt_8(in ubyte[64] input, ref ubyte[64] output, in uint[64] EK) 113 { 114 SIMD32 L0 = SIMD32.loadBigEndian(input.ptr ); 115 SIMD32 R0 = SIMD32.loadBigEndian(input.ptr + 16); 116 SIMD32 L1 = SIMD32.loadBigEndian(input.ptr + 32); 117 SIMD32 R1 = SIMD32.loadBigEndian(input.ptr + 48); 118 119 SIMD32.transpose(L0, R0, L1, R1); 120 121 foreach (size_t i; iota(0, 32, 2)) 122 { 123 SIMD32 K0 = SIMD32(EK[63 - 2*i]); 124 SIMD32 K1 = SIMD32(EK[62 - 2*i]); 125 SIMD32 K2 = SIMD32(EK[61 - 2*i]); 126 SIMD32 K3 = SIMD32(EK[60 - 2*i]); 127 128 R0 -= (((L0.lshift!4()) ^ (L0.rshift!5())) + L0) ^ K0; 129 R1 -= (((L1.lshift!4()) ^ (L1.rshift!5())) + L1) ^ K0; 130 131 L0 -= (((R0.lshift!4()) ^ (R0.rshift!5())) + R0) ^ K1; 132 L1 -= (((R1.lshift!4()) ^ (R1.rshift!5())) + R1) ^ K1; 133 134 R0 -= (((L0.lshift!4()) ^ (L0.rshift!5())) + L0) ^ K2; 135 R1 -= (((L1.lshift!4()) ^ (L1.rshift!5())) + L1) ^ K2; 136 137 L0 -= (((R0.lshift!4()) ^ (R0.rshift!5())) + R0) ^ K3; 138 L1 -= (((R1.lshift!4()) ^ (R1.rshift!5())) + R1) ^ K3; 139 } 140 141 SIMD32.transpose(L0, R0, L1, R1); 142 143 L0.storeBigEndian(output.ptr); 144 R0.storeBigEndian(output.ptr + 16); 145 L1.storeBigEndian(output.ptr + 32); 146 R1.storeBigEndian(output.ptr + 48); 147 }