1 /**
2 * XTEA in SIMD
3 * 
4 * Copyright:
5 * (C) 2009 Jack Lloyd
6 * (C) 2014-2015 Etienne Cimon
7 *
8 * License:
9 * Botan is released under the Simplified BSD License (see LICENSE.md)
10 */
11 module botan.block.xtea_simd;
12 
13 import botan.constants;
14 static if (BOTAN_HAS_XTEA_SIMD):
15 
16 
17 import botan.block.xtea;
18 import botan.utils.loadstor;
19 import botan.simd.simd_32;
20 import std.range : iota;
21 import botan.block.block_cipher;
22 import botan.utils.mem_ops;
23 
24 /**
25 * XTEA implemented using SIMD operations
26 */
27 final class XTEASIMD : XTEA
28 {
29 public:
30     override @property size_t parallelism() const { return 8; }
31 
32     /*
33     * XTEA Encryption
34     */
35     override void encryptN(const(ubyte)* input, ubyte* output, size_t blocks)
36     {
37         const uint* KS = this.getEK().ptr;
38         
39         while (blocks >= 8)
40         {
41             xtea_encrypt_8(*cast(ubyte[64]*) input, *cast(ubyte[64]*) output, *cast(uint[64]*) KS);
42             input += 8 * BLOCK_SIZE;
43             output += 8 * BLOCK_SIZE;
44             blocks -= 8;
45         }
46         
47         if (blocks)
48             super.encryptN(input, output, blocks);
49     }
50 
51     /*
52     * XTEA Decryption
53     */
54     override void decryptN(const(ubyte)* input, ubyte* output, size_t blocks)
55     {
56         const uint* KS = this.getEK().ptr;
57         
58         while (blocks >= 8)
59         {
60             xtea_decrypt_8(*cast(ubyte[64]*) input, *cast(ubyte[64]*) output, *cast(uint[64]*) KS);
61             input += 8 * BLOCK_SIZE;
62             output += 8 * BLOCK_SIZE;
63             blocks -= 8;
64         }
65         
66         if (blocks)
67             super.decryptN(input, output, blocks);
68     }
69 
70     override BlockCipher clone() const { return new XTEASIMD; }
71 }
72 
73 package:
74 
75 void xtea_encrypt_8(in ubyte[64] input, ref ubyte[64] output, in uint[64] EK)
76 {
77     SIMD32 L0 = SIMD32.loadBigEndian(input.ptr      );
78     SIMD32 R0 = SIMD32.loadBigEndian(input.ptr + 16);
79     SIMD32 L1 = SIMD32.loadBigEndian(input.ptr + 32);
80     SIMD32 R1 = SIMD32.loadBigEndian(input.ptr + 48);
81 
82     SIMD32.transpose(L0, R0, L1, R1);
83     
84     foreach (size_t i; iota(0, 32, 2))
85     {
86         SIMD32 K0 = SIMD32(EK[2*i  ]);
87         SIMD32 K1 = SIMD32(EK[2*i+1]);
88         SIMD32 K2 = SIMD32(EK[2*i+2]);
89         SIMD32 K3 = SIMD32(EK[2*i+3]);
90         
91         L0 += (((R0.lshift!4()) ^ (R0.rshift!5())) + R0) ^ K0;
92         L1 += (((R1.lshift!4()) ^ (R1.rshift!5())) + R1) ^ K0;
93         
94         R0 += (((L0.lshift!4()) ^ (L0.rshift!5())) + L0) ^ K1;
95         R1 += (((L1.lshift!4()) ^ (L1.rshift!5())) + L1) ^ K1;
96         
97         L0 += (((R0.lshift!4()) ^ (R0.rshift!5())) + R0) ^ K2;
98         L1 += (((R1.lshift!4()) ^ (R1.rshift!5())) + R1) ^ K2;
99         
100         R0 += (((L0.lshift!4()) ^ (L0.rshift!5())) + L0) ^ K3;
101         R1 += (((L1.lshift!4()) ^ (L1.rshift!5())) + L1) ^ K3;
102     }
103     
104     SIMD32.transpose(L0, R0, L1, R1);
105     
106     L0.storeBigEndian(output.ptr);
107     R0.storeBigEndian(output.ptr + 16);
108     L1.storeBigEndian(output.ptr + 32);
109     R1.storeBigEndian(output.ptr + 48);
110 }
111 
112 void xtea_decrypt_8(in ubyte[64] input, ref ubyte[64] output, in uint[64] EK)
113 {
114     SIMD32 L0 = SIMD32.loadBigEndian(input.ptr      );
115     SIMD32 R0 = SIMD32.loadBigEndian(input.ptr + 16);
116     SIMD32 L1 = SIMD32.loadBigEndian(input.ptr + 32);
117     SIMD32 R1 = SIMD32.loadBigEndian(input.ptr + 48);
118 
119     SIMD32.transpose(L0, R0, L1, R1);
120     
121     foreach (size_t i; iota(0, 32, 2))
122     {
123         SIMD32 K0 = SIMD32(EK[63 - 2*i]);
124         SIMD32 K1 = SIMD32(EK[62 - 2*i]);
125         SIMD32 K2 = SIMD32(EK[61 - 2*i]);
126         SIMD32 K3 = SIMD32(EK[60 - 2*i]);
127         
128         R0 -= (((L0.lshift!4()) ^ (L0.rshift!5())) + L0) ^ K0;
129         R1 -= (((L1.lshift!4()) ^ (L1.rshift!5())) + L1) ^ K0;
130         
131         L0 -= (((R0.lshift!4()) ^ (R0.rshift!5())) + R0) ^ K1;
132         L1 -= (((R1.lshift!4()) ^ (R1.rshift!5())) + R1) ^ K1;
133         
134         R0 -= (((L0.lshift!4()) ^ (L0.rshift!5())) + L0) ^ K2;
135         R1 -= (((L1.lshift!4()) ^ (L1.rshift!5())) + L1) ^ K2;
136         
137         L0 -= (((R0.lshift!4()) ^ (R0.rshift!5())) + R0) ^ K3;
138         L1 -= (((R1.lshift!4()) ^ (R1.rshift!5())) + R1) ^ K3;
139     }
140     
141     SIMD32.transpose(L0, R0, L1, R1);
142     
143     L0.storeBigEndian(output.ptr);
144     R0.storeBigEndian(output.ptr + 16);
145     L1.storeBigEndian(output.ptr + 32);
146     R1.storeBigEndian(output.ptr + 48);
147 }