1 /**
2 * Serpent (SIMD)
3 * 
4 * Copyright:
5 * (C) 2009 Jack Lloyd
6 * (C) 2014-2015 Etienne Cimon
7 *
8 * License:
9 * Botan is released under the Simplified BSD License (see LICENSE.md)
10 */
11 module botan.block.serp_simd;
12 
13 import botan.constants;
14 static if (BOTAN_HAS_SERPENT_SIMD):
15 
16 import botan.simd.simd_32;
17 import botan.utils.loadstor;
18 import botan.block.serpent;
19 import botan.block.block_cipher;
20 import botan.utils.mem_ops;
21 
22 /**
23 * Serpent implementation using SIMD
24 */
25 final class SerpentSIMD : Serpent
26 {
27 public:
28     override @property size_t parallelism() const { return 4; }
29 
30     /*
31     * Serpent Encryption
32     */
33     override void encryptN(const(ubyte)* input, ubyte* output, size_t blocks)
34     {
35         const uint* KS = this.getRoundKeys().ptr;
36         
37         while (blocks >= 4)
38         {
39             serpent_encrypt_4(*cast(ubyte[64]*) input, *cast(ubyte[64]*) output, *cast(uint[132]*) KS);
40             input += 4 * BLOCK_SIZE;
41             output += 4 * BLOCK_SIZE;
42             blocks -= 4;
43         }
44         
45         if (blocks)
46             super.encryptN(input, output, blocks);
47     }
48 
49     /*
50     * Serpent Decryption
51     */
52     override void decryptN(const(ubyte)* input, ubyte* output, size_t blocks)
53     {
54         const uint* KS = this.getRoundKeys().ptr;
55         
56         while (blocks >= 4)
57         {
58             serpent_decrypt_4(*cast(ubyte[64]*) input, *cast(ubyte[64]*) output, *cast(uint[132]*) KS);
59             input += 4 * BLOCK_SIZE;
60             output += 4 * BLOCK_SIZE;
61             blocks -= 4;
62         }
63         
64         if (blocks)
65             super.decryptN(input, output, blocks);
66     }
67 
68     override BlockCipher clone() const { return new SerpentSIMD; }
69 }
70 
71 package:
72 
73 /*
74 * SIMD Serpent Encryption of 4 blocks in parallel
75 */
76 void serpent_encrypt_4(in ubyte[64] input, ref ubyte[64] output, in uint[132] keys)
77 {
78     SIMD32 B0 = SIMD32.loadLittleEndian(input.ptr);
79     SIMD32 B1 = SIMD32.loadLittleEndian(input.ptr + 16);
80     SIMD32 B2 = SIMD32.loadLittleEndian(input.ptr + 32);
81     SIMD32 B3 = SIMD32.loadLittleEndian(input.ptr + 48);
82     
83     SIMD32.transpose(B0, B1, B2, B3);
84     
85     mixin(key_xor!( 0) ~  SBoxE1!("B0", "B1", "B2", "B3") ~ transform);
86     mixin(key_xor!( 1) ~  SBoxE2!("B0", "B1", "B2", "B3") ~ transform);
87     mixin(key_xor!( 2) ~  SBoxE3!("B0", "B1", "B2", "B3") ~ transform);
88     mixin(key_xor!( 3) ~  SBoxE4!("B0", "B1", "B2", "B3") ~ transform);
89     mixin(key_xor!( 4) ~  SBoxE5!("B0", "B1", "B2", "B3") ~ transform);
90     mixin(key_xor!( 5) ~  SBoxE6!("B0", "B1", "B2", "B3") ~ transform);
91     mixin(key_xor!( 6) ~  SBoxE7!("B0", "B1", "B2", "B3") ~ transform);
92     mixin(key_xor!( 7) ~  SBoxE8!("B0", "B1", "B2", "B3") ~ transform);
93     
94     mixin(key_xor!( 8) ~  SBoxE1!("B0", "B1", "B2", "B3") ~ transform);
95     mixin(key_xor!( 9) ~  SBoxE2!("B0", "B1", "B2", "B3") ~ transform);
96     mixin(key_xor!(10) ~  SBoxE3!("B0", "B1", "B2", "B3") ~ transform);
97     mixin(key_xor!(11) ~  SBoxE4!("B0", "B1", "B2", "B3") ~ transform);
98     mixin(key_xor!(12) ~  SBoxE5!("B0", "B1", "B2", "B3") ~ transform);
99     mixin(key_xor!(13) ~  SBoxE6!("B0", "B1", "B2", "B3") ~ transform);
100     mixin(key_xor!(14) ~  SBoxE7!("B0", "B1", "B2", "B3") ~ transform);
101     mixin(key_xor!(15) ~  SBoxE8!("B0", "B1", "B2", "B3") ~ transform);
102     
103     mixin(key_xor!(16) ~  SBoxE1!("B0", "B1", "B2", "B3") ~ transform);
104     mixin(key_xor!(17) ~  SBoxE2!("B0", "B1", "B2", "B3") ~ transform);
105     mixin(key_xor!(18) ~  SBoxE3!("B0", "B1", "B2", "B3") ~ transform);
106     mixin(key_xor!(19) ~  SBoxE4!("B0", "B1", "B2", "B3") ~ transform);
107     mixin(key_xor!(20) ~  SBoxE5!("B0", "B1", "B2", "B3") ~ transform);
108     mixin(key_xor!(21) ~  SBoxE6!("B0", "B1", "B2", "B3") ~ transform);
109     mixin(key_xor!(22) ~  SBoxE7!("B0", "B1", "B2", "B3") ~ transform);
110     mixin(key_xor!(23) ~  SBoxE8!("B0", "B1", "B2", "B3") ~ transform);
111     
112     mixin(key_xor!(24) ~  SBoxE1!("B0", "B1", "B2", "B3") ~ transform);
113     mixin(key_xor!(25) ~  SBoxE2!("B0", "B1", "B2", "B3") ~ transform);
114     mixin(key_xor!(26) ~  SBoxE3!("B0", "B1", "B2", "B3") ~ transform);
115     mixin(key_xor!(27) ~  SBoxE4!("B0", "B1", "B2", "B3") ~ transform);
116     mixin(key_xor!(28) ~  SBoxE5!("B0", "B1", "B2", "B3") ~ transform);
117     mixin(key_xor!(29) ~  SBoxE6!("B0", "B1", "B2", "B3") ~ transform);
118     mixin(key_xor!(30) ~  SBoxE7!("B0", "B1", "B2", "B3") ~ transform);
119     mixin(key_xor!(31) ~  SBoxE8!("B0", "B1", "B2", "B3") ~ key_xor!(32));
120     
121     SIMD32.transpose(B0, B1, B2, B3);
122     
123     B0.storeLittleEndian(output.ptr);
124     B1.storeLittleEndian(output.ptr + 16);
125     B2.storeLittleEndian(output.ptr + 32);
126     B3.storeLittleEndian(output.ptr + 48);
127 }
128 
129 /*
130 * SIMD Serpent Decryption of 4 blocks in parallel
131 */
132 void serpent_decrypt_4(in ubyte[64] input, ref ubyte[64] output, in uint[132] keys) 
133 {
134     SIMD32 B0 = SIMD32.loadLittleEndian(input.ptr);
135     SIMD32 B1 = SIMD32.loadLittleEndian(input.ptr + 16);
136     SIMD32 B2 = SIMD32.loadLittleEndian(input.ptr + 32);
137     SIMD32 B3 = SIMD32.loadLittleEndian(input.ptr + 48);
138     
139     SIMD32.transpose(B0, B1, B2, B3);
140     
141     mixin(key_xor!(32));  mixin(SBoxD8); mixin(key_xor!(31));
142     mixin(i_transform); mixin(SBoxD7); mixin(key_xor!(30));
143     mixin(i_transform); mixin(SBoxD6); mixin(key_xor!(29));
144     mixin(i_transform); mixin(SBoxD5); mixin(key_xor!(28));
145     mixin(i_transform); mixin(SBoxD4); mixin(key_xor!(27));
146     mixin(i_transform); mixin(SBoxD3); mixin(key_xor!(26));
147     mixin(i_transform); mixin(SBoxD2); mixin(key_xor!(25));
148     mixin(i_transform); mixin(SBoxD1); mixin(key_xor!(24));
149     
150     mixin(i_transform); mixin(SBoxD8); mixin(key_xor!(23));
151     mixin(i_transform); mixin(SBoxD7); mixin(key_xor!(22));
152     mixin(i_transform); mixin(SBoxD6); mixin(key_xor!(21));
153     mixin(i_transform); mixin(SBoxD5); mixin(key_xor!(20));
154     mixin(i_transform); mixin(SBoxD4); mixin(key_xor!(19));
155     mixin(i_transform); mixin(SBoxD3); mixin(key_xor!(18));
156     mixin(i_transform); mixin(SBoxD2); mixin(key_xor!(17));
157     mixin(i_transform); mixin(SBoxD1); mixin(key_xor!(16));
158     
159     mixin(i_transform); mixin(SBoxD8); mixin(key_xor!(15));
160     mixin(i_transform); mixin(SBoxD7); mixin(key_xor!(14));
161     mixin(i_transform); mixin(SBoxD6); mixin(key_xor!(13));
162     mixin(i_transform); mixin(SBoxD5); mixin(key_xor!(12));
163     mixin(i_transform); mixin(SBoxD4); mixin(key_xor!(11));
164     mixin(i_transform); mixin(SBoxD3); mixin(key_xor!(10));
165     mixin(i_transform); mixin(SBoxD2); mixin(key_xor!( 9));
166     mixin(i_transform); mixin(SBoxD1); mixin(key_xor!( 8));
167     
168     mixin(i_transform); mixin(SBoxD8); mixin(key_xor!( 7));
169     mixin(i_transform); mixin(SBoxD7); mixin(key_xor!( 6));
170     mixin(i_transform); mixin(SBoxD6); mixin(key_xor!( 5));
171     mixin(i_transform); mixin(SBoxD5); mixin(key_xor!( 4));
172     mixin(i_transform); mixin(SBoxD4); mixin(key_xor!( 3));
173     mixin(i_transform); mixin(SBoxD3); mixin(key_xor!( 2));
174     mixin(i_transform); mixin(SBoxD2); mixin(key_xor!( 1));
175     mixin(i_transform); mixin(SBoxD1); mixin(key_xor!( 0));
176     
177     SIMD32.transpose(B0, B1, B2, B3);
178     
179     B0.storeLittleEndian(output.ptr);
180     B1.storeLittleEndian(output.ptr + 16);
181     B2.storeLittleEndian(output.ptr + 32);
182     B3.storeLittleEndian(output.ptr + 48);
183 }
184 
185 private:
186 
187 /*
188 * Serpent's linear transformations
189 */
190 enum string transform =
191     `B0.rotateLeft!13();                
192     B2.rotateLeft!3();
193     B1 ^= B0 ^ B2;
194     B3 ^= B2 ^ (B0.lshift!3());
195     B1.rotateLeft!1();
196     B3.rotateLeft!7();
197     B0 ^= B1 ^ B3;
198     B2 ^= B3 ^ (B1.lshift!7());
199     B0.rotateLeft!5();
200         B2.rotateLeft!22();`;
201 
202 enum string i_transform =
203     `B2.rotateRight!22();
204     B0.rotateRight!5();
205     B2 ^= B3 ^ (B1.lshift!7());
206     B0 ^= B1 ^ B3;
207     B3.rotateRight!7();
208     B1.rotateRight!1();
209     B3 ^= B2 ^ (B0.lshift!3());
210     B1 ^= B0 ^ B2;
211     B2.rotateRight!3();
212     B0.rotateRight!13();`;
213 
214 enum string key_xor(uint round) =
215     `B0 ^= SIMD32(keys[4*` ~ round.stringof ~ `  ]);
216     B1 ^= SIMD32(keys[4*` ~ round.stringof ~ `+1]);
217     B2 ^= SIMD32(keys[4*` ~ round.stringof ~ `+2]);
218     B3 ^= SIMD32(keys[4*` ~ round.stringof ~ `+3]);`;