1 /** 2 * Serpent (SIMD) 3 * 4 * Copyright: 5 * (C) 2009 Jack Lloyd 6 * (C) 2014-2015 Etienne Cimon 7 * 8 * License: 9 * Botan is released under the Simplified BSD License (see LICENSE.md) 10 */ 11 module botan.block.serp_simd; 12 13 import botan.constants; 14 static if (BOTAN_HAS_SERPENT_SIMD): 15 16 import botan.simd.simd_32; 17 import botan.utils.loadstor; 18 import botan.block.serpent; 19 import botan.block.block_cipher; 20 import botan.utils.mem_ops; 21 22 /** 23 * Serpent implementation using SIMD 24 */ 25 final class SerpentSIMD : Serpent 26 { 27 public: 28 override @property size_t parallelism() const { return 4; } 29 30 /* 31 * Serpent Encryption 32 */ 33 override void encryptN(const(ubyte)* input, ubyte* output, size_t blocks) 34 { 35 const uint* KS = this.getRoundKeys().ptr; 36 37 while (blocks >= 4) 38 { 39 serpent_encrypt_4(*cast(ubyte[64]*) input, *cast(ubyte[64]*) output, *cast(uint[132]*) KS); 40 input += 4 * BLOCK_SIZE; 41 output += 4 * BLOCK_SIZE; 42 blocks -= 4; 43 } 44 45 if (blocks) 46 super.encryptN(input, output, blocks); 47 } 48 49 /* 50 * Serpent Decryption 51 */ 52 override void decryptN(const(ubyte)* input, ubyte* output, size_t blocks) 53 { 54 const uint* KS = this.getRoundKeys().ptr; 55 56 while (blocks >= 4) 57 { 58 serpent_decrypt_4(*cast(ubyte[64]*) input, *cast(ubyte[64]*) output, *cast(uint[132]*) KS); 59 input += 4 * BLOCK_SIZE; 60 output += 4 * BLOCK_SIZE; 61 blocks -= 4; 62 } 63 64 if (blocks) 65 super.decryptN(input, output, blocks); 66 } 67 68 override BlockCipher clone() const { return new SerpentSIMD; } 69 } 70 71 package: 72 73 /* 74 * SIMD Serpent Encryption of 4 blocks in parallel 75 */ 76 void serpent_encrypt_4(in ubyte[64] input, ref ubyte[64] output, in uint[132] keys) 77 { 78 SIMD32 B0 = SIMD32.loadLittleEndian(input.ptr); 79 SIMD32 B1 = SIMD32.loadLittleEndian(input.ptr + 16); 80 SIMD32 B2 = SIMD32.loadLittleEndian(input.ptr + 32); 81 SIMD32 B3 = SIMD32.loadLittleEndian(input.ptr + 48); 82 83 SIMD32.transpose(B0, B1, B2, B3); 84 85 mixin(key_xor!( 0) ~ SBoxE1!("B0", "B1", "B2", "B3") ~ transform); 86 mixin(key_xor!( 1) ~ SBoxE2!("B0", "B1", "B2", "B3") ~ transform); 87 mixin(key_xor!( 2) ~ SBoxE3!("B0", "B1", "B2", "B3") ~ transform); 88 mixin(key_xor!( 3) ~ SBoxE4!("B0", "B1", "B2", "B3") ~ transform); 89 mixin(key_xor!( 4) ~ SBoxE5!("B0", "B1", "B2", "B3") ~ transform); 90 mixin(key_xor!( 5) ~ SBoxE6!("B0", "B1", "B2", "B3") ~ transform); 91 mixin(key_xor!( 6) ~ SBoxE7!("B0", "B1", "B2", "B3") ~ transform); 92 mixin(key_xor!( 7) ~ SBoxE8!("B0", "B1", "B2", "B3") ~ transform); 93 94 mixin(key_xor!( 8) ~ SBoxE1!("B0", "B1", "B2", "B3") ~ transform); 95 mixin(key_xor!( 9) ~ SBoxE2!("B0", "B1", "B2", "B3") ~ transform); 96 mixin(key_xor!(10) ~ SBoxE3!("B0", "B1", "B2", "B3") ~ transform); 97 mixin(key_xor!(11) ~ SBoxE4!("B0", "B1", "B2", "B3") ~ transform); 98 mixin(key_xor!(12) ~ SBoxE5!("B0", "B1", "B2", "B3") ~ transform); 99 mixin(key_xor!(13) ~ SBoxE6!("B0", "B1", "B2", "B3") ~ transform); 100 mixin(key_xor!(14) ~ SBoxE7!("B0", "B1", "B2", "B3") ~ transform); 101 mixin(key_xor!(15) ~ SBoxE8!("B0", "B1", "B2", "B3") ~ transform); 102 103 mixin(key_xor!(16) ~ SBoxE1!("B0", "B1", "B2", "B3") ~ transform); 104 mixin(key_xor!(17) ~ SBoxE2!("B0", "B1", "B2", "B3") ~ transform); 105 mixin(key_xor!(18) ~ SBoxE3!("B0", "B1", "B2", "B3") ~ transform); 106 mixin(key_xor!(19) ~ SBoxE4!("B0", "B1", "B2", "B3") ~ transform); 107 mixin(key_xor!(20) ~ SBoxE5!("B0", "B1", "B2", "B3") ~ transform); 108 mixin(key_xor!(21) ~ SBoxE6!("B0", "B1", "B2", "B3") ~ transform); 109 mixin(key_xor!(22) ~ SBoxE7!("B0", "B1", "B2", "B3") ~ transform); 110 mixin(key_xor!(23) ~ SBoxE8!("B0", "B1", "B2", "B3") ~ transform); 111 112 mixin(key_xor!(24) ~ SBoxE1!("B0", "B1", "B2", "B3") ~ transform); 113 mixin(key_xor!(25) ~ SBoxE2!("B0", "B1", "B2", "B3") ~ transform); 114 mixin(key_xor!(26) ~ SBoxE3!("B0", "B1", "B2", "B3") ~ transform); 115 mixin(key_xor!(27) ~ SBoxE4!("B0", "B1", "B2", "B3") ~ transform); 116 mixin(key_xor!(28) ~ SBoxE5!("B0", "B1", "B2", "B3") ~ transform); 117 mixin(key_xor!(29) ~ SBoxE6!("B0", "B1", "B2", "B3") ~ transform); 118 mixin(key_xor!(30) ~ SBoxE7!("B0", "B1", "B2", "B3") ~ transform); 119 mixin(key_xor!(31) ~ SBoxE8!("B0", "B1", "B2", "B3") ~ key_xor!(32)); 120 121 SIMD32.transpose(B0, B1, B2, B3); 122 123 B0.storeLittleEndian(output.ptr); 124 B1.storeLittleEndian(output.ptr + 16); 125 B2.storeLittleEndian(output.ptr + 32); 126 B3.storeLittleEndian(output.ptr + 48); 127 } 128 129 /* 130 * SIMD Serpent Decryption of 4 blocks in parallel 131 */ 132 void serpent_decrypt_4(in ubyte[64] input, ref ubyte[64] output, in uint[132] keys) 133 { 134 SIMD32 B0 = SIMD32.loadLittleEndian(input.ptr); 135 SIMD32 B1 = SIMD32.loadLittleEndian(input.ptr + 16); 136 SIMD32 B2 = SIMD32.loadLittleEndian(input.ptr + 32); 137 SIMD32 B3 = SIMD32.loadLittleEndian(input.ptr + 48); 138 139 SIMD32.transpose(B0, B1, B2, B3); 140 141 mixin(key_xor!(32)); mixin(SBoxD8); mixin(key_xor!(31)); 142 mixin(i_transform); mixin(SBoxD7); mixin(key_xor!(30)); 143 mixin(i_transform); mixin(SBoxD6); mixin(key_xor!(29)); 144 mixin(i_transform); mixin(SBoxD5); mixin(key_xor!(28)); 145 mixin(i_transform); mixin(SBoxD4); mixin(key_xor!(27)); 146 mixin(i_transform); mixin(SBoxD3); mixin(key_xor!(26)); 147 mixin(i_transform); mixin(SBoxD2); mixin(key_xor!(25)); 148 mixin(i_transform); mixin(SBoxD1); mixin(key_xor!(24)); 149 150 mixin(i_transform); mixin(SBoxD8); mixin(key_xor!(23)); 151 mixin(i_transform); mixin(SBoxD7); mixin(key_xor!(22)); 152 mixin(i_transform); mixin(SBoxD6); mixin(key_xor!(21)); 153 mixin(i_transform); mixin(SBoxD5); mixin(key_xor!(20)); 154 mixin(i_transform); mixin(SBoxD4); mixin(key_xor!(19)); 155 mixin(i_transform); mixin(SBoxD3); mixin(key_xor!(18)); 156 mixin(i_transform); mixin(SBoxD2); mixin(key_xor!(17)); 157 mixin(i_transform); mixin(SBoxD1); mixin(key_xor!(16)); 158 159 mixin(i_transform); mixin(SBoxD8); mixin(key_xor!(15)); 160 mixin(i_transform); mixin(SBoxD7); mixin(key_xor!(14)); 161 mixin(i_transform); mixin(SBoxD6); mixin(key_xor!(13)); 162 mixin(i_transform); mixin(SBoxD5); mixin(key_xor!(12)); 163 mixin(i_transform); mixin(SBoxD4); mixin(key_xor!(11)); 164 mixin(i_transform); mixin(SBoxD3); mixin(key_xor!(10)); 165 mixin(i_transform); mixin(SBoxD2); mixin(key_xor!( 9)); 166 mixin(i_transform); mixin(SBoxD1); mixin(key_xor!( 8)); 167 168 mixin(i_transform); mixin(SBoxD8); mixin(key_xor!( 7)); 169 mixin(i_transform); mixin(SBoxD7); mixin(key_xor!( 6)); 170 mixin(i_transform); mixin(SBoxD6); mixin(key_xor!( 5)); 171 mixin(i_transform); mixin(SBoxD5); mixin(key_xor!( 4)); 172 mixin(i_transform); mixin(SBoxD4); mixin(key_xor!( 3)); 173 mixin(i_transform); mixin(SBoxD3); mixin(key_xor!( 2)); 174 mixin(i_transform); mixin(SBoxD2); mixin(key_xor!( 1)); 175 mixin(i_transform); mixin(SBoxD1); mixin(key_xor!( 0)); 176 177 SIMD32.transpose(B0, B1, B2, B3); 178 179 B0.storeLittleEndian(output.ptr); 180 B1.storeLittleEndian(output.ptr + 16); 181 B2.storeLittleEndian(output.ptr + 32); 182 B3.storeLittleEndian(output.ptr + 48); 183 } 184 185 private: 186 187 /* 188 * Serpent's linear transformations 189 */ 190 enum string transform = 191 `B0.rotateLeft!13(); 192 B2.rotateLeft!3(); 193 B1 ^= B0 ^ B2; 194 B3 ^= B2 ^ (B0.lshift!3()); 195 B1.rotateLeft!1(); 196 B3.rotateLeft!7(); 197 B0 ^= B1 ^ B3; 198 B2 ^= B3 ^ (B1.lshift!7()); 199 B0.rotateLeft!5(); 200 B2.rotateLeft!22();`; 201 202 enum string i_transform = 203 `B2.rotateRight!22(); 204 B0.rotateRight!5(); 205 B2 ^= B3 ^ (B1.lshift!7()); 206 B0 ^= B1 ^ B3; 207 B3.rotateRight!7(); 208 B1.rotateRight!1(); 209 B3 ^= B2 ^ (B0.lshift!3()); 210 B1 ^= B0 ^ B2; 211 B2.rotateRight!3(); 212 B0.rotateRight!13();`; 213 214 enum string key_xor(uint round) = 215 `B0 ^= SIMD32(keys[4*` ~ round.stringof ~ ` ]); 216 B1 ^= SIMD32(keys[4*` ~ round.stringof ~ `+1]); 217 B2 ^= SIMD32(keys[4*` ~ round.stringof ~ `+2]); 218 B3 ^= SIMD32(keys[4*` ~ round.stringof ~ `+3]);`;