1 /** 2 * 64x64.128 bit multiply operation 3 * 4 * Copyright: 5 * (C) 2013 Jack Lloyd 6 * (C) 2014-2015 Etienne Cimon 7 * 8 * License: 9 * Botan is released under the Simplified BSD License (see LICENSE.md) 10 */ 11 module botan.utils.mul128; 12 import botan.constants; 13 import botan.utils.types; 14 15 /** 16 * Perform a 64x64.128 bit multiplication 17 * TODO: Optimize this further 18 */ 19 void mul64x64_128(ulong a, ulong b, ref ulong[2] res) pure 20 { 21 version (D_InlineAsm_X86_64) { 22 ulong* lo = res.ptr; 23 ulong* hi = &res[1]; 24 asm pure nothrow @nogc { 25 mov RAX, a; 26 mul b; 27 mov RBX, lo; 28 mov RCX, hi; 29 mov [RBX], RAX; 30 mov [RCX], RDX; 31 } 32 } 33 else { 34 /* 35 * Do a 64x64->128 multiply using four 32x32->64 multiplies plus 36 * some adds and shifts. Last resort for CPUs like UltraSPARC (with 37 * 64-bit registers/ALU, but no 64x64->128 multiply) or 32-bit CPUs. 38 */ 39 const size_t HWORD_BITS = 32; 40 const uint HWORD_MASK = 0xFFFFFFFF; 41 42 const uint a_hi = (a >> HWORD_BITS); 43 const uint a_lo = (a & HWORD_MASK); 44 const uint b_hi = (b >> HWORD_BITS); 45 const uint b_lo = (b & HWORD_MASK); 46 47 ulong x0 = cast(ulong)(a_hi) * b_hi; 48 ulong x1 = cast(ulong)(a_lo) * b_hi; 49 ulong x2 = cast(ulong)(a_hi) * b_lo; 50 ulong x3 = cast(ulong)(a_lo) * b_lo; 51 52 // this cannot overflow as (2^32-1)^2 + 2^32-1 < 2^64-1 53 x2 += x3 >> HWORD_BITS; 54 55 // this one can overflow 56 x2 += x1; 57 58 // propagate the carry if any 59 x0 += cast(ulong)(cast(bool)(x2 < x1)) << HWORD_BITS; 60 61 res[1] = x0 + (x2 >> HWORD_BITS); 62 res[0] = ((x2 & HWORD_MASK) << HWORD_BITS) + (x3 & HWORD_MASK); 63 } 64 }