00001
00002
00003
00004 #include "pch.h"
00005
00006 #ifndef CRYPTOPP_IMPORTS
00007
00008 #include "integer.h"
00009 #include "modarith.h"
00010 #include "nbtheory.h"
00011 #include "asn.h"
00012 #include "oids.h"
00013 #include "words.h"
00014 #include "algparam.h"
00015 #include "pubkey.h"
00016 #include "sha.h"
00017 #include "cpu.h"
00018
00019 #include <iostream>
00020
00021 #if _MSC_VER >= 1400
00022 #include <intrin.h>
00023 #endif
00024
00025 #ifdef __DECCXX
00026 #include <c_asm.h>
00027 #endif
00028
00029 #ifdef CRYPTOPP_MSVC6_NO_PP
00030 #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
00031 #endif
00032
00033 #define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
00034
00035 NAMESPACE_BEGIN(CryptoPP)
00036
00037 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
00038 {
00039 if (valueType != typeid(Integer))
00040 return false;
00041 *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
00042 return true;
00043 }
00044
00045 inline static int Compare(const word *A, const word *B, size_t N)
00046 {
00047 while (N--)
00048 if (A[N] > B[N])
00049 return 1;
00050 else if (A[N] < B[N])
00051 return -1;
00052
00053 return 0;
00054 }
00055
00056 inline static int Increment(word *A, size_t N, word B=1)
00057 {
00058 assert(N);
00059 word t = A[0];
00060 A[0] = t+B;
00061 if (A[0] >= t)
00062 return 0;
00063 for (unsigned i=1; i<N; i++)
00064 if (++A[i])
00065 return 0;
00066 return 1;
00067 }
00068
00069 inline static int Decrement(word *A, size_t N, word B=1)
00070 {
00071 assert(N);
00072 word t = A[0];
00073 A[0] = t-B;
00074 if (A[0] <= t)
00075 return 0;
00076 for (unsigned i=1; i<N; i++)
00077 if (A[i]--)
00078 return 0;
00079 return 1;
00080 }
00081
00082 static void TwosComplement(word *A, size_t N)
00083 {
00084 Decrement(A, N);
00085 for (unsigned i=0; i<N; i++)
00086 A[i] = ~A[i];
00087 }
00088
00089 static word AtomicInverseModPower2(word A)
00090 {
00091 assert(A%2==1);
00092
00093 word R=A%8;
00094
00095 for (unsigned i=3; i<WORD_BITS; i*=2)
00096 R = R*(2-R*A);
00097
00098 assert(R*A==1);
00099 return R;
00100 }
00101
00102
00103
00104 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || CRYPTOPP_BOOL_X64
00105 #define Declare2Words(x) word x##0, x##1;
00106 #define AssignWord(a, b) a##0 = b; a##1 = 0;
00107 #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
00108 #define LowWord(a) a##0
00109 #define HighWord(a) a##1
00110 #ifdef _MSC_VER
00111 #define MultiplyWords(p, a, b) p##0 = _umul128(a, b, &p##1);
00112 #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
00113 #elif defined(__DECCXX)
00114 #define MultiplyWords(p, a, b) p##0 = a*b; p##1 = asm("umulh %a0, %a1, %v0", a, b);
00115 #elif CRYPTOPP_BOOL_X64
00116 #define MultiplyWords(p, a, b) asm ("mulq %3" : "=a"(p##0), "=d"(p##1) : "a"(a), "g"(b) : "cc");
00117 #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
00118 #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
00119 #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
00120 #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
00121 #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
00122 #endif
00123 #ifndef Double3Words
00124 #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
00125 #endif
00126 #ifndef Acc2WordsBy2
00127 #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
00128 #endif
00129 #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
00130 #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
00131 #define GetCarry(u) u##1
00132 #define GetBorrow(u) u##1
00133 #else
00134 #define Declare2Words(x) dword x;
00135 #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER)
00136 #define MultiplyWords(p, a, b) p = __emulu(a, b);
00137 #else
00138 #define MultiplyWords(p, a, b) p = (dword)a*b;
00139 #endif
00140 #define AssignWord(a, b) a = b;
00141 #define Add2WordsBy1(a, b, c) a = b + c;
00142 #define Acc2WordsBy2(a, b) a += b;
00143 #define LowWord(a) word(a)
00144 #define HighWord(a) word(a>>WORD_BITS)
00145 #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
00146 #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
00147 #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
00148 #define GetCarry(u) HighWord(u)
00149 #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
00150 #endif
00151 #ifndef MulAcc
00152 #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
00153 #endif
00154 #ifndef Acc2WordsBy1
00155 #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
00156 #endif
00157 #ifndef Acc3WordsBy2
00158 #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
00159 #endif
00160
00161 class DWord
00162 {
00163 public:
00164 DWord() {}
00165
00166 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00167 explicit DWord(word low)
00168 {
00169 m_whole = low;
00170 }
00171 #else
00172 explicit DWord(word low)
00173 {
00174 m_halfs.low = low;
00175 m_halfs.high = 0;
00176 }
00177 #endif
00178
00179 DWord(word low, word high)
00180 {
00181 m_halfs.low = low;
00182 m_halfs.high = high;
00183 }
00184
00185 static DWord Multiply(word a, word b)
00186 {
00187 DWord r;
00188 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00189 r.m_whole = (dword)a * b;
00190 #else
00191 r.m_halfs.low = _umul128(a, b, &r.m_halfs.high);
00192 #endif
00193 return r;
00194 }
00195
00196 static DWord MultiplyAndAdd(word a, word b, word c)
00197 {
00198 DWord r = Multiply(a, b);
00199 return r += c;
00200 }
00201
00202 DWord & operator+=(word a)
00203 {
00204 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00205 m_whole = m_whole + a;
00206 #else
00207 m_halfs.low += a;
00208 m_halfs.high += (m_halfs.low < a);
00209 #endif
00210 return *this;
00211 }
00212
00213 DWord operator+(word a)
00214 {
00215 DWord r;
00216 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00217 r.m_whole = m_whole + a;
00218 #else
00219 r.m_halfs.low = m_halfs.low + a;
00220 r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
00221 #endif
00222 return r;
00223 }
00224
00225 DWord operator-(DWord a)
00226 {
00227 DWord r;
00228 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00229 r.m_whole = m_whole - a.m_whole;
00230 #else
00231 r.m_halfs.low = m_halfs.low - a.m_halfs.low;
00232 r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
00233 #endif
00234 return r;
00235 }
00236
00237 DWord operator-(word a)
00238 {
00239 DWord r;
00240 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00241 r.m_whole = m_whole - a;
00242 #else
00243 r.m_halfs.low = m_halfs.low - a;
00244 r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
00245 #endif
00246 return r;
00247 }
00248
00249
00250 word operator/(word divisor);
00251
00252 word operator%(word a);
00253
00254 bool operator!() const
00255 {
00256 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00257 return !m_whole;
00258 #else
00259 return !m_halfs.high && !m_halfs.low;
00260 #endif
00261 }
00262
00263 word GetLowHalf() const {return m_halfs.low;}
00264 word GetHighHalf() const {return m_halfs.high;}
00265 word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
00266
00267 private:
00268 union
00269 {
00270 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00271 dword m_whole;
00272 #endif
00273 struct
00274 {
00275 #ifdef IS_LITTLE_ENDIAN
00276 word low;
00277 word high;
00278 #else
00279 word high;
00280 word low;
00281 #endif
00282 } m_halfs;
00283 };
00284 };
00285
00286 class Word
00287 {
00288 public:
00289 Word() {}
00290
00291 Word(word value)
00292 {
00293 m_whole = value;
00294 }
00295
00296 Word(hword low, hword high)
00297 {
00298 m_whole = low | (word(high) << (WORD_BITS/2));
00299 }
00300
00301 static Word Multiply(hword a, hword b)
00302 {
00303 Word r;
00304 r.m_whole = (word)a * b;
00305 return r;
00306 }
00307
00308 Word operator-(Word a)
00309 {
00310 Word r;
00311 r.m_whole = m_whole - a.m_whole;
00312 return r;
00313 }
00314
00315 Word operator-(hword a)
00316 {
00317 Word r;
00318 r.m_whole = m_whole - a;
00319 return r;
00320 }
00321
00322
00323 hword operator/(hword divisor)
00324 {
00325 return hword(m_whole / divisor);
00326 }
00327
00328 bool operator!() const
00329 {
00330 return !m_whole;
00331 }
00332
00333 word GetWhole() const {return m_whole;}
00334 hword GetLowHalf() const {return hword(m_whole);}
00335 hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
00336 hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
00337
00338 private:
00339 word m_whole;
00340 };
00341
00342
00343 template <class S, class D>
00344 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
00345 {
00346
00347 assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
00348
00349
00350 S Q;
00351 if (S(B1+1) == 0)
00352 Q = A[2];
00353 else
00354 Q = D(A[1], A[2]) / S(B1+1);
00355
00356
00357 D p = D::Multiply(B0, Q);
00358 D u = (D) A[0] - p.GetLowHalf();
00359 A[0] = u.GetLowHalf();
00360 u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
00361 A[1] = u.GetLowHalf();
00362 A[2] += u.GetHighHalf();
00363
00364
00365 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
00366 {
00367 u = (D) A[0] - B0;
00368 A[0] = u.GetLowHalf();
00369 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
00370 A[1] = u.GetLowHalf();
00371 A[2] += u.GetHighHalf();
00372 Q++;
00373 assert(Q);
00374 }
00375
00376 return Q;
00377 }
00378
00379
00380 template <class S, class D>
00381 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
00382 {
00383 if (!B)
00384 return D(Ah.GetLowHalf(), Ah.GetHighHalf());
00385 else
00386 {
00387 S Q[2];
00388 T[0] = Al.GetLowHalf();
00389 T[1] = Al.GetHighHalf();
00390 T[2] = Ah.GetLowHalf();
00391 T[3] = Ah.GetHighHalf();
00392 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
00393 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
00394 return D(Q[0], Q[1]);
00395 }
00396 }
00397
00398
00399 inline word DWord::operator/(word a)
00400 {
00401 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00402 return word(m_whole / a);
00403 #else
00404 hword r[4];
00405 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
00406 #endif
00407 }
00408
00409 inline word DWord::operator%(word a)
00410 {
00411 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00412 return word(m_whole % a);
00413 #else
00414 if (a < (word(1) << (WORD_BITS/2)))
00415 {
00416 hword h = hword(a);
00417 word r = m_halfs.high % h;
00418 r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
00419 return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
00420 }
00421 else
00422 {
00423 hword r[4];
00424 DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
00425 return Word(r[0], r[1]).GetWhole();
00426 }
00427 #endif
00428 }
00429
00430
00431
00432
00433 #if defined(__GNUC__)
00434 #define AddPrologue \
00435 int result; \
00436 __asm__ __volatile__ \
00437 ( \
00438 ".intel_syntax noprefix;"
00439 #define AddEpilogue \
00440 ".att_syntax prefix;" \
00441 : "=a" (result)\
00442 : "d" (C), "a" (A), "D" (B), "c" (N) \
00443 : "%esi", "memory", "cc" \
00444 );\
00445 return result;
00446 #define MulPrologue \
00447 __asm__ __volatile__ \
00448 ( \
00449 ".intel_syntax noprefix;" \
00450 AS1( push ebx) \
00451 AS2( mov ebx, edx)
00452 #define MulEpilogue \
00453 AS1( pop ebx) \
00454 ".att_syntax prefix;" \
00455 : \
00456 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
00457 : "%esi", "memory", "cc" \
00458 );
00459 #define SquPrologue MulPrologue
00460 #define SquEpilogue \
00461 AS1( pop ebx) \
00462 ".att_syntax prefix;" \
00463 : \
00464 : "d" (s_maskLow16), "c" (C), "a" (A) \
00465 : "%esi", "%edi", "memory", "cc" \
00466 );
00467 #define TopPrologue MulPrologue
00468 #define TopEpilogue \
00469 AS1( pop ebx) \
00470 ".att_syntax prefix;" \
00471 : \
00472 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
00473 : "memory", "cc" \
00474 );
00475 #else
00476 #define AddPrologue \
00477 __asm push edi \
00478 __asm push esi \
00479 __asm mov eax, [esp+12] \
00480 __asm mov edi, [esp+16]
00481 #define AddEpilogue \
00482 __asm pop esi \
00483 __asm pop edi \
00484 __asm ret 8
00485 #if _MSC_VER < 1300
00486 #define SaveEBX __asm push ebx
00487 #define RestoreEBX __asm pop ebx
00488 #else
00489 #define SaveEBX
00490 #define RestoreEBX
00491 #endif
00492 #define SquPrologue \
00493 AS2( mov eax, A) \
00494 AS2( mov ecx, C) \
00495 SaveEBX \
00496 AS2( lea ebx, s_maskLow16)
00497 #define MulPrologue \
00498 AS2( mov eax, A) \
00499 AS2( mov edi, B) \
00500 AS2( mov ecx, C) \
00501 SaveEBX \
00502 AS2( lea ebx, s_maskLow16)
00503 #define TopPrologue \
00504 AS2( mov eax, A) \
00505 AS2( mov edi, B) \
00506 AS2( mov ecx, C) \
00507 AS2( mov esi, L) \
00508 SaveEBX \
00509 AS2( lea ebx, s_maskLow16)
00510 #define SquEpilogue RestoreEBX
00511 #define MulEpilogue RestoreEBX
00512 #define TopEpilogue RestoreEBX
00513 #endif
00514
00515 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00516 extern "C" {
00517 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
00518 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
00519 }
00520 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__)
00521 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
00522 {
00523 word result;
00524 __asm__ __volatile__
00525 (
00526 ".intel_syntax;"
00527 AS1( neg %1)
00528 ASJ( jz, 1, f)
00529 AS2( mov %0,[%3+8*%1])
00530 AS2( add %0,[%4+8*%1])
00531 AS2( mov [%2+8*%1],%0)
00532 ASL(0)
00533 AS2( mov %0,[%3+8*%1+8])
00534 AS2( adc %0,[%4+8*%1+8])
00535 AS2( mov [%2+8*%1+8],%0)
00536 AS2( lea %1,[%1+2])
00537 ASJ( jrcxz, 1, f)
00538 AS2( mov %0,[%3+8*%1])
00539 AS2( adc %0,[%4+8*%1])
00540 AS2( mov [%2+8*%1],%0)
00541 ASJ( jmp, 0, b)
00542 ASL(1)
00543 AS2( mov %0, 0)
00544 AS2( adc %0, %0)
00545 ".att_syntax;"
00546 : "=&r" (result)
00547 : "c" (N), "r" (C+N), "r" (A+N), "r" (B+N)
00548 : "memory", "cc"
00549 );
00550 return (int)result;
00551 }
00552
00553 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00554 {
00555 word result;
00556 __asm__ __volatile__
00557 (
00558 ".intel_syntax;"
00559 AS1( neg %1)
00560 ASJ( jz, 1, f)
00561 AS2( mov %0,[%3+8*%1])
00562 AS2( sub %0,[%4+8*%1])
00563 AS2( mov [%2+8*%1],%0)
00564 ASL(0)
00565 AS2( mov %0,[%3+8*%1+8])
00566 AS2( sbb %0,[%4+8*%1+8])
00567 AS2( mov [%2+8*%1+8],%0)
00568 AS2( lea %1,[%1+2])
00569 ASJ( jrcxz, 1, f)
00570 AS2( mov %0,[%3+8*%1])
00571 AS2( sbb %0,[%4+8*%1])
00572 AS2( mov [%2+8*%1],%0)
00573 ASJ( jmp, 0, b)
00574 ASL(1)
00575 AS2( mov %0, 0)
00576 AS2( adc %0, %0)
00577 ".att_syntax;"
00578 : "=&r" (result)
00579 : "c" (N), "r" (C+N), "r" (A+N), "r" (B+N)
00580 : "memory", "cc"
00581 );
00582 return (int)result;
00583 }
00584 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
00585 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
00586 {
00587 AddPrologue
00588
00589
00590 AS2( lea eax, [eax+4*ecx])
00591 AS2( lea edi, [edi+4*ecx])
00592 AS2( lea edx, [edx+4*ecx])
00593
00594 AS1( neg ecx)
00595 AS2( test ecx, 2)
00596 ASJ( jz, 0, f)
00597 AS2( sub ecx, 2)
00598 ASJ( jmp, 1, f)
00599
00600 ASL(0)
00601 ASJ( jecxz, 2, f)
00602 AS2( mov esi,[eax+4*ecx])
00603 AS2( adc esi,[edi+4*ecx])
00604 AS2( mov [edx+4*ecx],esi)
00605 AS2( mov esi,[eax+4*ecx+4])
00606 AS2( adc esi,[edi+4*ecx+4])
00607 AS2( mov [edx+4*ecx+4],esi)
00608 ASL(1)
00609 AS2( mov esi,[eax+4*ecx+8])
00610 AS2( adc esi,[edi+4*ecx+8])
00611 AS2( mov [edx+4*ecx+8],esi)
00612 AS2( mov esi,[eax+4*ecx+12])
00613 AS2( adc esi,[edi+4*ecx+12])
00614 AS2( mov [edx+4*ecx+12],esi)
00615
00616 AS2( lea ecx,[ecx+4])
00617 ASJ( jmp, 0, b)
00618
00619 ASL(2)
00620 AS2( mov eax, 0)
00621 AS1( setc al)
00622
00623 AddEpilogue
00624 }
00625
00626 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00627 {
00628 AddPrologue
00629
00630
00631 AS2( lea eax, [eax+4*ecx])
00632 AS2( lea edi, [edi+4*ecx])
00633 AS2( lea edx, [edx+4*ecx])
00634
00635 AS1( neg ecx)
00636 AS2( test ecx, 2)
00637 ASJ( jz, 0, f)
00638 AS2( sub ecx, 2)
00639 ASJ( jmp, 1, f)
00640
00641 ASL(0)
00642 ASJ( jecxz, 2, f)
00643 AS2( mov esi,[eax+4*ecx])
00644 AS2( sbb esi,[edi+4*ecx])
00645 AS2( mov [edx+4*ecx],esi)
00646 AS2( mov esi,[eax+4*ecx+4])
00647 AS2( sbb esi,[edi+4*ecx+4])
00648 AS2( mov [edx+4*ecx+4],esi)
00649 ASL(1)
00650 AS2( mov esi,[eax+4*ecx+8])
00651 AS2( sbb esi,[edi+4*ecx+8])
00652 AS2( mov [edx+4*ecx+8],esi)
00653 AS2( mov esi,[eax+4*ecx+12])
00654 AS2( sbb esi,[edi+4*ecx+12])
00655 AS2( mov [edx+4*ecx+12],esi)
00656
00657 AS2( lea ecx,[ecx+4])
00658 ASJ( jmp, 0, b)
00659
00660 ASL(2)
00661 AS2( mov eax, 0)
00662 AS1( setc al)
00663
00664 AddEpilogue
00665 }
00666
00667 #if CRYPTOPP_INTEGER_SSE2
00668 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
00669 {
00670 AddPrologue
00671
00672
00673 AS2( lea eax, [eax+4*ecx])
00674 AS2( lea edi, [edi+4*ecx])
00675 AS2( lea edx, [edx+4*ecx])
00676
00677 AS1( neg ecx)
00678 AS2( pxor mm2, mm2)
00679 ASJ( jz, 2, f)
00680 AS2( test ecx, 2)
00681 ASJ( jz, 0, f)
00682 AS2( sub ecx, 2)
00683 ASJ( jmp, 1, f)
00684
00685 ASL(0)
00686 AS2( movd mm0, DWORD PTR [eax+4*ecx])
00687 AS2( movd mm1, DWORD PTR [edi+4*ecx])
00688 AS2( paddq mm0, mm1)
00689 AS2( paddq mm2, mm0)
00690 AS2( movd DWORD PTR [edx+4*ecx], mm2)
00691 AS2( psrlq mm2, 32)
00692
00693 AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
00694 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
00695 AS2( paddq mm0, mm1)
00696 AS2( paddq mm2, mm0)
00697 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
00698 AS2( psrlq mm2, 32)
00699
00700 ASL(1)
00701 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
00702 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
00703 AS2( paddq mm0, mm1)
00704 AS2( paddq mm2, mm0)
00705 AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
00706 AS2( psrlq mm2, 32)
00707
00708 AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
00709 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
00710 AS2( paddq mm0, mm1)
00711 AS2( paddq mm2, mm0)
00712 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
00713 AS2( psrlq mm2, 32)
00714
00715 AS2( add ecx, 4)
00716 ASJ( jnz, 0, b)
00717
00718 ASL(2)
00719 AS2( movd eax, mm2)
00720 AS1( emms)
00721
00722 AddEpilogue
00723 }
00724 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
00725 {
00726 AddPrologue
00727
00728
00729 AS2( lea eax, [eax+4*ecx])
00730 AS2( lea edi, [edi+4*ecx])
00731 AS2( lea edx, [edx+4*ecx])
00732
00733 AS1( neg ecx)
00734 AS2( pxor mm2, mm2)
00735 ASJ( jz, 2, f)
00736 AS2( test ecx, 2)
00737 ASJ( jz, 0, f)
00738 AS2( sub ecx, 2)
00739 ASJ( jmp, 1, f)
00740
00741 ASL(0)
00742 AS2( movd mm0, DWORD PTR [eax+4*ecx])
00743 AS2( movd mm1, DWORD PTR [edi+4*ecx])
00744 AS2( psubq mm0, mm1)
00745 AS2( psubq mm0, mm2)
00746 AS2( movd DWORD PTR [edx+4*ecx], mm0)
00747 AS2( psrlq mm0, 63)
00748
00749 AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
00750 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
00751 AS2( psubq mm2, mm1)
00752 AS2( psubq mm2, mm0)
00753 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
00754 AS2( psrlq mm2, 63)
00755
00756 ASL(1)
00757 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
00758 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
00759 AS2( psubq mm0, mm1)
00760 AS2( psubq mm0, mm2)
00761 AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
00762 AS2( psrlq mm0, 63)
00763
00764 AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
00765 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
00766 AS2( psubq mm2, mm1)
00767 AS2( psubq mm2, mm0)
00768 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
00769 AS2( psrlq mm2, 63)
00770
00771 AS2( add ecx, 4)
00772 ASJ( jnz, 0, b)
00773
00774 ASL(2)
00775 AS2( movd eax, mm2)
00776 AS1( emms)
00777
00778 AddEpilogue
00779 }
00780 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00781 #else
00782 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
00783 {
00784 assert (N%2 == 0);
00785
00786 Declare2Words(u);
00787 AssignWord(u, 0);
00788 for (size_t i=0; i<N; i+=2)
00789 {
00790 AddWithCarry(u, A[i], B[i]);
00791 C[i] = LowWord(u);
00792 AddWithCarry(u, A[i+1], B[i+1]);
00793 C[i+1] = LowWord(u);
00794 }
00795 return int(GetCarry(u));
00796 }
00797
00798 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00799 {
00800 assert (N%2 == 0);
00801
00802 Declare2Words(u);
00803 AssignWord(u, 0);
00804 for (size_t i=0; i<N; i+=2)
00805 {
00806 SubtractWithBorrow(u, A[i], B[i]);
00807 C[i] = LowWord(u);
00808 SubtractWithBorrow(u, A[i+1], B[i+1]);
00809 C[i+1] = LowWord(u);
00810 }
00811 return int(GetBorrow(u));
00812 }
00813 #endif
00814
00815 static word LinearMultiply(word *C, const word *A, word B, size_t N)
00816 {
00817 word carry=0;
00818 for(unsigned i=0; i<N; i++)
00819 {
00820 Declare2Words(p);
00821 MultiplyWords(p, A[i], B);
00822 Acc2WordsBy1(p, carry);
00823 C[i] = LowWord(p);
00824 carry = HighWord(p);
00825 }
00826 return carry;
00827 }
00828
00829 #define Mul_2 \
00830 Mul_Begin(2) \
00831 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00832 Mul_End(1, 1)
00833
00834 #define Mul_4 \
00835 Mul_Begin(4) \
00836 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00837 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00838 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00839 Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
00840 Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
00841 Mul_End(5, 3)
00842
00843 #define Mul_8 \
00844 Mul_Begin(8) \
00845 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00846 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00847 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00848 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00849 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00850 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00851 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00852 Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
00853 Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
00854 Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
00855 Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
00856 Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
00857 Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
00858 Mul_End(13, 7)
00859
00860 #define Mul_16 \
00861 Mul_Begin(16) \
00862 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00863 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00864 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00865 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00866 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00867 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00868 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00869 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
00870 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
00871 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
00872 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
00873 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
00874 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
00875 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
00876 Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
00877 Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
00878 Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
00879 Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
00880 Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
00881 Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
00882 Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
00883 Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
00884 Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
00885 Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
00886 Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
00887 Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
00888 Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
00889 Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
00890 Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
00891 Mul_End(29, 15)
00892
00893 #define Squ_2 \
00894 Squ_Begin(2) \
00895 Squ_End(2)
00896
00897 #define Squ_4 \
00898 Squ_Begin(4) \
00899 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00900 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00901 Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
00902 Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
00903 Squ_End(4)
00904
00905 #define Squ_8 \
00906 Squ_Begin(8) \
00907 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00908 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00909 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
00910 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
00911 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
00912 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
00913 Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
00914 Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
00915 Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
00916 Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
00917 Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
00918 Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
00919 Squ_End(8)
00920
00921 #define Squ_16 \
00922 Squ_Begin(16) \
00923 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00924 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00925 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
00926 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
00927 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
00928 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
00929 Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
00930 Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
00931 Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
00932 Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
00933 Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
00934 Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
00935 Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
00936 Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
00937 Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
00938 Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
00939 Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
00940 Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
00941 Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
00942 Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
00943 Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
00944 Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
00945 Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
00946 Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
00947 Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
00948 Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
00949 Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
00950 Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
00951 Squ_End(16)
00952
00953 #define Bot_2 \
00954 Mul_Begin(2) \
00955 Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
00956 Bot_End(2)
00957
00958 #define Bot_4 \
00959 Mul_Begin(4) \
00960 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00961 Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
00962 Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
00963 Bot_End(4)
00964
00965 #define Bot_8 \
00966 Mul_Begin(8) \
00967 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00968 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00969 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00970 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00971 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00972 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00973 Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
00974 Bot_End(8)
00975
00976 #define Bot_16 \
00977 Mul_Begin(16) \
00978 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00979 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00980 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00981 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00982 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00983 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00984 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00985 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
00986 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
00987 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
00988 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
00989 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
00990 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
00991 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
00992 Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
00993 Bot_End(16)
00994
00995 #if 0
00996 #define Mul_Begin(n) \
00997 Declare2Words(p) \
00998 Declare2Words(c) \
00999 Declare2Words(d) \
01000 MultiplyWords(p, A[0], B[0]) \
01001 AssignWord(c, LowWord(p)) \
01002 AssignWord(d, HighWord(p))
01003
01004 #define Mul_Acc(i, j) \
01005 MultiplyWords(p, A[i], B[j]) \
01006 Acc2WordsBy1(c, LowWord(p)) \
01007 Acc2WordsBy1(d, HighWord(p))
01008
01009 #define Mul_SaveAcc(k, i, j) \
01010 R[k] = LowWord(c); \
01011 Add2WordsBy1(c, d, HighWord(c)) \
01012 MultiplyWords(p, A[i], B[j]) \
01013 AssignWord(d, HighWord(p)) \
01014 Acc2WordsBy1(c, LowWord(p))
01015
01016 #define Mul_End(n) \
01017 R[2*n-3] = LowWord(c); \
01018 Acc2WordsBy1(d, HighWord(c)) \
01019 MultiplyWords(p, A[n-1], B[n-1])\
01020 Acc2WordsBy2(d, p) \
01021 R[2*n-2] = LowWord(d); \
01022 R[2*n-1] = HighWord(d);
01023
01024 #define Bot_SaveAcc(k, i, j) \
01025 R[k] = LowWord(c); \
01026 word e = LowWord(d) + HighWord(c); \
01027 e += A[i] * B[j];
01028
01029 #define Bot_Acc(i, j) \
01030 e += A[i] * B[j];
01031
01032 #define Bot_End(n) \
01033 R[n-1] = e;
01034 #else
01035 #define Mul_Begin(n) \
01036 Declare2Words(p) \
01037 word c; \
01038 Declare2Words(d) \
01039 MultiplyWords(p, A[0], B[0]) \
01040 c = LowWord(p); \
01041 AssignWord(d, HighWord(p))
01042
01043 #define Mul_Acc(i, j) \
01044 MulAcc(c, d, A[i], B[j])
01045
01046 #define Mul_SaveAcc(k, i, j) \
01047 R[k] = c; \
01048 c = LowWord(d); \
01049 AssignWord(d, HighWord(d)) \
01050 MulAcc(c, d, A[i], B[j])
01051
01052 #define Mul_End(k, i) \
01053 R[k] = c; \
01054 MultiplyWords(p, A[i], B[i]) \
01055 Acc2WordsBy2(p, d) \
01056 R[k+1] = LowWord(p); \
01057 R[k+2] = HighWord(p);
01058
01059 #define Bot_SaveAcc(k, i, j) \
01060 R[k] = c; \
01061 c = LowWord(d); \
01062 c += A[i] * B[j];
01063
01064 #define Bot_Acc(i, j) \
01065 c += A[i] * B[j];
01066
01067 #define Bot_End(n) \
01068 R[n-1] = c;
01069 #endif
01070
01071 #define Squ_Begin(n) \
01072 Declare2Words(p) \
01073 word c; \
01074 Declare2Words(d) \
01075 Declare2Words(e) \
01076 MultiplyWords(p, A[0], A[0]) \
01077 R[0] = LowWord(p); \
01078 AssignWord(e, HighWord(p)) \
01079 MultiplyWords(p, A[0], A[1]) \
01080 c = LowWord(p); \
01081 AssignWord(d, HighWord(p)) \
01082 Squ_NonDiag \
01083
01084 #define Squ_NonDiag \
01085 Double3Words(c, d)
01086
01087 #define Squ_SaveAcc(k, i, j) \
01088 Acc3WordsBy2(c, d, e) \
01089 R[k] = c; \
01090 MultiplyWords(p, A[i], A[j]) \
01091 c = LowWord(p); \
01092 AssignWord(d, HighWord(p)) \
01093
01094 #define Squ_Acc(i, j) \
01095 MulAcc(c, d, A[i], A[j])
01096
01097 #define Squ_Diag(i) \
01098 Squ_NonDiag \
01099 MulAcc(c, d, A[i], A[i])
01100
01101 #define Squ_End(n) \
01102 Acc3WordsBy2(c, d, e) \
01103 R[2*n-3] = c; \
01104 MultiplyWords(p, A[n-1], A[n-1])\
01105 Acc2WordsBy2(p, e) \
01106 R[2*n-2] = LowWord(p); \
01107 R[2*n-1] = HighWord(p);
01108
01109 void Baseline_Multiply2(word *R, const word *A, const word *B)
01110 {
01111 Mul_2
01112 }
01113
01114 void Baseline_Multiply4(word *R, const word *A, const word *B)
01115 {
01116 Mul_4
01117 }
01118
01119 void Baseline_Multiply8(word *R, const word *A, const word *B)
01120 {
01121 Mul_8
01122 }
01123
01124 void Baseline_Square2(word *R, const word *A)
01125 {
01126 Squ_2
01127 }
01128
01129 void Baseline_Square4(word *R, const word *A)
01130 {
01131 Squ_4
01132 }
01133
01134 void Baseline_Square8(word *R, const word *A)
01135 {
01136 Squ_8
01137 }
01138
01139 void Baseline_MultiplyBottom2(word *R, const word *A, const word *B)
01140 {
01141 Bot_2
01142 }
01143
01144 void Baseline_MultiplyBottom4(word *R, const word *A, const word *B)
01145 {
01146 Bot_4
01147 }
01148
01149 void Baseline_MultiplyBottom8(word *R, const word *A, const word *B)
01150 {
01151 Bot_8
01152 }
01153
01154 #define Top_Begin(n) \
01155 Declare2Words(p) \
01156 word c; \
01157 Declare2Words(d) \
01158 MultiplyWords(p, A[0], B[n-2]);\
01159 AssignWord(d, HighWord(p));
01160
01161 #define Top_Acc(i, j) \
01162 MultiplyWords(p, A[i], B[j]);\
01163 Acc2WordsBy1(d, HighWord(p));
01164
01165 #define Top_SaveAcc0(i, j) \
01166 c = LowWord(d); \
01167 AssignWord(d, HighWord(d)) \
01168 MulAcc(c, d, A[i], B[j])
01169
01170 #define Top_SaveAcc1(i, j) \
01171 c = L<c; \
01172 Acc2WordsBy1(d, c); \
01173 c = LowWord(d); \
01174 AssignWord(d, HighWord(d)) \
01175 MulAcc(c, d, A[i], B[j])
01176
01177 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
01178 {
01179 word T[4];
01180 Baseline_Multiply2(T, A, B);
01181 R[0] = T[2];
01182 R[1] = T[3];
01183 }
01184
01185 void Baseline_MultiplyTop4(word *R, const word *A, const word *B, word L)
01186 {
01187 Top_Begin(4)
01188 Top_Acc(1, 1) Top_Acc(2, 0) \
01189 Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
01190 Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
01191 Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
01192 Mul_End(1, 3)
01193 }
01194
01195 void Baseline_MultiplyTop8(word *R, const word *A, const word *B, word L)
01196 {
01197 Top_Begin(8)
01198 Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
01199 Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
01200 Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
01201 Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
01202 Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
01203 Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
01204 Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
01205 Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
01206 Mul_End(5, 7)
01207 }
01208
01209 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
01210 void Baseline_Multiply16(word *R, const word *A, const word *B)
01211 {
01212 Mul_16
01213 }
01214
01215 void Baseline_Square16(word *R, const word *A)
01216 {
01217 Squ_16
01218 }
01219
01220 void Baseline_MultiplyBottom16(word *R, const word *A, const word *B)
01221 {
01222 Bot_16
01223 }
01224
01225 void Baseline_MultiplyTop16(word *R, const word *A, const word *B, word L)
01226 {
01227 Top_Begin(16)
01228 Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
01229 Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
01230 Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
01231 Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
01232 Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
01233 Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
01234 Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
01235 Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
01236 Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
01237 Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
01238 Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
01239 Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
01240 Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
01241 Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
01242 Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
01243 Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
01244 Mul_End(13, 15)
01245 }
01246 #endif
01247
01248
01249
01250 #if CRYPTOPP_INTEGER_SSE2
01251
01252 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
01253
01254 #undef Mul_Begin
01255 #undef Mul_Acc
01256 #undef Top_Begin
01257 #undef Top_Acc
01258 #undef Squ_Acc
01259 #undef Squ_NonDiag
01260 #undef Squ_Diag
01261 #undef Squ_SaveAcc
01262 #undef Squ_Begin
01263 #undef Mul_SaveAcc
01264 #undef Bot_Acc
01265 #undef Bot_SaveAcc
01266 #undef Bot_End
01267 #undef Squ_End
01268 #undef Mul_End
01269
01270 #define SSE2_FinalSave(k) \
01271 AS2( psllq xmm5, 16) \
01272 AS2( paddq xmm4, xmm5) \
01273 AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
01274
01275 #define SSE2_SaveShift(k) \
01276 AS2( movq xmm0, xmm6) \
01277 AS2( punpckhqdq xmm6, xmm0) \
01278 AS2( movq xmm1, xmm7) \
01279 AS2( punpckhqdq xmm7, xmm1) \
01280 AS2( paddd xmm6, xmm0) \
01281 AS2( pslldq xmm6, 4) \
01282 AS2( paddd xmm7, xmm1) \
01283 AS2( paddd xmm4, xmm6) \
01284 AS2( pslldq xmm7, 4) \
01285 AS2( movq xmm6, xmm4) \
01286 AS2( paddd xmm5, xmm7) \
01287 AS2( movq xmm7, xmm5) \
01288 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
01289 AS2( psrlq xmm6, 16) \
01290 AS2( paddq xmm6, xmm7) \
01291 AS2( punpckhqdq xmm4, xmm0) \
01292 AS2( punpckhqdq xmm5, xmm0) \
01293 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
01294 AS2( psrlq xmm6, 3*16) \
01295 AS2( paddd xmm4, xmm6) \
01296
01297 #define Squ_SSE2_SaveShift(k) \
01298 AS2( movq xmm0, xmm6) \
01299 AS2( punpckhqdq xmm6, xmm0) \
01300 AS2( movq xmm1, xmm7) \
01301 AS2( punpckhqdq xmm7, xmm1) \
01302 AS2( paddd xmm6, xmm0) \
01303 AS2( pslldq xmm6, 4) \
01304 AS2( paddd xmm7, xmm1) \
01305 AS2( paddd xmm4, xmm6) \
01306 AS2( pslldq xmm7, 4) \
01307 AS2( movhlps xmm6, xmm4) \
01308 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
01309 AS2( paddd xmm5, xmm7) \
01310 AS2( movhps QWORD PTR [esp+12], xmm5)\
01311 AS2( psrlq xmm4, 16) \
01312 AS2( paddq xmm4, xmm5) \
01313 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
01314 AS2( psrlq xmm4, 3*16) \
01315 AS2( paddd xmm4, xmm6) \
01316 AS2( movq QWORD PTR [esp+4], xmm4)\
01317
01318 #define SSE2_FirstMultiply(i) \
01319 AS2( movdqa xmm7, [esi+(i)*16])\
01320 AS2( movdqa xmm5, [edi-(i)*16])\
01321 AS2( pmuludq xmm5, xmm7) \
01322 AS2( movdqa xmm4, [ebx])\
01323 AS2( movdqa xmm6, xmm4) \
01324 AS2( pand xmm4, xmm5) \
01325 AS2( psrld xmm5, 16) \
01326 AS2( pmuludq xmm7, [edx-(i)*16])\
01327 AS2( pand xmm6, xmm7) \
01328 AS2( psrld xmm7, 16)
01329
01330 #define Squ_Begin(n) \
01331 SquPrologue \
01332 AS2( mov esi, esp)\
01333 AS2( and esp, 0xfffffff0)\
01334 AS2( lea edi, [esp-32*n])\
01335 AS2( sub esp, 32*n+16)\
01336 AS1( push esi)\
01337 AS2( mov esi, edi) \
01338 AS2( xor edx, edx) \
01339 ASL(1) \
01340 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01341 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01342 AS2( movdqa [edi+2*edx], xmm0) \
01343 AS2( psrlq xmm0, 32) \
01344 AS2( movdqa [edi+2*edx+16], xmm0) \
01345 AS2( movdqa [edi+16*n+2*edx], xmm1) \
01346 AS2( psrlq xmm1, 32) \
01347 AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
01348 AS2( add edx, 16) \
01349 AS2( cmp edx, 8*(n)) \
01350 ASJ( jne, 1, b) \
01351 AS2( lea edx, [edi+16*n])\
01352 SSE2_FirstMultiply(0) \
01353
01354 #define Squ_Acc(i) \
01355 ASL(LSqu##i) \
01356 AS2( movdqa xmm1, [esi+(i)*16]) \
01357 AS2( movdqa xmm0, [edi-(i)*16]) \
01358 AS2( movdqa xmm2, [ebx]) \
01359 AS2( pmuludq xmm0, xmm1) \
01360 AS2( pmuludq xmm1, [edx-(i)*16]) \
01361 AS2( movdqa xmm3, xmm2) \
01362 AS2( pand xmm2, xmm0) \
01363 AS2( psrld xmm0, 16) \
01364 AS2( paddd xmm4, xmm2) \
01365 AS2( paddd xmm5, xmm0) \
01366 AS2( pand xmm3, xmm1) \
01367 AS2( psrld xmm1, 16) \
01368 AS2( paddd xmm6, xmm3) \
01369 AS2( paddd xmm7, xmm1) \
01370
01371 #define Squ_Acc1(i)
01372 #define Squ_Acc2(i) ASC(call, LSqu##i)
01373 #define Squ_Acc3(i) Squ_Acc2(i)
01374 #define Squ_Acc4(i) Squ_Acc2(i)
01375 #define Squ_Acc5(i) Squ_Acc2(i)
01376 #define Squ_Acc6(i) Squ_Acc2(i)
01377 #define Squ_Acc7(i) Squ_Acc2(i)
01378 #define Squ_Acc8(i) Squ_Acc2(i)
01379
01380 #define SSE2_End(E, n) \
01381 SSE2_SaveShift(2*(n)-3) \
01382 AS2( movdqa xmm7, [esi+16]) \
01383 AS2( movdqa xmm0, [edi]) \
01384 AS2( pmuludq xmm0, xmm7) \
01385 AS2( movdqa xmm2, [ebx]) \
01386 AS2( pmuludq xmm7, [edx]) \
01387 AS2( movdqa xmm6, xmm2) \
01388 AS2( pand xmm2, xmm0) \
01389 AS2( psrld xmm0, 16) \
01390 AS2( paddd xmm4, xmm2) \
01391 AS2( paddd xmm5, xmm0) \
01392 AS2( pand xmm6, xmm7) \
01393 AS2( psrld xmm7, 16) \
01394 SSE2_SaveShift(2*(n)-2) \
01395 SSE2_FinalSave(2*(n)-1) \
01396 AS1( pop esp)\
01397 E
01398
01399 #define Squ_End(n) SSE2_End(SquEpilogue, n)
01400 #define Mul_End(n) SSE2_End(MulEpilogue, n)
01401 #define Top_End(n) SSE2_End(TopEpilogue, n)
01402
01403 #define Squ_Column1(k, i) \
01404 Squ_SSE2_SaveShift(k) \
01405 AS2( add esi, 16) \
01406 SSE2_FirstMultiply(1)\
01407 Squ_Acc##i(i) \
01408 AS2( paddd xmm4, xmm4) \
01409 AS2( paddd xmm5, xmm5) \
01410 AS2( movdqa xmm3, [esi]) \
01411 AS2( movq xmm1, QWORD PTR [esi+8]) \
01412 AS2( pmuludq xmm1, xmm3) \
01413 AS2( pmuludq xmm3, xmm3) \
01414 AS2( movdqa xmm0, [ebx])\
01415 AS2( movdqa xmm2, xmm0) \
01416 AS2( pand xmm0, xmm1) \
01417 AS2( psrld xmm1, 16) \
01418 AS2( paddd xmm6, xmm0) \
01419 AS2( paddd xmm7, xmm1) \
01420 AS2( pand xmm2, xmm3) \
01421 AS2( psrld xmm3, 16) \
01422 AS2( paddd xmm6, xmm6) \
01423 AS2( paddd xmm7, xmm7) \
01424 AS2( paddd xmm4, xmm2) \
01425 AS2( paddd xmm5, xmm3) \
01426 AS2( movq xmm0, QWORD PTR [esp+4])\
01427 AS2( movq xmm1, QWORD PTR [esp+12])\
01428 AS2( paddd xmm4, xmm0)\
01429 AS2( paddd xmm5, xmm1)\
01430
01431 #define Squ_Column0(k, i) \
01432 Squ_SSE2_SaveShift(k) \
01433 AS2( add edi, 16) \
01434 AS2( add edx, 16) \
01435 SSE2_FirstMultiply(1)\
01436 Squ_Acc##i(i) \
01437 AS2( paddd xmm6, xmm6) \
01438 AS2( paddd xmm7, xmm7) \
01439 AS2( paddd xmm4, xmm4) \
01440 AS2( paddd xmm5, xmm5) \
01441 AS2( movq xmm0, QWORD PTR [esp+4])\
01442 AS2( movq xmm1, QWORD PTR [esp+12])\
01443 AS2( paddd xmm4, xmm0)\
01444 AS2( paddd xmm5, xmm1)\
01445
01446 #define SSE2_MulAdd45 \
01447 AS2( movdqa xmm7, [esi]) \
01448 AS2( movdqa xmm0, [edi]) \
01449 AS2( pmuludq xmm0, xmm7) \
01450 AS2( movdqa xmm2, [ebx]) \
01451 AS2( pmuludq xmm7, [edx]) \
01452 AS2( movdqa xmm6, xmm2) \
01453 AS2( pand xmm2, xmm0) \
01454 AS2( psrld xmm0, 16) \
01455 AS2( paddd xmm4, xmm2) \
01456 AS2( paddd xmm5, xmm0) \
01457 AS2( pand xmm6, xmm7) \
01458 AS2( psrld xmm7, 16)
01459
01460 #define Mul_Begin(n) \
01461 MulPrologue \
01462 AS2( mov esi, esp)\
01463 AS2( and esp, 0xfffffff0)\
01464 AS2( sub esp, 48*n+16)\
01465 AS1( push esi)\
01466 AS2( xor edx, edx) \
01467 ASL(1) \
01468 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01469 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01470 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
01471 AS2( movdqa [esp+20+2*edx], xmm0) \
01472 AS2( psrlq xmm0, 32) \
01473 AS2( movdqa [esp+20+2*edx+16], xmm0) \
01474 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
01475 AS2( psrlq xmm1, 32) \
01476 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
01477 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
01478 AS2( psrlq xmm2, 32) \
01479 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
01480 AS2( add edx, 16) \
01481 AS2( cmp edx, 8*(n)) \
01482 ASJ( jne, 1, b) \
01483 AS2( lea edi, [esp+20])\
01484 AS2( lea edx, [esp+20+16*n])\
01485 AS2( lea esi, [esp+20+32*n])\
01486 SSE2_FirstMultiply(0) \
01487
01488 #define Mul_Acc(i) \
01489 ASL(LMul##i) \
01490 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
01491 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
01492 AS2( movdqa xmm2, [ebx]) \
01493 AS2( pmuludq xmm0, xmm1) \
01494 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01495 AS2( movdqa xmm3, xmm2) \
01496 AS2( pand xmm2, xmm0) \
01497 AS2( psrld xmm0, 16) \
01498 AS2( paddd xmm4, xmm2) \
01499 AS2( paddd xmm5, xmm0) \
01500 AS2( pand xmm3, xmm1) \
01501 AS2( psrld xmm1, 16) \
01502 AS2( paddd xmm6, xmm3) \
01503 AS2( paddd xmm7, xmm1) \
01504
01505 #define Mul_Acc1(i)
01506 #define Mul_Acc2(i) ASC(call, LMul##i)
01507 #define Mul_Acc3(i) Mul_Acc2(i)
01508 #define Mul_Acc4(i) Mul_Acc2(i)
01509 #define Mul_Acc5(i) Mul_Acc2(i)
01510 #define Mul_Acc6(i) Mul_Acc2(i)
01511 #define Mul_Acc7(i) Mul_Acc2(i)
01512 #define Mul_Acc8(i) Mul_Acc2(i)
01513 #define Mul_Acc9(i) Mul_Acc2(i)
01514 #define Mul_Acc10(i) Mul_Acc2(i)
01515 #define Mul_Acc11(i) Mul_Acc2(i)
01516 #define Mul_Acc12(i) Mul_Acc2(i)
01517 #define Mul_Acc13(i) Mul_Acc2(i)
01518 #define Mul_Acc14(i) Mul_Acc2(i)
01519 #define Mul_Acc15(i) Mul_Acc2(i)
01520 #define Mul_Acc16(i) Mul_Acc2(i)
01521
01522 #define Mul_Column1(k, i) \
01523 SSE2_SaveShift(k) \
01524 AS2( add esi, 16) \
01525 SSE2_MulAdd45\
01526 Mul_Acc##i(i) \
01527
01528 #define Mul_Column0(k, i) \
01529 SSE2_SaveShift(k) \
01530 AS2( add edi, 16) \
01531 AS2( add edx, 16) \
01532 SSE2_MulAdd45\
01533 Mul_Acc##i(i) \
01534
01535 #define Bot_Acc(i) \
01536 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
01537 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
01538 AS2( pmuludq xmm0, xmm1) \
01539 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01540 AS2( paddq xmm4, xmm0) \
01541 AS2( paddd xmm6, xmm1)
01542
01543 #define Bot_SaveAcc(k) \
01544 SSE2_SaveShift(k) \
01545 AS2( add edi, 16) \
01546 AS2( add edx, 16) \
01547 AS2( movdqa xmm6, [esi]) \
01548 AS2( movdqa xmm0, [edi]) \
01549 AS2( pmuludq xmm0, xmm6) \
01550 AS2( paddq xmm4, xmm0) \
01551 AS2( psllq xmm5, 16) \
01552 AS2( paddq xmm4, xmm5) \
01553 AS2( pmuludq xmm6, [edx])
01554
01555 #define Bot_End(n) \
01556 AS2( movhlps xmm7, xmm6) \
01557 AS2( paddd xmm6, xmm7) \
01558 AS2( psllq xmm6, 32) \
01559 AS2( paddd xmm4, xmm6) \
01560 AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
01561 AS1( pop esp)\
01562 MulEpilogue
01563
01564 #define Top_Begin(n) \
01565 TopPrologue \
01566 AS2( mov edx, esp)\
01567 AS2( and esp, 0xfffffff0)\
01568 AS2( sub esp, 48*n+16)\
01569 AS1( push edx)\
01570 AS2( xor edx, edx) \
01571 ASL(1) \
01572 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01573 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01574 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
01575 AS2( movdqa [esp+20+2*edx], xmm0) \
01576 AS2( psrlq xmm0, 32) \
01577 AS2( movdqa [esp+20+2*edx+16], xmm0) \
01578 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
01579 AS2( psrlq xmm1, 32) \
01580 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
01581 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
01582 AS2( psrlq xmm2, 32) \
01583 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
01584 AS2( add edx, 16) \
01585 AS2( cmp edx, 8*(n)) \
01586 ASJ( jne, 1, b) \
01587 AS2( mov eax, esi) \
01588 AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
01589 AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
01590 AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
01591 AS2( pxor xmm4, xmm4)\
01592 AS2( pxor xmm5, xmm5)
01593
01594 #define Top_Acc(i) \
01595 AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
01596 AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01597 AS2( psrlq xmm0, 48) \
01598 AS2( paddd xmm5, xmm0)\
01599
01600 #define Top_Column0(i) \
01601 AS2( psllq xmm5, 32) \
01602 AS2( add edi, 16) \
01603 AS2( add edx, 16) \
01604 SSE2_MulAdd45\
01605 Mul_Acc##i(i) \
01606
01607 #define Top_Column1(i) \
01608 SSE2_SaveShift(0) \
01609 AS2( add esi, 16) \
01610 SSE2_MulAdd45\
01611 Mul_Acc##i(i) \
01612 AS2( shr eax, 16) \
01613 AS2( movd xmm0, eax)\
01614 AS2( movd xmm1, [ecx+4])\
01615 AS2( psrld xmm1, 16)\
01616 AS2( pcmpgtd xmm1, xmm0)\
01617 AS2( psrld xmm1, 31)\
01618 AS2( paddd xmm4, xmm1)\
01619
01620 void SSE2_Square4(word *C, const word *A)
01621 {
01622 Squ_Begin(2)
01623 Squ_Column0(0, 1)
01624 Squ_End(2)
01625 }
01626
01627 void SSE2_Square8(word *C, const word *A)
01628 {
01629 Squ_Begin(4)
01630 #ifndef __GNUC__
01631 ASJ( jmp, 0, f)
01632 Squ_Acc(2)
01633 AS1( ret) ASL(0)
01634 #endif
01635 Squ_Column0(0, 1)
01636 Squ_Column1(1, 1)
01637 Squ_Column0(2, 2)
01638 Squ_Column1(3, 1)
01639 Squ_Column0(4, 1)
01640 Squ_End(4)
01641 }
01642
01643 void SSE2_Square16(word *C, const word *A)
01644 {
01645 Squ_Begin(8)
01646 #ifndef __GNUC__
01647 ASJ( jmp, 0, f)
01648 Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
01649 AS1( ret) ASL(0)
01650 #endif
01651 Squ_Column0(0, 1)
01652 Squ_Column1(1, 1)
01653 Squ_Column0(2, 2)
01654 Squ_Column1(3, 2)
01655 Squ_Column0(4, 3)
01656 Squ_Column1(5, 3)
01657 Squ_Column0(6, 4)
01658 Squ_Column1(7, 3)
01659 Squ_Column0(8, 3)
01660 Squ_Column1(9, 2)
01661 Squ_Column0(10, 2)
01662 Squ_Column1(11, 1)
01663 Squ_Column0(12, 1)
01664 Squ_End(8)
01665 }
01666
01667 void SSE2_Square32(word *C, const word *A)
01668 {
01669 Squ_Begin(16)
01670 ASJ( jmp, 0, f)
01671 Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
01672 AS1( ret) ASL(0)
01673 Squ_Column0(0, 1)
01674 Squ_Column1(1, 1)
01675 Squ_Column0(2, 2)
01676 Squ_Column1(3, 2)
01677 Squ_Column0(4, 3)
01678 Squ_Column1(5, 3)
01679 Squ_Column0(6, 4)
01680 Squ_Column1(7, 4)
01681 Squ_Column0(8, 5)
01682 Squ_Column1(9, 5)
01683 Squ_Column0(10, 6)
01684 Squ_Column1(11, 6)
01685 Squ_Column0(12, 7)
01686 Squ_Column1(13, 7)
01687 Squ_Column0(14, 8)
01688 Squ_Column1(15, 7)
01689 Squ_Column0(16, 7)
01690 Squ_Column1(17, 6)
01691 Squ_Column0(18, 6)
01692 Squ_Column1(19, 5)
01693 Squ_Column0(20, 5)
01694 Squ_Column1(21, 4)
01695 Squ_Column0(22, 4)
01696 Squ_Column1(23, 3)
01697 Squ_Column0(24, 3)
01698 Squ_Column1(25, 2)
01699 Squ_Column0(26, 2)
01700 Squ_Column1(27, 1)
01701 Squ_Column0(28, 1)
01702 Squ_End(16)
01703 }
01704
01705 void SSE2_Multiply4(word *C, const word *A, const word *B)
01706 {
01707 Mul_Begin(2)
01708 #ifndef __GNUC__
01709 ASJ( jmp, 0, f)
01710 Mul_Acc(2)
01711 AS1( ret) ASL(0)
01712 #endif
01713 Mul_Column0(0, 2)
01714 Mul_End(2)
01715 }
01716
01717 void SSE2_Multiply8(word *C, const word *A, const word *B)
01718 {
01719 Mul_Begin(4)
01720 #ifndef __GNUC__
01721 ASJ( jmp, 0, f)
01722 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01723 AS1( ret) ASL(0)
01724 #endif
01725 Mul_Column0(0, 2)
01726 Mul_Column1(1, 3)
01727 Mul_Column0(2, 4)
01728 Mul_Column1(3, 3)
01729 Mul_Column0(4, 2)
01730 Mul_End(4)
01731 }
01732
01733 void SSE2_Multiply16(word *C, const word *A, const word *B)
01734 {
01735 Mul_Begin(8)
01736 #ifndef __GNUC__
01737 ASJ( jmp, 0, f)
01738 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01739 AS1( ret) ASL(0)
01740 #endif
01741 Mul_Column0(0, 2)
01742 Mul_Column1(1, 3)
01743 Mul_Column0(2, 4)
01744 Mul_Column1(3, 5)
01745 Mul_Column0(4, 6)
01746 Mul_Column1(5, 7)
01747 Mul_Column0(6, 8)
01748 Mul_Column1(7, 7)
01749 Mul_Column0(8, 6)
01750 Mul_Column1(9, 5)
01751 Mul_Column0(10, 4)
01752 Mul_Column1(11, 3)
01753 Mul_Column0(12, 2)
01754 Mul_End(8)
01755 }
01756
01757 void SSE2_Multiply32(word *C, const word *A, const word *B)
01758 {
01759 Mul_Begin(16)
01760 ASJ( jmp, 0, f)
01761 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01762 AS1( ret) ASL(0)
01763 Mul_Column0(0, 2)
01764 Mul_Column1(1, 3)
01765 Mul_Column0(2, 4)
01766 Mul_Column1(3, 5)
01767 Mul_Column0(4, 6)
01768 Mul_Column1(5, 7)
01769 Mul_Column0(6, 8)
01770 Mul_Column1(7, 9)
01771 Mul_Column0(8, 10)
01772 Mul_Column1(9, 11)
01773 Mul_Column0(10, 12)
01774 Mul_Column1(11, 13)
01775 Mul_Column0(12, 14)
01776 Mul_Column1(13, 15)
01777 Mul_Column0(14, 16)
01778 Mul_Column1(15, 15)
01779 Mul_Column0(16, 14)
01780 Mul_Column1(17, 13)
01781 Mul_Column0(18, 12)
01782 Mul_Column1(19, 11)
01783 Mul_Column0(20, 10)
01784 Mul_Column1(21, 9)
01785 Mul_Column0(22, 8)
01786 Mul_Column1(23, 7)
01787 Mul_Column0(24, 6)
01788 Mul_Column1(25, 5)
01789 Mul_Column0(26, 4)
01790 Mul_Column1(27, 3)
01791 Mul_Column0(28, 2)
01792 Mul_End(16)
01793 }
01794
01795 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
01796 {
01797 Mul_Begin(2)
01798 Bot_SaveAcc(0) Bot_Acc(2)
01799 Bot_End(2)
01800 }
01801
01802 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
01803 {
01804 Mul_Begin(4)
01805 #ifndef __GNUC__
01806 ASJ( jmp, 0, f)
01807 Mul_Acc(3) Mul_Acc(2)
01808 AS1( ret) ASL(0)
01809 #endif
01810 Mul_Column0(0, 2)
01811 Mul_Column1(1, 3)
01812 Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01813 Bot_End(4)
01814 }
01815
01816 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
01817 {
01818 Mul_Begin(8)
01819 #ifndef __GNUC__
01820 ASJ( jmp, 0, f)
01821 Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01822 AS1( ret) ASL(0)
01823 #endif
01824 Mul_Column0(0, 2)
01825 Mul_Column1(1, 3)
01826 Mul_Column0(2, 4)
01827 Mul_Column1(3, 5)
01828 Mul_Column0(4, 6)
01829 Mul_Column1(5, 7)
01830 Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01831 Bot_End(8)
01832 }
01833
01834 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
01835 {
01836 Mul_Begin(16)
01837 #ifndef __GNUC__
01838 ASJ( jmp, 0, f)
01839 Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01840 AS1( ret) ASL(0)
01841 #endif
01842 Mul_Column0(0, 2)
01843 Mul_Column1(1, 3)
01844 Mul_Column0(2, 4)
01845 Mul_Column1(3, 5)
01846 Mul_Column0(4, 6)
01847 Mul_Column1(5, 7)
01848 Mul_Column0(6, 8)
01849 Mul_Column1(7, 9)
01850 Mul_Column0(8, 10)
01851 Mul_Column1(9, 11)
01852 Mul_Column0(10, 12)
01853 Mul_Column1(11, 13)
01854 Mul_Column0(12, 14)
01855 Mul_Column1(13, 15)
01856 Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01857 Bot_End(16)
01858 }
01859
01860 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
01861 {
01862 Top_Begin(4)
01863 Top_Acc(3) Top_Acc(2) Top_Acc(1)
01864 #ifndef __GNUC__
01865 ASJ( jmp, 0, f)
01866 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01867 AS1( ret) ASL(0)
01868 #endif
01869 Top_Column0(4)
01870 Top_Column1(3)
01871 Mul_Column0(0, 2)
01872 Top_End(2)
01873 }
01874
01875 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
01876 {
01877 Top_Begin(8)
01878 Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
01879 #ifndef __GNUC__
01880 ASJ( jmp, 0, f)
01881 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01882 AS1( ret) ASL(0)
01883 #endif
01884 Top_Column0(8)
01885 Top_Column1(7)
01886 Mul_Column0(0, 6)
01887 Mul_Column1(1, 5)
01888 Mul_Column0(2, 4)
01889 Mul_Column1(3, 3)
01890 Mul_Column0(4, 2)
01891 Top_End(4)
01892 }
01893
01894 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
01895 {
01896 Top_Begin(16)
01897 Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
01898 #ifndef __GNUC__
01899 ASJ( jmp, 0, f)
01900 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01901 AS1( ret) ASL(0)
01902 #endif
01903 Top_Column0(16)
01904 Top_Column1(15)
01905 Mul_Column0(0, 14)
01906 Mul_Column1(1, 13)
01907 Mul_Column0(2, 12)
01908 Mul_Column1(3, 11)
01909 Mul_Column0(4, 10)
01910 Mul_Column1(5, 9)
01911 Mul_Column0(6, 8)
01912 Mul_Column1(7, 7)
01913 Mul_Column0(8, 6)
01914 Mul_Column1(9, 5)
01915 Mul_Column0(10, 4)
01916 Mul_Column1(11, 3)
01917 Mul_Column0(12, 2)
01918 Top_End(8)
01919 }
01920
01921 #endif // #if CRYPTOPP_INTEGER_SSE2
01922
01923
01924
01925 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
01926 typedef void (* PMul)(word *C, const word *A, const word *B);
01927 typedef void (* PSqu)(word *C, const word *A);
01928 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
01929
01930 #if CRYPTOPP_INTEGER_SSE2
01931 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
01932 static size_t s_recursionLimit = 8;
01933 #else
01934 static const size_t s_recursionLimit = 16;
01935 #endif
01936
01937 static PMul s_pMul[9], s_pBot[9];
01938 static PSqu s_pSqu[9];
01939 static PMulTop s_pTop[9];
01940
01941 static void SetFunctionPointers()
01942 {
01943 s_pMul[0] = &Baseline_Multiply2;
01944 s_pBot[0] = &Baseline_MultiplyBottom2;
01945 s_pSqu[0] = &Baseline_Square2;
01946 s_pTop[0] = &Baseline_MultiplyTop2;
01947 s_pTop[1] = &Baseline_MultiplyTop4;
01948
01949 #if CRYPTOPP_INTEGER_SSE2
01950 if (HasSSE2())
01951 {
01952 if (IsP4())
01953 {
01954 s_pAdd = &SSE2_Add;
01955 s_pSub = &SSE2_Sub;
01956 }
01957
01958 s_recursionLimit = 32;
01959
01960 s_pMul[1] = &SSE2_Multiply4;
01961 s_pMul[2] = &SSE2_Multiply8;
01962 s_pMul[4] = &SSE2_Multiply16;
01963 s_pMul[8] = &SSE2_Multiply32;
01964
01965 s_pBot[1] = &SSE2_MultiplyBottom4;
01966 s_pBot[2] = &SSE2_MultiplyBottom8;
01967 s_pBot[4] = &SSE2_MultiplyBottom16;
01968 s_pBot[8] = &SSE2_MultiplyBottom32;
01969
01970 s_pSqu[1] = &SSE2_Square4;
01971 s_pSqu[2] = &SSE2_Square8;
01972 s_pSqu[4] = &SSE2_Square16;
01973 s_pSqu[8] = &SSE2_Square32;
01974
01975 s_pTop[2] = &SSE2_MultiplyTop8;
01976 s_pTop[4] = &SSE2_MultiplyTop16;
01977 s_pTop[8] = &SSE2_MultiplyTop32;
01978 }
01979 else
01980 #endif
01981 {
01982 s_pMul[1] = &Baseline_Multiply4;
01983 s_pMul[2] = &Baseline_Multiply8;
01984
01985 s_pBot[1] = &Baseline_MultiplyBottom4;
01986 s_pBot[2] = &Baseline_MultiplyBottom8;
01987
01988 s_pSqu[1] = &Baseline_Square4;
01989 s_pSqu[2] = &Baseline_Square8;
01990
01991 s_pTop[2] = &Baseline_MultiplyTop8;
01992
01993 #if !CRYPTOPP_INTEGER_SSE2
01994 s_pMul[4] = &Baseline_Multiply16;
01995 s_pBot[4] = &Baseline_MultiplyBottom16;
01996 s_pSqu[4] = &Baseline_Square16;
01997 s_pTop[4] = &Baseline_MultiplyTop16;
01998 #endif
01999 }
02000 }
02001
02002 inline int Add(word *C, const word *A, const word *B, size_t N)
02003 {
02004 #if CRYPTOPP_INTEGER_SSE2
02005 return s_pAdd(N, C, A, B);
02006 #else
02007 return Baseline_Add(N, C, A, B);
02008 #endif
02009 }
02010
02011 inline int Subtract(word *C, const word *A, const word *B, size_t N)
02012 {
02013 #if CRYPTOPP_INTEGER_SSE2
02014 return s_pSub(N, C, A, B);
02015 #else
02016 return Baseline_Sub(N, C, A, B);
02017 #endif
02018 }
02019
02020
02021
02022
02023 #define A0 A
02024 #define A1 (A+N2)
02025 #define B0 B
02026 #define B1 (B+N2)
02027
02028 #define T0 T
02029 #define T1 (T+N2)
02030 #define T2 (T+N)
02031 #define T3 (T+N+N2)
02032
02033 #define R0 R
02034 #define R1 (R+N2)
02035 #define R2 (R+N)
02036 #define R3 (R+N+N2)
02037
02038
02039
02040
02041
02042
02043 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
02044 {
02045 assert(N>=2 && N%2==0);
02046
02047 if (N <= s_recursionLimit)
02048 s_pMul[N/4](R, A, B);
02049 else
02050 {
02051 const size_t N2 = N/2;
02052
02053 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
02054 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
02055
02056 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
02057 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
02058
02059 RecursiveMultiply(R2, T2, A1, B1, N2);
02060 RecursiveMultiply(T0, T2, R0, R1, N2);
02061 RecursiveMultiply(R0, T2, A0, B0, N2);
02062
02063
02064
02065 int c2 = Add(R2, R2, R1, N2);
02066 int c3 = c2;
02067 c2 += Add(R1, R2, R0, N2);
02068 c3 += Add(R2, R2, R3, N2);
02069
02070 if (AN2 == BN2)
02071 c3 -= Subtract(R1, R1, T0, N);
02072 else
02073 c3 += Add(R1, R1, T0, N);
02074
02075 c3 += Increment(R2, N2, c2);
02076 assert (c3 >= 0 && c3 <= 2);
02077 Increment(R3, N2, c3);
02078 }
02079 }
02080
02081
02082
02083
02084
02085 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
02086 {
02087 assert(N && N%2==0);
02088
02089 if (N <= s_recursionLimit)
02090 s_pSqu[N/4](R, A);
02091 else
02092 {
02093 const size_t N2 = N/2;
02094
02095 RecursiveSquare(R0, T2, A0, N2);
02096 RecursiveSquare(R2, T2, A1, N2);
02097 RecursiveMultiply(T0, T2, A0, A1, N2);
02098
02099 int carry = Add(R1, R1, T0, N);
02100 carry += Add(R1, R1, T0, N);
02101 Increment(R3, N2, carry);
02102 }
02103 }
02104
02105
02106
02107
02108
02109
02110 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02111 {
02112 assert(N>=2 && N%2==0);
02113
02114 if (N <= s_recursionLimit)
02115 s_pBot[N/4](R, A, B);
02116 else
02117 {
02118 const size_t N2 = N/2;
02119
02120 RecursiveMultiply(R, T, A0, B0, N2);
02121 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
02122 Add(R1, R1, T0, N2);
02123 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
02124 Add(R1, R1, T0, N2);
02125 }
02126 }
02127
02128
02129
02130
02131
02132
02133
02134 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
02135 {
02136 assert(N>=2 && N%2==0);
02137
02138 if (N <= s_recursionLimit)
02139 s_pTop[N/4](R, A, B, L[N-1]);
02140 else
02141 {
02142 const size_t N2 = N/2;
02143
02144 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
02145 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
02146
02147 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
02148 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
02149
02150 RecursiveMultiply(T0, T2, R0, R1, N2);
02151 RecursiveMultiply(R0, T2, A1, B1, N2);
02152
02153
02154
02155 int t, c3;
02156 int c2 = Subtract(T2, L+N2, L, N2);
02157
02158 if (AN2 == BN2)
02159 {
02160 c2 -= Add(T2, T2, T0, N2);
02161 t = (Compare(T2, R0, N2) == -1);
02162 c3 = t - Subtract(T2, T2, T1, N2);
02163 }
02164 else
02165 {
02166 c2 += Subtract(T2, T2, T0, N2);
02167 t = (Compare(T2, R0, N2) == -1);
02168 c3 = t + Add(T2, T2, T1, N2);
02169 }
02170
02171 c2 += t;
02172 if (c2 >= 0)
02173 c3 += Increment(T2, N2, c2);
02174 else
02175 c3 -= Decrement(T2, N2, -c2);
02176 c3 += Add(R0, T2, R1, N2);
02177
02178 assert (c3 >= 0 && c3 <= 2);
02179 Increment(R1, N2, c3);
02180 }
02181 }
02182
02183 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
02184 {
02185 RecursiveMultiply(R, T, A, B, N);
02186 }
02187
02188 inline void Square(word *R, word *T, const word *A, size_t N)
02189 {
02190 RecursiveSquare(R, T, A, N);
02191 }
02192
02193 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02194 {
02195 RecursiveMultiplyBottom(R, T, A, B, N);
02196 }
02197
02198
02199
02200
02201
02202
02203 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
02204 {
02205 if (NA == NB)
02206 {
02207 if (A == B)
02208 Square(R, T, A, NA);
02209 else
02210 Multiply(R, T, A, B, NA);
02211
02212 return;
02213 }
02214
02215 if (NA > NB)
02216 {
02217 std::swap(A, B);
02218 std::swap(NA, NB);
02219 }
02220
02221 assert(NB % NA == 0);
02222
02223 if (NA==2 && !A[1])
02224 {
02225 switch (A[0])
02226 {
02227 case 0:
02228 SetWords(R, 0, NB+2);
02229 return;
02230 case 1:
02231 CopyWords(R, B, NB);
02232 R[NB] = R[NB+1] = 0;
02233 return;
02234 default:
02235 R[NB] = LinearMultiply(R, B, A[0], NB);
02236 R[NB+1] = 0;
02237 return;
02238 }
02239 }
02240
02241 size_t i;
02242 if ((NB/NA)%2 == 0)
02243 {
02244 Multiply(R, T, A, B, NA);
02245 CopyWords(T+2*NA, R+NA, NA);
02246
02247 for (i=2*NA; i<NB; i+=2*NA)
02248 Multiply(T+NA+i, T, A, B+i, NA);
02249 for (i=NA; i<NB; i+=2*NA)
02250 Multiply(R+i, T, A, B+i, NA);
02251 }
02252 else
02253 {
02254 for (i=0; i<NB; i+=2*NA)
02255 Multiply(R+i, T, A, B+i, NA);
02256 for (i=NA; i<NB; i+=2*NA)
02257 Multiply(T+NA+i, T, A, B+i, NA);
02258 }
02259
02260 if (Add(R+NA, R+NA, T+2*NA, NB-NA))
02261 Increment(R+NB, NA);
02262 }
02263
02264
02265
02266
02267
02268 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
02269 {
02270 if (N==2)
02271 {
02272 T[0] = AtomicInverseModPower2(A[0]);
02273 T[1] = 0;
02274 s_pBot[0](T+2, T, A);
02275 TwosComplement(T+2, 2);
02276 Increment(T+2, 2, 2);
02277 s_pBot[0](R, T, T+2);
02278 }
02279 else
02280 {
02281 const size_t N2 = N/2;
02282 RecursiveInverseModPower2(R0, T0, A0, N2);
02283 T0[0] = 1;
02284 SetWords(T0+1, 0, N2-1);
02285 MultiplyTop(R1, T1, T0, R0, A0, N2);
02286 MultiplyBottom(T0, T1, R0, A1, N2);
02287 Add(T0, R1, T0, N2);
02288 TwosComplement(T0, N2);
02289 MultiplyBottom(R1, T1, R0, T0, N2);
02290 }
02291 }
02292
02293
02294
02295
02296
02297
02298
02299 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
02300 {
02301 #if 1
02302 MultiplyBottom(R, T, X, U, N);
02303 MultiplyTop(T, T+N, X, R, M, N);
02304 word borrow = Subtract(T, X+N, T, N);
02305
02306 word carry = Add(T+N, T, M, N);
02307 assert(carry || !borrow);
02308 CopyWords(R, T + (borrow ? N : 0), N);
02309 #elif 0
02310 const word u = 0-U[0];
02311 Declare2Words(p)
02312 for (size_t i=0; i<N; i++)
02313 {
02314 const word t = u * X[i];
02315 word c = 0;
02316 for (size_t j=0; j<N; j+=2)
02317 {
02318 MultiplyWords(p, t, M[j]);
02319 Acc2WordsBy1(p, X[i+j]);
02320 Acc2WordsBy1(p, c);
02321 X[i+j] = LowWord(p);
02322 c = HighWord(p);
02323 MultiplyWords(p, t, M[j+1]);
02324 Acc2WordsBy1(p, X[i+j+1]);
02325 Acc2WordsBy1(p, c);
02326 X[i+j+1] = LowWord(p);
02327 c = HighWord(p);
02328 }
02329
02330 if (Increment(X+N+i, N-i, c))
02331 while (!Subtract(X+N, X+N, M, N)) {}
02332 }
02333
02334 memcpy(R, X+N, N*WORD_SIZE);
02335 #else
02336 __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
02337 for (size_t i=0; i<N; i++)
02338 {
02339 __m64 t = _mm_cvtsi32_si64(X[i]);
02340 t = _mm_mul_su32(t, u);
02341 __m64 c = _mm_setzero_si64();
02342 for (size_t j=0; j<N; j+=2)
02343 {
02344 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
02345 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
02346 c = _mm_add_si64(c, p);
02347 X[i+j] = _mm_cvtsi64_si32(c);
02348 c = _mm_srli_si64(c, 32);
02349 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
02350 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
02351 c = _mm_add_si64(c, p);
02352 X[i+j+1] = _mm_cvtsi64_si32(c);
02353 c = _mm_srli_si64(c, 32);
02354 }
02355
02356 if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
02357 while (!Subtract(X+N, X+N, M, N)) {}
02358 }
02359
02360 memcpy(R, X+N, N*WORD_SIZE);
02361 _mm_empty();
02362 #endif
02363 }
02364
02365
02366
02367
02368
02369
02370
02371
02372 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
02373 {
02374 assert(N%2==0 && N>=4);
02375
02376 #define M0 M
02377 #define M1 (M+N2)
02378 #define V0 V
02379 #define V1 (V+N2)
02380
02381 #define X0 X
02382 #define X1 (X+N2)
02383 #define X2 (X+N)
02384 #define X3 (X+N+N2)
02385
02386 const size_t N2 = N/2;
02387 Multiply(T0, T2, V0, X3, N2);
02388 int c2 = Add(T0, T0, X0, N);
02389 MultiplyBottom(T3, T2, T0, U, N2);
02390 MultiplyTop(T2, R, T0, T3, M0, N2);
02391 c2 -= Subtract(T2, T1, T2, N2);
02392 Multiply(T0, R, T3, M1, N2);
02393 c2 -= Subtract(T0, T2, T0, N2);
02394 int c3 = -(int)Subtract(T1, X2, T1, N2);
02395 Multiply(R0, T2, V1, X3, N2);
02396 c3 += Add(R, R, T, N);
02397
02398 if (c2>0)
02399 c3 += Increment(R1, N2);
02400 else if (c2<0)
02401 c3 -= Decrement(R1, N2, -c2);
02402
02403 assert(c3>=-1 && c3<=1);
02404 if (c3>0)
02405 Subtract(R, R, M, N);
02406 else if (c3<0)
02407 Add(R, R, M, N);
02408
02409 #undef M0
02410 #undef M1
02411 #undef V0
02412 #undef V1
02413
02414 #undef X0
02415 #undef X1
02416 #undef X2
02417 #undef X3
02418 }
02419
02420 #undef A0
02421 #undef A1
02422 #undef B0
02423 #undef B1
02424
02425 #undef T0
02426 #undef T1
02427 #undef T2
02428 #undef T3
02429
02430 #undef R0
02431 #undef R1
02432 #undef R2
02433 #undef R3
02434
02435
02436
02437
02438
02439
02440
02441
02442
02443
02444
02445
02446
02447
02448
02449
02450
02451
02452
02453
02454
02455
02456
02457
02458
02459
02460
02461
02462
02463
02464
02465
02466
02467
02468
02469
02470
02471
02472
02473
02474
02475
02476
02477
02478
02479
02480
02481
02482
02483
02484
02485
02486
02487
02488
02489
02490
02491
02492
02493
02494
02495
02496
02497
02498
02499 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02500 {
02501 word T[4];
02502 DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
02503 Q[0] = q.GetLowHalf();
02504 Q[1] = q.GetHighHalf();
02505
02506 #ifndef NDEBUG
02507 if (B[0] || B[1])
02508 {
02509
02510 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02511 word P[4];
02512 s_pMul[0](P, Q, B);
02513 Add(P, P, T, 4);
02514 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02515 }
02516 #endif
02517 }
02518
02519
02520 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
02521 {
02522 assert(N && N%2==0);
02523
02524 AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
02525
02526 word borrow = Subtract(R, R, T, N+2);
02527 assert(!borrow && !R[N+1]);
02528
02529 while (R[N] || Compare(R, B, N) >= 0)
02530 {
02531 R[N] -= Subtract(R, R, B, N);
02532 Q[1] += (++Q[0]==0);
02533 assert(Q[0] || Q[1]);
02534 }
02535 }
02536
02537
02538
02539
02540
02541
02542
02543 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
02544 {
02545 assert(NA && NB && NA%2==0 && NB%2==0);
02546 assert(B[NB-1] || B[NB-2]);
02547 assert(NB <= NA);
02548
02549
02550 word *const TA=T;
02551 word *const TB=T+NA+2;
02552 word *const TP=T+NA+2+NB;
02553
02554
02555 unsigned shiftWords = (B[NB-1]==0);
02556 TB[0] = TB[NB-1] = 0;
02557 CopyWords(TB+shiftWords, B, NB-shiftWords);
02558 unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
02559 assert(shiftBits < WORD_BITS);
02560 ShiftWordsLeftByBits(TB, NB, shiftBits);
02561
02562
02563 TA[0] = TA[NA] = TA[NA+1] = 0;
02564 CopyWords(TA+shiftWords, A, NA);
02565 ShiftWordsLeftByBits(TA, NA+2, shiftBits);
02566
02567 if (TA[NA+1]==0 && TA[NA] <= 1)
02568 {
02569 Q[NA-NB+1] = Q[NA-NB] = 0;
02570 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
02571 {
02572 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
02573 ++Q[NA-NB];
02574 }
02575 }
02576 else
02577 {
02578 NA+=2;
02579 assert(Compare(TA+NA-NB, TB, NB) < 0);
02580 }
02581
02582 word BT[2];
02583 BT[0] = TB[NB-2] + 1;
02584 BT[1] = TB[NB-1] + (BT[0]==0);
02585
02586
02587 for (size_t i=NA-2; i>=NB; i-=2)
02588 {
02589 AtomicDivide(Q+i-NB, TA+i-2, BT);
02590 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
02591 }
02592
02593
02594 CopyWords(R, TA+shiftWords, NB);
02595 ShiftWordsRightByBits(R, NB, shiftBits);
02596 }
02597
02598 static inline size_t EvenWordCount(const word *X, size_t N)
02599 {
02600 while (N && X[N-2]==0 && X[N-1]==0)
02601 N-=2;
02602 return N;
02603 }
02604
02605
02606
02607
02608
02609
02610
02611 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
02612 {
02613 assert(NA<=N && N && N%2==0);
02614
02615 word *b = T;
02616 word *c = T+N;
02617 word *f = T+2*N;
02618 word *g = T+3*N;
02619 size_t bcLen=2, fgLen=EvenWordCount(M, N);
02620 unsigned int k=0, s=0;
02621
02622 SetWords(T, 0, 3*N);
02623 b[0]=1;
02624 CopyWords(f, A, NA);
02625 CopyWords(g, M, N);
02626
02627 while (1)
02628 {
02629 word t=f[0];
02630 while (!t)
02631 {
02632 if (EvenWordCount(f, fgLen)==0)
02633 {
02634 SetWords(R, 0, N);
02635 return 0;
02636 }
02637
02638 ShiftWordsRightByWords(f, fgLen, 1);
02639 if (c[bcLen-1]) bcLen+=2;
02640 assert(bcLen <= N);
02641 ShiftWordsLeftByWords(c, bcLen, 1);
02642 k+=WORD_BITS;
02643 t=f[0];
02644 }
02645
02646 unsigned int i=0;
02647 while (t%2 == 0)
02648 {
02649 t>>=1;
02650 i++;
02651 }
02652 k+=i;
02653
02654 if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2)
02655 {
02656 if (s%2==0)
02657 CopyWords(R, b, N);
02658 else
02659 Subtract(R, M, b, N);
02660 return k;
02661 }
02662
02663 ShiftWordsRightByBits(f, fgLen, i);
02664 t=ShiftWordsLeftByBits(c, bcLen, i);
02665 if (t)
02666 {
02667 c[bcLen] = t;
02668 bcLen+=2;
02669 assert(bcLen <= N);
02670 }
02671
02672 if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0)
02673 fgLen-=2;
02674
02675 if (Compare(f, g, fgLen)==-1)
02676 {
02677 std::swap(f, g);
02678 std::swap(b, c);
02679 s++;
02680 }
02681
02682 Subtract(f, f, g, fgLen);
02683
02684 if (Add(b, b, c, bcLen))
02685 {
02686 b[bcLen] = 1;
02687 bcLen+=2;
02688 assert(bcLen <= N);
02689 }
02690 }
02691 }
02692
02693
02694
02695
02696
02697 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02698 {
02699 CopyWords(R, A, N);
02700
02701 while (k--)
02702 {
02703 if (R[0]%2==0)
02704 ShiftWordsRightByBits(R, N, 1);
02705 else
02706 {
02707 word carry = Add(R, R, M, N);
02708 ShiftWordsRightByBits(R, N, 1);
02709 R[N-1] += carry<<(WORD_BITS-1);
02710 }
02711 }
02712 }
02713
02714
02715
02716
02717
02718 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02719 {
02720 CopyWords(R, A, N);
02721
02722 while (k--)
02723 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
02724 Subtract(R, R, M, N);
02725 }
02726
02727
02728
02729 InitializeInteger::InitializeInteger()
02730 {
02731 if (!g_pAssignIntToInteger)
02732 {
02733 SetFunctionPointers();
02734 g_pAssignIntToInteger = AssignIntToInteger;
02735 }
02736 }
02737
02738 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
02739
02740 static inline size_t RoundupSize(size_t n)
02741 {
02742 if (n<=8)
02743 return RoundupSizeTable[n];
02744 else if (n<=16)
02745 return 16;
02746 else if (n<=32)
02747 return 32;
02748 else if (n<=64)
02749 return 64;
02750 else return size_t(1) << BitPrecision(n-1);
02751 }
02752
02753 Integer::Integer()
02754 : reg(2), sign(POSITIVE)
02755 {
02756 reg[0] = reg[1] = 0;
02757 }
02758
02759 Integer::Integer(const Integer& t)
02760 : reg(RoundupSize(t.WordCount())), sign(t.sign)
02761 {
02762 CopyWords(reg, t.reg, reg.size());
02763 }
02764
02765 Integer::Integer(Sign s, lword value)
02766 : reg(2), sign(s)
02767 {
02768 reg[0] = word(value);
02769 reg[1] = word(SafeRightShift<WORD_BITS>(value));
02770 }
02771
02772 Integer::Integer(signed long value)
02773 : reg(2)
02774 {
02775 if (value >= 0)
02776 sign = POSITIVE;
02777 else
02778 {
02779 sign = NEGATIVE;
02780 value = -value;
02781 }
02782 reg[0] = word(value);
02783 reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
02784 }
02785
02786 Integer::Integer(Sign s, word high, word low)
02787 : reg(2), sign(s)
02788 {
02789 reg[0] = low;
02790 reg[1] = high;
02791 }
02792
02793 bool Integer::IsConvertableToLong() const
02794 {
02795 if (ByteCount() > sizeof(long))
02796 return false;
02797
02798 unsigned long value = (unsigned long)reg[0];
02799 value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02800
02801 if (sign==POSITIVE)
02802 return (signed long)value >= 0;
02803 else
02804 return -(signed long)value < 0;
02805 }
02806
02807 signed long Integer::ConvertToLong() const
02808 {
02809 assert(IsConvertableToLong());
02810
02811 unsigned long value = (unsigned long)reg[0];
02812 value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02813 return sign==POSITIVE ? value : -(signed long)value;
02814 }
02815
02816 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s)
02817 {
02818 Decode(encodedInteger, byteCount, s);
02819 }
02820
02821 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s)
02822 {
02823 Decode(encodedInteger, byteCount, s);
02824 }
02825
02826 Integer::Integer(BufferedTransformation &bt)
02827 {
02828 BERDecode(bt);
02829 }
02830
02831 Integer::Integer(RandomNumberGenerator &rng, size_t bitcount)
02832 {
02833 Randomize(rng, bitcount);
02834 }
02835
02836 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
02837 {
02838 if (!Randomize(rng, min, max, rnType, equiv, mod))
02839 throw Integer::RandomNumberNotFound();
02840 }
02841
02842 Integer Integer::Power2(size_t e)
02843 {
02844 Integer r((word)0, BitsToWords(e+1));
02845 r.SetBit(e);
02846 return r;
02847 }
02848
02849 template <long i>
02850 struct NewInteger
02851 {
02852 Integer * operator()() const
02853 {
02854 return new Integer(i);
02855 }
02856 };
02857
02858 const Integer &Integer::Zero()
02859 {
02860 return Singleton<Integer>().Ref();
02861 }
02862
02863 const Integer &Integer::One()
02864 {
02865 return Singleton<Integer, NewInteger<1> >().Ref();
02866 }
02867
02868 const Integer &Integer::Two()
02869 {
02870 return Singleton<Integer, NewInteger<2> >().Ref();
02871 }
02872
02873 bool Integer::operator!() const
02874 {
02875 return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
02876 }
02877
02878 Integer& Integer::operator=(const Integer& t)
02879 {
02880 if (this != &t)
02881 {
02882 if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
02883 reg.New(RoundupSize(t.WordCount()));
02884 CopyWords(reg, t.reg, reg.size());
02885 sign = t.sign;
02886 }
02887 return *this;
02888 }
02889
02890 bool Integer::GetBit(size_t n) const
02891 {
02892 if (n/WORD_BITS >= reg.size())
02893 return 0;
02894 else
02895 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
02896 }
02897
02898 void Integer::SetBit(size_t n, bool value)
02899 {
02900 if (value)
02901 {
02902 reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
02903 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
02904 }
02905 else
02906 {
02907 if (n/WORD_BITS < reg.size())
02908 reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
02909 }
02910 }
02911
02912 byte Integer::GetByte(size_t n) const
02913 {
02914 if (n/WORD_SIZE >= reg.size())
02915 return 0;
02916 else
02917 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
02918 }
02919
02920 void Integer::SetByte(size_t n, byte value)
02921 {
02922 reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
02923 reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
02924 reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
02925 }
02926
02927 lword Integer::GetBits(size_t i, size_t n) const
02928 {
02929 lword v = 0;
02930 assert(n <= sizeof(v)*8);
02931 for (unsigned int j=0; j<n; j++)
02932 v |= lword(GetBit(i+j)) << j;
02933 return v;
02934 }
02935
02936 Integer Integer::operator-() const
02937 {
02938 Integer result(*this);
02939 result.Negate();
02940 return result;
02941 }
02942
02943 Integer Integer::AbsoluteValue() const
02944 {
02945 Integer result(*this);
02946 result.sign = POSITIVE;
02947 return result;
02948 }
02949
02950 void Integer::swap(Integer &a)
02951 {
02952 reg.swap(a.reg);
02953 std::swap(sign, a.sign);
02954 }
02955
02956 Integer::Integer(word value, size_t length)
02957 : reg(RoundupSize(length)), sign(POSITIVE)
02958 {
02959 reg[0] = value;
02960 SetWords(reg+1, 0, reg.size()-1);
02961 }
02962
02963 template <class T>
02964 static Integer StringToInteger(const T *str)
02965 {
02966 int radix;
02967
02968
02969 unsigned int length;
02970 for (length = 0; str[length] != 0; length++) {}
02971
02972 Integer v;
02973
02974 if (length == 0)
02975 return v;
02976
02977 switch (str[length-1])
02978 {
02979 case 'h':
02980 case 'H':
02981 radix=16;
02982 break;
02983 case 'o':
02984 case 'O':
02985 radix=8;
02986 break;
02987 case 'b':
02988 case 'B':
02989 radix=2;
02990 break;
02991 default:
02992 radix=10;
02993 }
02994
02995 if (length > 2 && str[0] == '0' && str[1] == 'x')
02996 radix = 16;
02997
02998 for (unsigned i=0; i<length; i++)
02999 {
03000 int digit;
03001
03002 if (str[i] >= '0' && str[i] <= '9')
03003 digit = str[i] - '0';
03004 else if (str[i] >= 'A' && str[i] <= 'F')
03005 digit = str[i] - 'A' + 10;
03006 else if (str[i] >= 'a' && str[i] <= 'f')
03007 digit = str[i] - 'a' + 10;
03008 else
03009 digit = radix;
03010
03011 if (digit < radix)
03012 {
03013 v *= radix;
03014 v += digit;
03015 }
03016 }
03017
03018 if (str[0] == '-')
03019 v.Negate();
03020
03021 return v;
03022 }
03023
03024 Integer::Integer(const char *str)
03025 : reg(2), sign(POSITIVE)
03026 {
03027 *this = StringToInteger(str);
03028 }
03029
03030 Integer::Integer(const wchar_t *str)
03031 : reg(2), sign(POSITIVE)
03032 {
03033 *this = StringToInteger(str);
03034 }
03035
03036 unsigned int Integer::WordCount() const
03037 {
03038 return (unsigned int)CountWords(reg, reg.size());
03039 }
03040
03041 unsigned int Integer::ByteCount() const
03042 {
03043 unsigned wordCount = WordCount();
03044 if (wordCount)
03045 return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
03046 else
03047 return 0;
03048 }
03049
03050 unsigned int Integer::BitCount() const
03051 {
03052 unsigned wordCount = WordCount();
03053 if (wordCount)
03054 return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
03055 else
03056 return 0;
03057 }
03058
03059 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
03060 {
03061 StringStore store(input, inputLen);
03062 Decode(store, inputLen, s);
03063 }
03064
03065 void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s)
03066 {
03067 assert(bt.MaxRetrievable() >= inputLen);
03068
03069 byte b;
03070 bt.Peek(b);
03071 sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
03072
03073 while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
03074 {
03075 bt.Skip(1);
03076 inputLen--;
03077 bt.Peek(b);
03078 }
03079
03080 reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
03081
03082 for (size_t i=inputLen; i > 0; i--)
03083 {
03084 bt.Get(b);
03085 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
03086 }
03087
03088 if (sign == NEGATIVE)
03089 {
03090 for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
03091 reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
03092 TwosComplement(reg, reg.size());
03093 }
03094 }
03095
03096 size_t Integer::MinEncodedSize(Signedness signedness) const
03097 {
03098 unsigned int outputLen = STDMAX(1U, ByteCount());
03099 if (signedness == UNSIGNED)
03100 return outputLen;
03101 if (NotNegative() && (GetByte(outputLen-1) & 0x80))
03102 outputLen++;
03103 if (IsNegative() && *this < -Power2(outputLen*8-1))
03104 outputLen++;
03105 return outputLen;
03106 }
03107
03108 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
03109 {
03110 ArraySink sink(output, outputLen);
03111 Encode(sink, outputLen, signedness);
03112 }
03113
03114 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
03115 {
03116 if (signedness == UNSIGNED || NotNegative())
03117 {
03118 for (size_t i=outputLen; i > 0; i--)
03119 bt.Put(GetByte(i-1));
03120 }
03121 else
03122 {
03123
03124 Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
03125 temp.Encode(bt, outputLen, UNSIGNED);
03126 }
03127 }
03128
03129 void Integer::DEREncode(BufferedTransformation &bt) const
03130 {
03131 DERGeneralEncoder enc(bt, INTEGER);
03132 Encode(enc, MinEncodedSize(SIGNED), SIGNED);
03133 enc.MessageEnd();
03134 }
03135
03136 void Integer::BERDecode(const byte *input, size_t len)
03137 {
03138 StringStore store(input, len);
03139 BERDecode(store);
03140 }
03141
03142 void Integer::BERDecode(BufferedTransformation &bt)
03143 {
03144 BERGeneralDecoder dec(bt, INTEGER);
03145 if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
03146 BERDecodeError();
03147 Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
03148 dec.MessageEnd();
03149 }
03150
03151 void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
03152 {
03153 DERGeneralEncoder enc(bt, OCTET_STRING);
03154 Encode(enc, length);
03155 enc.MessageEnd();
03156 }
03157
03158 void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
03159 {
03160 BERGeneralDecoder dec(bt, OCTET_STRING);
03161 if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
03162 BERDecodeError();
03163 Decode(dec, length);
03164 dec.MessageEnd();
03165 }
03166
03167 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
03168 {
03169 ArraySink sink(output, len);
03170 return OpenPGPEncode(sink);
03171 }
03172
03173 size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const
03174 {
03175 word16 bitCount = BitCount();
03176 bt.PutWord16(bitCount);
03177 size_t byteCount = BitsToBytes(bitCount);
03178 Encode(bt, byteCount);
03179 return 2 + byteCount;
03180 }
03181
03182 void Integer::OpenPGPDecode(const byte *input, size_t len)
03183 {
03184 StringStore store(input, len);
03185 OpenPGPDecode(store);
03186 }
03187
03188 void Integer::OpenPGPDecode(BufferedTransformation &bt)
03189 {
03190 word16 bitCount;
03191 if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
03192 throw OpenPGPDecodeErr();
03193 Decode(bt, BitsToBytes(bitCount));
03194 }
03195
03196 void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits)
03197 {
03198 const size_t nbytes = nbits/8 + 1;
03199 SecByteBlock buf(nbytes);
03200 rng.GenerateBlock(buf, nbytes);
03201 if (nbytes)
03202 buf[0] = (byte)Crop(buf[0], nbits % 8);
03203 Decode(buf, nbytes, UNSIGNED);
03204 }
03205
03206 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
03207 {
03208 if (min > max)
03209 throw InvalidArgument("Integer: Min must be no greater than Max");
03210
03211 Integer range = max - min;
03212 const unsigned int nbits = range.BitCount();
03213
03214 do
03215 {
03216 Randomize(rng, nbits);
03217 }
03218 while (*this > range);
03219
03220 *this += min;
03221 }
03222
03223 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
03224 {
03225 return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
03226 }
03227
03228 class KDF2_RNG : public RandomNumberGenerator
03229 {
03230 public:
03231 KDF2_RNG(const byte *seed, size_t seedSize)
03232 : m_counter(0), m_counterAndSeed(seedSize + 4)
03233 {
03234 memcpy(m_counterAndSeed + 4, seed, seedSize);
03235 }
03236
03237 void GenerateBlock(byte *output, size_t size)
03238 {
03239 PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
03240 ++m_counter;
03241 P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
03242 }
03243
03244 private:
03245 word32 m_counter;
03246 SecByteBlock m_counterAndSeed;
03247 };
03248
03249 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs ¶ms)
03250 {
03251 Integer min = params.GetValueWithDefault("Min", Integer::Zero());
03252 Integer max;
03253 if (!params.GetValue("Max", max))
03254 {
03255 int bitLength;
03256 if (params.GetIntValue("BitLength", bitLength))
03257 max = Integer::Power2(bitLength);
03258 else
03259 throw InvalidArgument("Integer: missing Max argument");
03260 }
03261 if (min > max)
03262 throw InvalidArgument("Integer: Min must be no greater than Max");
03263
03264 Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
03265 Integer mod = params.GetValueWithDefault("Mod", Integer::One());
03266
03267 if (equiv.IsNegative() || equiv >= mod)
03268 throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
03269
03270 Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
03271
03272 member_ptr<KDF2_RNG> kdf2Rng;
03273 ConstByteArrayParameter seed;
03274 if (params.GetValue("Seed", seed))
03275 {
03276 ByteQueue bq;
03277 DERSequenceEncoder seq(bq);
03278 min.DEREncode(seq);
03279 max.DEREncode(seq);
03280 equiv.DEREncode(seq);
03281 mod.DEREncode(seq);
03282 DEREncodeUnsigned(seq, rnType);
03283 DEREncodeOctetString(seq, seed.begin(), seed.size());
03284 seq.MessageEnd();
03285
03286 SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
03287 bq.Get(finalSeed, finalSeed.size());
03288 kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
03289 }
03290 RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
03291
03292 switch (rnType)
03293 {
03294 case ANY:
03295 if (mod == One())
03296 Randomize(rng, min, max);
03297 else
03298 {
03299 Integer min1 = min + (equiv-min)%mod;
03300 if (max < min1)
03301 return false;
03302 Randomize(rng, Zero(), (max - min1) / mod);
03303 *this *= mod;
03304 *this += min1;
03305 }
03306 return true;
03307
03308 case PRIME:
03309 {
03310 const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
03311
03312 int i;
03313 i = 0;
03314 while (1)
03315 {
03316 if (++i==16)
03317 {
03318
03319 Integer first = min;
03320 if (FirstPrime(first, max, equiv, mod, pSelector))
03321 {
03322
03323 *this = first;
03324 if (!FirstPrime(first, max, equiv, mod, pSelector))
03325 return true;
03326 }
03327 else
03328 return false;
03329 }
03330
03331 Randomize(rng, min, max);
03332 if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
03333 return true;
03334 }
03335 }
03336
03337 default:
03338 throw InvalidArgument("Integer: invalid RandomNumberType argument");
03339 }
03340 }
03341
03342 std::istream& operator>>(std::istream& in, Integer &a)
03343 {
03344 char c;
03345 unsigned int length = 0;
03346 SecBlock<char> str(length + 16);
03347
03348 std::ws(in);
03349
03350 do
03351 {
03352 in.read(&c, 1);
03353 str[length++] = c;
03354 if (length >= str.size())
03355 str.Grow(length + 16);
03356 }
03357 while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
03358
03359 if (in.gcount())
03360 in.putback(c);
03361 str[length-1] = '\0';
03362 a = Integer(str);
03363
03364 return in;
03365 }
03366
03367 std::ostream& operator<<(std::ostream& out, const Integer &a)
03368 {
03369
03370 long f = out.flags() & std::ios::basefield;
03371 int base, block;
03372 char suffix;
03373 switch(f)
03374 {
03375 case std::ios::oct :
03376 base = 8;
03377 block = 8;
03378 suffix = 'o';
03379 break;
03380 case std::ios::hex :
03381 base = 16;
03382 block = 4;
03383 suffix = 'h';
03384 break;
03385 default :
03386 base = 10;
03387 block = 3;
03388 suffix = '.';
03389 }
03390
03391 SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1);
03392 Integer temp1=a, temp2;
03393 unsigned i=0;
03394 const char vec[]="0123456789ABCDEF";
03395
03396 if (a.IsNegative())
03397 {
03398 out << '-';
03399 temp1.Negate();
03400 }
03401
03402 if (!a)
03403 out << '0';
03404
03405 while (!!temp1)
03406 {
03407 word digit;
03408 Integer::Divide(digit, temp2, temp1, base);
03409 s[i++]=vec[digit];
03410 temp1=temp2;
03411 }
03412
03413 while (i--)
03414 {
03415 out << s[i];
03416
03417
03418 }
03419 return out << suffix;
03420 }
03421
03422 Integer& Integer::operator++()
03423 {
03424 if (NotNegative())
03425 {
03426 if (Increment(reg, reg.size()))
03427 {
03428 reg.CleanGrow(2*reg.size());
03429 reg[reg.size()/2]=1;
03430 }
03431 }
03432 else
03433 {
03434 word borrow = Decrement(reg, reg.size());
03435 assert(!borrow);
03436 if (WordCount()==0)
03437 *this = Zero();
03438 }
03439 return *this;
03440 }
03441
03442 Integer& Integer::operator--()
03443 {
03444 if (IsNegative())
03445 {
03446 if (Increment(reg, reg.size()))
03447 {
03448 reg.CleanGrow(2*reg.size());
03449 reg[reg.size()/2]=1;
03450 }
03451 }
03452 else
03453 {
03454 if (Decrement(reg, reg.size()))
03455 *this = -One();
03456 }
03457 return *this;
03458 }
03459
03460 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
03461 {
03462 int carry;
03463 if (a.reg.size() == b.reg.size())
03464 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03465 else if (a.reg.size() > b.reg.size())
03466 {
03467 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
03468 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
03469 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
03470 }
03471 else
03472 {
03473 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03474 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
03475 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
03476 }
03477
03478 if (carry)
03479 {
03480 sum.reg.CleanGrow(2*sum.reg.size());
03481 sum.reg[sum.reg.size()/2] = 1;
03482 }
03483 sum.sign = Integer::POSITIVE;
03484 }
03485
03486 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
03487 {
03488 unsigned aSize = a.WordCount();
03489 aSize += aSize%2;
03490 unsigned bSize = b.WordCount();
03491 bSize += bSize%2;
03492
03493 if (aSize == bSize)
03494 {
03495 if (Compare(a.reg, b.reg, aSize) >= 0)
03496 {
03497 Subtract(diff.reg, a.reg, b.reg, aSize);
03498 diff.sign = Integer::POSITIVE;
03499 }
03500 else
03501 {
03502 Subtract(diff.reg, b.reg, a.reg, aSize);
03503 diff.sign = Integer::NEGATIVE;
03504 }
03505 }
03506 else if (aSize > bSize)
03507 {
03508 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
03509 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
03510 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
03511 assert(!borrow);
03512 diff.sign = Integer::POSITIVE;
03513 }
03514 else
03515 {
03516 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
03517 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
03518 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
03519 assert(!borrow);
03520 diff.sign = Integer::NEGATIVE;
03521 }
03522 }
03523
03524
03525 template <class T> inline const T& STDMAX2(const T& a, const T& b)
03526 {
03527 return a < b ? b : a;
03528 }
03529
03530 Integer Integer::Plus(const Integer& b) const
03531 {
03532 Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
03533 if (NotNegative())
03534 {
03535 if (b.NotNegative())
03536 PositiveAdd(sum, *this, b);
03537 else
03538 PositiveSubtract(sum, *this, b);
03539 }
03540 else
03541 {
03542 if (b.NotNegative())
03543 PositiveSubtract(sum, b, *this);
03544 else
03545 {
03546 PositiveAdd(sum, *this, b);
03547 sum.sign = Integer::NEGATIVE;
03548 }
03549 }
03550 return sum;
03551 }
03552
03553 Integer& Integer::operator+=(const Integer& t)
03554 {
03555 reg.CleanGrow(t.reg.size());
03556 if (NotNegative())
03557 {
03558 if (t.NotNegative())
03559 PositiveAdd(*this, *this, t);
03560 else
03561 PositiveSubtract(*this, *this, t);
03562 }
03563 else
03564 {
03565 if (t.NotNegative())
03566 PositiveSubtract(*this, t, *this);
03567 else
03568 {
03569 PositiveAdd(*this, *this, t);
03570 sign = Integer::NEGATIVE;
03571 }
03572 }
03573 return *this;
03574 }
03575
03576 Integer Integer::Minus(const Integer& b) const
03577 {
03578 Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
03579 if (NotNegative())
03580 {
03581 if (b.NotNegative())
03582 PositiveSubtract(diff, *this, b);
03583 else
03584 PositiveAdd(diff, *this, b);
03585 }
03586 else
03587 {
03588 if (b.NotNegative())
03589 {
03590 PositiveAdd(diff, *this, b);
03591 diff.sign = Integer::NEGATIVE;
03592 }
03593 else
03594 PositiveSubtract(diff, b, *this);
03595 }
03596 return diff;
03597 }
03598
03599 Integer& Integer::operator-=(const Integer& t)
03600 {
03601 reg.CleanGrow(t.reg.size());
03602 if (NotNegative())
03603 {
03604 if (t.NotNegative())
03605 PositiveSubtract(*this, *this, t);
03606 else
03607 PositiveAdd(*this, *this, t);
03608 }
03609 else
03610 {
03611 if (t.NotNegative())
03612 {
03613 PositiveAdd(*this, *this, t);
03614 sign = Integer::NEGATIVE;
03615 }
03616 else
03617 PositiveSubtract(*this, t, *this);
03618 }
03619 return *this;
03620 }
03621
03622 Integer& Integer::operator<<=(size_t n)
03623 {
03624 const size_t wordCount = WordCount();
03625 const size_t shiftWords = n / WORD_BITS;
03626 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03627
03628 reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
03629 ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
03630 ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
03631 return *this;
03632 }
03633
03634 Integer& Integer::operator>>=(size_t n)
03635 {
03636 const size_t wordCount = WordCount();
03637 const size_t shiftWords = n / WORD_BITS;
03638 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03639
03640 ShiftWordsRightByWords(reg, wordCount, shiftWords);
03641 if (wordCount > shiftWords)
03642 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
03643 if (IsNegative() && WordCount()==0)
03644 *this = Zero();
03645 return *this;
03646 }
03647
03648 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
03649 {
03650 size_t aSize = RoundupSize(a.WordCount());
03651 size_t bSize = RoundupSize(b.WordCount());
03652
03653 product.reg.CleanNew(RoundupSize(aSize+bSize));
03654 product.sign = Integer::POSITIVE;
03655
03656 IntegerSecBlock workspace(aSize + bSize);
03657 AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
03658 }
03659
03660 void Multiply(Integer &product, const Integer &a, const Integer &b)
03661 {
03662 PositiveMultiply(product, a, b);
03663
03664 if (a.NotNegative() != b.NotNegative())
03665 product.Negate();
03666 }
03667
03668 Integer Integer::Times(const Integer &b) const
03669 {
03670 Integer product;
03671 Multiply(product, *this, b);
03672 return product;
03673 }
03674
03675
03676
03677
03678
03679
03680
03681
03682
03683
03684
03685
03686
03687
03688
03689
03690
03691
03692
03693
03694
03695
03696
03697 void PositiveDivide(Integer &remainder, Integer "ient,
03698 const Integer &a, const Integer &b)
03699 {
03700 unsigned aSize = a.WordCount();
03701 unsigned bSize = b.WordCount();
03702
03703 if (!bSize)
03704 throw Integer::DivideByZero();
03705
03706 if (a.PositiveCompare(b) == -1)
03707 {
03708 remainder = a;
03709 remainder.sign = Integer::POSITIVE;
03710 quotient = Integer::Zero();
03711 return;
03712 }
03713
03714 aSize += aSize%2;
03715 bSize += bSize%2;
03716
03717 remainder.reg.CleanNew(RoundupSize(bSize));
03718 remainder.sign = Integer::POSITIVE;
03719 quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
03720 quotient.sign = Integer::POSITIVE;
03721
03722 IntegerSecBlock T(aSize+3*(bSize+2));
03723 Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
03724 }
03725
03726 void Integer::Divide(Integer &remainder, Integer "ient, const Integer ÷nd, const Integer &divisor)
03727 {
03728 PositiveDivide(remainder, quotient, dividend, divisor);
03729
03730 if (dividend.IsNegative())
03731 {
03732 quotient.Negate();
03733 if (remainder.NotZero())
03734 {
03735 --quotient;
03736 remainder = divisor.AbsoluteValue() - remainder;
03737 }
03738 }
03739
03740 if (divisor.IsNegative())
03741 quotient.Negate();
03742 }
03743
03744 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
03745 {
03746 q = a;
03747 q >>= n;
03748
03749 const size_t wordCount = BitsToWords(n);
03750 if (wordCount <= a.WordCount())
03751 {
03752 r.reg.resize(RoundupSize(wordCount));
03753 CopyWords(r.reg, a.reg, wordCount);
03754 SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
03755 if (n % WORD_BITS != 0)
03756 r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
03757 }
03758 else
03759 {
03760 r.reg.resize(RoundupSize(a.WordCount()));
03761 CopyWords(r.reg, a.reg, r.reg.size());
03762 }
03763 r.sign = POSITIVE;
03764
03765 if (a.IsNegative() && r.NotZero())
03766 {
03767 --q;
03768 r = Power2(n) - r;
03769 }
03770 }
03771
03772 Integer Integer::DividedBy(const Integer &b) const
03773 {
03774 Integer remainder, quotient;
03775 Integer::Divide(remainder, quotient, *this, b);
03776 return quotient;
03777 }
03778
03779 Integer Integer::Modulo(const Integer &b) const
03780 {
03781 Integer remainder, quotient;
03782 Integer::Divide(remainder, quotient, *this, b);
03783 return remainder;
03784 }
03785
03786 void Integer::Divide(word &remainder, Integer "ient, const Integer ÷nd, word divisor)
03787 {
03788 if (!divisor)
03789 throw Integer::DivideByZero();
03790
03791 assert(divisor);
03792
03793 if ((divisor & (divisor-1)) == 0)
03794 {
03795 quotient = dividend >> (BitPrecision(divisor)-1);
03796 remainder = dividend.reg[0] & (divisor-1);
03797 return;
03798 }
03799
03800 unsigned int i = dividend.WordCount();
03801 quotient.reg.CleanNew(RoundupSize(i));
03802 remainder = 0;
03803 while (i--)
03804 {
03805 quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
03806 remainder = DWord(dividend.reg[i], remainder) % divisor;
03807 }
03808
03809 if (dividend.NotNegative())
03810 quotient.sign = POSITIVE;
03811 else
03812 {
03813 quotient.sign = NEGATIVE;
03814 if (remainder)
03815 {
03816 --quotient;
03817 remainder = divisor - remainder;
03818 }
03819 }
03820 }
03821
03822 Integer Integer::DividedBy(word b) const
03823 {
03824 word remainder;
03825 Integer quotient;
03826 Integer::Divide(remainder, quotient, *this, b);
03827 return quotient;
03828 }
03829
03830 word Integer::Modulo(word divisor) const
03831 {
03832 if (!divisor)
03833 throw Integer::DivideByZero();
03834
03835 assert(divisor);
03836
03837 word remainder;
03838
03839 if ((divisor & (divisor-1)) == 0)
03840 remainder = reg[0] & (divisor-1);
03841 else
03842 {
03843 unsigned int i = WordCount();
03844
03845 if (divisor <= 5)
03846 {
03847 DWord sum(0, 0);
03848 while (i--)
03849 sum += reg[i];
03850 remainder = sum % divisor;
03851 }
03852 else
03853 {
03854 remainder = 0;
03855 while (i--)
03856 remainder = DWord(reg[i], remainder) % divisor;
03857 }
03858 }
03859
03860 if (IsNegative() && remainder)
03861 remainder = divisor - remainder;
03862
03863 return remainder;
03864 }
03865
03866 void Integer::Negate()
03867 {
03868 if (!!(*this))
03869 sign = Sign(1-sign);
03870 }
03871
03872 int Integer::PositiveCompare(const Integer& t) const
03873 {
03874 unsigned size = WordCount(), tSize = t.WordCount();
03875
03876 if (size == tSize)
03877 return CryptoPP::Compare(reg, t.reg, size);
03878 else
03879 return size > tSize ? 1 : -1;
03880 }
03881
03882 int Integer::Compare(const Integer& t) const
03883 {
03884 if (NotNegative())
03885 {
03886 if (t.NotNegative())
03887 return PositiveCompare(t);
03888 else
03889 return 1;
03890 }
03891 else
03892 {
03893 if (t.NotNegative())
03894 return -1;
03895 else
03896 return -PositiveCompare(t);
03897 }
03898 }
03899
03900 Integer Integer::SquareRoot() const
03901 {
03902 if (!IsPositive())
03903 return Zero();
03904
03905
03906 Integer x, y = Power2((BitCount()+1)/2);
03907 assert(y*y >= *this);
03908
03909 do
03910 {
03911 x = y;
03912 y = (x + *this/x) >> 1;
03913 } while (y<x);
03914
03915 return x;
03916 }
03917
03918 bool Integer::IsSquare() const
03919 {
03920 Integer r = SquareRoot();
03921 return *this == r.Squared();
03922 }
03923
03924 bool Integer::IsUnit() const
03925 {
03926 return (WordCount() == 1) && (reg[0] == 1);
03927 }
03928
03929 Integer Integer::MultiplicativeInverse() const
03930 {
03931 return IsUnit() ? *this : Zero();
03932 }
03933
03934 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
03935 {
03936 return x*y%m;
03937 }
03938
03939 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
03940 {
03941 ModularArithmetic mr(m);
03942 return mr.Exponentiate(x, e);
03943 }
03944
03945 Integer Integer::Gcd(const Integer &a, const Integer &b)
03946 {
03947 return EuclideanDomainOf<Integer>().Gcd(a, b);
03948 }
03949
03950 Integer Integer::InverseMod(const Integer &m) const
03951 {
03952 assert(m.NotNegative());
03953
03954 if (IsNegative() || *this>=m)
03955 return (*this%m).InverseMod(m);
03956
03957 if (m.IsEven())
03958 {
03959 if (!m || IsEven())
03960 return Zero();
03961 if (*this == One())
03962 return One();
03963
03964 Integer u = m.InverseMod(*this);
03965 return !u ? Zero() : (m*(*this-u)+1)/(*this);
03966 }
03967
03968 SecBlock<word> T(m.reg.size() * 4);
03969 Integer r((word)0, m.reg.size());
03970 unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
03971 DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
03972 return r;
03973 }
03974
03975 word Integer::InverseMod(word mod) const
03976 {
03977 word g0 = mod, g1 = *this % mod;
03978 word v0 = 0, v1 = 1;
03979 word y;
03980
03981 while (g1)
03982 {
03983 if (g1 == 1)
03984 return v1;
03985 y = g0 / g1;
03986 g0 = g0 % g1;
03987 v0 += y * v1;
03988
03989 if (!g0)
03990 break;
03991 if (g0 == 1)
03992 return mod-v0;
03993 y = g1 / g0;
03994 g1 = g1 % g0;
03995 v1 += y * v0;
03996 }
03997 return 0;
03998 }
03999
04000
04001
04002 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
04003 {
04004 BERSequenceDecoder seq(bt);
04005 OID oid(seq);
04006 if (oid != ASN1::prime_field())
04007 BERDecodeError();
04008 m_modulus.BERDecode(seq);
04009 seq.MessageEnd();
04010 m_result.reg.resize(m_modulus.reg.size());
04011 }
04012
04013 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
04014 {
04015 DERSequenceEncoder seq(bt);
04016 ASN1::prime_field().DEREncode(seq);
04017 m_modulus.DEREncode(seq);
04018 seq.MessageEnd();
04019 }
04020
04021 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
04022 {
04023 a.DEREncodeAsOctetString(out, MaxElementByteLength());
04024 }
04025
04026 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
04027 {
04028 a.BERDecodeAsOctetString(in, MaxElementByteLength());
04029 }
04030
04031 const Integer& ModularArithmetic::Half(const Integer &a) const
04032 {
04033 if (a.reg.size()==m_modulus.reg.size())
04034 {
04035 CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
04036 return m_result;
04037 }
04038 else
04039 return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
04040 }
04041
04042 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
04043 {
04044 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04045 {
04046 if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
04047 || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
04048 {
04049 CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04050 }
04051 return m_result;
04052 }
04053 else
04054 {
04055 m_result1 = a+b;
04056 if (m_result1 >= m_modulus)
04057 m_result1 -= m_modulus;
04058 return m_result1;
04059 }
04060 }
04061
04062 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
04063 {
04064 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04065 {
04066 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
04067 || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
04068 {
04069 CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
04070 }
04071 }
04072 else
04073 {
04074 a+=b;
04075 if (a>=m_modulus)
04076 a-=m_modulus;
04077 }
04078
04079 return a;
04080 }
04081
04082 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
04083 {
04084 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04085 {
04086 if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
04087 CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04088 return m_result;
04089 }
04090 else
04091 {
04092 m_result1 = a-b;
04093 if (m_result1.IsNegative())
04094 m_result1 += m_modulus;
04095 return m_result1;
04096 }
04097 }
04098
04099 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
04100 {
04101 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04102 {
04103 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
04104 CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
04105 }
04106 else
04107 {
04108 a-=b;
04109 if (a.IsNegative())
04110 a+=m_modulus;
04111 }
04112
04113 return a;
04114 }
04115
04116 const Integer& ModularArithmetic::Inverse(const Integer &a) const
04117 {
04118 if (!a)
04119 return a;
04120
04121 CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
04122 if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
04123 Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
04124
04125 return m_result;
04126 }
04127
04128 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
04129 {
04130 if (m_modulus.IsOdd())
04131 {
04132 MontgomeryRepresentation dr(m_modulus);
04133 return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
04134 }
04135 else
04136 return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
04137 }
04138
04139 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
04140 {
04141 if (m_modulus.IsOdd())
04142 {
04143 MontgomeryRepresentation dr(m_modulus);
04144 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
04145 for (unsigned int i=0; i<exponentsCount; i++)
04146 results[i] = dr.ConvertOut(results[i]);
04147 }
04148 else
04149 AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
04150 }
04151
04152 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m)
04153 : ModularArithmetic(m),
04154 m_u((word)0, m_modulus.reg.size()),
04155 m_workspace(5*m_modulus.reg.size())
04156 {
04157 if (!m_modulus.IsOdd())
04158 throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
04159
04160 RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
04161 }
04162
04163 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
04164 {
04165 word *const T = m_workspace.begin();
04166 word *const R = m_result.reg.begin();
04167 const size_t N = m_modulus.reg.size();
04168 assert(a.reg.size()<=N && b.reg.size()<=N);
04169
04170 AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
04171 SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
04172 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04173 return m_result;
04174 }
04175
04176 const Integer& MontgomeryRepresentation::Square(const Integer &a) const
04177 {
04178 word *const T = m_workspace.begin();
04179 word *const R = m_result.reg.begin();
04180 const size_t N = m_modulus.reg.size();
04181 assert(a.reg.size()<=N);
04182
04183 CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
04184 SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
04185 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04186 return m_result;
04187 }
04188
04189 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
04190 {
04191 word *const T = m_workspace.begin();
04192 word *const R = m_result.reg.begin();
04193 const size_t N = m_modulus.reg.size();
04194 assert(a.reg.size()<=N);
04195
04196 CopyWords(T, a.reg, a.reg.size());
04197 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04198 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04199 return m_result;
04200 }
04201
04202 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
04203 {
04204
04205 word *const T = m_workspace.begin();
04206 word *const R = m_result.reg.begin();
04207 const size_t N = m_modulus.reg.size();
04208 assert(a.reg.size()<=N);
04209
04210 CopyWords(T, a.reg, a.reg.size());
04211 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04212 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04213 unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
04214
04215
04216
04217 if (k>N*WORD_BITS)
04218 DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
04219 else
04220 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
04221
04222 return m_result;
04223 }
04224
04225 NAMESPACE_END
04226
04227 #endif