mirror of
https://github.com/lcn2/calc.git
synced 2025-08-16 01:03:29 +03:00
Converted all ASCII tabs to ASCII spaces using a 8 character tab stop, for all files, except for all Makefiles (plus rpm.mk). The `git diff -w` reports no changes.
1108 lines
40 KiB
C
1108 lines
40 KiB
C
/*
|
|
* zmul - faster than usual multiplying and squaring routines
|
|
*
|
|
* Copyright (C) 1999-2007,2021-2023 David I. Bell
|
|
*
|
|
* Calc is open software; you can redistribute it and/or modify it under
|
|
* the terms of the version 2.1 of the GNU Lesser General Public License
|
|
* as published by the Free Software Foundation.
|
|
*
|
|
* Calc is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General
|
|
* Public License for more details.
|
|
*
|
|
* A copy of version 2.1 of the GNU Lesser General Public License is
|
|
* distributed with calc under the filename COPYING-LGPL. You should have
|
|
* received a copy with calc; if not, write to Free Software Foundation, Inc.
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*
|
|
* Under source code control: 1991/01/09 20:01:31
|
|
* File existed as early as: 1991
|
|
*
|
|
* Share and enjoy! :-) http://www.isthe.com/chongo/tech/comp/calc/
|
|
*/
|
|
|
|
/*
|
|
* Faster than usual multiplying and squaring routines.
|
|
* The algorithm used is the reasonably simple one from Knuth, volume 2,
|
|
* section 4.3.3. These recursive routines are of speed O(N^1.585)
|
|
* instead of O(N^2). The usual multiplication and (almost usual) squaring
|
|
* algorithms are used for small numbers. On a 386 with its compiler, the
|
|
* two algorithms are equal in speed at about 100 decimal digits.
|
|
*/
|
|
|
|
|
|
#include "config.h"
|
|
#include "zmath.h"
|
|
|
|
|
|
#include "errtbl.h"
|
|
#include "banned.h" /* include after system header <> includes */
|
|
|
|
|
|
STATIC HALF *tempbuf; /* temporary buffer for multiply and square */
|
|
|
|
S_FUNC LEN domul(HALF *v1, LEN size1, HALF *v2, LEN size2, HALF *ans);
|
|
S_FUNC LEN dosquare(HALF *vp, LEN size, HALF *ans);
|
|
|
|
|
|
/*
|
|
* Multiply two numbers using the following formula recursively:
|
|
* (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D
|
|
* where S is a power of 2^16, and so multiplies by it are shifts, and
|
|
* A,B,C,D are the left and right HALFs of the numbers to be multiplied.
|
|
*
|
|
* given:
|
|
* z1 numbers to multiply
|
|
* z2 numbers to multiply
|
|
* res result of multiplication
|
|
*/
|
|
void
|
|
zmul(ZVALUE z1, ZVALUE z2, ZVALUE *res)
|
|
{
|
|
LEN len; /* size of array */
|
|
|
|
if (ziszero(z1) || ziszero(z2)) {
|
|
*res = _zero_;
|
|
return;
|
|
}
|
|
if (zisunit(z1)) {
|
|
zcopy(z2, res);
|
|
res->sign = (z1.sign != z2.sign);
|
|
return;
|
|
}
|
|
if (zisunit(z2)) {
|
|
zcopy(z1, res);
|
|
res->sign = (z1.sign != z2.sign);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Allocate a temporary buffer for the recursion levels to use.
|
|
* An array needs to be allocated large enough for all of the
|
|
* temporary results to fit in. This size is about twice the size
|
|
* of the largest original number, since each recursion level uses
|
|
* the size of its given number, and whose size is 1/2 the size of
|
|
* the previous level. The sum of the infinite series is 2.
|
|
* Add some extra words because of rounding when dividing by 2
|
|
* and also because of the extra word that each multiply needs.
|
|
*/
|
|
len = z1.len;
|
|
if (len < z2.len)
|
|
len = z2.len;
|
|
len = 2 * len + 64;
|
|
tempbuf = zalloctemp(len);
|
|
|
|
res->sign = (z1.sign != z2.sign);
|
|
res->v = alloc(z1.len + z2.len + 2);
|
|
res->len = domul(z1.v, z1.len, z2.v, z2.len, res->v);
|
|
}
|
|
|
|
|
|
/*
|
|
* Recursive routine to multiply two numbers by splitting them up into
|
|
* two numbers of half the size, and using the results of multiplying the
|
|
* sub-pieces. The result is placed in the indicated array, which must be
|
|
* large enough for the result plus one extra word (size1 + size2 + 1).
|
|
* Returns the actual size of the result with leading zeroes stripped.
|
|
* This also uses a temporary array which must be twice as large as
|
|
* one more than the size of the number at the top level recursive call.
|
|
*
|
|
* given:
|
|
* v1 first number
|
|
* size1 size of first number
|
|
* v2 second number
|
|
* size2 size of second number
|
|
* ans location for result
|
|
*/
|
|
S_FUNC LEN
|
|
domul(HALF *v1, LEN size1, HALF *v2, LEN size2, HALF *ans)
|
|
{
|
|
LEN shift; /* amount numbers are shifted by */
|
|
LEN sizeA; /* size of left half of first number */
|
|
LEN sizeB; /* size of right half of first number */
|
|
LEN sizeC; /* size of left half of second number */
|
|
LEN sizeD; /* size of right half of second number */
|
|
LEN sizeAB; /* size of subtraction of A and B */
|
|
LEN sizeDC; /* size of subtraction of D and C */
|
|
LEN sizeABDC; /* size of product of above two results */
|
|
LEN subsize; /* size of difference of HALFs */
|
|
LEN copysize; /* size of number left to copy */
|
|
LEN sizetotal; /* total size of product */
|
|
LEN len; /* temporary length */
|
|
HALF *baseA; /* base of left half of first number */
|
|
HALF *baseB; /* base of right half of first number */
|
|
HALF *baseC; /* base of left half of second number */
|
|
HALF *baseD; /* base of right half of second number */
|
|
HALF *baseAB; /* base of result of subtraction of A and B */
|
|
HALF *baseDC; /* base of result of subtraction of D and C */
|
|
HALF *baseABDC; /* base of product of above two results */
|
|
HALF *baseAC; /* base of product of A and C */
|
|
HALF *baseBD; /* base of product of B and D */
|
|
FULL carry; /* carry digit for small multiplications */
|
|
FULL carryACBD; /* carry from addition of A*C and B*D */
|
|
FULL digit; /* single digit multiplying by */
|
|
HALF *temp; /* base for temporary calculations */
|
|
bool neg; /* whether intermediate term is negative */
|
|
register HALF *hd, *h1=NULL, *h2=NULL; /* for inner loops */
|
|
SIUNION sival; /* for addition of digits */
|
|
|
|
/* firewall */
|
|
if (v1 == NULL) {
|
|
math_error("%s: v1 NULL", __func__);
|
|
not_reached();
|
|
}
|
|
if (ans == NULL) {
|
|
math_error("%s: ans NULL", __func__);
|
|
not_reached();
|
|
}
|
|
|
|
/*
|
|
* Trim the numbers of leading zeroes and initialize the
|
|
* estimated size of the result.
|
|
*/
|
|
hd = &v1[size1 - 1];
|
|
while ((*hd == 0) && (size1 > 1)) {
|
|
hd--;
|
|
size1--;
|
|
}
|
|
hd = &v2[size2 - 1];
|
|
while ((*hd == 0) && (size2 > 1)) {
|
|
hd--;
|
|
size2--;
|
|
}
|
|
sizetotal = size1 + size2;
|
|
|
|
/*
|
|
* First check for zero answer.
|
|
*/
|
|
if (((size1 == 1) && (*v1 == 0)) || ((size2 == 1) && (*v2 == 0))) {
|
|
*ans = 0;
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Exchange the two numbers if necessary to make the number of
|
|
* digits of the first number be greater than or equal to the
|
|
* second number.
|
|
*/
|
|
if (size1 < size2) {
|
|
len = size1; size1 = size2; size2 = len;
|
|
hd = v1; v1 = v2; v2 = hd;
|
|
}
|
|
|
|
/*
|
|
* If the smaller number has only a few digits, then calculate
|
|
* the result in the normal manner in order to avoid the overhead
|
|
* of the recursion for small numbers. The number of digits where
|
|
* the algorithm changes is settable from 2 to maxint.
|
|
*/
|
|
if (size2 < conf->mul2) {
|
|
/*
|
|
* First clear the top part of the result, and then multiply
|
|
* by the lowest digit to get the first partial sum. Later
|
|
* products will then add into this result.
|
|
*/
|
|
hd = &ans[size1];
|
|
len = size2;
|
|
while (len--)
|
|
*hd++ = 0;
|
|
|
|
digit = *v2++;
|
|
h1 = v1;
|
|
hd = ans;
|
|
carry = 0;
|
|
len = size1;
|
|
while (len >= 4) { /* expand the loop some */
|
|
len -= 4;
|
|
sival.ivalue = ((FULL) *h1++) * digit + carry;
|
|
/* ignore Saber-C warning #112 - get ushort from uint */
|
|
/* OK to ignore on name domul`sival */
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
sival.ivalue = ((FULL) *h1++) * digit + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
sival.ivalue = ((FULL) *h1++) * digit + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
sival.ivalue = ((FULL) *h1++) * digit + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (len--) {
|
|
sival.ivalue = ((FULL) *h1++) * digit + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
*hd = (HALF)carry;
|
|
|
|
/*
|
|
* Now multiply by the remaining digits of the second number,
|
|
* adding each product into the final result.
|
|
*/
|
|
h2 = ans;
|
|
while (--size2 > 0) {
|
|
digit = *v2++;
|
|
h1 = v1;
|
|
hd = ++h2;
|
|
if (digit == 0)
|
|
continue;
|
|
carry = 0;
|
|
len = size1;
|
|
while (len >= 4) { /* expand the loop some */
|
|
len -= 4;
|
|
sival.ivalue = ((FULL) *h1++) * digit
|
|
+ ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
sival.ivalue = ((FULL) *h1++) * digit
|
|
+ ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
sival.ivalue = ((FULL) *h1++) * digit
|
|
+ ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
sival.ivalue = ((FULL) *h1++) * digit
|
|
+ ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (len--) {
|
|
sival.ivalue = ((FULL) *h1++) * digit
|
|
+ ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (carry) {
|
|
sival.ivalue = ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Now return the true size of the number.
|
|
*/
|
|
len = sizetotal;
|
|
hd = &ans[len - 1];
|
|
while ((*hd == 0) && (len > 1)) {
|
|
hd--;
|
|
len--;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
/*
|
|
* Need to multiply by a large number.
|
|
* Allocate temporary space for calculations, and calculate the
|
|
* value for the shift. The shift value is 1/2 the size of the
|
|
* larger (first) number (rounded up). The amount of temporary
|
|
* space needed is twice the size of the shift, plus one more word
|
|
* for the multiply to use.
|
|
*/
|
|
shift = (size1 + 1) / 2;
|
|
temp = tempbuf;
|
|
tempbuf += (2 * shift) + 1;
|
|
|
|
/*
|
|
* Determine the sizes and locations of all the numbers.
|
|
* The value of sizeC can be negative, and this is checked later.
|
|
* The value of sizeD is limited by the full size of the number.
|
|
*/
|
|
baseA = v1 + shift;
|
|
baseB = v1;
|
|
baseC = v2 + ((shift <= size2) ? shift : size2);
|
|
baseD = v2;
|
|
baseAB = ans;
|
|
baseDC = ans + shift;
|
|
baseAC = ans + shift * 2;
|
|
baseBD = ans;
|
|
|
|
sizeA = size1 - shift;
|
|
sizeC = size2 - shift;
|
|
|
|
sizeB = shift;
|
|
hd = &baseB[shift - 1];
|
|
while ((*hd == 0) && (sizeB > 1)) {
|
|
hd--;
|
|
sizeB--;
|
|
}
|
|
|
|
sizeD = shift;
|
|
if (sizeD > size2)
|
|
sizeD = size2;
|
|
hd = &baseD[sizeD - 1];
|
|
while ((*hd == 0) && (sizeD > 1)) {
|
|
hd--;
|
|
sizeD--;
|
|
}
|
|
|
|
/*
|
|
* If the smaller number has a high half of zero, then calculate
|
|
* the result by breaking up the first number into two numbers
|
|
* and combining the results using the obvious formula:
|
|
* (A*S+B) * D = (A*D)*S + B*D
|
|
*/
|
|
if (sizeC <= 0) {
|
|
len = domul(baseB, sizeB, baseD, sizeD, ans);
|
|
hd = &ans[len];
|
|
len = sizetotal - len;
|
|
while (len--)
|
|
*hd++ = 0;
|
|
|
|
/*
|
|
* Add the second number into the first number, shifted
|
|
* over at the correct position.
|
|
*/
|
|
len = domul(baseA, sizeA, baseD, sizeD, temp);
|
|
h1 = temp;
|
|
hd = ans + shift;
|
|
carry = 0;
|
|
while (len--) {
|
|
sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (carry) {
|
|
sival.ivalue = ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
|
|
/*
|
|
* Determine the final size of the number and return it.
|
|
*/
|
|
len = sizetotal;
|
|
hd = &ans[len - 1];
|
|
while ((*hd == 0) && (len > 1)) {
|
|
hd--;
|
|
len--;
|
|
}
|
|
tempbuf = temp;
|
|
return len;
|
|
}
|
|
|
|
/*
|
|
* Now we know that the high HALFs of the numbers are nonzero,
|
|
* so we can use the complete formula.
|
|
* (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D.
|
|
* The steps are done in the following order:
|
|
* A-B
|
|
* D-C
|
|
* (A-B)*(D-C)
|
|
* S^2*A*C + B*D
|
|
* (S^2+S)*A*C + (S+1)*B*D (*)
|
|
* (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D
|
|
*
|
|
* Note: step (*) above can produce a result which is larger than
|
|
* the final product will be, and this is where the extra word
|
|
* needed in the product comes from. After the final subtraction is
|
|
* done, the result fits in the expected size. Using the extra word
|
|
* is easier than suppressing the carries and borrows everywhere.
|
|
*
|
|
* Begin by forming the product (A-B)*(D-C) into a temporary
|
|
* location that we save until the final step. Do each subtraction
|
|
* at positions 0 and S. Be very careful about the relative sizes
|
|
* of the numbers since this result can be negative. For the first
|
|
* step calculate the absolute difference of A and B into a temporary
|
|
* location at position 0 of the result. Negate the sign if A is
|
|
* smaller than B.
|
|
*/
|
|
neg = false;
|
|
if (sizeA == sizeB) {
|
|
len = sizeA;
|
|
h1 = &baseA[len - 1];
|
|
h2 = &baseB[len - 1];
|
|
while ((len > 1) && (*h1 == *h2)) {
|
|
len--;
|
|
h1--;
|
|
h2--;
|
|
}
|
|
}
|
|
if ((sizeA > sizeB) || ((sizeA == sizeB) && h1 && h2 && (*h1 > *h2))) {
|
|
h1 = baseA;
|
|
h2 = baseB;
|
|
sizeAB = sizeA;
|
|
subsize = sizeB;
|
|
} else {
|
|
neg = !neg;
|
|
h1 = baseB;
|
|
h2 = baseA;
|
|
sizeAB = sizeB;
|
|
subsize = sizeA;
|
|
}
|
|
copysize = sizeAB - subsize;
|
|
|
|
hd = baseAB;
|
|
carry = 0;
|
|
while (subsize--) {
|
|
sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry;
|
|
*hd++ = (HALF)(BASE1 - sival.silow);
|
|
carry = sival.sihigh;
|
|
}
|
|
while (copysize--) {
|
|
sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry;
|
|
*hd++ = (HALF)(BASE1 - sival.silow);
|
|
carry = sival.sihigh;
|
|
}
|
|
|
|
hd = &baseAB[sizeAB - 1];
|
|
while ((*hd == 0) && (sizeAB > 1)) {
|
|
hd--;
|
|
sizeAB--;
|
|
}
|
|
|
|
/*
|
|
* This completes the calculation of abs(A-B). For the next step
|
|
* calculate the absolute difference of D and C into a temporary
|
|
* location at position S of the result. Negate the sign if C is
|
|
* larger than D.
|
|
*/
|
|
if (sizeC == sizeD) {
|
|
len = sizeC;
|
|
h1 = &baseC[len - 1];
|
|
h2 = &baseD[len - 1];
|
|
while ((len > 1) && (*h1 == *h2)) {
|
|
len--;
|
|
h1--;
|
|
h2--;
|
|
}
|
|
}
|
|
if ((sizeC > sizeD) || ((sizeC == sizeD) && (*h1 > *h2))) {
|
|
neg = !neg;
|
|
h1 = baseC;
|
|
h2 = baseD;
|
|
sizeDC = sizeC;
|
|
subsize = sizeD;
|
|
} else {
|
|
h1 = baseD;
|
|
h2 = baseC;
|
|
sizeDC = sizeD;
|
|
subsize = sizeC;
|
|
}
|
|
copysize = sizeDC - subsize;
|
|
|
|
hd = baseDC;
|
|
carry = 0;
|
|
while (subsize--) {
|
|
sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry;
|
|
*hd++ = (HALF)(BASE1 - sival.silow);
|
|
carry = sival.sihigh;
|
|
}
|
|
while (copysize--) {
|
|
sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry;
|
|
*hd++ = (HALF)(BASE1 - sival.silow);
|
|
carry = sival.sihigh;
|
|
}
|
|
hd = &baseDC[sizeDC - 1];
|
|
while ((*hd == 0) && (sizeDC > 1)) {
|
|
hd--;
|
|
sizeDC--;
|
|
}
|
|
|
|
/*
|
|
* This completes the calculation of abs(D-C). Now multiply
|
|
* together abs(A-B) and abs(D-C) into a temporary location,
|
|
* which is preserved until the final steps.
|
|
*/
|
|
baseABDC = temp;
|
|
sizeABDC = domul(baseAB, sizeAB, baseDC, sizeDC, baseABDC);
|
|
|
|
/*
|
|
* Now calculate B*D and A*C into one of their two final locations.
|
|
* Make sure the high order digits of the products are zeroed since
|
|
* this initializes the final result. Be careful about this zeroing
|
|
* since the size of the high order words might be smaller than
|
|
* the shift size. Do B*D first since the multiplies use one more
|
|
* word than the size of the product. Also zero the final extra
|
|
* word in the result for possible carries to use.
|
|
*/
|
|
len = domul(baseB, sizeB, baseD, sizeD, baseBD);
|
|
hd = &baseBD[len];
|
|
len = shift * 2 - len;
|
|
while (len--)
|
|
*hd++ = 0;
|
|
|
|
len = domul(baseA, sizeA, baseC, sizeC, baseAC);
|
|
hd = &baseAC[len];
|
|
len = sizetotal - shift * 2 - len + 1;
|
|
while (len--)
|
|
*hd++ = 0;
|
|
|
|
/*
|
|
* Now add in A*C and B*D into themselves at the other shifted
|
|
* position that they need. This addition is tricky in order to
|
|
* make sure that the two additions cannot interfere with each other.
|
|
* Therefore we first add in the top half of B*D and the lower half
|
|
* of A*C. The sources and destinations of these two additions
|
|
* overlap, and so the same answer results from the two additions,
|
|
* thus only two pointers suffice for both additions. Keep the
|
|
* final carry from these additions for later use since we cannot
|
|
* afford to change the top half of A*C yet.
|
|
*/
|
|
h1 = baseBD + shift;
|
|
h2 = baseAC;
|
|
carryACBD = 0;
|
|
len = shift;
|
|
while (len--) {
|
|
sival.ivalue = ((FULL) *h1) + ((FULL) *h2) + carryACBD;
|
|
*h1++ = sival.silow;
|
|
*h2++ = sival.silow;
|
|
carryACBD = sival.sihigh;
|
|
}
|
|
|
|
/*
|
|
* Now add in the bottom half of B*D and the top half of A*C.
|
|
* These additions are straightforward, except that A*C should
|
|
* be done first because of possible carries from B*D, and the
|
|
* top half of A*C might not exist. Add in one of the carries
|
|
* from the previous addition while we are at it.
|
|
*/
|
|
h1 = baseAC + shift;
|
|
hd = baseAC;
|
|
carry = carryACBD;
|
|
len = sizetotal - 3 * shift;
|
|
while (len--) {
|
|
sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (carry) {
|
|
sival.ivalue = ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
|
|
h1 = baseBD;
|
|
hd = baseBD + shift;
|
|
carry = 0;
|
|
len = shift;
|
|
while (len--) {
|
|
sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (carry) {
|
|
sival.ivalue = ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
|
|
/*
|
|
* Now finally add in the other delayed carry from the
|
|
* above addition.
|
|
*/
|
|
hd = baseAC + shift;
|
|
while (carryACBD) {
|
|
sival.ivalue = ((FULL) *hd) + carryACBD;
|
|
*hd++ = sival.silow;
|
|
carryACBD = sival.sihigh;
|
|
}
|
|
|
|
/*
|
|
* Now finally add or subtract (A-B)*(D-C) into the final result at
|
|
* the correct position (S), according to whether it is positive or
|
|
* negative. When subtracting, the answer cannot go negative.
|
|
*/
|
|
h1 = baseABDC;
|
|
hd = ans + shift;
|
|
carry = 0;
|
|
len = sizeABDC;
|
|
if (neg) {
|
|
while (len--) {
|
|
sival.ivalue = BASE1 - ((FULL) *hd) +
|
|
((FULL) *h1++) + carry;
|
|
*hd++ = (HALF)(BASE1 - sival.silow);
|
|
carry = sival.sihigh;
|
|
}
|
|
while (carry) {
|
|
sival.ivalue = BASE1 - ((FULL) *hd) + carry;
|
|
*hd++ = (HALF)(BASE1 - sival.silow);
|
|
carry = sival.sihigh;
|
|
}
|
|
} else {
|
|
while (len--) {
|
|
sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (carry) {
|
|
sival.ivalue = ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Finally determine the size of the final result and return that.
|
|
*/
|
|
len = sizetotal;
|
|
hd = &ans[len - 1];
|
|
while ((*hd == 0) && (len > 1)) {
|
|
hd--;
|
|
len--;
|
|
}
|
|
tempbuf = temp;
|
|
return len;
|
|
}
|
|
|
|
|
|
/*
|
|
* Square a number by using the following formula recursively:
|
|
* (A*S+B)^2 = (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2
|
|
* where S is a power of 2^16, and so multiplies by it are shifts,
|
|
* and A and B are the left and right HALFs of the number to square.
|
|
*/
|
|
void
|
|
zsquare(ZVALUE z, ZVALUE *res)
|
|
{
|
|
LEN len;
|
|
|
|
/* firewall */
|
|
if (res == NULL) {
|
|
math_error("%s: res NULL", __func__);
|
|
not_reached();
|
|
}
|
|
|
|
if (ziszero(z)) {
|
|
*res = _zero_;
|
|
return;
|
|
}
|
|
if (zisunit(z)) {
|
|
*res = _one_;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Allocate a temporary array if necessary for the recursion to use.
|
|
* The array needs to be allocated large enough for all of the
|
|
* temporary results to fit in. This size is about 3 times the
|
|
* size of the original number, since each recursion level uses 3/2
|
|
* of the size of its given number, and whose size is 1/2 the size
|
|
* of the previous level. The sum of the infinite series is 3.
|
|
* Allocate some extra words for rounding up the sizes.
|
|
*/
|
|
len = 3 * z.len + 32;
|
|
tempbuf = zalloctemp(len);
|
|
|
|
res->sign = 0;
|
|
res->v = alloc((z.len+2) * 2);
|
|
/*
|
|
* Without the memset below, Purify reports that dosquare()
|
|
* will read uninitialized memory at the dosquare() line below
|
|
* the comment:
|
|
*
|
|
* uninitialized memory read (see zsquare)
|
|
*
|
|
* This problem occurs during regression test #622 and may
|
|
* be duplicated by executing:
|
|
*
|
|
* config("sq2", 2);
|
|
* 0xffff0000ffffffff00000000ffff0000000000000000ffff^2;
|
|
*/
|
|
memset((char *)res->v, 0, ((z.len+2) * 2)*sizeof(HALF));
|
|
res->len = dosquare(z.v, z.len, res->v);
|
|
}
|
|
|
|
|
|
/*
|
|
* Recursive routine to square a number by splitting it up into two numbers
|
|
* of half the size, and using the results of squaring the sub-pieces.
|
|
* The result is placed in the indicated array, which must be large
|
|
* enough for the result (size * 2). Returns the size of the result.
|
|
* This uses a temporary array which must be 3 times as large as the
|
|
* size of the number at the top level recursive call.
|
|
*
|
|
* given:
|
|
* vp value to be squared
|
|
* size length of value to square
|
|
* ans location for result
|
|
*/
|
|
S_FUNC LEN
|
|
dosquare(HALF *vp, LEN size, HALF *ans)
|
|
{
|
|
LEN shift; /* amount numbers are shifted by */
|
|
LEN sizeA; /* size of left half of number to square */
|
|
LEN sizeB; /* size of right half of number to square */
|
|
LEN sizeAA; /* size of square of left half */
|
|
LEN sizeBB; /* size of square of right half */
|
|
LEN sizeAABB; /* size of sum of squares of A and B */
|
|
LEN sizeAB; /* size of difference of A and B */
|
|
LEN sizeABAB; /* size of square of difference of A and B */
|
|
LEN subsize; /* size of difference of HALFs */
|
|
LEN copysize; /* size of number left to copy */
|
|
LEN sumsize; /* size of sum */
|
|
LEN sizetotal; /* total size of square */
|
|
LEN len; /* temporary length */
|
|
LEN len1; /* another temporary length */
|
|
FULL carry; /* carry digit for small multiplications */
|
|
FULL digit; /* single digit multiplying by */
|
|
HALF *temp; /* base for temporary calculations */
|
|
HALF *baseA; /* base of left half of number */
|
|
HALF *baseB; /* base of right half of number */
|
|
HALF *baseAA; /* base of square of left half of number */
|
|
HALF *baseBB; /* base of square of right half of number */
|
|
HALF *baseAABB; /* base of sum of squares of A and B */
|
|
HALF *baseAB; /* base of difference of A and B */
|
|
HALF *baseABAB; /* base of square of difference of A and B */
|
|
register HALF *hd, *h1, *h2, *h3; /* for inner loops */
|
|
SIUNION sival; /* for addition of digits */
|
|
|
|
/* firewall */
|
|
if (vp == NULL) {
|
|
math_error("%s: vp NULL", __func__);
|
|
not_reached();
|
|
}
|
|
if (ans == NULL) {
|
|
math_error("%s: ans NULL", __func__);
|
|
not_reached();
|
|
}
|
|
|
|
/*
|
|
* First trim the number of leading zeroes.
|
|
*/
|
|
hd = &vp[size - 1];
|
|
while ((*hd == 0) && (size > 1)) {
|
|
size--;
|
|
hd--;
|
|
}
|
|
sizetotal = size + size;
|
|
|
|
/*
|
|
* If the number has only a small number of digits, then use the
|
|
* (almost) normal multiplication method. Multiply each halfword
|
|
* only by those halfwords further on in the number. Missed terms
|
|
* will then be the same pairs of products repeated, and the squares
|
|
* of each halfword. The first case is handled by doubling the
|
|
* result. The second case is handled explicitly. The number of
|
|
* digits where the algorithm changes is settable from 2 to maxint.
|
|
*/
|
|
if (size < conf->sq2) {
|
|
hd = ans;
|
|
len = sizetotal;
|
|
while (len--)
|
|
*hd++ = 0;
|
|
|
|
h2 = vp;
|
|
hd = ans + 1;
|
|
for (len = size; len--; hd += 2) {
|
|
digit = (FULL) *h2++;
|
|
if (digit == 0)
|
|
continue;
|
|
h3 = h2;
|
|
h1 = hd;
|
|
carry = 0;
|
|
len1 = len;
|
|
while (len1 >= 4) { /* expand the loop some */
|
|
len1 -= 4;
|
|
sival.ivalue = (digit * ((FULL) *h3++))
|
|
+ ((FULL) *h1) + carry;
|
|
*h1++ = sival.silow;
|
|
sival.ivalue = (digit * ((FULL) *h3++))
|
|
+ ((FULL) *h1) + ((FULL) sival.sihigh);
|
|
*h1++ = sival.silow;
|
|
sival.ivalue = (digit * ((FULL) *h3++))
|
|
+ ((FULL) *h1) + ((FULL) sival.sihigh);
|
|
*h1++ = sival.silow;
|
|
sival.ivalue = (digit * ((FULL) *h3++))
|
|
+ ((FULL) *h1) + ((FULL) sival.sihigh);
|
|
*h1++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (len1--) {
|
|
sival.ivalue = (digit * ((FULL) *h3++))
|
|
+ ((FULL) *h1) + carry;
|
|
*h1++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (carry) {
|
|
sival.ivalue = ((FULL) *h1) + carry;
|
|
*h1++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Now double the result.
|
|
* There is no final carry to worry about because we
|
|
* handle all digits of the result which must fit.
|
|
*/
|
|
carry = 0;
|
|
hd = ans;
|
|
len = sizetotal;
|
|
while (len--) {
|
|
digit = ((FULL) *hd);
|
|
sival.ivalue = digit + digit + carry;
|
|
/* ignore Saber-C warning #112 - get ushort from uint */
|
|
/* OK to ignore on name dosquare`sival */
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
|
|
/*
|
|
* Now add in the squares of each halfword.
|
|
*/
|
|
carry = 0;
|
|
hd = ans;
|
|
h3 = vp;
|
|
len = size;
|
|
while (len--) {
|
|
digit = ((FULL) *h3++);
|
|
sival.ivalue = digit * digit + ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
sival.ivalue = ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (carry) {
|
|
sival.ivalue = ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
|
|
/*
|
|
* Finally return the size of the result.
|
|
*/
|
|
len = sizetotal;
|
|
hd = &ans[len - 1];
|
|
while ((*hd == 0) && (len > 1)) {
|
|
len--;
|
|
hd--;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
/*
|
|
* The number to be squared is large.
|
|
* Allocate temporary space and determine the sizes and
|
|
* positions of the values to be calculated.
|
|
*/
|
|
temp = tempbuf;
|
|
tempbuf += (3 * (size + 1) / 2);
|
|
|
|
sizeA = size / 2;
|
|
sizeB = size - sizeA;
|
|
shift = sizeB;
|
|
baseA = vp + sizeB;
|
|
baseB = vp;
|
|
baseAA = &ans[shift * 2];
|
|
baseBB = ans;
|
|
baseAABB = temp;
|
|
baseAB = temp;
|
|
baseABAB = &temp[shift];
|
|
|
|
/*
|
|
* Trim the second number of leading zeroes.
|
|
*/
|
|
hd = &baseB[sizeB - 1];
|
|
while ((*hd == 0) && (sizeB > 1)) {
|
|
sizeB--;
|
|
hd--;
|
|
}
|
|
|
|
/*
|
|
* Now to proceed to calculate the result using the formula.
|
|
* (A*S+B)^2 = (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2.
|
|
* The steps are done in the following order:
|
|
* S^2*A^2 + B^2
|
|
* A^2 + B^2
|
|
* (S^2+S)*A^2 + (S+1)*B^2
|
|
* (A-B)^2
|
|
* (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2.
|
|
*
|
|
* Begin by forming the squares of two the HALFs concatenated
|
|
* together in the final result location. Make sure that the
|
|
* highest words of the results are zero.
|
|
*/
|
|
sizeBB = dosquare(baseB, sizeB, baseBB);
|
|
hd = &baseBB[sizeBB];
|
|
len = shift * 2 - sizeBB;
|
|
while (len--)
|
|
*hd++ = 0;
|
|
|
|
sizeAA = dosquare(baseA, sizeA, baseAA);
|
|
hd = &baseAA[sizeAA];
|
|
len = sizetotal - shift * 2 - sizeAA;
|
|
while (len--)
|
|
*hd++ = 0;
|
|
|
|
/*
|
|
* Sum the two squares into a temporary location.
|
|
*/
|
|
if (sizeAA >= sizeBB) {
|
|
h1 = baseAA;
|
|
h2 = baseBB;
|
|
sizeAABB = sizeAA;
|
|
sumsize = sizeBB;
|
|
} else {
|
|
h1 = baseBB;
|
|
h2 = baseAA;
|
|
sizeAABB = sizeBB;
|
|
sumsize = sizeAA;
|
|
}
|
|
copysize = sizeAABB - sumsize;
|
|
|
|
hd = baseAABB;
|
|
carry = 0;
|
|
while (sumsize--) {
|
|
sival.ivalue = ((FULL) *h1++) + ((FULL) *h2++) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (copysize--) {
|
|
sival.ivalue = ((FULL) *h1++) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
if (carry) {
|
|
*hd = (HALF)carry;
|
|
sizeAABB++;
|
|
}
|
|
|
|
/*
|
|
* Add the sum back into the previously calculated squares
|
|
* shifted over to the proper location.
|
|
*/
|
|
h1 = baseAABB;
|
|
hd = ans + shift;
|
|
carry = 0;
|
|
len = sizeAABB;
|
|
while (len--) {
|
|
sival.ivalue = ((FULL) *hd) + ((FULL) *h1++) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (carry) {
|
|
/* uninitialized memory read (see zsquare) */
|
|
sival.ivalue = ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
|
|
/*
|
|
* Calculate the absolute value of the difference of the two HALFs
|
|
* into a temporary location.
|
|
*/
|
|
if (sizeA == sizeB) {
|
|
len = sizeA;
|
|
h1 = &baseA[len - 1];
|
|
h2 = &baseB[len - 1];
|
|
while ((len > 1) && (*h1 == *h2)) {
|
|
len--;
|
|
h1--;
|
|
h2--;
|
|
}
|
|
}
|
|
if ((sizeA > sizeB) || ((sizeA == sizeB) && (*h1 > *h2))) {
|
|
h1 = baseA;
|
|
h2 = baseB;
|
|
sizeAB = sizeA;
|
|
subsize = sizeB;
|
|
} else {
|
|
h1 = baseB;
|
|
h2 = baseA;
|
|
sizeAB = sizeB;
|
|
subsize = sizeA;
|
|
}
|
|
copysize = sizeAB - subsize;
|
|
|
|
hd = baseAB;
|
|
carry = 0;
|
|
while (subsize--) {
|
|
sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry;
|
|
*hd++ = (HALF)(BASE1 - sival.silow);
|
|
carry = sival.sihigh;
|
|
}
|
|
while (copysize--) {
|
|
sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry;
|
|
*hd++ = (HALF)(BASE1 - sival.silow);
|
|
carry = sival.sihigh;
|
|
}
|
|
|
|
hd = &baseAB[sizeAB - 1];
|
|
while ((*hd == 0) && (sizeAB > 1)) {
|
|
sizeAB--;
|
|
hd--;
|
|
}
|
|
|
|
/*
|
|
* Now square the number into another temporary location,
|
|
* and subtract that from the final result.
|
|
*/
|
|
sizeABAB = dosquare(baseAB, sizeAB, baseABAB);
|
|
|
|
h1 = baseABAB;
|
|
hd = ans + shift;
|
|
carry = 0;
|
|
while (sizeABAB--) {
|
|
sival.ivalue = BASE1 - ((FULL) *hd) + ((FULL) *h1++) + carry;
|
|
*hd++ = (HALF)(BASE1 - sival.silow);
|
|
carry = sival.sihigh;
|
|
}
|
|
while (carry) {
|
|
sival.ivalue = BASE1 - ((FULL) *hd) + carry;
|
|
*hd++ = (HALF)(BASE1 - sival.silow);
|
|
carry = sival.sihigh;
|
|
}
|
|
|
|
/*
|
|
* Return the size of the result.
|
|
*/
|
|
len = sizetotal;
|
|
hd = &ans[len - 1];
|
|
while ((*hd == 0) && (len > 1)) {
|
|
len--;
|
|
hd--;
|
|
}
|
|
tempbuf = temp;
|
|
return len;
|
|
}
|
|
|
|
|
|
/*
|
|
* Return a pointer to a buffer to be used for holding a temporary number.
|
|
* The buffer will be at least as large as the specified number of HALFs,
|
|
* and remains valid until the next call to this routine. The buffer cannot
|
|
* be freed by the caller. There is only one temporary buffer, and so as to
|
|
* avoid possible conflicts this is only used by the lowest level routines
|
|
* such as divide, multiply, and square.
|
|
*
|
|
* given:
|
|
* len required number of HALFs in buffer
|
|
*/
|
|
HALF *
|
|
zalloctemp(LEN len)
|
|
{
|
|
HALF *hp;
|
|
STATIC LEN buflen; /* current length of temp buffer */
|
|
STATIC HALF *bufptr; /* pointer to current temp buffer */
|
|
|
|
if (len <= buflen)
|
|
return bufptr;
|
|
|
|
/*
|
|
* We need to grow the temporary buffer.
|
|
* First free any existing buffer, and then allocate the new one.
|
|
* While we are at it, make the new buffer bigger than necessary
|
|
* in order to reduce the number of reallocations.
|
|
*/
|
|
len += 100;
|
|
if (buflen) {
|
|
buflen = 0;
|
|
free(bufptr);
|
|
}
|
|
/* don't call alloc() because _math_abort_ may not be set right */
|
|
hp = (HALF *) malloc((len+1) * sizeof(HALF));
|
|
if (hp == NULL) {
|
|
math_error("No memory for temp buffer");
|
|
not_reached();
|
|
}
|
|
bufptr = hp;
|
|
buflen = len;
|
|
return hp;
|
|
}
|