calc/zmod.c

/*
 * zmod - modulo arithmetic routines
 *
 * Copyright (C) 1999-2007,2021-2023  David I. Bell, Landon Curt Noll and Ernest Bowen
 *
 * Primary author:  David I. Bell
 *
 * Calc is open software; you can redistribute it and/or modify it under
 * the terms of the version 2.1 of the GNU Lesser General Public License
 * as published by the Free Software Foundation.
 *
 * Calc is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General
 * Public License for more details.
 *
 * A copy of version 2.1 of the GNU Lesser General Public License is
 * distributed with calc under the filename COPYING-LGPL.  You should have
 * received a copy with calc; if not, write to Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 *
 * Under source code control:   1991/05/22 23:03:55
 * File existed as early as:    1991
 *
 * Share and enjoy!  :-)        http://www.isthe.com/chongo/tech/comp/calc/
 */

/*
 * Routines to do modulo arithmetic both normally and also using the REDC
 * algorithm given by Peter L. Montgomery in Mathematics of Computation,
 * volume 44, number 170 (April, 1985).  For multiple multiplies using
 * the same large modulus, the REDC algorithm avoids the usual division
 * by the modulus, instead replacing it with two multiplies or else a
 * special algorithm.  When these two multiplies or the special algorithm
 * are faster then the division, then the REDC algorithm is better.
 */


#include "alloc.h"
#include "config.h"
#include "zmath.h"


#include "errtbl.h"
#include "banned.h"     /* include after system header <> includes */


#define POWBITS 4               /* bits for power chunks (must divide BASEB) */
#define POWNUMS (1<<POWBITS)    /* number of powers needed in table */

S_FUNC void zmod5(ZVALUE *zp);
S_FUNC void zmod6(ZVALUE z1, ZVALUE *res);
S_FUNC void zredcmodinv(ZVALUE z1, ZVALUE *res);

STATIC REDC *powermodredc = NULL;       /* REDC info for raising to power */

bool havelastmod = false;
STATIC ZVALUE lastmod[1];
STATIC ZVALUE lastmodinv[1];


/*
 * Square a number and then mod the result with a second number.
 * The number to be squared can be negative or out of modulo range.
 * The result will be in the range 0 to the modulus - 1.
 *
 * given:
 *      z1              number to be squared
 *      z2              number to take mod with
 *      res             result
 */
void
zsquaremod(ZVALUE z1, ZVALUE z2, ZVALUE *res)
{
        ZVALUE tmp;
        FULL prod;
        FULL digit;

        /* firewall */
        if (res == NULL) {
                math_error("%s: res NULL", __func__);
                not_reached();
        }

        if (ziszero(z2) || zisneg(z2)) {
                math_error("Mod of non-positive integer");
                not_reached();
        }
        if (ziszero(z1) || zisunit(z2)) {
                *res = _zero_;
                return;
        }

        /*
         * If the modulus is a single digit number, then do the result
         * cheaply.  Check especially for a small power of two.
         */
        if (zistiny(z2)) {
                digit = z2.v[0];
                if ((digit & -digit) == digit) {        /* NEEDS 2'S COMP */
                        prod = (FULL) z1.v[0];
                        prod = (prod * prod) & (digit - 1);
                } else {
                        z1.sign = 0;
                        prod = (FULL) zmodi(z1, (long) digit);
                        prod = (prod * prod) % digit;
                }
                itoz((long) prod, res);
                return;
        }

        /*
         * The modulus is more than one digit.
         * Actually do the square and divide if necessary.
         */
        zsquare(z1, &tmp);
        if ((tmp.len < z2.len) ||
                ((tmp.len == z2.len) && (tmp.v[tmp.len-1] < z2.v[z2.len-1]))) {
                        *res = tmp;
                        return;
        }
        zmod(tmp, z2, res, 0);
        zfree(tmp);
}


/*
 * Calculate the number congruent to the given number whose absolute
 * value is minimal.  The number to be reduced can be negative or out of
 * modulo range.  The result will be within the range -int((modulus-1)/2)
 * to int(modulus/2) inclusive.  For example, for modulus 7, numbers are
 * reduced to the range [-3, 3], and for modulus 8, numbers are reduced to
 * the range [-3, 4].
 *
 * given:
 *      z1              number to find minimum congruence of
 *      z2              number to take mod with
 *      res             result
 */
void
zminmod(ZVALUE z1, ZVALUE z2, ZVALUE *res)
{
        ZVALUE tmp1, tmp2;
        int sign;
        int cv;

        /* firewall */
        if (res == NULL) {
                math_error("%s: res NULL", __func__);
                not_reached();
        }

        if (ziszero(z2) || zisneg(z2)) {
                math_error("Mod of non-positive integer");
                not_reached();
        }
        if (ziszero(z1) || zisunit(z2)) {
                *res = _zero_;
                return;
        }
        if (zistwo(z2)) {
                if (zisodd(z1))
                        *res = _one_;
                else
                        *res = _zero_;
                return;
        }

        /*
         * Do a quick check to see if the number is very small compared
         * to the modulus.  If so, then the result is obvious.
         */
        if (z1.len < z2.len - 1) {
                zcopy(z1, res);
                return;
        }

        /*
         * Now make sure the input number is within the modulo range.
         * If not, then reduce it to be within range and make the
         * quick check again.
         */
        sign = z1.sign;
        z1.sign = 0;
        cv = zrel(z1, z2);
        if (cv == 0) {
                *res = _zero_;
                return;
        }
        tmp1 = z1;
        if (cv > 0) {
                z1.sign = (bool)sign;
                zmod(z1, z2, &tmp1, 0);
                if (tmp1.len < z2.len - 1) {
                        *res = tmp1;
                        return;
                }
                sign = 0;
        }

        /*
         * Now calculate the difference of the modulus and the absolute
         * value of the original number.  Compare the original number with
         * the difference, and return the one with the smallest absolute
         * value, with the correct sign.  If the two values are equal, then
         * return the positive result.
         */
        zsub(z2, tmp1, &tmp2);
        cv = zrel(tmp1, tmp2);
        if (cv < 0) {
                zfree(tmp2);
                tmp1.sign = (bool)sign;
                if (tmp1.v == z1.v)
                        zcopy(tmp1, res);
                else
                        *res = tmp1;
        } else {
                if (cv)
                        tmp2.sign = !sign;
                if (tmp1.v != z1.v)
                        zfree(tmp1);
                *res = tmp2;
        }
}


/*
 * Compare two numbers for equality modulo a third number.
 * The two numbers to be compared can be negative or out of modulo range.
 * Returns true if the numbers are not congruent, and false if they are
 * congruent.
 *
 * given:
 *      z1              first number to be compared
 *      z2              second number to be compared
 *      z3              modulus
 */
bool
zcmpmod(ZVALUE z1, ZVALUE z2, ZVALUE z3)
{
        ZVALUE tmp1, tmp2, tmp3;
        FULL digit;
        LEN len;
        int cv;

        if (zisneg(z3) || ziszero(z3)) {
                math_error("Non-positive modulus in zcmpmod");
                not_reached();
        }
        if (zistwo(z3))
                return (((z1.v[0] + z2.v[0]) & 0x1) != 0);

        /*
         * If the two numbers are equal, then their mods are equal.
         */
        if ((z1.sign == z2.sign) && (z1.len == z2.len) &&
                (z1.v[0] == z2.v[0]) && (zcmp(z1, z2) == 0))
                        return false;

        /*
         * If both numbers are negative, then we can make them positive.
         */
        if (zisneg(z1) && zisneg(z2)) {
                z1.sign = 0;
                z2.sign = 0;
        }

        /*
         * For small negative numbers, make them positive before comparing.
         * In any case, the resulting numbers are in tmp1 and tmp2.
         */
        tmp1 = z1;
        tmp2 = z2;
        len = z3.len;
        digit = z3.v[len - 1];

        if (zisneg(z1) && ((z1.len < len) ||
                ((z1.len == len) && (z1.v[z1.len - 1] < digit))))
                        zadd(z1, z3, &tmp1);

        if (zisneg(z2) && ((z2.len < len) ||
                ((z2.len == len) && (z2.v[z2.len - 1] < digit))))
                        zadd(z2, z3, &tmp2);

        /*
         * Now compare the two numbers for equality.
         * If they are equal we are all done.
         */
        if (zcmp(tmp1, tmp2) == 0) {
                if (tmp1.v != z1.v)
                        zfree(tmp1);
                if (tmp2.v != z2.v)
                        zfree(tmp2);
                return false;
        }

        /*
         * They are not identical.  Now if both numbers are positive
         * and less than the modulus, then they are definitely not equal.
         */
        if ((tmp1.sign == tmp2.sign) &&
                ((tmp1.len < len) || (zrel(tmp1, z3) < 0)) &&
                ((tmp2.len < len) || (zrel(tmp2, z3) < 0))) {
                if (tmp1.v != z1.v)
                        zfree(tmp1);
                if (tmp2.v != z2.v)
                        zfree(tmp2);
                return true;
        }

        /*
         * Either one of the numbers is negative or is large.
         * So do the standard thing and subtract the two numbers.
         * Then they are equal if the result is 0 (mod z3).
         */
        zsub(tmp1, tmp2, &tmp3);
        if (tmp1.v != z1.v)
                zfree(tmp1);
        if (tmp2.v != z2.v)
                zfree(tmp2);

        /*
         * Compare the result with the modulus to see if it is equal to
         * or less than the modulus.  If so, we know the mod result.
         */
        tmp3.sign = 0;
        cv = zrel(tmp3, z3);
        if (cv == 0) {
                zfree(tmp3);
                return false;
        }
        if (cv < 0) {
                zfree(tmp3);
                return true;
        }

        /*
         * We are forced to actually do the division.
         * The numbers are congruent if the result is zero.
         */
        zmod(tmp3, z3, &tmp1, 0);
        zfree(tmp3);
        if (ziszero(tmp1)) {
                zfree(tmp1);
                return false;
        } else {
                zfree(tmp1);
                return true;
        }
}


/*
 * Given the address of a positive integer whose word count does not
 * exceed twice that of the modulus stored at lastmod, to evaluate and store
 * at that address the value of the integer modulo the modulus.
 */
S_FUNC void
zmod5(ZVALUE *zp)
{
        LEN len, modlen, j;
        ZVALUE tmp1, tmp2;
        ZVALUE z1, z2, z3;
        HALF *a, *b;
        FULL f;
        HALF u;

        /* firewall */
        if (zp == NULL) {
                math_error("%s: zp NULL", __func__);
                not_reached();
        }

        int subcount = 0;

        if (zrel(*zp, *lastmod) < 0)
                return;
        modlen = lastmod->len;
        len = zp->len;
        z1.v = zp->v + modlen - 1;
        z1.len = len - modlen + 1;
        z1.sign = z2.sign = z3.sign = 0;
        if (z1.len > modlen + 1) {
                math_error("Bad call to zmod5!!!");
                not_reached();
        }
        z2.v = lastmodinv->v + modlen + 1 - z1.len;
        z2.len = lastmodinv->len - modlen - 1 + z1.len;
        zmul(z1, z2, &tmp1);
        z3.v = tmp1.v + z1.len;
        z3.len = tmp1.len - z1.len;
        if (z3.len > 0) {
                zmul(z3, *lastmod, &tmp2);
                j = modlen;
                a = zp->v;
                b = tmp2.v;
                u = 0;
                len = modlen;
                while (j-- > 0) {
                        f = (FULL) *a - (FULL) *b++ - (FULL) u;
                        *a++ = (HALF) f;
                        u = - (HALF) (f >> BASEB);
                }
                if (z1.len > 1) {
                        len++;
                        if (tmp2.len > modlen)
                                f = (FULL) *a - (FULL) *b - (FULL) u;
                        else
                                f = (FULL) *a - (FULL) u;
                        *a++ = (HALF) f;
                }
                while (len > 0 && *--a == 0)
                        len--;
                zp->len = len;
                zfree(tmp2);
        }
        zfree(tmp1);
        while (len > 0 && zrel(*zp, *lastmod) >= 0) {
                subcount++;
                if (subcount > 2) {
                        math_error("Too many subtractions in zmod5");
                        not_reached();
                }
                j = modlen;
                a = zp->v;
                b = lastmod->v;
                u = 0;
                while (j-- > 0) {
                        f = (FULL) *a - (FULL) *b++ - (FULL) u;
                        *a++ = (HALF) f;
                        u = - (HALF) (f >> BASEB);
                }
                if (len > modlen) {
                        f = (FULL) *a - (FULL) u;
                        *a++ = (HALF) f;
                }
                while (len > 0 && *--a == 0)
                        len--;
                zp->len = len;
        }
        if (len == 0)
                zp->len = 1;
}

S_FUNC void
zmod6(ZVALUE z1, ZVALUE *res)
{
        LEN len, modlen, len0;
        int sign;
        ZVALUE zp0, ztmp;

        /* firewall */
        if (res == NULL) {
                math_error("%s: res NULL", __func__);
                not_reached();
        }

        if (ziszero(z1) || zisone(*lastmod)) {
                *res = _zero_;
                return;
        }
        sign = z1.sign;
        z1.sign = 0;
        zcopy(z1, &ztmp);
        modlen = lastmod->len;
        zp0.sign = 0;
        while (zrel(ztmp, *lastmod) >= 0) {
                len = ztmp.len;
                zp0.len = len;
                len0 = 0;
                if (len > 2 * modlen) {
                        zp0.len = 2 * modlen;
                        len0 = len - 2 * modlen;
                }
                zp0.v = ztmp.v + len - zp0.len;
                zmod5(&zp0);
                len = len0 + zp0.len;
                while (len > 0 && ztmp.v[len - 1] == 0)
                        len--;
                if (len == 0) {
                        zfree(ztmp);
                        *res = _zero_;
                        return;
                }
                ztmp.len = len;
        }
        if (sign)
                zsub(*lastmod, ztmp, res);
        else
                zcopy(ztmp, res);
        zfree(ztmp);
}


/*
 * Compute the result of raising one number to a power modulo another number.
 * That is, this computes:  a^b (modulo c).
 * This calculates the result by examining the power POWBITS bits at a time,
 * using a small table of POWNUMS low powers to calculate powers for those bits,
 * and repeated squaring and multiplying by the partial powers to generate
 * the complete power.  If the power being raised to is high enough, then
 * this uses the REDC algorithm to avoid doing many divisions.  When using
 * REDC, multiple calls to this routine using the same modulus will be
 * slightly faster.
 */
void
zpowermod(ZVALUE z1, ZVALUE z2, ZVALUE z3, ZVALUE *res)
{
        HALF *hp;               /* pointer to current word of the power */
        REDC *rp;               /* REDC information to be used */
        ZVALUE *pp;             /* pointer to low power table */
        ZVALUE ans, temp;       /* calculation values */
        ZVALUE modpow;          /* current small power */
        ZVALUE lowpowers[POWNUMS];      /* low powers */
        ZVALUE ztmp;
        int curshift;           /* shift value for word of power */
        HALF curhalf;           /* current word of power */
        unsigned int curpow;    /* current low power */
        unsigned int curbit;    /* current bit of low power */
        bool free_z1;           /* true => need to free z1 */
        int i;

        /* firewall */
        if (res == NULL) {
                math_error("%s: res NULL", __func__);
                not_reached();
        }

        if (zisneg(z3) || ziszero(z3)) {
                math_error("Non-positive modulus in zpowermod");
                not_reached();
        }
        if (zisneg(z2)) {
                math_error("Negative power in zpowermod");
                not_reached();
        }


        /*
         * Check easy cases first.
         */
        if ((ziszero(z1) && !ziszero(z2)) || zisunit(z3)) {
                /* 0^(non_zero) or x^y mod 1 always produces zero */
                *res = _zero_;
                return;
        }
        if (ziszero(z2)) {                      /* x^0 == 1 */
                *res = _one_;
                return;
        }
        if (zistwo(z3)) {                       /* mod 2 */
                if (zisodd(z1))
                        *res = _one_;
                else
                        *res = _zero_;
                return;
        }
        if (zisunit(z1) && (!z1.sign || ziseven(z2))) {
                /* 1^x or (-1)^(2x) */
                *res = _one_;
                return;
        }

        /*
         * Normalize the number being raised to be non-negative and to lie
         * within the modulo range.  Then check for zero or one specially.
         */
        ztmp.len = 0;
        free_z1 = false;
        if (zisneg(z1) || zrel(z1, z3) >= 0) {
                zmod(z1, z3, &ztmp, 0);
                zfree(z1);
                z1 = ztmp;
                free_z1 = true;
        }
        if (ziszero(z1)) {
                zfree(z1);
                if (ztmp.len)
                        zfree(ztmp);
                *res = _zero_;
                return;
        }
        if (zisone(z1)) {
                zfree(z1);
                if (ztmp.len)
                        zfree(ztmp);
                *res = _one_;
                return;
        }

        /*
         * If modulus is large enough use zmod5
         */
        if (z3.len >= conf->pow2) {
                if (havelastmod && zcmp(z3, *lastmod)) {
                        zfree(*lastmod);
                        zfree(*lastmodinv);
                        havelastmod = false;
                }
                if (!havelastmod) {
                        zcopy(z3, lastmod);
                        zbitvalue(2 * z3.len * BASEB, &temp);
                        zquo(temp, z3, lastmodinv, 0);
                        zfree(temp);
                        havelastmod = true;
                }

                /* zzz */
                for (pp = &lowpowers[2]; pp <= &lowpowers[POWNUMS-1]; pp++) {
                        pp->len = 0;
                        pp->v = NULL;
                }
                lowpowers[0] = _one_;
                lowpowers[1] = z1;
                ans = _one_;

                hp = &z2.v[z2.len - 1];
                curhalf = *hp;
                curshift = BASEB - POWBITS;
                while (curshift && ((curhalf >> curshift) == 0))
                        curshift -= POWBITS;

                /*
                 * Calculate the result by examining the power POWBITS bits at
                 * a time, and use the table of low powers at each iteration.
                 */
                for (;;) {
                        curpow = (curhalf >> curshift) & (POWNUMS - 1);
                        pp = &lowpowers[curpow];

                        /*
                         * If the small power is not yet saved in the table,
                         * then calculate it and remember it in the table for
                         * future use.
                         */
                        if (pp->v == NULL) {
                                if (curpow & 0x1) {
                                        zcopy(z1, &modpow);
                                        free_z1 = false;
                                } else {
                                        modpow = _one_;
                                }

                                for (curbit = 0x2;
                                     curbit <= curpow;
                                     curbit *= 2) {
                                        pp = &lowpowers[curbit];
                                        if (pp->v == NULL) {
                                                zsquare(lowpowers[curbit/2],
                                                        &temp);
                                                zmod5(&temp);
                                                zcopy(temp, pp);
                                                zfree(temp);
                                        }
                                        if (curbit & curpow) {
                                                zmul(*pp, modpow, &temp);
                                                zfree(modpow);
                                                zmod5(&temp);
                                                zcopy(temp, &modpow);
                                                zfree(temp);
                                        }
                                }
                                pp = &lowpowers[curpow];
                                if (pp->v != NULL) {
                                        zfree(*pp);
                                }
                                *pp = modpow;
                        }

                        /*
                         * If the power is nonzero, then accumulate the small
                         * power into the result.
                         */
                        if (curpow) {
                                zmul(ans, *pp, &temp);
                                zfree(ans);
                                zmod5(&temp);
                                zcopy(temp, &ans);
                                zfree(temp);
                        }

                        /*
                         * Select the next POWBITS bits of the power, if
                         * there is any more to generate.
                         */
                        curshift -= POWBITS;
                        if (curshift < 0) {
                                if (hp == z2.v)
                                        break;
                                curhalf = *--hp;
                                curshift = BASEB - POWBITS;
                        }

                        /*
                         * Square the result POWBITS times to make room for
                         * the next chunk of bits.
                         */
                        for (i = 0; i < POWBITS; i++) {
                                zsquare(ans, &temp);
                                zfree(ans);
                                zmod5(&temp);
                                zcopy(temp, &ans);
                                zfree(temp);
                        }
                }

                for (pp = &lowpowers[2]; pp <= &lowpowers[POWNUMS-1]; pp++) {
                        zfree(*pp);
                }
                *res = ans;
                if (ztmp.len)
                        zfree(ztmp);
                return;
        }

        /*
         * If the modulus is odd and small enough then use
         * the REDC algorithm.  The size where this is done is configurable.
         */
        if (z3.len < conf->redc2 && zisodd(z3)) {
                if (powermodredc && zcmp(powermodredc->mod, z3)) {
                        zredcfree(powermodredc);
                        powermodredc = NULL;
                }
                if (powermodredc == NULL)
                        powermodredc = zredcalloc(z3);
                rp = powermodredc;
                zredcencode(rp, z1, &temp);
                if (free_z1 == true) {
                        zfree(z1);
                }
                zredcpower(rp, temp, z2, &z1);
                zfree(temp);
                zredcdecode(rp, z1, res);
                zfree(z1);
                return;
        }

        /*
         * Modulus or power is small enough to perform the power raising
         * directly.  Initialize the table of powers.
         */
        for (pp = &lowpowers[2]; pp <= &lowpowers[POWNUMS-1]; pp++) {
                pp->len = 0;
                pp->v = NULL;
        }
        lowpowers[0] = _one_;
        lowpowers[1] = z1;
        ans = _one_;

        hp = &z2.v[z2.len - 1];
        curhalf = *hp;
        curshift = BASEB - POWBITS;
        while (curshift && ((curhalf >> curshift) == 0))
                curshift -= POWBITS;

        /*
         * Calculate the result by examining the power POWBITS bits at a time,
         * and use the table of low powers at each iteration.
         */
        for (;;) {
                curpow = (curhalf >> curshift) & (POWNUMS - 1);
                pp = &lowpowers[curpow];

                /*
                 * If the small power is not yet saved in the table, then
                 * calculate it and remember it in the table for future use.
                 */
                if (pp->v == NULL) {
                        if (curpow & 0x1) {
                                zcopy(z1, &modpow);
                                free_z1 = false;
                        } else {
                                modpow = _one_;
                        }

                        for (curbit = 0x2; curbit <= curpow; curbit *= 2) {
                                pp = &lowpowers[curbit];
                                if (pp->v == NULL) {
                                        zsquare(lowpowers[curbit/2], &temp);
                                        zmod(temp, z3, pp, 0);
                                        zfree(temp);
                                }
                                if (curbit & curpow) {
                                        zmul(*pp, modpow, &temp);
                                        zfree(modpow);
                                        zmod(temp, z3, &modpow, 0);
                                        zfree(temp);
                                }
                        }
                        pp = &lowpowers[curpow];
                        if (pp->v != NULL) {
                                zfree(*pp);
                        }
                        *pp = modpow;
                }

                /*
                 * If the power is nonzero, then accumulate the small power
                 * into the result.
                 */
                if (curpow) {
                        zmul(ans, *pp, &temp);
                        zfree(ans);
                        zmod(temp, z3, &ans, 0);
                        zfree(temp);
                }

                /*
                 * Select the next POWBITS bits of the power, if there is
                 * any more to generate.
                 */
                curshift -= POWBITS;
                if (curshift < 0) {
                        if (hp-- == z2.v)
                                break;
                        curhalf = *hp;
                        curshift = BASEB - POWBITS;
                }

                /*
                 * Square the result POWBITS times to make room for the next
                 * chunk of bits.
                 */
                for (i = 0; i < POWBITS; i++) {
                        zsquare(ans, &temp);
                        zfree(ans);
                        zmod(temp, z3, &ans, 0);
                        zfree(temp);
                }
        }

        for (pp = &lowpowers[2]; pp <= &lowpowers[POWNUMS-1]; pp++) {
                zfree(*pp);
        }
        *res = ans;
        if (ztmp.len)
                zfree(ztmp);
        if (free_z1 == true) {
                zfree(z1);
        }
}

/*
 * Given a positive odd N-word integer z, evaluate minv(-z, BASEB^N)
 */
S_FUNC void
zredcmodinv(ZVALUE z, ZVALUE *res)
{
        ZVALUE tmp;
        HALF *a0, *a, *b;
        HALF bit, h, inv, v;
        FULL f;
        LEN N, i, j, len;

        /* firewall */
        if (res == NULL) {
                math_error("%s: res NULL", __func__);
                not_reached();
        }

        N = z.len;
        tmp.sign = 0;
        tmp.len = N;
        tmp.v = alloc(N);
        zclearval(tmp);
        *tmp.v = 1;
        h = 1 + *z.v;
        bit = 1;
        inv = 1;
        while (h) {
                bit <<= 1;
                if (bit & h) {
                        inv |= bit;
                        h += bit * *z.v;
                }
        }

        j = N;
        a0 = tmp.v;
        while (j-- > 0) {
                v = inv * *a0;
                i = j;
                a = a0;
                b = z.v;
                f = (FULL) v * (FULL) *b++ + (FULL) *a++;
                *a0 = v;
                while (i-- > 0) {
                        f = (FULL) v * (FULL) *b++  + (FULL) *a + (f >> BASEB);
                        *a++ = (HALF) f;
                }
                while (j > 0 && *++a0 == 0)
                        j--;
        }
        a = tmp.v + N;
        len = N;
        while (*--a == 0)
                len--;
        tmp.len = len;
        zcopy(tmp, res);
        zfree(tmp);
}


/*
 * Initialize the REDC algorithm for a particular modulus,
 * returning a pointer to a structure that is used for other
 * REDC calls.  An error is generated if the structure cannot
 * be allocated.  The modulus must be odd and positive.
 *
 * given:
 *      z1              modulus to initialize for
 */
REDC *
zredcalloc(ZVALUE z1)
{
        REDC *rp;               /* REDC information */
        ZVALUE tmp;
        long bit;

        if (ziseven(z1) || zisneg(z1)) {
                math_error("REDC requires positive odd modulus");
                not_reached();
        }

        rp = (REDC *) malloc(sizeof(REDC));
        if (rp == NULL) {
                math_error("Cannot allocate REDC structure");
                not_reached();
        }

        /*
         * Round up the binary modulus to the next power of two
         * which is at a word boundary.  Then the shift and modulo
         * operations mod the binary modulus can be done very cheaply.
         * Calculate the REDC format for the number 1 for future use.
         */
        zcopy(z1, &rp->mod);
        zredcmodinv(z1, &rp->inv);
        bit = zhighbit(z1) + 1;
        if (bit % BASEB)
                bit += (BASEB - (bit % BASEB));
        zbitvalue(bit, &tmp);
        zmod(tmp, rp->mod, &rp->one, 0);
        zfree(tmp);
        rp->len = (LEN)(bit / BASEB);
        return rp;
}


/*
 * Free any numbers associated with the specified REDC structure,
 * and then the REDC structure itself.
 *
 * given:
 *      rp              REDC information to be cleared
 */
void
zredcfree(REDC *rp)
{
        /* firewall */
        if (rp == NULL) {
                math_error("%s: rp NULL", __func__);
                not_reached();
        }

        zfree(rp->mod);
        zfree(rp->inv);
        zfree(rp->one);
        free(rp);
}


/*
 * Convert a normal number into the specified REDC format.
 * The number to be converted can be negative or out of modulo range.
 * The resulting number can be used for multiplying, adding, subtracting,
 * or comparing with any other such converted numbers, as if the numbers
 * were being calculated modulo the number which initialized the REDC
 * information.  When the final value is not converted, the result is the
 * same as if the usual operations were done with the original numbers.
 *
 * given:
 *      rp              REDC information
 *      z1              number to be converted
 *      res             returned converted number
 */
void
zredcencode(REDC *rp, ZVALUE z1, ZVALUE *res)
{
        ZVALUE tmp1;

        /* firewall */
        if (rp == NULL) {
                math_error("%s: rp NULL", __func__);
                not_reached();
        }
        if (res == NULL) {
                math_error("%s: res NULL", __func__);
                not_reached();
        }

        /*
         * Confirm or initialize lastmod information when modulus is a
         * big number.
         */

        if (rp->len >= conf->pow2) {
                if (havelastmod && zcmp(rp->mod, *lastmod)) {
                        zfree(*lastmod);
                        zfree(*lastmodinv);
                        havelastmod = false;
                }
                if (!havelastmod) {
                        zcopy(rp->mod, lastmod);
                        zbitvalue(2 * rp->len * BASEB, &tmp1);
                        zquo(tmp1, rp->mod, lastmodinv, 0);
                        zfree(tmp1);
                        havelastmod = true;
                }
        }
        /*
         * Handle the cases 0, 1, -1, and 2 specially since these are
         * easy to calculate.  Zero transforms to zero, and the others
         * can be obtained from the precomputed REDC format for 1 since
         * addition and subtraction act normally for REDC format numbers.
         */
        if (ziszero(z1)) {
                *res = _zero_;
                return;
        }
        if (zisone(z1)) {
                zcopy(rp->one, res);
                return;
        }
        if (zisunit(z1)) {
                zsub(rp->mod, rp->one, res);
                return;
        }
        if (zistwo(z1)) {
                zadd(rp->one, rp->one, &tmp1);
                if (zrel(tmp1, rp->mod) < 0) {
                        *res = tmp1;
                        return;
                }
                zsub(tmp1, rp->mod, res);
                zfree(tmp1);
                return;
        }

        /*
         * Not a trivial number to convert, so do the full transformation.
         */
        zshift(z1, rp->len * BASEB, &tmp1);
        if (rp->len < conf->pow2)
                zmod(tmp1, rp->mod, res, 0);
        else
                zmod6(tmp1, res);
        zfree(tmp1);
}


/*
 * The REDC algorithm used to convert numbers out of REDC format and also
 * used after multiplication of two REDC numbers.  Using this routine
 * avoids any divides, replacing the divide by two multiplications.
 * If the numbers are very large, then these two multiplies will be
 * quicker than the divide, since dividing is harder than multiplying.
 *
 * given:
 *      rp              REDC information
 *      z1              number to be transformed
 *      res             returned transformed number
 */
void
zredcdecode(REDC *rp, ZVALUE z1, ZVALUE *res)
{
        ZVALUE tmp1, tmp2;
        ZVALUE ztmp;
        ZVALUE ztop;
        ZVALUE zp1;
        FULL muln;
        HALF *h1;
        HALF *h3;
        HALF *hd = NULL;
        HALF Ninv;
        LEN modlen;
        LEN len;
        FULL f;
        int sign;

        /* firewall */
        if (rp == NULL) {
                math_error("%s: rp NULL", __func__);
                not_reached();
        }
        if (res == NULL) {
                math_error("%s: res NULL", __func__);
                not_reached();
        }
        int i, j;

        /*
         * Check first for the special values for 0 and 1 that are easy.
         */
        if (ziszero(z1)) {
                *res = _zero_;
                return;
        }
        if ((z1.len == rp->one.len) && (z1.v[0] == rp->one.v[0]) &&
                (zcmp(z1, rp->one) == 0)) {
                        *res = _one_;
                        return;
        }
        ztop.len = 0;
        ztmp.len = 0;
        modlen = rp->len;
        sign = z1.sign;
        z1.sign = 0;
        if (z1.len > modlen) {
                ztop.v = z1.v + modlen;
                ztop.len = z1.len - modlen;
                ztop.sign = 0;
                if (zrel(ztop, rp->mod) >= 0) {
                        zmod(ztop, rp->mod, &ztmp, 0);
                        ztop = ztmp;
                }
                len = modlen;
                h1 = z1.v + len;
                while (len > 0 && *--h1 == 0)
                        len--;
                if (len == 0) {
                        if (ztmp.len)
                                *res = ztmp;
                        else
                                zcopy(ztop, res);
                        return;
                }
                z1.len = len;

        }
        if (rp->mod.len < conf->pow2) {
                Ninv = rp->inv.v[0];
                res->sign = 0;
                res->len = modlen;
                res->v = alloc(modlen);
                zclearval(*res);
                h1 = z1.v;
                for (i = 0; i < modlen; i++) {
                        h3 = rp->mod.v;
                        hd = res->v;
                        f = (FULL) *hd++;
                        if (i < z1.len)
                                f += (FULL) *h1++;
                        muln = (HALF) ((f & BASE1) * Ninv);
                        f = ((muln * (FULL) *h3++) + f) >> BASEB;
                        j = modlen;
                        while (--j > 0) {
                                f += (muln * (FULL) *h3++) + (FULL) *hd;
                                hd[-1] = (HALF) f;
                                f >>= BASEB;
                                hd++;
                        }
                        hd[-1] = (HALF) f;
                }
                len = modlen;
                while (*--hd == 0 && len > 1)
                        len--;
                if (len == 0)
                        len = 1;
                res->len = len;
        } else {
                /* Here 0 < z1 < 2^bitnum */

                /*
                 * First calculate the following:
                 *      tmp2 = ((z1 * inv) % 2^bitnum.
                 * The mod operations can be done with no work since the bit
                 * number was selected as a multiple of the word size.  Just
                 * reduce the sizes of the numbers as required.
                 */
                zmul(z1, rp->inv, &tmp2);
                if (tmp2.len > modlen) {
                        h1 = tmp2.v + modlen;
                        len = modlen;
                        while (len > 0 && *--h1 == 0)
                                len--;
                        tmp2.len = len;
                }

                /*
                 * Next calculate the following:
                 *      res = (z1 + tmp2 * modulus) / 2^bitnum
                 * Since 0 < z1 < 2^bitnum and the division is always exact,
                 * the quotient can be evaluated by rounding up
                 * (tmp2 * modulus)/2^bitnum.  This can be achieved by defining
                 * zp1 by an appropriate shift and then adding one.
                 */
                zmul(tmp2, rp->mod, &tmp1);
                zfree(tmp2);
                if (tmp1.len > modlen) {
                        zp1.v = tmp1.v + modlen;
                        zp1.len = tmp1.len - modlen;
                        zp1.sign = 0;
                        zadd(zp1, _one_, res);
                } else {
                        *res = _one_;
                }
                zfree(tmp1);
        }
        if (ztop.len) {
                zadd(*res, ztop, &tmp1);
                zfree(*res);
                if (ztmp.len)
                        zfree(ztmp);
                *res = tmp1;
        }

        /*
         * Finally do a final modulo by a simple subtraction if necessary.
         * This is all that is needed because the previous calculation is
         * guaranteed to always be less than twice the modulus.
         */

        if (zrel(*res, rp->mod) >= 0) {
                zsub(*res, rp->mod, &tmp1);
                zfree(*res);
                *res = tmp1;
        }
        if (sign && !ziszero(*res)) {
                zsub(rp->mod, *res, &tmp1);
                zfree(*res);
                *res = tmp1;
        }
        return;
}


/*
 * Multiply two numbers in REDC format together producing a result also
 * in REDC format.  If the result is converted back to a normal number,
 * then the result is the same as the modulo'd multiplication of the
 * original numbers before they were converted to REDC format.  This
 * calculation is done in one of two ways, depending on the size of the
 * modulus.  For large numbers, the REDC definition is used directly
 * which involves three multiplies overall.  For small numbers, a
 * complicated routine is used which does the indicated multiplication
 * and the REDC algorithm at the same time to produce the result.
 *
 * given:
 *      rp              REDC information
 *      z1              first REDC number to be multiplied
 *      z2              second REDC number to be multiplied
 *      res             resulting REDC number
 */
void
zredcmul(REDC *rp, ZVALUE z1, ZVALUE z2, ZVALUE *res)
{
        FULL mulb;
        FULL muln;
        HALF *h1;
        HALF *h2;
        HALF *h3;
        HALF *hd;
        HALF Ninv;
        HALF topdigit = 0;
        LEN modlen;
        LEN len;
        LEN len2;
        SIUNION sival1;
        SIUNION sival2;
        SIUNION carry;
        ZVALUE tmp;
        ZVALUE z1tmp, z2tmp;
        int sign;

        /* firewall */
        if (rp == NULL) {
                math_error("%s: rp NULL", __func__);
                not_reached();
        }
        if (res == NULL) {
                math_error("%s: res NULL", __func__);
                not_reached();
        }

        sign = z1.sign ^ z2.sign;
        z1.sign = 0;
        z2.sign = 0;
        z1tmp.len = 0;
        if (zrel(z1, rp->mod) >= 0) {
                zmod(z1, rp->mod, &z1tmp, 0);
                z1 = z1tmp;
        }
        z2tmp.len = 0;
        if (zrel(z2, rp->mod) >= 0) {
                zmod(z2, rp->mod, &z2tmp, 0);
                z2 = z2tmp;
        }


        /*
         * Check for special values which we easily know the answer.
         */
        if (ziszero(z1) || ziszero(z2)) {
                *res = _zero_;
                if (z1tmp.len)
                        zfree(z1tmp);
                if (z2tmp.len)
                        zfree(z2tmp);
                return;
        }

        if ((z1.len == rp->one.len) && (z1.v[0] == rp->one.v[0]) &&
                (zcmp(z1, rp->one) == 0)) {
                        if (sign)
                                zsub(rp->mod, z2, res);
                        else
                                zcopy(z2, res);
                        if (z1tmp.len)
                                zfree(z1tmp);
                        if (z2tmp.len)
                                zfree(z2tmp);
                        return;
        }

        if ((z2.len == rp->one.len) && (z2.v[0] == rp->one.v[0]) &&
                (zcmp(z2, rp->one) == 0)) {
                        if (sign)
                                zsub(rp->mod, z1, res);
                        else
                                zcopy(z1, res);
                        if (z1tmp.len)
                                zfree(z1tmp);
                        if (z2tmp.len)
                                zfree(z2tmp);
                        return;
        }

        /*
         * If the size of the modulus is large, then just do the multiply,
         * followed by the two multiplies contained in the REDC routine.
         * This will be quicker than directly doing the REDC calculation
         * because of the O(N^1.585) speed of the multiplies.  The size
         * of the number which this is done is configurable.
         */
        if (rp->mod.len >= conf->redc2) {
                zmul(z1, z2, &tmp);
                zredcdecode(rp, tmp, res);
                zfree(tmp);
                if (sign && !ziszero(*res)) {
                        zsub(rp->mod, *res, &tmp);
                        zfree(*res);
                        *res = tmp;
                }
                if (z1tmp.len)
                        zfree(z1tmp);
                if (z2tmp.len)
                        zfree(z2tmp);
                return;
        }

        /*
         * The number is small enough to calculate by doing the O(N^2) REDC
         * algorithm directly.  This algorithm performs the multiplication and
         * the reduction at the same time.  Notice the obscure facts that
         * only the lowest word of the inverse value is used, and that
         * there is no shifting of the partial products as there is in a
         * normal multiply.
         */
        modlen = rp->mod.len;
        Ninv = rp->inv.v[0];

        /*
         * Allocate the result and clear it.
         * The size of the result will be equal to or smaller than
         * the modulus size.
         */
        res->sign = 0;
        res->len = modlen;
        res->v = alloc(modlen);

        hd = res->v;
        len = modlen;
        zclearval(*res);

        /*
         * Do this outermost loop over all the digits of z1.
         */
        h1 = z1.v;
        len = z1.len;
        while (len--) {
                /*
                 * Start off with the next digit of z1, the first
                 * digit of z2, and the first digit of the modulus.
                 */
                mulb = (FULL) *h1++;
                h2 = z2.v;
                h3 = rp->mod.v;
                hd = res->v;
                sival1.ivalue = mulb * ((FULL) *h2++) + ((FULL) *hd++);
                muln = ((HALF) (sival1.silow * Ninv));
                sival2.ivalue = muln * ((FULL) *h3++) + ((FULL) sival1.silow);
                carry.ivalue = ((FULL) sival1.sihigh) + ((FULL) sival2.sihigh);

                /*
                 * Do this innermost loop for each digit of z2, except
                 * for the first digit which was just done above.
                 */
                len2 = z2.len;
                while (--len2 > 0) {
                        sival1.ivalue = mulb * ((FULL) *h2++)
                                + ((FULL) *hd) + ((FULL) carry.silow);
                        sival2.ivalue = muln * ((FULL) *h3++)
                                + ((FULL) sival1.silow);
                        carry.ivalue = ((FULL) sival1.sihigh)
                                + ((FULL) sival2.sihigh)
                                + ((FULL) carry.sihigh);

                        hd[-1] = sival2.silow;
                        hd++;
                }

                /*
                 * Now continue the loop as necessary so the total number
                 * of iterations is equal to the size of the modulus.
                 * This acts as if the innermost loop was repeated for
                 * high digits of z2 that are zero.
                 */
                len2 = modlen - z2.len;
                while (len2--) {
                        sival2.ivalue = muln * ((FULL) *h3++)
                                + ((FULL) *hd)
                                + ((FULL) carry.silow);
                        carry.ivalue = ((FULL) sival2.sihigh)
                                + ((FULL) carry.sihigh);

                        hd[-1] = sival2.silow;
                        hd++;
                }

                carry.ivalue += topdigit;
                hd[-1] = carry.silow;
                topdigit = carry.sihigh;
        }

        /*
         * Now continue the loop as necessary so the total number
         * of iterations is equal to the size of the modulus.
         * This acts as if the outermost loop was repeated for high
         * digits of z1 that are zero.
         */
        len = modlen - z1.len;
        while (len--) {
                /*
                 * Start off with the first digit of the modulus.
                 */
                h3 = rp->mod.v;
                hd = res->v;
                muln = ((HALF) (*hd * Ninv));
                sival2.ivalue = muln * ((FULL) *h3++) + (FULL) *hd++;
                carry.ivalue = ((FULL) sival2.sihigh);

                /*
                 * Do this innermost loop for each digit of the modulus,
                 * except for the first digit which was just done above.
                 */
                len2 = modlen;
                while (--len2 > 0) {
                        sival2.ivalue = muln * ((FULL) *h3++)
                                + ((FULL) *hd) + ((FULL) carry.silow);
                        carry.ivalue = ((FULL) sival2.sihigh)
                                + ((FULL) carry.sihigh);

                        hd[-1] = sival2.silow;
                        hd++;
                }
                carry.ivalue += topdigit;
                hd[-1] = carry.silow;
                topdigit = carry.sihigh;
        }

        /*
         * Determine the true size of the result, taking the top digit of
         * the current result into account.  The top digit is not stored in
         * the number because it is temporary and would become zero anyway
         * after the final subtraction is done.
         */
        if (topdigit == 0) {
                len = modlen;
                while (*--hd == 0 && len > 1) {
                        len--;
                }
                res->len = len;

        /*
         * Compare the result with the modulus.
         * If it is less than the modulus, then the calculation is complete.
         */

                if (zrel(*res, rp->mod) < 0) {
                        if (z1tmp.len)
                                zfree(z1tmp);
                        if (z2tmp.len)
                                zfree(z2tmp);
                        if (sign && !ziszero(*res)) {
                                zsub(rp->mod, *res, &tmp);
                                zfree(*res);
                                *res = tmp;
                        }
                        return;
                }
        }

        /*
         * Do a subtraction to reduce the result to a value less than
         * the modulus.  The REDC algorithm guarantees that a single subtract
         * is all that is needed.  Ignore any borrowing from the possible
         * highest word of the current result because that would affect
         * only the top digit value that was not stored and would become
         * zero anyway.
         */
        carry.ivalue = 0;
        h1 = rp->mod.v;
        hd = res->v;
        len = modlen;
        while (len--) {
                carry.ivalue = BASE1 - ((FULL) *hd) + ((FULL) *h1++)
                        + ((FULL) carry.silow);
                *hd++ = (HALF)(BASE1 - carry.silow);
                carry.silow = carry.sihigh;
        }

        /*
         * Now finally recompute the size of the result.
         */
        len = modlen;
        hd = &res->v[len - 1];
        while ((*hd == 0) && (len > 1)) {
                hd--;
                len--;
        }
        res->len = len;
        if (z1tmp.len)
                zfree(z1tmp);
        if (z2tmp.len)
                zfree(z2tmp);
        if (sign && !ziszero(*res)) {
                zsub(rp->mod, *res, &tmp);
                zfree(*res);
                *res = tmp;
        }

}

/*
 * Square a number in REDC format producing a result also in REDC format.
 *
 * given:
 *      rp              REDC information
 *      z1              REDC number to be squared
 *      res             resulting REDC number
 */
void
zredcsquare(REDC *rp, ZVALUE z1, ZVALUE *res)
{
        FULL mulb;
        FULL muln;
        HALF *h1;
        HALF *h2;
        HALF *h3;
        HALF *hd = NULL;
        HALF Ninv;
        HALF topdigit = 0;
        LEN modlen;
        LEN len;
        SIUNION sival1;
        SIUNION sival2;
        SIUNION sival3;
        SIUNION carry;
        ZVALUE tmp, ztmp;
        FULL f;
        int i, j;

        /* firewall */
        if (rp == NULL) {
                math_error("%s: rp NULL", __func__);
                not_reached();
        }
        if (res == NULL) {
                math_error("%s: res NULL", __func__);
                not_reached();
        }

        ztmp.len = 0;
        z1.sign = 0;
        if (zrel(z1, rp->mod) >= 0) {
                zmod(z1, rp->mod, &ztmp, 0);
                z1 = ztmp;
        }
        if (ziszero(z1)) {
                *res = _zero_;
                if (ztmp.len)
                        zfree(ztmp);
                return;
        }
        if ((z1.len == rp->one.len) && (z1.v[0] == rp->one.v[0]) &&
                (zcmp(z1, rp->one) == 0)) {
                        zcopy(z1, res);
                        if (ztmp.len)
                                zfree(ztmp);
                        return;
        }


        /*
         * If the modulus is small enough, then call the multiply
         * routine to produce the result.  Otherwise call the O(N^1.585)
         * routines to get the answer.
         */
        if (rp->mod.len >= conf->redc2
                        || 3 * z1.len < 2 * rp->mod.len) {
                zsquare(z1, &tmp);
                zredcdecode(rp, tmp, res);
                zfree(tmp);
                if (ztmp.len)
                        zfree(ztmp);
                return;
        }
        modlen = rp->mod.len;
        Ninv = rp->inv.v[0];

        res->sign = 0;
        res->len = modlen;
        res->v = alloc(modlen);

        zclearval(*res);

        h1 = z1.v;

        for (i = 0; i < z1.len; i++) {
                mulb = (FULL) *h1++;
                h2 = h1;
                h3 = rp->mod.v;
                hd = res->v;
                if (i == 0) {
                        sival1.ivalue = mulb * mulb;
                        muln = (HALF) (sival1.silow * Ninv);
                        sival2.ivalue = muln * ((FULL) *h3++)
                                + (FULL) sival1.silow;
                        carry.ivalue = (FULL) sival1.sihigh
                                + (FULL) sival2.sihigh;
                        hd++;
                } else {
                        muln = (HALF) (*hd * Ninv);
                        f = (muln * ((FULL) *h3++) + (FULL) *hd++) >> BASEB;
                        j = i;
                        while (--j > 0) {
                                f += muln * ((FULL) *h3++) + *hd;
                                hd[-1] = (HALF) f;
                                f >>= BASEB;
                                hd++;
                        }
                        carry.ivalue = f;
                        sival1.ivalue = mulb * mulb + (FULL) carry.silow;
                        sival2.ivalue = muln * ((FULL) *h3++)
                                + (FULL) *hd
                                + (FULL) sival1.silow;
                        carry.ivalue = (FULL) sival1.sihigh
                                + (FULL) sival2.sihigh
                                + (FULL) carry.sihigh;
                        hd[-1] = sival2.silow;
                        hd++;
                }
                j = z1.len - i;
                while (--j > 0) {
                        sival1.ivalue = mulb * ((FULL) *h2++);
                        sival2.ivalue = ((FULL) sival1.silow << 1)
                                + muln * ((FULL) *h3++);
                        sival3.ivalue = (FULL) sival2.silow
                                + (FULL) *hd
                                + (FULL) carry.silow;
                        carry.ivalue = ((FULL) sival1.sihigh << 1)
                                + (FULL) sival2.sihigh
                                + (FULL) sival3.sihigh
                                + (FULL) carry.sihigh;
                        hd[-1] = sival3.silow;
                        hd++;
                }
                j = modlen - z1.len;
                while (j-- > 0) {
                        sival1.ivalue = muln * ((FULL) *h3++)
                                + (FULL) *hd
                                + (FULL) carry.silow;
                        carry.ivalue = (FULL) sival1.sihigh
                                + (FULL) carry.sihigh;
                        hd[-1] = sival1.silow;
                        hd++;
                }
                carry.ivalue += (FULL) topdigit;
                hd[-1] = carry.silow;
                topdigit = carry.sihigh;
        }
        i = modlen - z1.len;
        while (i-- > 0) {
                h3 = rp->mod.v;
                hd = res->v;
                muln = (HALF) (*hd * Ninv);
                sival1.ivalue = muln * ((FULL) *h3++) + (FULL) *hd++;
                carry.ivalue = (FULL) sival1.sihigh;
                j = modlen;
                while (--j > 0) {
                        sival1.ivalue = muln * ((FULL) *h3++)
                                + (FULL) *hd
                                + (FULL) carry.silow;
                        carry.ivalue = (FULL) sival1.sihigh
                                + (FULL) carry.sihigh;
                        hd[-1] = sival1.silow;
                        hd++;
                }
                carry.ivalue += (FULL) topdigit;
                hd[-1] = carry.silow;
                topdigit = carry.sihigh;
        }
        if (topdigit == 0) {
                len = modlen;
                while (*--hd == 0 && len > 1) {
                        len--;
                }
                res->len = len;
                if  (zrel(*res, rp->mod) < 0) {
                        if (ztmp.len)
                                zfree(ztmp);
                        return;
                }
        }

        carry.ivalue = 0;
        h1 = rp->mod.v;
        hd = res->v;
        len = modlen;
        while (len--) {
                carry.ivalue = BASE1 - ((FULL) *hd) + ((FULL) *h1++)
                        + ((FULL) carry.silow);
                *hd++ = (HALF)(BASE1 - carry.silow);
                carry.silow = carry.sihigh;
        }

        len = modlen;
        hd = &res->v[len - 1];
        while ((*hd == 0) && (len > 1)) {
                hd--;
                len--;
        }
        res->len = len;
        if (ztmp.len)
                zfree(ztmp);
}


/*
 * Compute the result of raising a REDC format number to a power.
 * The result is within the range 0 to the modulus - 1.
 * This calculates the result by examining the power POWBITS bits at a time,
 * using a small table of POWNUMS low powers to calculate powers for those bits,
 * and repeated squaring and multiplying by the partial powers to generate
 * the complete power.
 *
 * given:
 *      rp              REDC information
 *      z1              REDC number to be raised
 *      z2              normal number to raise number to
 *      res             result
 */
void
zredcpower(REDC *rp, ZVALUE z1, ZVALUE z2, ZVALUE *res)
{
        HALF *hp;               /* pointer to current word of the power */
        ZVALUE *pp;             /* pointer to low power table */
        ZVALUE ans, temp;       /* calculation values */
        ZVALUE ztmp;
        ZVALUE modpow;          /* current small power */
        ZVALUE lowpowers[POWNUMS];      /* low powers */
        int curshift;           /* shift value for word of power */
        HALF curhalf;           /* current word of power */
        unsigned int curpow;    /* current low power */
        unsigned int curbit;    /* current bit of low power */
        int sign;
        int i;

        /* firewall */
        if (rp == NULL) {
                math_error("%s: rp NULL", __func__);
                not_reached();
        }
        if (res == NULL) {
                math_error("%s: res NULL", __func__);
                not_reached();
        }

        if (zisneg(z2)) {
                math_error("Negative power in zredcpower");
                not_reached();
        }

        if (zisunit(rp->mod)) {
                *res = _zero_;
                return;
        }

        sign = zisodd(z2) ? z1.sign : 0;
        z1.sign = 0;
        ztmp.len = 0;
        if (zrel(z1, rp->mod) >= 0) {
                zmod(z1, rp->mod, &ztmp, 0);
                z1 = ztmp;
        }
        /*
         * Check for zero or the REDC format for one.
         */
        if (ziszero(z1)) {
                if (ziszero(z2))
                        *res = _one_;
                else
                        *res = _zero_;
                if (ztmp.len)
                        zfree(ztmp);
                return;
        }
        if (zcmp(z1, rp->one) == 0) {
                if (sign)
                        zsub(rp->mod, rp->one, res);
                else
                        zcopy(rp->one, res);
                if (ztmp.len)
                        zfree(ztmp);
                return;
        }

        /*
         * See if the number being raised is the REDC format for -1.
         * If so, then the answer is the REDC format for one or minus one.
         * To do this check, calculate the REDC format for -1.
         */
        if (((HALF)(z1.v[0] + rp->one.v[0])) == rp->mod.v[0]) {
                zsub(rp->mod, rp->one, &temp);
                if (zcmp(z1, temp) == 0) {
                        if (zisodd(z2) ^ sign) {
                                *res = temp;
                                if (ztmp.len)
                                        zfree(ztmp);
                                return;
                        }
                        zfree(temp);
                        zcopy(rp->one, res);
                        if (ztmp.len)
                                zfree(ztmp);
                        return;
                }
                zfree(temp);
        }

        for (pp = &lowpowers[2]; pp < &lowpowers[POWNUMS]; pp++)
                pp->len = 0;
        zcopy(rp->one, &lowpowers[0]);
        zcopy(z1, &lowpowers[1]);
        zcopy(rp->one, &ans);

        hp = &z2.v[z2.len - 1];
        curhalf = *hp;
        curshift = BASEB - POWBITS;
        while (curshift && ((curhalf >> curshift) == 0))
                curshift -= POWBITS;

        /*
         * Calculate the result by examining the power POWBITS bits at a time,
         * and use the table of low powers at each iteration.
         */
        for (;;) {
                curpow = (curhalf >> curshift) & (POWNUMS - 1);
                pp = &lowpowers[curpow];

                /*
                 * If the small power is not yet saved in the table, then
                 * calculate it and remember it in the table for future use.
                 */
                if (pp->len == 0) {
                        if (curpow & 0x1)
                                zcopy(z1, &modpow);
                        else
                                zcopy(rp->one, &modpow);

                        for (curbit = 0x2; curbit <= curpow; curbit *= 2) {
                                pp = &lowpowers[curbit];
                                if (pp->len == 0)
                                        zredcsquare(rp, lowpowers[curbit/2],
                                                pp);
                                if (curbit & curpow) {
                                        zredcmul(rp, *pp, modpow, &temp);
                                        zfree(modpow);
                                        modpow = temp;
                                }
                        }
                        pp = &lowpowers[curpow];
                        if (pp->len > 0) {
                                zfree(*pp);
                        }
                        *pp = modpow;
                }

                /*
                 * If the power is nonzero, then accumulate the small power
                 * into the result.
                 */
                if (curpow) {
                        zredcmul(rp, ans, *pp, &temp);
                        zfree(ans);
                        ans = temp;
                }

                /*
                 * Select the next POWBITS bits of the power, if there is
                 * any more to generate.
                 */
                curshift -= POWBITS;
                if (curshift < 0) {
                        if (hp-- == z2.v)
                                break;
                        curhalf = *hp;
                        curshift = BASEB - POWBITS;
                }

                /*
                 * Square the result POWBITS times to make room for the next
                 * chunk of bits.
                 */
                for (i = 0; i < POWBITS; i++) {
                        zredcsquare(rp, ans, &temp);
                        zfree(ans);
                        ans = temp;
                }
        }

        for (pp = lowpowers; pp < &lowpowers[POWNUMS]; pp++) {
                zfree(*pp);
        }
        if (sign && !ziszero(ans)) {
                zsub(rp->mod, ans, res);
                zfree(ans);
        } else {
                *res = ans;
        }
        if (ztmp.len)
                zfree(ztmp);
}


/*
 * zhnrmod - compute z mod h*2^n+r
 *
 * We compute v mod h*2^n+r, where h>0, n>0, abs(r) <= 1, as follows:
 *
 *      Let v = b*2^n + a, where 0 <= a < 2^n
 *
 *      Now v mod h*2^n+r == b*2^n + a mod h*2^n+r,
 *      and thus v mod h*2^n+r == b*2^n mod h*2^n+r + a mod h*2^n+r.
 *
 *      Because 0 <= a < 2^n < h*2^n+r, a mod h*2^n+r == a.
 *      Thus v mod h*2^n+r == b*2^n mod h*2^n+r + a.
 *
 *      It can be shown that b*2^n mod h*2^n == 2^n * (b mod h).
 *
 *      Thus for r == 0, v mod h*2^n+r == (2^n)*(b mod h) + a.
 *
 *      It can be shown that v mod 2^n-1 == a+b mod 2^n-1.
 *
 *      Thus for r == -1, v mod h*2^n+r == (2^n)*(b mod h) + a + int(b/h).
 *
 *      It can be shown that v mod 2^n+1 == a-b mod 2^n+1.
 *
 *      Thus for r == +1, v mod h*2^n+r == (2^n)*(b mod h) + a - int(b/h).
 *
 *      Therefore, v mod h*2^n+r == (2^n)*(b mod h) + a - r*int(b/h).
 *
 * The above proof leads to the following calc resource file which computes
 * the value z mod h*2^n+r:
 *
 *    define hnrmod(v,h,n,r)
 *    {
 *      local a,b,modulus,tquo,tmod,lbit,ret;
 *
 *      if (!isint(h) || h < 1) {
 *          quit "h must be an integer be > 0";
 *      }
 *      if (!isint(n) || n < 1) {
 *          quit "n must be an integer be > 0";
 *      }
 *      if (r != 1 && r != 0 && r != -1) {
 *          quit "r must be -1, 0 or 1";
 *      }
 *
 *      lbit = lowbit(h);
 *      if (lbit > 0) {
 *          n += lbit;
 *          h >>= lbit;
 *      }
 *
 *      modulus = h<<n+r;
 *      if (modulus <= 2^31-1) {
 *          return v % modulus;
 *      }
 *      ret = v;
 *
 *      do {
 *          if (highbit(ret) < n) {
 *              break;
 *          }
 *          b = ret>>n;
 *          a = ret - (b<<n);
 *
 *          switch (r) {
 *          case -1:
 *              if (h == 1) {
 *                  ret = a + b;
 *              } else {
 *                  quomod(b, h, tquo, tmod);
 *                  ret = tmod<<n + a + tquo;
 *              }
 *              break;
 *          case 0:
 *              if (h == 1) {
 *                  ret = a;
 *              } else {
 *                  ret = (b%h)<<n + a;
 *              }
 *              break;
 *          case 1:
 *              if (h == 1) {
 *                  ret = ((a > b) ? a-b : modulus+a-b);
 *              } else {
 *                  quomod(b, h, tquo, tmod);
 *                  tmod = tmod<<n + a;
 *                  ret = ((tmod >= tquo) ? tmod-tquo : modulus+tmod-tquo);
 *              }
 *              break;
 *          }
 *      } while (ret > modulus);
 *      ret = ((ret < 0) ? ret+modulus : ((ret == modulus) ? 0 : ret));
 *
 *      return ret;
 *    }
 *
 * This function implements the above calc resource file.
 *
 * given:
 *      v               take mod of this value, v >= 0
 *      zh              h from modulus h*2^n+r, h > 0
 *      zn              n from modulus h*2^n+r, n > 0
 *      zr              r from modulus h*2^n+r, abs(r) <= 1
 *      res             v mod h*2^n+r
 */
void
zhnrmod(ZVALUE v, ZVALUE zh, ZVALUE zn, ZVALUE zr, ZVALUE *res)
{
        ZVALUE a;               /* lower n bits of v */
        ZVALUE b;               /* bits above the lower n bits of v */
        ZVALUE h;               /* working zh value */
        ZVALUE modulus;         /* h^2^n + r */
        ZVALUE tquo;            /* b // h */
        ZVALUE tmod;            /* b % h or (b%h)<<n + a */
        ZVALUE t;               /* temp ZVALUE */
        ZVALUE t2;              /* temp ZVALUE */
        ZVALUE ret;             /* return value, what *res is set to */
        long n;                 /* integer value of zn */
        long r;                 /* integer value of zr */
        long hbit;              /* highbit(res) */
        long lbit;              /* lowbit(h) */
        int zrelval;            /* return value of zrel() */
        int hisone;             /* 1 => h == 1, 0 => h != 1 */

        /* firewall */
        if (res == NULL) {
                math_error("%s: res NULL", __func__);
                not_reached();
        }

        /*
         * firewall
         */
        if (zisneg(zh) || ziszero(zh)) {
                math_error("h must be > 0");
                not_reached();
        }
        if (zisneg(zn) || ziszero(zn)) {
                math_error("n must be > 0");
                not_reached();
        }
        if (zge31b(zn)) {
                math_error("n must be < 2^31");
                not_reached();
        }
        if (!zisabsleone(zr)) {
                math_error("r must be -1, 0 or 1");
                not_reached();
        }


        /*
         * setup for loop
         */
        n = ztolong(zn);
        r = ztolong(zr);
        if (zisneg(zr)) {
                r = -r;
        }
        /* lbit = lowbit(h); */
        lbit = zlowbit(zh);
        /* if (lbit > 0) { n += lbit; h >>= lbit; } */
        if (lbit > 0) {
                n += lbit;
                zshift(zh, -lbit, &h);
        } else {
                h = zh;
        }
        /* modulus = h<<n+r; */
        zshift(h, n, &t);
        switch (r) {
        case 1:
                zadd(t, _one_, &modulus);
                zfree(t);
                break;
        case 0:
                modulus = t;
                break;
        case -1:
                zsub(t, _one_, &modulus);
                zfree(t);
                break;
        }
        /* if (modulus <= MAXLONG) { return v % modulus; } */
        if (!zgtmaxlong(modulus)) {
                itoz(zmodi(v, ztolong(modulus)), res);
                zfree(modulus);
                if (lbit > 0) {
                        zfree(h);
                }
                return;
        }
        /* ret = v; */
        zcopy(v, &ret);

        /*
         * shift-add modulus loop
         */
        hisone = zisone(h);
        do {

                /*
                 * split ret into to chunks, the lower n bits
                 * and everything above the lower n bits
                 */
                /* if (highbit(ret) < n) { break; } */
                hbit = (long)zhighbit(ret);
                if (hbit < n) {
                        zrelval = (zcmp(ret, modulus) ? -1 : 0);
                        break;
                }
                /* b = ret>>n; */
                zshift(ret, -n, &b);
                b.sign = ret.sign;
                /* a = ret - (b<<n); */
                a.sign = ret.sign;
                a.len = (n+BASEB-1)/BASEB;
                a.v = alloc(a.len);
                memcpy(a.v, ret.v, a.len*sizeof(HALF));
                if (n % BASEB) {
                        a.v[a.len - 1] &= lowhalf[n % BASEB];
                }
                ztrim(&a);

                /*
                 * switch depending on r == -1, 0 or 1
                 */
                switch (r) {
                case -1:        /* v mod h*2^h-1 */
                        /* if (h == 1) ... */
                        if (hisone) {
                                /* ret = a + b; */
                                zfree(ret);
                                zadd(a, b, &ret);

                        /* ... else ... */
                        } else {
                                /* quomod(b, h, tquo, tmod); */
                                (void) zdiv(b, h, &tquo, &tmod, 0);
                                /* ret = tmod<<n + a + tquo; */
                                zshift(tmod, n, &t);
                                zfree(tmod);
                                zadd(a, tquo, &t2);
                                zfree(tquo);
                                zfree(ret);
                                zadd(t, t2, &ret);
                                zfree(t);
                                zfree(t2);
                        }
                        break;

                case 0:         /* v mod h*2^h-1 */
                        /* if (h == 1) ... */
                        if (hisone) {
                                /* ret = a; */
                                zfree(ret);
                                zcopy(a, &ret);

                        /* ... else ... */
                        } else {
                                /* ret = (b%h)<<n + a; */
                                (void) zmod(b, h, &tmod, 0);
                                zshift(tmod, n, &t);
                                zfree(tmod);
                                zfree(ret);
                                zadd(t, a, &ret);
                                zfree(t);
                        }
                        break;

                case 1:         /* v mod h*2^h-1 */
                        /* if (h == 1) ... */
                        if (hisone) {
                                /* ret = a-b; */
                                zfree(ret);
                                zsub(a, b, &ret);

                        /* ... else ... */
                        } else {
                                /* quomod(b, h, tquo, tmod); */
                                (void) zdiv(b, h, &tquo, &tmod, 0);
                                /* tmod = tmod<<n + a; */
                                zshift(tmod, n, &t);
                                zfree(tmod);
                                zadd(t, a, &tmod);
                                zfree(t);
                                /* ret = tmod-tquo; */
                                zfree(ret);
                                zsub(tmod, tquo, &ret);
                                zfree(tquo);
                                zfree(tmod);
                        }
                        break;
                }
                zfree(a);
                zfree(b);

        /* ... while (abs(ret) > modulus); */
        } while ((zrelval = zabsrel(ret, modulus)) > 0);
        /* ret = ((ret < 0) ? ret+modulus : ((ret == modulus) ? 0 : ret)); */
        if (ret.sign) {
                zadd(ret, modulus, &t);
                zfree(ret);
                ret = t;
        } else if (zrelval == 0) {
                zfree(ret);
                ret = _zero_;
        }
        zfree(modulus);
        if (lbit > 0) {
                zfree(h);
        }

        /*
         * return ret
         */
        *res = ret;
        return;
}