mirror of
https://github.com/lcn2/calc.git
synced 2025-08-19 01:13:27 +03:00
While help/errstr has been added, the errstr builtin function is not yet written. In anticipation of the new errstr builtin the rest of the calc error system has been updated to associated errsym E_STRING's with errnum error codes and errmsg error messages. Minor improvements to help/rand. The verify_error_table() function that does a verification the error_table[] array and setup private_error_alias[] array is now called by libcalc_call_me_first(). Fix comment about wrong include file in have_sys_mount.h. Removed unused booltostr() and strtobool() macros from bool.h. Moved define of math_error(char *, ...) from zmath.h to errtbl.h. The errtbl.h include file, unless ERRCODE_SRC is defined also includes attribute.h and errsym.h. Group calc error related builtin support functions together in func.c. Make switch indenting in func.c consistent. Passing an invalid argument to error(), errno() or strerror() will set errno AND throw a math error. Before errno would be set and an error value was returned. Before there was no way to tell if the error value was a result of the arg or if an error detected. Added E_STRING to error([errnum | "E_STRING"]) builtin function. Added E_STRING to errno([errnum | "E_STRING"]) builtin function. Added E_STRING to strerror([errnum | "E_STRING"]) builtin function. Calling these functions with an E_STRING errsym is the same as calling them with the matching errnum code. Standardized on calc computation error related E_STRING strings where there are a set of related codes. Changed "E_...digits" into "E_..._digits". For example, E_FPUTC1 became E_FPUTC_1, E_FPUTC2 became E_FPUTC_2, and E_FPUTC3 became E_FPUTC_3. In a few cases such as E_APPR became E_APPR_1, because there was a E_APPR2 (which became E_APPR_2) and E_APPR3 (which became E_APPR_3). To other special cases, E_ILOG10 became E_IBASE10_LOG and E_ILOG2 became E_IBASE2_LOG because E_ILOG10 and E_ILOG2 are both independent calc computation error related E_STRING strings. Now related sets of E_STRING strings end in _ (underscore) followed by digits. The following is the list of E_STRING strings changes: E_APPR ==> E_APPR_1 E_ROUND ==> E_ROUND_1 E_SQRT ==> E_SQRT_1 E_ROOT ==> E_ROOT_1 E_SHIFT ==> E_SHIFT_1 E_SCALE ==> E_SCALE_1 E_POWI ==> E_POWI_1 E_POWER ==> E_POWER_1 E_QUO ==> E_QUO_1 E_MOD ==> E_MOD_1 E_ABS ==> E_ABS_1 E_APPR2 ==> E_APPR_2 E_APPR3 ==> E_APPR_3 E_ROUND2 ==> E_ROUND_2 E_ROUND3 ==> E_ROUND_3 E_BROUND2 ==> E_BROUND_2 E_BROUND3 ==> E_BROUND_3 E_SQRT2 ==> E_SQRT_2 E_SQRT3 ==> E_SQRT_3 E_ROOT2 ==> E_ROOT_2 E_ROOT3 ==> E_ROOT_3 E_SHIFT2 ==> E_SHIFT_2 E_SCALE2 ==> E_SCALE_2 E_POWI2 ==> E_POWI_2 E_POWER2 ==> E_POWER_2 E_POWER3 ==> E_POWER_3 E_QUO2 ==> E_QUO_2 E_QUO3 ==> E_QUO_3 E_MOD2 ==> E_MOD_2 E_MOD3 ==> E_MOD_3 E_ABS2 ==> E_ABS_2 E_EXP1 ==> E_EXP_1 E_EXP2 ==> E_EXP_2 E_FPUTC1 ==> E_FPUTC_1 E_FPUTC2 ==> E_FPUTC_2 E_FPUTC3 ==> E_FPUTC_3 E_FGETC1 ==> E_FGETC_1 E_FGETC2 ==> E_FGETC_2 E_FOPEN1 ==> E_FOPEN_1 E_FOPEN2 ==> E_FOPEN_2 E_FREOPEN1 ==> E_FREOPEN_1 E_FREOPEN2 ==> E_FREOPEN_2 E_FREOPEN3 ==> E_FREOPEN_3 E_FCLOSE1 ==> E_FCLOSE_1 E_FPUTS1 ==> E_FPUTS_1 E_FPUTS2 ==> E_FPUTS_2 E_FPUTS3 ==> E_FPUTS_3 E_FGETS1 ==> E_FGETS_1 E_FGETS2 ==> E_FGETS_2 E_FPUTSTR1 ==> E_FPUTSTR_1 E_FPUTSTR2 ==> E_FPUTSTR_2 E_FPUTSTR3 ==> E_FPUTSTR_3 E_FGETSTR1 ==> E_FGETSTR_1 E_FGETSTR2 ==> E_FGETSTR_2 E_FGETLINE1 ==> E_FGETLINE_1 E_FGETLINE2 ==> E_FGETLINE_2 E_FGETFIELD1 ==> E_FGETFIELD_1 E_FGETFIELD2 ==> E_FGETFIELD_2 E_REWIND1 ==> E_REWIND_1 E_PRINTF1 ==> E_PRINTF_1 E_PRINTF2 ==> E_PRINTF_2 E_FPRINTF1 ==> E_FPRINTF_1 E_FPRINTF2 ==> E_FPRINTF_2 E_FPRINTF3 ==> E_FPRINTF_3 E_STRPRINTF1 ==> E_STRPRINTF_1 E_STRPRINTF2 ==> E_STRPRINTF_2 E_FSCAN1 ==> E_FSCAN_1 E_FSCAN2 ==> E_FSCAN_2 E_FSCANF1 ==> E_FSCANF_1 E_FSCANF2 ==> E_FSCANF_2 E_FSCANF3 ==> E_FSCANF_3 E_FSCANF4 ==> E_FSCANF_4 E_STRSCANF1 ==> E_STRSCANF_1 E_STRSCANF2 ==> E_STRSCANF_2 E_STRSCANF3 ==> E_STRSCANF_3 E_STRSCANF4 ==> E_STRSCANF_4 E_SCANF1 ==> E_SCANF_1 E_SCANF2 ==> E_SCANF_2 E_SCANF3 ==> E_SCANF_3 E_FTELL1 ==> E_FTELL_1 E_FTELL2 ==> E_FTELL_2 E_FSEEK1 ==> E_FSEEK_1 E_FSEEK2 ==> E_FSEEK_2 E_FSEEK3 ==> E_FSEEK_3 E_FSIZE1 ==> E_FSIZE_1 E_FSIZE2 ==> E_FSIZE_2 E_FEOF1 ==> E_FEOF_1 E_FEOF2 ==> E_FEOF_2 E_FERROR1 ==> E_FERROR_1 E_FERROR2 ==> E_FERROR_2 E_UNGETC1 ==> E_UNGETC_1 E_UNGETC2 ==> E_UNGETC_2 E_UNGETC3 ==> E_UNGETC_3 E_ISATTY1 ==> E_ISATTY_1 E_ISATTY2 ==> E_ISATTY_2 E_ACCESS1 ==> E_ACCESS_1 E_ACCESS2 ==> E_ACCESS_2 E_SEARCH1 ==> E_SEARCH_1 E_SEARCH2 ==> E_SEARCH_2 E_SEARCH3 ==> E_SEARCH_3 E_SEARCH4 ==> E_SEARCH_4 E_SEARCH5 ==> E_SEARCH_5 E_SEARCH6 ==> E_SEARCH_6 E_RSEARCH1 ==> E_RSEARCH_1 E_RSEARCH2 ==> E_RSEARCH_2 E_RSEARCH3 ==> E_RSEARCH_3 E_RSEARCH4 ==> E_RSEARCH_4 E_RSEARCH5 ==> E_RSEARCH_5 E_RSEARCH6 ==> E_RSEARCH_6 E_REWIND2 ==> E_REWIND_2 E_STRERROR1 ==> E_STRERROR_1 E_STRERROR2 ==> E_STRERROR_2 E_COS1 ==> E_COS_1 E_COS2 ==> E_COS_2 E_SIN1 ==> E_SIN_1 E_SIN2 ==> E_SIN_2 E_EVAL2 ==> E_EVAL_2 E_ARG1 ==> E_ARG_1 E_ARG2 ==> E_ARG_2 E_POLAR1 ==> E_POLAR_1 E_POLAR2 ==> E_POLAR_2 E_MATFILL1 ==> E_MATFILL_1 E_MATFILL2 ==> E_MATFILL_2 E_MATTRANS1 ==> E_MATTRANS_1 E_MATTRANS2 ==> E_MATTRANS_2 E_DET1 ==> E_DET_1 E_DET2 ==> E_DET_2 E_DET3 ==> E_DET_3 E_MATMIN1 ==> E_MATMIN_1 E_MATMIN2 ==> E_MATMIN_2 E_MATMIN3 ==> E_MATMIN_3 E_MATMAX1 ==> E_MATMAX_1 E_MATMAX2 ==> E_MATMAX_2 E_MATMAX3 ==> E_MATMAX_3 E_CP1 ==> E_CP_1 E_CP2 ==> E_CP_2 E_CP3 ==> E_CP_3 E_DP1 ==> E_DP_1 E_DP2 ==> E_DP_2 E_DP3 ==> E_DP_3 E_SUBSTR1 ==> E_SUBSTR_1 E_SUBSTR2 ==> E_SUBSTR_2 E_INSERT1 ==> E_INSERT_1 E_INSERT2 ==> E_INSERT_2 E_DELETE1 ==> E_DELETE_1 E_DELETE2 ==> E_DELETE_2 E_LN1 ==> E_LN_1 E_LN2 ==> E_LN_2 E_ERROR1 ==> E_ERROR_1 E_ERROR2 ==> E_ERROR_2 E_EVAL3 ==> E_EVAL_3 E_EVAL4 ==> E_EVAL_4 E_RM1 ==> E_RM_1 E_RM2 ==> E_RM_2 E_BLK1 ==> E_BLK_1 E_BLK2 ==> E_BLK_2 E_BLK3 ==> E_BLK_3 E_BLK4 ==> E_BLK_4 E_BLKFREE1 ==> E_BLKFREE_1 E_BLKFREE2 ==> E_BLKFREE_2 E_BLKFREE3 ==> E_BLKFREE_3 E_BLKFREE4 ==> E_BLKFREE_4 E_BLKFREE5 ==> E_BLKFREE_5 E_BLOCKS1 ==> E_BLOCKS_1 E_BLOCKS2 ==> E_BLOCKS_2 E_COPY1 ==> E_COPY_01 E_COPY2 ==> E_COPY_02 E_COPY3 ==> E_COPY_03 E_COPY4 ==> E_COPY_04 E_COPY5 ==> E_COPY_05 E_COPY6 ==> E_COPY_06 E_COPY7 ==> E_COPY_07 E_COPY8 ==> E_COPY_08 E_COPY9 ==> E_COPY_09 E_COPY10 ==> E_COPY_10 E_COPY11 ==> E_COPY_11 E_COPY12 ==> E_COPY_12 E_COPY13 ==> E_COPY_13 E_COPY14 ==> E_COPY_14 E_COPY15 ==> E_COPY_15 E_COPY16 ==> E_COPY_16 E_COPY17 ==> E_COPY_17 E_COPYF1 ==> E_COPYF_1 E_COPYF2 ==> E_COPYF_2 E_COPYF3 ==> E_COPYF_3 E_COPYF4 ==> E_COPYF_4 E_PROTECT1 ==> E_PROTECT_1 E_PROTECT2 ==> E_PROTECT_2 E_PROTECT3 ==> E_PROTECT_3 E_MATFILL3 ==> E_MATFILL_3 E_MATFILL4 ==> E_MATFILL_4 E_MATTRACE1 ==> E_MATTRACE_1 E_MATTRACE2 ==> E_MATTRACE_2 E_MATTRACE3 ==> E_MATTRACE_3 E_TAN1 ==> E_TAN_1 E_TAN2 ==> E_TAN_2 E_COT1 ==> E_COT_1 E_COT2 ==> E_COT_2 E_SEC1 ==> E_SEC_1 E_SEC2 ==> E_SEC_2 E_CSC1 ==> E_CSC_1 E_CSC2 ==> E_CSC_2 E_SINH1 ==> E_SINH_1 E_SINH2 ==> E_SINH_2 E_COSH1 ==> E_COSH_1 E_COSH2 ==> E_COSH_2 E_TANH1 ==> E_TANH_1 E_TANH2 ==> E_TANH_2 E_COTH1 ==> E_COTH_1 E_COTH2 ==> E_COTH_2 E_SECH1 ==> E_SECH_1 E_SECH2 ==> E_SECH_2 E_CSCH1 ==> E_CSCH_1 E_CSCH2 ==> E_CSCH_2 E_ASIN1 ==> E_ASIN_1 E_ASIN2 ==> E_ASIN_2 E_ACOS1 ==> E_ACOS_1 E_ACOS2 ==> E_ACOS_2 E_ATAN1 ==> E_ATAN_1 E_ATAN2 ==> E_ATAN_2 E_ACOT1 ==> E_ACOT_1 E_ACOT2 ==> E_ACOT_2 E_ASEC1 ==> E_ASEC_1 E_ASEC2 ==> E_ASEC_2 E_ACSC1 ==> E_ACSC_1 E_ACSC2 ==> E_ACSC_2 E_ASINH1 ==> E_ASINH_1 E_ASINH2 ==> E_ASINH_2 E_ACOSH1 ==> E_ACOSH_1 E_ACOSH2 ==> E_ACOSH_2 E_ATANH1 ==> E_ATANH_1 E_ATANH2 ==> E_ATANH_2 E_ACOTH1 ==> E_ACOTH_1 E_ACOTH2 ==> E_ACOTH_2 E_ASECH1 ==> E_ASECH_1 E_ASECH2 ==> E_ASECH_2 E_ACSCH1 ==> E_ACSCH_1 E_ACSCH2 ==> E_ACSCH_2 E_GD1 ==> E_GD_1 E_GD2 ==> E_GD_2 E_AGD1 ==> E_AGD_1 E_AGD2 ==> E_AGD_2 E_BIT1 ==> E_BIT_1 E_BIT2 ==> E_BIT_2 E_SETBIT1 ==> E_SETBIT_1 E_SETBIT2 ==> E_SETBIT_2 E_SETBIT3 ==> E_SETBIT_3 E_SEG1 ==> E_SEG_1 E_SEG2 ==> E_SEG_2 E_SEG3 ==> E_SEG_3 E_HIGHBIT1 ==> E_HIGHBIT_1 E_HIGHBIT2 ==> E_HIGHBIT_2 E_LOWBIT1 ==> E_LOWBIT_1 E_LOWBIT2 ==> E_LOWBIT_2 E_HEAD1 ==> E_HEAD_1 E_HEAD2 ==> E_HEAD_2 E_TAIL1 ==> E_TAIL_1 E_TAIL2 ==> E_TAIL_2 E_XOR1 ==> E_XOR_1 E_XOR2 ==> E_XOR_2 E_INDICES1 ==> E_INDICES_1 E_INDICES2 ==> E_INDICES_2 E_EXP3 ==> E_EXP_3 E_SINH3 ==> E_SINH_3 E_COSH3 ==> E_COSH_3 E_SIN3 ==> E_SIN_3 E_COS3 ==> E_COS_3 E_GD3 ==> E_GD_3 E_AGD3 ==> E_AGD_3 E_POWER4 ==> E_POWER_4 E_ROOT4 ==> E_ROOT_4 E_DGT1 ==> E_DGT_1 E_DGT2 ==> E_DGT_2 E_DGT3 ==> E_DGT_3 E_PLCS1 ==> E_PLCS_1 E_PLCS2 ==> E_PLCS_2 E_DGTS1 ==> E_DGTS_1 E_DGTS2 ==> E_DGTS_2 E_ILOG10 ==> E_IBASE10_LOG E_ILOG2 ==> E_IBASE2_LOG E_COMB1 ==> E_COMB_1 E_COMB2 ==> E_COMB_2 E_ASSIGN1 ==> E_ASSIGN_1 E_ASSIGN2 ==> E_ASSIGN_2 E_ASSIGN3 ==> E_ASSIGN_3 E_ASSIGN4 ==> E_ASSIGN_4 E_ASSIGN5 ==> E_ASSIGN_5 E_ASSIGN6 ==> E_ASSIGN_6 E_ASSIGN7 ==> E_ASSIGN_7 E_ASSIGN8 ==> E_ASSIGN_8 E_ASSIGN9 ==> E_ASSIGN_9 E_SWAP1 ==> E_SWAP_1 E_SWAP2 ==> E_SWAP_2 E_SWAP3 ==> E_SWAP_3 E_QUOMOD1 ==> E_QUOMOD_1 E_QUOMOD2 ==> E_QUOMOD_2 E_QUOMOD3 ==> E_QUOMOD_3 E_PREINC1 ==> E_PREINC_1 E_PREINC2 ==> E_PREINC_2 E_PREINC3 ==> E_PREINC_3 E_PREDEC1 ==> E_PREDEC_1 E_PREDEC2 ==> E_PREDEC_2 E_PREDEC3 ==> E_PREDEC_3 E_POSTINC1 ==> E_POSTINC_1 E_POSTINC2 ==> E_POSTINC_2 E_POSTINC3 ==> E_POSTINC_3 E_POSTDEC1 ==> E_POSTDEC_1 E_POSTDEC2 ==> E_POSTDEC_2 E_POSTDEC3 ==> E_POSTDEC_3 E_INIT1 ==> E_INIT_01 E_INIT2 ==> E_INIT_02 E_INIT3 ==> E_INIT_03 E_INIT4 ==> E_INIT_04 E_INIT5 ==> E_INIT_05 E_INIT6 ==> E_INIT_06 E_INIT7 ==> E_INIT_07 E_INIT8 ==> E_INIT_08 E_INIT9 ==> E_INIT_09 E_INIT10 ==> E_INIT_10 E_LIST1 ==> E_LIST_1 E_LIST2 ==> E_LIST_2 E_LIST3 ==> E_LIST_3 E_LIST4 ==> E_LIST_4 E_LIST5 ==> E_LIST_5 E_LIST6 ==> E_LIST_6 E_MODIFY1 ==> E_MODIFY_1 E_MODIFY2 ==> E_MODIFY_2 E_MODIFY3 ==> E_MODIFY_3 E_MODIFY4 ==> E_MODIFY_4 E_MODIFY5 ==> E_MODIFY_5 E_FPATHOPEN1 ==> E_FPATHOPEN_1 E_FPATHOPEN2 ==> E_FPATHOPEN_2 E_LOG1 ==> E_LOG_1 E_LOG2 ==> E_LOG_2 E_LOG3 ==> E_LOG_3 E_FGETFILE1 ==> E_FGETFILE_1 E_FGETFILE2 ==> E_FGETFILE_2 E_FGETFILE3 ==> E_FGETFILE_3 E_TAN3 ==> E_TAN_3 E_TAN4 ==> E_TAN_4 E_COT3 ==> E_COT_3 E_COT4 ==> E_COT_4 E_SEC3 ==> E_SEC_3 E_CSC3 ==> E_CSC_3 E_TANH3 ==> E_TANH_3 E_TANH4 ==> E_TANH_4 E_COTH3 ==> E_COTH_3 E_COTH4 ==> E_COTH_4 E_SECH3 ==> E_SECH_3 E_CSCH3 ==> E_CSCH_3 E_ASIN3 ==> E_ASIN_3 E_ACOS3 ==> E_ACOS_3 E_ASINH3 ==> E_ASINH_3 E_ACOSH3 ==> E_ACOSH_3 E_ATAN3 ==> E_ATAN_3 E_ACOT3 ==> E_ACOT_3 E_ASEC3 ==> E_ASEC_3 E_ACSC3 ==> E_ACSC_3 E_ATANH3 ==> E_ATANH_3 E_ACOTH3 ==> E_ACOTH_3 E_ASECH3 ==> E_ASECH_3 E_ACSCH3 ==> E_ACSCH_3 E_D2R1 ==> E_D2R_1 E_D2R2 ==> E_D2R_2 E_R2D1 ==> E_R2D_1 E_R2D2 ==> E_R2D_2 E_G2R1 ==> E_G2R_1 E_G2R2 ==> E_G2R_2 E_R2G1 ==> E_R2G_1 E_R2G2 ==> E_R2G_2 E_D2G1 ==> E_D2G_1 E_G2D1 ==> E_G2D_1 E_D2DMS1 ==> E_D2DMS_1 E_D2DMS2 ==> E_D2DMS_2 E_D2DMS3 ==> E_D2DMS_3 E_D2DMS4 ==> E_D2DMS_4 E_D2DM1 ==> E_D2DM_1 E_D2DM2 ==> E_D2DM_2 E_D2DM3 ==> E_D2DM_3 E_D2DM4 ==> E_D2DM_4 E_G2GMS1 ==> E_G2GMS_1 E_G2GMS2 ==> E_G2GMS_2 E_G2GMS3 ==> E_G2GMS_3 E_G2GMS4 ==> E_G2GMS_4 E_G2GM1 ==> E_G2GM_1 E_G2GM2 ==> E_G2GM_2 E_G2GM3 ==> E_G2GM_3 E_G2GM4 ==> E_G2GM_4 E_H2HMS1 ==> E_H2HMS_1 E_H2HMS2 ==> E_H2HMS_2 E_H2HMS3 ==> E_H2HMS_3 E_H2HMS4 ==> E_H2HMS_4 E_H2HM1 ==> E_H2HM_1 E_H2HM2 ==> E_H2HM_2 E_H2HM3 ==> E_H2HM_3 E_H2HM4 ==> E_H2HM_4 E_DMS2D1 ==> E_DMS2D_1 E_DMS2D2 ==> E_DMS2D_2 E_DM2D1 ==> E_DM2D_1 E_DM2D2 ==> E_DM2D_2 E_GMS2G1 ==> E_GMS2G_1 E_GMS2G2 ==> E_GMS2G_2 E_GM2G1 ==> E_GM2G_1 E_GM2G2 ==> E_GM2G_2 E_HMS2H1 ==> E_HMS2H_1 E_HMS2H2 ==> E_HMS2H_2 E_HM2H1 ==> E_HM2H_1 E_HM2H2 ==> E_HM2H_2 E_VERSIN1 ==> E_VERSIN_1 E_VERSIN2 ==> E_VERSIN_2 E_VERSIN3 ==> E_VERSIN_3 E_AVERSIN1 ==> E_AVERSIN_1 E_AVERSIN2 ==> E_AVERSIN_2 E_AVERSIN3 ==> E_AVERSIN_3 E_COVERSIN1 ==> E_COVERSIN_1 E_COVERSIN2 ==> E_COVERSIN_2 E_COVERSIN3 ==> E_COVERSIN_3 E_ACOVERSIN1 ==> E_ACOVERSIN_1 E_ACOVERSIN2 ==> E_ACOVERSIN_2 E_ACOVERSIN3 ==> E_ACOVERSIN_3 E_VERCOS1 ==> E_VERCOS_1 E_VERCOS2 ==> E_VERCOS_2 E_VERCOS3 ==> E_VERCOS_3 E_AVERCOS1 ==> E_AVERCOS_1 E_AVERCOS2 ==> E_AVERCOS_2 E_AVERCOS3 ==> E_AVERCOS_3 E_COVERCOS1 ==> E_COVERCOS_1 E_COVERCOS2 ==> E_COVERCOS_2 E_COVERCOS3 ==> E_COVERCOS_3 E_ACOVERCOS1 ==> E_ACOVERCOS_1 E_ACOVERCOS2 ==> E_ACOVERCOS_2 E_ACOVERCOS3 ==> E_ACOVERCOS_3 E_TAN5 ==> E_TAN_5 E_COT5 ==> E_COT_5 E_COT6 ==> E_COT_6 E_SEC5 ==> E_SEC_5 E_CSC5 ==> E_CSC_5 E_CSC6 ==> E_CSC_6
1108 lines
29 KiB
C
1108 lines
29 KiB
C
/*
|
|
* zmul - faster than usual multiplying and squaring routines
|
|
*
|
|
* Copyright (C) 1999-2007,2021-2023 David I. Bell
|
|
*
|
|
* Calc is open software; you can redistribute it and/or modify it under
|
|
* the terms of the version 2.1 of the GNU Lesser General Public License
|
|
* as published by the Free Software Foundation.
|
|
*
|
|
* Calc is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General
|
|
* Public License for more details.
|
|
*
|
|
* A copy of version 2.1 of the GNU Lesser General Public License is
|
|
* distributed with calc under the filename COPYING-LGPL. You should have
|
|
* received a copy with calc; if not, write to Free Software Foundation, Inc.
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*
|
|
* Under source code control: 1991/01/09 20:01:31
|
|
* File existed as early as: 1991
|
|
*
|
|
* Share and enjoy! :-) http://www.isthe.com/chongo/tech/comp/calc/
|
|
*/
|
|
|
|
/*
|
|
* Faster than usual multiplying and squaring routines.
|
|
* The algorithm used is the reasonably simple one from Knuth, volume 2,
|
|
* section 4.3.3. These recursive routines are of speed O(N^1.585)
|
|
* instead of O(N^2). The usual multiplication and (almost usual) squaring
|
|
* algorithms are used for small numbers. On a 386 with its compiler, the
|
|
* two algorithms are equal in speed at about 100 decimal digits.
|
|
*/
|
|
|
|
|
|
#include "config.h"
|
|
#include "zmath.h"
|
|
|
|
|
|
#include "errtbl.h"
|
|
#include "banned.h" /* include after system header <> includes */
|
|
|
|
|
|
STATIC HALF *tempbuf; /* temporary buffer for multiply and square */
|
|
|
|
S_FUNC LEN domul(HALF *v1, LEN size1, HALF *v2, LEN size2, HALF *ans);
|
|
S_FUNC LEN dosquare(HALF *vp, LEN size, HALF *ans);
|
|
|
|
|
|
/*
|
|
* Multiply two numbers using the following formula recursively:
|
|
* (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D
|
|
* where S is a power of 2^16, and so multiplies by it are shifts, and
|
|
* A,B,C,D are the left and right HALFs of the numbers to be multiplied.
|
|
*
|
|
* given:
|
|
* z1 numbers to multiply
|
|
* z2 numbers to multiply
|
|
* res result of multiplication
|
|
*/
|
|
void
|
|
zmul(ZVALUE z1, ZVALUE z2, ZVALUE *res)
|
|
{
|
|
LEN len; /* size of array */
|
|
|
|
if (ziszero(z1) || ziszero(z2)) {
|
|
*res = _zero_;
|
|
return;
|
|
}
|
|
if (zisunit(z1)) {
|
|
zcopy(z2, res);
|
|
res->sign = (z1.sign != z2.sign);
|
|
return;
|
|
}
|
|
if (zisunit(z2)) {
|
|
zcopy(z1, res);
|
|
res->sign = (z1.sign != z2.sign);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Allocate a temporary buffer for the recursion levels to use.
|
|
* An array needs to be allocated large enough for all of the
|
|
* temporary results to fit in. This size is about twice the size
|
|
* of the largest original number, since each recursion level uses
|
|
* the size of its given number, and whose size is 1/2 the size of
|
|
* the previous level. The sum of the infinite series is 2.
|
|
* Add some extra words because of rounding when dividing by 2
|
|
* and also because of the extra word that each multiply needs.
|
|
*/
|
|
len = z1.len;
|
|
if (len < z2.len)
|
|
len = z2.len;
|
|
len = 2 * len + 64;
|
|
tempbuf = zalloctemp(len);
|
|
|
|
res->sign = (z1.sign != z2.sign);
|
|
res->v = alloc(z1.len + z2.len + 2);
|
|
res->len = domul(z1.v, z1.len, z2.v, z2.len, res->v);
|
|
}
|
|
|
|
|
|
/*
|
|
* Recursive routine to multiply two numbers by splitting them up into
|
|
* two numbers of half the size, and using the results of multiplying the
|
|
* sub-pieces. The result is placed in the indicated array, which must be
|
|
* large enough for the result plus one extra word (size1 + size2 + 1).
|
|
* Returns the actual size of the result with leading zeroes stripped.
|
|
* This also uses a temporary array which must be twice as large as
|
|
* one more than the size of the number at the top level recursive call.
|
|
*
|
|
* given:
|
|
* v1 first number
|
|
* size1 size of first number
|
|
* v2 second number
|
|
* size2 size of second number
|
|
* ans location for result
|
|
*/
|
|
S_FUNC LEN
|
|
domul(HALF *v1, LEN size1, HALF *v2, LEN size2, HALF *ans)
|
|
{
|
|
LEN shift; /* amount numbers are shifted by */
|
|
LEN sizeA; /* size of left half of first number */
|
|
LEN sizeB; /* size of right half of first number */
|
|
LEN sizeC; /* size of left half of second number */
|
|
LEN sizeD; /* size of right half of second number */
|
|
LEN sizeAB; /* size of subtraction of A and B */
|
|
LEN sizeDC; /* size of subtraction of D and C */
|
|
LEN sizeABDC; /* size of product of above two results */
|
|
LEN subsize; /* size of difference of HALFs */
|
|
LEN copysize; /* size of number left to copy */
|
|
LEN sizetotal; /* total size of product */
|
|
LEN len; /* temporary length */
|
|
HALF *baseA; /* base of left half of first number */
|
|
HALF *baseB; /* base of right half of first number */
|
|
HALF *baseC; /* base of left half of second number */
|
|
HALF *baseD; /* base of right half of second number */
|
|
HALF *baseAB; /* base of result of subtraction of A and B */
|
|
HALF *baseDC; /* base of result of subtraction of D and C */
|
|
HALF *baseABDC; /* base of product of above two results */
|
|
HALF *baseAC; /* base of product of A and C */
|
|
HALF *baseBD; /* base of product of B and D */
|
|
FULL carry; /* carry digit for small multiplications */
|
|
FULL carryACBD; /* carry from addition of A*C and B*D */
|
|
FULL digit; /* single digit multiplying by */
|
|
HALF *temp; /* base for temporary calculations */
|
|
bool neg; /* whether intermediate term is negative */
|
|
register HALF *hd, *h1=NULL, *h2=NULL; /* for inner loops */
|
|
SIUNION sival; /* for addition of digits */
|
|
|
|
/* firewall */
|
|
if (v1 == NULL) {
|
|
math_error("%s: v1 NULL", __func__);
|
|
not_reached();
|
|
}
|
|
if (ans == NULL) {
|
|
math_error("%s: ans NULL", __func__);
|
|
not_reached();
|
|
}
|
|
|
|
/*
|
|
* Trim the numbers of leading zeroes and initialize the
|
|
* estimated size of the result.
|
|
*/
|
|
hd = &v1[size1 - 1];
|
|
while ((*hd == 0) && (size1 > 1)) {
|
|
hd--;
|
|
size1--;
|
|
}
|
|
hd = &v2[size2 - 1];
|
|
while ((*hd == 0) && (size2 > 1)) {
|
|
hd--;
|
|
size2--;
|
|
}
|
|
sizetotal = size1 + size2;
|
|
|
|
/*
|
|
* First check for zero answer.
|
|
*/
|
|
if (((size1 == 1) && (*v1 == 0)) || ((size2 == 1) && (*v2 == 0))) {
|
|
*ans = 0;
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Exchange the two numbers if necessary to make the number of
|
|
* digits of the first number be greater than or equal to the
|
|
* second number.
|
|
*/
|
|
if (size1 < size2) {
|
|
len = size1; size1 = size2; size2 = len;
|
|
hd = v1; v1 = v2; v2 = hd;
|
|
}
|
|
|
|
/*
|
|
* If the smaller number has only a few digits, then calculate
|
|
* the result in the normal manner in order to avoid the overhead
|
|
* of the recursion for small numbers. The number of digits where
|
|
* the algorithm changes is settable from 2 to maxint.
|
|
*/
|
|
if (size2 < conf->mul2) {
|
|
/*
|
|
* First clear the top part of the result, and then multiply
|
|
* by the lowest digit to get the first partial sum. Later
|
|
* products will then add into this result.
|
|
*/
|
|
hd = &ans[size1];
|
|
len = size2;
|
|
while (len--)
|
|
*hd++ = 0;
|
|
|
|
digit = *v2++;
|
|
h1 = v1;
|
|
hd = ans;
|
|
carry = 0;
|
|
len = size1;
|
|
while (len >= 4) { /* expand the loop some */
|
|
len -= 4;
|
|
sival.ivalue = ((FULL) *h1++) * digit + carry;
|
|
/* ignore Saber-C warning #112 - get ushort from uint */
|
|
/* OK to ignore on name domul`sival */
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
sival.ivalue = ((FULL) *h1++) * digit + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
sival.ivalue = ((FULL) *h1++) * digit + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
sival.ivalue = ((FULL) *h1++) * digit + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (len--) {
|
|
sival.ivalue = ((FULL) *h1++) * digit + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
*hd = (HALF)carry;
|
|
|
|
/*
|
|
* Now multiply by the remaining digits of the second number,
|
|
* adding each product into the final result.
|
|
*/
|
|
h2 = ans;
|
|
while (--size2 > 0) {
|
|
digit = *v2++;
|
|
h1 = v1;
|
|
hd = ++h2;
|
|
if (digit == 0)
|
|
continue;
|
|
carry = 0;
|
|
len = size1;
|
|
while (len >= 4) { /* expand the loop some */
|
|
len -= 4;
|
|
sival.ivalue = ((FULL) *h1++) * digit
|
|
+ ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
sival.ivalue = ((FULL) *h1++) * digit
|
|
+ ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
sival.ivalue = ((FULL) *h1++) * digit
|
|
+ ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
sival.ivalue = ((FULL) *h1++) * digit
|
|
+ ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (len--) {
|
|
sival.ivalue = ((FULL) *h1++) * digit
|
|
+ ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (carry) {
|
|
sival.ivalue = ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Now return the true size of the number.
|
|
*/
|
|
len = sizetotal;
|
|
hd = &ans[len - 1];
|
|
while ((*hd == 0) && (len > 1)) {
|
|
hd--;
|
|
len--;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
/*
|
|
* Need to multiply by a large number.
|
|
* Allocate temporary space for calculations, and calculate the
|
|
* value for the shift. The shift value is 1/2 the size of the
|
|
* larger (first) number (rounded up). The amount of temporary
|
|
* space needed is twice the size of the shift, plus one more word
|
|
* for the multiply to use.
|
|
*/
|
|
shift = (size1 + 1) / 2;
|
|
temp = tempbuf;
|
|
tempbuf += (2 * shift) + 1;
|
|
|
|
/*
|
|
* Determine the sizes and locations of all the numbers.
|
|
* The value of sizeC can be negative, and this is checked later.
|
|
* The value of sizeD is limited by the full size of the number.
|
|
*/
|
|
baseA = v1 + shift;
|
|
baseB = v1;
|
|
baseC = v2 + ((shift <= size2) ? shift : size2);
|
|
baseD = v2;
|
|
baseAB = ans;
|
|
baseDC = ans + shift;
|
|
baseAC = ans + shift * 2;
|
|
baseBD = ans;
|
|
|
|
sizeA = size1 - shift;
|
|
sizeC = size2 - shift;
|
|
|
|
sizeB = shift;
|
|
hd = &baseB[shift - 1];
|
|
while ((*hd == 0) && (sizeB > 1)) {
|
|
hd--;
|
|
sizeB--;
|
|
}
|
|
|
|
sizeD = shift;
|
|
if (sizeD > size2)
|
|
sizeD = size2;
|
|
hd = &baseD[sizeD - 1];
|
|
while ((*hd == 0) && (sizeD > 1)) {
|
|
hd--;
|
|
sizeD--;
|
|
}
|
|
|
|
/*
|
|
* If the smaller number has a high half of zero, then calculate
|
|
* the result by breaking up the first number into two numbers
|
|
* and combining the results using the obvious formula:
|
|
* (A*S+B) * D = (A*D)*S + B*D
|
|
*/
|
|
if (sizeC <= 0) {
|
|
len = domul(baseB, sizeB, baseD, sizeD, ans);
|
|
hd = &ans[len];
|
|
len = sizetotal - len;
|
|
while (len--)
|
|
*hd++ = 0;
|
|
|
|
/*
|
|
* Add the second number into the first number, shifted
|
|
* over at the correct position.
|
|
*/
|
|
len = domul(baseA, sizeA, baseD, sizeD, temp);
|
|
h1 = temp;
|
|
hd = ans + shift;
|
|
carry = 0;
|
|
while (len--) {
|
|
sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (carry) {
|
|
sival.ivalue = ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
|
|
/*
|
|
* Determine the final size of the number and return it.
|
|
*/
|
|
len = sizetotal;
|
|
hd = &ans[len - 1];
|
|
while ((*hd == 0) && (len > 1)) {
|
|
hd--;
|
|
len--;
|
|
}
|
|
tempbuf = temp;
|
|
return len;
|
|
}
|
|
|
|
/*
|
|
* Now we know that the high HALFs of the numbers are nonzero,
|
|
* so we can use the complete formula.
|
|
* (A*S+B)*(C*S+D) = (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D.
|
|
* The steps are done in the following order:
|
|
* A-B
|
|
* D-C
|
|
* (A-B)*(D-C)
|
|
* S^2*A*C + B*D
|
|
* (S^2+S)*A*C + (S+1)*B*D (*)
|
|
* (S^2+S)*A*C + S*(A-B)*(D-C) + (S+1)*B*D
|
|
*
|
|
* Note: step (*) above can produce a result which is larger than
|
|
* the final product will be, and this is where the extra word
|
|
* needed in the product comes from. After the final subtraction is
|
|
* done, the result fits in the expected size. Using the extra word
|
|
* is easier than suppressing the carries and borrows everywhere.
|
|
*
|
|
* Begin by forming the product (A-B)*(D-C) into a temporary
|
|
* location that we save until the final step. Do each subtraction
|
|
* at positions 0 and S. Be very careful about the relative sizes
|
|
* of the numbers since this result can be negative. For the first
|
|
* step calculate the absolute difference of A and B into a temporary
|
|
* location at position 0 of the result. Negate the sign if A is
|
|
* smaller than B.
|
|
*/
|
|
neg = false;
|
|
if (sizeA == sizeB) {
|
|
len = sizeA;
|
|
h1 = &baseA[len - 1];
|
|
h2 = &baseB[len - 1];
|
|
while ((len > 1) && (*h1 == *h2)) {
|
|
len--;
|
|
h1--;
|
|
h2--;
|
|
}
|
|
}
|
|
if ((sizeA > sizeB) || ((sizeA == sizeB) && h1 && h2 && (*h1 > *h2))) {
|
|
h1 = baseA;
|
|
h2 = baseB;
|
|
sizeAB = sizeA;
|
|
subsize = sizeB;
|
|
} else {
|
|
neg = !neg;
|
|
h1 = baseB;
|
|
h2 = baseA;
|
|
sizeAB = sizeB;
|
|
subsize = sizeA;
|
|
}
|
|
copysize = sizeAB - subsize;
|
|
|
|
hd = baseAB;
|
|
carry = 0;
|
|
while (subsize--) {
|
|
sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry;
|
|
*hd++ = (HALF)(BASE1 - sival.silow);
|
|
carry = sival.sihigh;
|
|
}
|
|
while (copysize--) {
|
|
sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry;
|
|
*hd++ = (HALF)(BASE1 - sival.silow);
|
|
carry = sival.sihigh;
|
|
}
|
|
|
|
hd = &baseAB[sizeAB - 1];
|
|
while ((*hd == 0) && (sizeAB > 1)) {
|
|
hd--;
|
|
sizeAB--;
|
|
}
|
|
|
|
/*
|
|
* This completes the calculation of abs(A-B). For the next step
|
|
* calculate the absolute difference of D and C into a temporary
|
|
* location at position S of the result. Negate the sign if C is
|
|
* larger than D.
|
|
*/
|
|
if (sizeC == sizeD) {
|
|
len = sizeC;
|
|
h1 = &baseC[len - 1];
|
|
h2 = &baseD[len - 1];
|
|
while ((len > 1) && (*h1 == *h2)) {
|
|
len--;
|
|
h1--;
|
|
h2--;
|
|
}
|
|
}
|
|
if ((sizeC > sizeD) || ((sizeC == sizeD) && (*h1 > *h2))) {
|
|
neg = !neg;
|
|
h1 = baseC;
|
|
h2 = baseD;
|
|
sizeDC = sizeC;
|
|
subsize = sizeD;
|
|
} else {
|
|
h1 = baseD;
|
|
h2 = baseC;
|
|
sizeDC = sizeD;
|
|
subsize = sizeC;
|
|
}
|
|
copysize = sizeDC - subsize;
|
|
|
|
hd = baseDC;
|
|
carry = 0;
|
|
while (subsize--) {
|
|
sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry;
|
|
*hd++ = (HALF)(BASE1 - sival.silow);
|
|
carry = sival.sihigh;
|
|
}
|
|
while (copysize--) {
|
|
sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry;
|
|
*hd++ = (HALF)(BASE1 - sival.silow);
|
|
carry = sival.sihigh;
|
|
}
|
|
hd = &baseDC[sizeDC - 1];
|
|
while ((*hd == 0) && (sizeDC > 1)) {
|
|
hd--;
|
|
sizeDC--;
|
|
}
|
|
|
|
/*
|
|
* This completes the calculation of abs(D-C). Now multiply
|
|
* together abs(A-B) and abs(D-C) into a temporary location,
|
|
* which is preserved until the final steps.
|
|
*/
|
|
baseABDC = temp;
|
|
sizeABDC = domul(baseAB, sizeAB, baseDC, sizeDC, baseABDC);
|
|
|
|
/*
|
|
* Now calculate B*D and A*C into one of their two final locations.
|
|
* Make sure the high order digits of the products are zeroed since
|
|
* this initializes the final result. Be careful about this zeroing
|
|
* since the size of the high order words might be smaller than
|
|
* the shift size. Do B*D first since the multiplies use one more
|
|
* word than the size of the product. Also zero the final extra
|
|
* word in the result for possible carries to use.
|
|
*/
|
|
len = domul(baseB, sizeB, baseD, sizeD, baseBD);
|
|
hd = &baseBD[len];
|
|
len = shift * 2 - len;
|
|
while (len--)
|
|
*hd++ = 0;
|
|
|
|
len = domul(baseA, sizeA, baseC, sizeC, baseAC);
|
|
hd = &baseAC[len];
|
|
len = sizetotal - shift * 2 - len + 1;
|
|
while (len--)
|
|
*hd++ = 0;
|
|
|
|
/*
|
|
* Now add in A*C and B*D into themselves at the other shifted
|
|
* position that they need. This addition is tricky in order to
|
|
* make sure that the two additions cannot interfere with each other.
|
|
* Therefore we first add in the top half of B*D and the lower half
|
|
* of A*C. The sources and destinations of these two additions
|
|
* overlap, and so the same answer results from the two additions,
|
|
* thus only two pointers suffice for both additions. Keep the
|
|
* final carry from these additions for later use since we cannot
|
|
* afford to change the top half of A*C yet.
|
|
*/
|
|
h1 = baseBD + shift;
|
|
h2 = baseAC;
|
|
carryACBD = 0;
|
|
len = shift;
|
|
while (len--) {
|
|
sival.ivalue = ((FULL) *h1) + ((FULL) *h2) + carryACBD;
|
|
*h1++ = sival.silow;
|
|
*h2++ = sival.silow;
|
|
carryACBD = sival.sihigh;
|
|
}
|
|
|
|
/*
|
|
* Now add in the bottom half of B*D and the top half of A*C.
|
|
* These additions are straightforward, except that A*C should
|
|
* be done first because of possible carries from B*D, and the
|
|
* top half of A*C might not exist. Add in one of the carries
|
|
* from the previous addition while we are at it.
|
|
*/
|
|
h1 = baseAC + shift;
|
|
hd = baseAC;
|
|
carry = carryACBD;
|
|
len = sizetotal - 3 * shift;
|
|
while (len--) {
|
|
sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (carry) {
|
|
sival.ivalue = ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
|
|
h1 = baseBD;
|
|
hd = baseBD + shift;
|
|
carry = 0;
|
|
len = shift;
|
|
while (len--) {
|
|
sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (carry) {
|
|
sival.ivalue = ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
|
|
/*
|
|
* Now finally add in the other delayed carry from the
|
|
* above addition.
|
|
*/
|
|
hd = baseAC + shift;
|
|
while (carryACBD) {
|
|
sival.ivalue = ((FULL) *hd) + carryACBD;
|
|
*hd++ = sival.silow;
|
|
carryACBD = sival.sihigh;
|
|
}
|
|
|
|
/*
|
|
* Now finally add or subtract (A-B)*(D-C) into the final result at
|
|
* the correct position (S), according to whether it is positive or
|
|
* negative. When subtracting, the answer cannot go negative.
|
|
*/
|
|
h1 = baseABDC;
|
|
hd = ans + shift;
|
|
carry = 0;
|
|
len = sizeABDC;
|
|
if (neg) {
|
|
while (len--) {
|
|
sival.ivalue = BASE1 - ((FULL) *hd) +
|
|
((FULL) *h1++) + carry;
|
|
*hd++ = (HALF)(BASE1 - sival.silow);
|
|
carry = sival.sihigh;
|
|
}
|
|
while (carry) {
|
|
sival.ivalue = BASE1 - ((FULL) *hd) + carry;
|
|
*hd++ = (HALF)(BASE1 - sival.silow);
|
|
carry = sival.sihigh;
|
|
}
|
|
} else {
|
|
while (len--) {
|
|
sival.ivalue = ((FULL) *h1++) + ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (carry) {
|
|
sival.ivalue = ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Finally determine the size of the final result and return that.
|
|
*/
|
|
len = sizetotal;
|
|
hd = &ans[len - 1];
|
|
while ((*hd == 0) && (len > 1)) {
|
|
hd--;
|
|
len--;
|
|
}
|
|
tempbuf = temp;
|
|
return len;
|
|
}
|
|
|
|
|
|
/*
|
|
* Square a number by using the following formula recursively:
|
|
* (A*S+B)^2 = (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2
|
|
* where S is a power of 2^16, and so multiplies by it are shifts,
|
|
* and A and B are the left and right HALFs of the number to square.
|
|
*/
|
|
void
|
|
zsquare(ZVALUE z, ZVALUE *res)
|
|
{
|
|
LEN len;
|
|
|
|
/* firewall */
|
|
if (res == NULL) {
|
|
math_error("%s: res NULL", __func__);
|
|
not_reached();
|
|
}
|
|
|
|
if (ziszero(z)) {
|
|
*res = _zero_;
|
|
return;
|
|
}
|
|
if (zisunit(z)) {
|
|
*res = _one_;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Allocate a temporary array if necessary for the recursion to use.
|
|
* The array needs to be allocated large enough for all of the
|
|
* temporary results to fit in. This size is about 3 times the
|
|
* size of the original number, since each recursion level uses 3/2
|
|
* of the size of its given number, and whose size is 1/2 the size
|
|
* of the previous level. The sum of the infinite series is 3.
|
|
* Allocate some extra words for rounding up the sizes.
|
|
*/
|
|
len = 3 * z.len + 32;
|
|
tempbuf = zalloctemp(len);
|
|
|
|
res->sign = 0;
|
|
res->v = alloc((z.len+2) * 2);
|
|
/*
|
|
* Without the memset below, Purify reports that dosquare()
|
|
* will read uninitialized memory at the dosquare() line below
|
|
* the comment:
|
|
*
|
|
* uninitialized memory read (see zsquare)
|
|
*
|
|
* This problem occurs during regression test #622 and may
|
|
* be duplicated by executing:
|
|
*
|
|
* config("sq2", 2);
|
|
* 0xffff0000ffffffff00000000ffff0000000000000000ffff^2;
|
|
*/
|
|
memset((char *)res->v, 0, ((z.len+2) * 2)*sizeof(HALF));
|
|
res->len = dosquare(z.v, z.len, res->v);
|
|
}
|
|
|
|
|
|
/*
|
|
* Recursive routine to square a number by splitting it up into two numbers
|
|
* of half the size, and using the results of squaring the sub-pieces.
|
|
* The result is placed in the indicated array, which must be large
|
|
* enough for the result (size * 2). Returns the size of the result.
|
|
* This uses a temporary array which must be 3 times as large as the
|
|
* size of the number at the top level recursive call.
|
|
*
|
|
* given:
|
|
* vp value to be squared
|
|
* size length of value to square
|
|
* ans location for result
|
|
*/
|
|
S_FUNC LEN
|
|
dosquare(HALF *vp, LEN size, HALF *ans)
|
|
{
|
|
LEN shift; /* amount numbers are shifted by */
|
|
LEN sizeA; /* size of left half of number to square */
|
|
LEN sizeB; /* size of right half of number to square */
|
|
LEN sizeAA; /* size of square of left half */
|
|
LEN sizeBB; /* size of square of right half */
|
|
LEN sizeAABB; /* size of sum of squares of A and B */
|
|
LEN sizeAB; /* size of difference of A and B */
|
|
LEN sizeABAB; /* size of square of difference of A and B */
|
|
LEN subsize; /* size of difference of HALFs */
|
|
LEN copysize; /* size of number left to copy */
|
|
LEN sumsize; /* size of sum */
|
|
LEN sizetotal; /* total size of square */
|
|
LEN len; /* temporary length */
|
|
LEN len1; /* another temporary length */
|
|
FULL carry; /* carry digit for small multiplications */
|
|
FULL digit; /* single digit multiplying by */
|
|
HALF *temp; /* base for temporary calculations */
|
|
HALF *baseA; /* base of left half of number */
|
|
HALF *baseB; /* base of right half of number */
|
|
HALF *baseAA; /* base of square of left half of number */
|
|
HALF *baseBB; /* base of square of right half of number */
|
|
HALF *baseAABB; /* base of sum of squares of A and B */
|
|
HALF *baseAB; /* base of difference of A and B */
|
|
HALF *baseABAB; /* base of square of difference of A and B */
|
|
register HALF *hd, *h1, *h2, *h3; /* for inner loops */
|
|
SIUNION sival; /* for addition of digits */
|
|
|
|
/* firewall */
|
|
if (vp == NULL) {
|
|
math_error("%s: vp NULL", __func__);
|
|
not_reached();
|
|
}
|
|
if (ans == NULL) {
|
|
math_error("%s: ans NULL", __func__);
|
|
not_reached();
|
|
}
|
|
|
|
/*
|
|
* First trim the number of leading zeroes.
|
|
*/
|
|
hd = &vp[size - 1];
|
|
while ((*hd == 0) && (size > 1)) {
|
|
size--;
|
|
hd--;
|
|
}
|
|
sizetotal = size + size;
|
|
|
|
/*
|
|
* If the number has only a small number of digits, then use the
|
|
* (almost) normal multiplication method. Multiply each halfword
|
|
* only by those halfwords further on in the number. Missed terms
|
|
* will then be the same pairs of products repeated, and the squares
|
|
* of each halfword. The first case is handled by doubling the
|
|
* result. The second case is handled explicitly. The number of
|
|
* digits where the algorithm changes is settable from 2 to maxint.
|
|
*/
|
|
if (size < conf->sq2) {
|
|
hd = ans;
|
|
len = sizetotal;
|
|
while (len--)
|
|
*hd++ = 0;
|
|
|
|
h2 = vp;
|
|
hd = ans + 1;
|
|
for (len = size; len--; hd += 2) {
|
|
digit = (FULL) *h2++;
|
|
if (digit == 0)
|
|
continue;
|
|
h3 = h2;
|
|
h1 = hd;
|
|
carry = 0;
|
|
len1 = len;
|
|
while (len1 >= 4) { /* expand the loop some */
|
|
len1 -= 4;
|
|
sival.ivalue = (digit * ((FULL) *h3++))
|
|
+ ((FULL) *h1) + carry;
|
|
*h1++ = sival.silow;
|
|
sival.ivalue = (digit * ((FULL) *h3++))
|
|
+ ((FULL) *h1) + ((FULL) sival.sihigh);
|
|
*h1++ = sival.silow;
|
|
sival.ivalue = (digit * ((FULL) *h3++))
|
|
+ ((FULL) *h1) + ((FULL) sival.sihigh);
|
|
*h1++ = sival.silow;
|
|
sival.ivalue = (digit * ((FULL) *h3++))
|
|
+ ((FULL) *h1) + ((FULL) sival.sihigh);
|
|
*h1++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (len1--) {
|
|
sival.ivalue = (digit * ((FULL) *h3++))
|
|
+ ((FULL) *h1) + carry;
|
|
*h1++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (carry) {
|
|
sival.ivalue = ((FULL) *h1) + carry;
|
|
*h1++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Now double the result.
|
|
* There is no final carry to worry about because we
|
|
* handle all digits of the result which must fit.
|
|
*/
|
|
carry = 0;
|
|
hd = ans;
|
|
len = sizetotal;
|
|
while (len--) {
|
|
digit = ((FULL) *hd);
|
|
sival.ivalue = digit + digit + carry;
|
|
/* ignore Saber-C warning #112 - get ushort from uint */
|
|
/* OK to ignore on name dosquare`sival */
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
|
|
/*
|
|
* Now add in the squares of each halfword.
|
|
*/
|
|
carry = 0;
|
|
hd = ans;
|
|
h3 = vp;
|
|
len = size;
|
|
while (len--) {
|
|
digit = ((FULL) *h3++);
|
|
sival.ivalue = digit * digit + ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
sival.ivalue = ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (carry) {
|
|
sival.ivalue = ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
|
|
/*
|
|
* Finally return the size of the result.
|
|
*/
|
|
len = sizetotal;
|
|
hd = &ans[len - 1];
|
|
while ((*hd == 0) && (len > 1)) {
|
|
len--;
|
|
hd--;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
/*
|
|
* The number to be squared is large.
|
|
* Allocate temporary space and determine the sizes and
|
|
* positions of the values to be calculated.
|
|
*/
|
|
temp = tempbuf;
|
|
tempbuf += (3 * (size + 1) / 2);
|
|
|
|
sizeA = size / 2;
|
|
sizeB = size - sizeA;
|
|
shift = sizeB;
|
|
baseA = vp + sizeB;
|
|
baseB = vp;
|
|
baseAA = &ans[shift * 2];
|
|
baseBB = ans;
|
|
baseAABB = temp;
|
|
baseAB = temp;
|
|
baseABAB = &temp[shift];
|
|
|
|
/*
|
|
* Trim the second number of leading zeroes.
|
|
*/
|
|
hd = &baseB[sizeB - 1];
|
|
while ((*hd == 0) && (sizeB > 1)) {
|
|
sizeB--;
|
|
hd--;
|
|
}
|
|
|
|
/*
|
|
* Now to proceed to calculate the result using the formula.
|
|
* (A*S+B)^2 = (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2.
|
|
* The steps are done in the following order:
|
|
* S^2*A^2 + B^2
|
|
* A^2 + B^2
|
|
* (S^2+S)*A^2 + (S+1)*B^2
|
|
* (A-B)^2
|
|
* (S^2+S)*A^2 + (S+1)*B^2 - S*(A-B)^2.
|
|
*
|
|
* Begin by forming the squares of two the HALFs concatenated
|
|
* together in the final result location. Make sure that the
|
|
* highest words of the results are zero.
|
|
*/
|
|
sizeBB = dosquare(baseB, sizeB, baseBB);
|
|
hd = &baseBB[sizeBB];
|
|
len = shift * 2 - sizeBB;
|
|
while (len--)
|
|
*hd++ = 0;
|
|
|
|
sizeAA = dosquare(baseA, sizeA, baseAA);
|
|
hd = &baseAA[sizeAA];
|
|
len = sizetotal - shift * 2 - sizeAA;
|
|
while (len--)
|
|
*hd++ = 0;
|
|
|
|
/*
|
|
* Sum the two squares into a temporary location.
|
|
*/
|
|
if (sizeAA >= sizeBB) {
|
|
h1 = baseAA;
|
|
h2 = baseBB;
|
|
sizeAABB = sizeAA;
|
|
sumsize = sizeBB;
|
|
} else {
|
|
h1 = baseBB;
|
|
h2 = baseAA;
|
|
sizeAABB = sizeBB;
|
|
sumsize = sizeAA;
|
|
}
|
|
copysize = sizeAABB - sumsize;
|
|
|
|
hd = baseAABB;
|
|
carry = 0;
|
|
while (sumsize--) {
|
|
sival.ivalue = ((FULL) *h1++) + ((FULL) *h2++) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (copysize--) {
|
|
sival.ivalue = ((FULL) *h1++) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
if (carry) {
|
|
*hd = (HALF)carry;
|
|
sizeAABB++;
|
|
}
|
|
|
|
/*
|
|
* Add the sum back into the previously calculated squares
|
|
* shifted over to the proper location.
|
|
*/
|
|
h1 = baseAABB;
|
|
hd = ans + shift;
|
|
carry = 0;
|
|
len = sizeAABB;
|
|
while (len--) {
|
|
sival.ivalue = ((FULL) *hd) + ((FULL) *h1++) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
while (carry) {
|
|
/* uninitialized memory read (see zsquare) */
|
|
sival.ivalue = ((FULL) *hd) + carry;
|
|
*hd++ = sival.silow;
|
|
carry = sival.sihigh;
|
|
}
|
|
|
|
/*
|
|
* Calculate the absolute value of the difference of the two HALFs
|
|
* into a temporary location.
|
|
*/
|
|
if (sizeA == sizeB) {
|
|
len = sizeA;
|
|
h1 = &baseA[len - 1];
|
|
h2 = &baseB[len - 1];
|
|
while ((len > 1) && (*h1 == *h2)) {
|
|
len--;
|
|
h1--;
|
|
h2--;
|
|
}
|
|
}
|
|
if ((sizeA > sizeB) || ((sizeA == sizeB) && (*h1 > *h2))) {
|
|
h1 = baseA;
|
|
h2 = baseB;
|
|
sizeAB = sizeA;
|
|
subsize = sizeB;
|
|
} else {
|
|
h1 = baseB;
|
|
h2 = baseA;
|
|
sizeAB = sizeB;
|
|
subsize = sizeA;
|
|
}
|
|
copysize = sizeAB - subsize;
|
|
|
|
hd = baseAB;
|
|
carry = 0;
|
|
while (subsize--) {
|
|
sival.ivalue = BASE1 - ((FULL) *h1++) + ((FULL) *h2++) + carry;
|
|
*hd++ = (HALF)(BASE1 - sival.silow);
|
|
carry = sival.sihigh;
|
|
}
|
|
while (copysize--) {
|
|
sival.ivalue = (BASE1 - ((FULL) *h1++)) + carry;
|
|
*hd++ = (HALF)(BASE1 - sival.silow);
|
|
carry = sival.sihigh;
|
|
}
|
|
|
|
hd = &baseAB[sizeAB - 1];
|
|
while ((*hd == 0) && (sizeAB > 1)) {
|
|
sizeAB--;
|
|
hd--;
|
|
}
|
|
|
|
/*
|
|
* Now square the number into another temporary location,
|
|
* and subtract that from the final result.
|
|
*/
|
|
sizeABAB = dosquare(baseAB, sizeAB, baseABAB);
|
|
|
|
h1 = baseABAB;
|
|
hd = ans + shift;
|
|
carry = 0;
|
|
while (sizeABAB--) {
|
|
sival.ivalue = BASE1 - ((FULL) *hd) + ((FULL) *h1++) + carry;
|
|
*hd++ = (HALF)(BASE1 - sival.silow);
|
|
carry = sival.sihigh;
|
|
}
|
|
while (carry) {
|
|
sival.ivalue = BASE1 - ((FULL) *hd) + carry;
|
|
*hd++ = (HALF)(BASE1 - sival.silow);
|
|
carry = sival.sihigh;
|
|
}
|
|
|
|
/*
|
|
* Return the size of the result.
|
|
*/
|
|
len = sizetotal;
|
|
hd = &ans[len - 1];
|
|
while ((*hd == 0) && (len > 1)) {
|
|
len--;
|
|
hd--;
|
|
}
|
|
tempbuf = temp;
|
|
return len;
|
|
}
|
|
|
|
|
|
/*
|
|
* Return a pointer to a buffer to be used for holding a temporary number.
|
|
* The buffer will be at least as large as the specified number of HALFs,
|
|
* and remains valid until the next call to this routine. The buffer cannot
|
|
* be freed by the caller. There is only one temporary buffer, and so as to
|
|
* avoid possible conflicts this is only used by the lowest level routines
|
|
* such as divide, multiply, and square.
|
|
*
|
|
* given:
|
|
* len required number of HALFs in buffer
|
|
*/
|
|
HALF *
|
|
zalloctemp(LEN len)
|
|
{
|
|
HALF *hp;
|
|
STATIC LEN buflen; /* current length of temp buffer */
|
|
STATIC HALF *bufptr; /* pointer to current temp buffer */
|
|
|
|
if (len <= buflen)
|
|
return bufptr;
|
|
|
|
/*
|
|
* We need to grow the temporary buffer.
|
|
* First free any existing buffer, and then allocate the new one.
|
|
* While we are at it, make the new buffer bigger than necessary
|
|
* in order to reduce the number of reallocations.
|
|
*/
|
|
len += 100;
|
|
if (buflen) {
|
|
buflen = 0;
|
|
free(bufptr);
|
|
}
|
|
/* don't call alloc() because _math_abort_ may not be set right */
|
|
hp = (HALF *) malloc((len+1) * sizeof(HALF));
|
|
if (hp == NULL) {
|
|
math_error("No memory for temp buffer");
|
|
not_reached();
|
|
}
|
|
bufptr = hp;
|
|
buflen = len;
|
|
return hp;
|
|
}
|