calc/lib/mfactor.cal

/*
 * Copyright (c) 1997 Landon Curt Noll
 *
 * Permission to use, copy, modify, and distribute this software and
 * its documentation for any purpose and without fee is hereby granted,
 * provided that the above copyright, this permission notice and text
 * this comment, and the disclaimer below appear in all of the following:
 *
 *	supporting documentation
 *	source copies
 *	source works derived from this source
 *	binaries derived from this source or from derived source
 *
 * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO
 * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR
 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
 * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
 * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 * PERFORMANCE OF THIS SOFTWARE.
 *
 *      Landon Curt Noll
 *      http://reality.sgi.com/chongo
 *
 *      chongo <was here> /\../\
 */


/*
 * hset method
 *
 * We will assume that mfactor is called with p_elim == 17.
 *
 *	n = (the Mersenne exponent we are testing)
 *	Q = 4*2*3*5*7*11*13*17 (4 * pfact(of some reasonable integer))
 *
 * We first determine all values of h mod Q such that:
 *
 *	gcd(h*n+1, Q) == 1   and   h*n+1 == +/-1 mod 8
 *
 * There will be 2*1*2*4*6*10*12*16 such values of h.
 *
 * For efficiency, we keep the difference between consecutive h values
 * in the hset[] difference array with hset[0] being the first h value.
 * Last, we multiply the hset[] values by n so that we only need
 * to add sequential values of hset[] to get factor candidates.
 *
 * We need only test factors of the form:
 *
 *	(Q*g*n + hx) + 1
 *
 * where:
 *
 *	g is an integer >= 0
 *	hx is computed from hset[] difference value described above
 *
 * Note that (Q*g*n + hx) is always even and that hx is a multiple
 * of n.  Thus the typical factor form:
 *
 *	2*k*n + 1
 *
 * implies that:
 *
 *	k = (Q*g + hx/n)/2
 *
 * This allows us to quickly eliminate factor values that are divisible
 * by 2, 3, 5, 7, 11, 13 or 17.  (well <= p value found below)
 *
 * The following loop shows how test_factor is advanced to higher test
 * values using hset[].  Here, hcount is the number of elements in hset[].
 * It can be shown that hset[0] == 0.  We add hset[hcount] to the hset[]
 * array for looping control convenience.
 *
 *	(* increase test_factor thru other possible test values *)
 *	test_factor = 0;
 *	hindx = 0;
 *	do {
 *		while (++hindx <= hcount) {
 *			test_factor += hset[hindx];
 *		}
 *		hindx = 0;
 *	} while (test_factor < some_limit);
 *
 * The test, mfactor(67, 1, 10000) took on an 200 Mhz r4k (user CPU seconds):
 *
 *	210.83 	(prior to use of hset[])
 * 	 78.35 	(hset[] for p_elim = 7)
 * 	 73.87 	(hset[] for p_elim = 11)
 * 	 73.92 	(hset[] for p_elim = 13)
 * 	234.16 	(hset[] for p_elim = 17)
 *	p_elim == 19 requires over 190 Megs of memory
 *
 * Over a long period of time, the call to load_hset() becomes insignificant.
 * If we look at the user CPU seconds from the first 10000 cycle to the
 * end of the test we find:
 *
 *	205.00 	(prior to use of hset[])
 * 	 75.89 	(hset[] for p_elim = 7)
 * 	 73.74 	(hset[] for p_elim = 11)
 * 	 70.61 	(hset[] for p_elim = 13)
 * 	 57.78 	(hset[] for p_elim = 17)
 *	 p_elim == 19 rejected because of memory size
 *
 * The p_elim == 17 overhead takes ~3 minutes on an 200 Mhz r4k CPU and
 * requires about ~13 Megs of memory.  The p_elim == 13 overhead
 * takes about 3 seconds and requires ~1.5 Megs of memory.
 *
 * The value p_elim == 17 is best for long factorizations.  It is the
 * fastest even thought the initial startup overhead is larger than
 * for p_elim == 13.
 *
 * NOTE: The values above are prior to optimizations where hset[] was
 *	 multiplied by n plus other optimizations.  Thus, the CPU
 *	 times you may get will not likely match the above values.
 */


/*
 * mfactor - find a factor of a Mersenne Number
 *
 * Mersenne numbers are numbers of the form:
 *
 *	2^n-1	for integer n > 0
 *
 * We know that factors of a Mersenne number are of the form:
 *
 *	2*k*n+1   and   +/- 1 mod 8
 *
 * We make use of the hset[] difference array to eliminate factor
 * candidates that would otherwise be divisible by 2, 3, 5, 7 ... p_elim.
 *
 * given:
 *	n		attempt to factor M(n) = 2^n-1
 *	start_k		the value k in 2*k*n+1 to start the search (def: 1)
 *	rept_loop	loop cycle reporting (def: 10000)
 *	p_elim		largest prime to eliminate from test factors (def: 17)
 *
 * returns:
 *	factor of (2^n)-1
 *
 * NOTE: The p_elim argument is optional and defaults to 17.  A p_elim value
 *	 of 17 is faster than 13 for even medium length runs.  However 13
 * 	 uses less memory and has a shorter startup time.
 */
define mfactor(n, start_k, rept_loop, p_elim)
{
	local Q;	/* 4*pfact(p_elim), hset[] cycle size */
	local hcount;	/* elements in the hset[] difference array */
	local loop;	/* report loop count */
	local q;	/* test factor of 2^n-1 */
	local g;	/* g as in test candidate form: (Q*g*hset[i])*n + 1 */
	local hindx;	/* hset[] index */
	local i;
	local tmp;
	local tmp2;

	/*
	 * firewall
	 */
	if (!isint(n) || n <= 0) {
		quit "n must be an integer > 0";
	}
	if (!isint(start_k)) {
		start_k = 1;
	} else if (!isint(start_k) || start_k <= 0) {
		quit "start_k must be an integer > 0";
	}
	if (!isint(rept_loop)) {
		rept_loop = 10000;
	}
	if (rept_loop < 1) {
		quit "rept_loop must be an integer > 0";
	}
	if (!isint(p_elim)) {
		p_elim = 17;
	}
	if (p_elim < 3) {
		quit "p_elim must be an integer > 2 (try 13 or 17)";
	}

	/*
	 * declare our global values
	 */
	Q = 4*pfact(p_elim);
	hcount = 2;
	/* allocate the h difference array */
	for (i=2; i <= p_elim; i = nextcand(i)) {
		hcount *= (i-1);
	}
	local mat hset[hcount+1];

	/*
	 * load the hset[] difference array
	 */
	{
		local x;	/* h*n+1 mod 8 */
		local h;	/* potential h value */
		local last_h;	/* previous valid h value */

		last_h = 0;
		for (i=0,h=0; h < Q; ++h) {
			if (gcd(h*n+1,Q) == 1) {
				x = (h*n+1) % 8;
				if (x == 1 || x == 7) {
					hset[i++] = (h-last_h) * n;
					last_h = h;
				}
			}
		}
		hset[hcount] = Q*n - last_h*n;
	}

	/*
	 * setup
	 *
	 * determine the next g and hset[] index (hindx) values such that:
	 *
	 *	2*start_k <= (Q*g + hset[hindx])
	 *
	 * and (Q*g + hset[hindx]) is a minimum and where:
	 *
	 *	Q = (4 * pfact(of some reasonable integer))
	 *	g = (some integer) (hset[] cycle number)
	 *
	 * We also compute 'q', the next test candidate.
	 */
	g = (2*start_k) // Q;
	tmp = 2*start_k - Q*g;
	for (tmp2=0, hindx=0;
	     hindx < hcount && (tmp2 += hset[hindx]/n) < tmp;
	     ++hindx) {
	}
	if (hindx == hcount) {
		/* we are beyond the end of a hset[] cycle, start at the next */
		++g;
		hindx = 0;
		tmp2 = hset[0]/n;
	}
	q = (Q*g + tmp2)*n + 1;

	/*
	 * look for a factor
	 *
	 * We ignore factors that themselves are divisible by a prime <=
	 * some small prime p.
	 *
	 * This process is guaranteed to find the smallest factor
	 * of 2^n-1.  A smallest factor of 2^n-1 must be prime, otherwise
	 * the divisors of that factor would also be factors of 2^n-1.
	 * Thus we know that if a test factor itself is not prime, it
	 * cannot be the smallest factor of 2^n-1.
	 *
	 * Eliminating all non-prime test factors would take too long.
	 * However we can eliminate 80.81% of the test factors
	 * by not using test factors that are divisible by a prime <= 17.
	 */
	if (pmod(2,n,q) == 1) {
		return q;
	} else {
		/* report this loop */
		printf("at 2*%d*%d+1, cpu: %f\n",
			(q-1)/(2*n), n, runtime());
		fflush(files(1));
		loop = 0;
	}
	do {
		/*
		 * determine if we need to report
		 *
		 * NOTE: (q-1)/(2*n) is the k value from 2*k*n + 1.
		 */
		if (rept_loop <= ++loop) {
			/* report this loop */
			printf("at 2*%d*%d+1, cpu: %f\n",
				(q-1)/(2*n), n, runtime());
			fflush(files(1));
			loop = 0;
		}

		/*
		 * skip if divisable by a prime <= 449
		 *
		 * The value 281 was determined by timing loops
		 * which found that 281 was at or near the
		 * minimum time to factor 2^(2^127-1)-1.
		 *
		 * The addition of the do { ... } while (factor(q, 449)>1);
		 * loop reduced the factoring loop time (36504 k values with
		 * the hset[] initialization time removed) from 25.69 sec to
		 * 15.62 sec of CPU time on a 200Mhz r4k.
		 */
		do {
			/*
			 * determine the next factor candidate
			 */
			q += hset[++hindx];
			if (hindx >= hcount) {
				hindx = 0;
				/*
				 * if we cared about g,
				 * then we wound ++g here too
				 */
			}
		} while (factor(q, 449) > 1);
	} while (pmod(2,n,q) != 1);

	/*
	 * return the factor found
	 *
	 * q is a factor of (2^n)-1
	 */
	return q;
}

if (config("lib_debug") >= 0) {
	print "mfactor(n [, start_k=1 [, rept_loop=10000 [, p_elim=17]]])"
}