diff --git a/CHANGES b/CHANGES index 05a10bd..99c9832 100644 --- a/CHANGES +++ b/CHANGES @@ -44,6 +44,12 @@ Following is the change from calc version 2.11.0t8 to date: Misc source file cleanup for things such as } else { style consistency. + Fixed the basis for FNV-1 hashes. Piror to this fix, the hash() + builtin produced FNV hash values that did not match the FNV-1 + algorithm as specified in: + + http://reality.sgi.com/chongo/tech/comp/fnv/index.html + Following is the change from calc version 2.11.0t7 to 2.11.0t7.5: diff --git a/assocfunc.c b/assocfunc.c index d5e276f..5cc2300 100644 --- a/assocfunc.c +++ b/assocfunc.c @@ -57,7 +57,7 @@ associndex(ASSOC *ap, BOOL create, long dim, VALUE *indices) * so that we can first select the correct hash chain, and * also so we can quickly compare each element for a match. */ - hash = (QCKHASH)0; + hash = FNV1_32_BASIS; for (i = 0; i < dim; i++) hash = hashvalue(&indices[i], hash); diff --git a/func.c b/func.c index 27a56fe..3b032fe 100644 --- a/func.c +++ b/func.c @@ -1527,7 +1527,7 @@ f_hash(int count, VALUE **vals) long lhash; VALUE result; - hash = (QCKHASH)0; + hash = FNV1_32_BASIS; while (count-- > 0) hash = hashvalue(*vals++, hash); lhash = (long) hash; diff --git a/help/hash b/help/hash index 9376a34..ef88a2c 100644 --- a/help/hash +++ b/help/hash @@ -12,10 +12,23 @@ TYPES DESCRIPTION Returns a hash value for one or more values of arbitrary types. + This function implements the Fowler/Noll/Vo hash-1 (FNV-1 hash). + The basis of the hash algorithm was taken from an idea sent + by Email to the IEEE POSIX P1003.2 mailing list from Phong Vo + (kpv@research.att.com) and Glenn Fowler (gsf@research.att.com). + Landon Curt Noll (http://reality.sgi.com/chongo) later improved on + their algorithm to come up with Fowler/Noll/Vo hash. + + See: + + http://reality.sgi.com/chongo/tech/comp/fnv/index.html + + for more information in this hash. + EXAMPLE > a = isqrt(2e1000); s = "xyz"; > hash(a,s) - 870000771 + 1916476840 LIMITS The number of arguments is not to exceed 100. diff --git a/lib/regress.cal b/lib/regress.cal index 4bd5a8e..8bf9bc2 100644 --- a/lib/regress.cal +++ b/lib/regress.cal @@ -7115,6 +7115,8 @@ print '188: parsed test_natnumset()'; */ define test_somenew() { + local a, s; + print '8200: Starting test_somenew'; vrfy(char(-1) == char(255), '8201: char(-1) == char(255)'); @@ -7139,7 +7141,11 @@ define test_somenew() vrfy(1/(1/0) == 0, '8215: 1/(1/0) == 0'); vrfy(inverse(1/0) == 0, '8216: inverse(1/0) == 0'); - print '8217: Ending test_somenew'; + a = isqrt(2e1000); s = "xyz"; + print '8217: a = isqrt(2e1000); s = "xyz";'; + vrfy(hash(a,s) == 1916476840, '8218: hash(a,s) == 1916476840'); + + print '8219: Ending test_somenew'; } print '189: parsed test_somenew()'; diff --git a/seed.c b/seed.c index 7eb32c2..9fddce0 100644 --- a/seed.c +++ b/seed.c @@ -86,6 +86,50 @@ typedef struct s_hash64 hash64; #endif +/* + * FNV-1 basis + * + * We start the hash at a non-zero value at the beginning so that + * hashing blocks of data with all 0 bits do not map onto the same + * 0 hash value. The virgin value that we use below is the hash value + * that we would get from following 32 ASCII characters: + * + * chongo /\../\ + * + * Note that the \'s above are not back-slashing escape characters. + * They are literal ASCII backslash 0x5c characters. + * + * The effect of this virgin initial value is the same as starting + * with 0 and pre-pending those 32 characters onto the data being + * hashed. + * + * Yes, even with this non-zero virgin value there is a set of data + * that will result in a zero hash value. Worse, appending any + * about of zero bytes will continue to produce a zero hash value. + * But that would happen with any initial value so long as the + * hash of the initial was the `inverse' of the virgin prefix string. + * + * But then again for any hash function, there exists sets of data + * which that the hash of every member is the same value. That is + * life with many to few mapping functions. All we do here is to + * prevent sets whose members consist of 0 or more bytes of 0's from + * being such an awkward set. + * + * And yes, someone can figure out what the magic 'inverse' of the + * 32 ASCII character are ... but this hash function is NOT intended + * to be a cryptographic hash function, just a fast and reasonably + * good hash function. + */ +#if defined(HAVE_B64) +# define FNV1_64_BASIS ((hash64)(0xcbf29ce484222325ULL)) +#else +# define FNV1_64_BASIS_0 ((USB32)0x2325) +# define FNV1_64_BASIS_1 ((USB32)0x8422) +# define FNV1_64_BASIS_2 ((USB32)0x9ce4) +# define FNV1_64_BASIS_3 ((USB32)0xcbf2) +#endif + + /* * hash_buf - perform a 64 bit Fowler/Noll/Vo hash on a buffer * @@ -116,24 +160,16 @@ hash_buf(char *buf, unsigned len) * (gsf@research.att.com). * * See: - * http://reality.sgi.com/chongo/src/fnv/fnv_hash.tar.gz - * http://reality.sgi.com/chongo/src/fnv/h32.c - * http://reality.sgi.com/chongo/src/fnv/h64.c + * http://reality.sgi.com/chongo/tech/comp/fnv/index.html * * for information on 32bit and 64bit Fowler/Noll/Vo hashes. * * Landon Curt Noll (http://reality.sgi.com/chongo) later improved * on their algorithm to come up with Fowler/Noll/Vo hash. - * - * The 32 hash was able to process 234936 words from the web2 dictionary - * without any 32 bit collisions using a constant of - * 16777619 = 0x1000193. - * - * The 64 bit hash uses 1099511628211 = 0x100000001b3 instead. */ #if defined(HAVE_B64) /* hash each octet of the buffer */ - for (hval = (hash64)0ULL; buf < buf_end; ++buf) { + for (hval = FNV1_64_BASIS; buf < buf_end; ++buf) { /* multiply by 1099511628211ULL mod 2^64 using 64 bit longs */ hval *= (hash64)1099511628211ULL; @@ -145,7 +181,11 @@ hash_buf(char *buf, unsigned len) #else /* HAVE_B64 */ /* hash each octet of the buffer */ - for (val[0]=val[1]=val[2]=val[3]=0; buf < buf_end; ++buf) { + val[0] = FNV1_64_BASIS_0; + val[1] = FNV1_64_BASIS_1; + val[2] = FNV1_64_BASIS_2; + val[3] = FNV1_64_BASIS_3; + for (; buf < buf_end; ++buf) { /* * multiply by 1099511628211 mod 2^64 using 32 bit longs @@ -167,7 +207,7 @@ hash_buf(char *buf, unsigned len) val[0] = tmp[0] & 0xffff; tmp[2] += (tmp[1] >> 16); val[1] = tmp[1] & 0xffff; - val[3] += (tmp[2] >> 16); + val[3] = tmp[3] + (tmp[2] >> 16); val[2] = tmp[2] & 0xffff; /* * Doing a val[3] &= 0xffff; is not really needed since it simply diff --git a/version.c b/version.c index b8e0366..18d67f3 100644 --- a/version.c +++ b/version.c @@ -12,7 +12,7 @@ #define MAJOR_VER 2 /* major version */ #define MINOR_VER 11 /* minor version */ #define MAJOR_PATCH 0 /* patch level or 0 if no patch */ -#define MINOR_PATCH "8.2" /* test number or empty string if no patch */ +#define MINOR_PATCH "8.3" /* test number or empty string if no patch */ /* * calc version constants diff --git a/zmath.h b/zmath.h index 837b2c2..80d2a21 100644 --- a/zmath.h +++ b/zmath.h @@ -144,6 +144,43 @@ typedef SB32 LEN; /* unit of length storage */ #endif /* LONG_BITS == 64 */ +/* + * FNV-1 basis + * + * We start the hash at a non-zero value at the beginning so that + * hashing blocks of data with all 0 bits do not map onto the same + * 0 hash value. The virgin value that we use below is the hash value + * that we would get from following 32 ASCII characters: + * + * chongo /\../\ + * + * Note that the \'s above are not back-slashing escape characters. + * They are literal ASCII backslash 0x5c characters. + * + * The effect of this virgin initial value is the same as starting + * with 0 and pre-pending those 32 characters onto the data being + * hashed. + * + * Yes, even with this non-zero virgin value there is a set of data + * that will result in a zero hash value. Worse, appending any + * about of zero bytes will continue to produce a zero hash value. + * But that would happen with any initial value so long as the + * hash of the initial was the `inverse' of the virgin prefix string. + * + * But then again for any hash function, there exists sets of data + * which that the hash of every member is the same value. That is + * life with many to few mapping functions. All we do here is to + * prevent sets whose members consist of 0 or more bytes of 0's from + * being such an awkward set. + * + * And yes, someone can figure out what the magic 'inverse' of the + * 32 ASCII character are ... but this hash function is NOT intended + * to be a cryptographic hash function, just a fast and reasonably + * good hash function. + */ +#define FNV1_32_BASIS ((QCKHASH)(0x811c9dc5)) + + /* * The largest power of 10 we will compute for our decimal conversion * internal constants is: 10^(2^TEN_MAX).