00001 #ifndef _UTH_LOOKUP3_H_ 00002 #define _UTH_LOOKUP3_H_ 00003 00004 #include <stdint.h> /* defines uint32_t etc */ 00005 00006 /* 00007 ------------------------------------------------------------------------------- 00008 lookup3.c, by Bob Jenkins, May 2006, Public Domain. 00009 00010 These are functions for producing 32-bit hashes for hash table lookup. 00011 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() 00012 are externally useful functions. Routines to test the hash are included 00013 if SELF_TEST is defined. You can use this free for any purpose. It's in 00014 the public domain. It has no warranty. 00015 00016 You probably want to use hashlittle(). hashlittle() and hashbig() 00017 hash byte arrays. hashlittle() is is faster than hashbig() on 00018 little-endian machines. Intel and AMD are little-endian machines. 00019 On second thought, you probably want hashlittle2(), which is identical to 00020 hashlittle() except it returns two 32-bit hashes for the price of one. 00021 You could implement hashbig2() if you wanted but I haven't bothered here. 00022 00023 If you want to find a hash of, say, exactly 7 integers, do 00024 a = i1; b = i2; c = i3; 00025 mix(a,b,c); 00026 a += i4; b += i5; c += i6; 00027 mix(a,b,c); 00028 a += i7; 00029 final(a,b,c); 00030 then use c as the hash value. If you have a variable length array of 00031 4-byte integers to hash, use hashword(). If you have a byte array (like 00032 a character string), use hashlittle(). If you have several byte arrays, or 00033 a mix of things, see the comments above hashlittle(). 00034 00035 Why is this so big? I read 12 bytes at a time into 3 4-byte integers, 00036 then mix those integers. This is fast (you can do a lot more thorough 00037 mixing with 12*3 instructions on 3 integers than you can with 3 instructions 00038 on 1 byte), but shoehorning those bytes into integers efficiently is messy. 00039 ------------------------------------------------------------------------------- 00040 */ 00041 00042 #ifdef __cplusplus 00043 extern "C" { 00044 #endif 00045 00046 00047 /* 00048 ------------------------------------------------------------------------------- 00049 hashlittle() -- hash a variable-length key into a 32-bit value 00050 k : the key (the unaligned variable-length array of bytes) 00051 length : the length of the key, counting by bytes 00052 initval : can be any 4-byte value 00053 Returns a 32-bit value. Every bit of the key affects every bit of 00054 the return value. Two keys differing by one or two bits will have 00055 totally different hash values. 00056 00057 The best hash table sizes are powers of 2. There is no need to do 00058 mod a prime (mod is sooo slow!). If you need less than 32 bits, 00059 use a bitmask. For example, if you need only 10 bits, do 00060 h = (h & hashmask(10)); 00061 In which case, the hash table should have hashsize(10) elements. 00062 00063 If you are hashing n strings (uint8_t **)k, do it like this: 00064 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h); 00065 00066 By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this 00067 code any way you wish, private, educational, or commercial. It's free. 00068 00069 Use for hash table lookup, or anything where one collision in 2^^32 is 00070 acceptable. Do NOT use for cryptographic purposes. 00071 ------------------------------------------------------------------------------- 00072 */ 00073 00074 uint32_t hashlittle( const void *key, size_t length, uint32_t initval); 00075 00076 00077 /* 00078 * hashlittle2: return 2 32-bit hash values 00079 * 00080 * This is identical to hashlittle(), except it returns two 32-bit hash 00081 * values instead of just one. This is good enough for hash table 00082 * lookup with 2^^64 buckets, or if you want a second hash if you're not 00083 * happy with the first, or if you want a probably-unique 64-bit ID for 00084 * the key. *pc is better mixed than *pb, so use *pc first. If you want 00085 * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)". 00086 */ 00087 void hashlittle2( 00088 const void *key, /* the key to hash */ 00089 size_t length, /* length of the key */ 00090 uint32_t *pc, /* IN: primary initval, OUT: primary hash */ 00091 uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */; 00092 00093 #ifdef __cplusplus 00094 } 00095 #endif 00096 00097 00098 #endif