summaryrefslogtreecommitdiff
path: root/src/mum.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/mum.h')
-rw-r--r--src/mum.h365
1 files changed, 194 insertions, 171 deletions
diff --git a/src/mum.h b/src/mum.h
index 318efea50268..aa1ed2d840a3 100644
--- a/src/mum.h
+++ b/src/mum.h
@@ -69,14 +69,14 @@ typedef unsigned __int64 uint64_t;
#endif
#endif
-#if defined(__GNUC__) && ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 9) || (__GNUC__ > 4))
+#if defined(__GNUC__) && ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 9) || (__GNUC__ > 4))
#define _MUM_FRESH_GCC
#endif
#if defined(__GNUC__) && !defined(__llvm__) && defined(_MUM_FRESH_GCC)
-#define _MUM_ATTRIBUTE_UNUSED __attribute__((unused))
-#define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts)))
-#define _MUM_TARGET(opts) __attribute__((__target__ (opts)))
+#define _MUM_ATTRIBUTE_UNUSED __attribute__((unused))
+#define _MUM_OPTIMIZE(opts) __attribute__((__optimize__(opts)))
+#define _MUM_TARGET(opts) __attribute__((__target__(opts)))
#else
#define _MUM_ATTRIBUTE_UNUSED
#define _MUM_OPTIMIZE(opts)
@@ -95,93 +95,108 @@ static uint64_t _mum_tail_prime = 0xaf47d47c99b1461bULL;
static uint64_t _mum_finish_prime1 = 0xa9a7ae7ceff79f3fULL;
static uint64_t _mum_finish_prime2 = 0xaf47d47c99b1461bULL;
-static uint64_t _mum_primes [] = {
- 0X9ebdcae10d981691, 0X32b9b9b97a27ac7d, 0X29b5584d83d35bbd, 0X4b04e0e61401255f,
- 0X25e8f7b1f1c9d027, 0X80d4c8c000f3e881, 0Xbd1255431904b9dd, 0X8a3bd4485eee6d81,
- 0X3bc721b2aad05197, 0X71b1a19b907d6e33, 0X525e6c1084a8534b, 0X9e4c2cd340c1299f,
- 0Xde3add92e94caa37, 0X7e14eadb1f65311d, 0X3f5aa40f89812853, 0X33b15a3b587d15c9,
+static uint64_t _mum_primes[] = {
+ 0X9ebdcae10d981691,
+ 0X32b9b9b97a27ac7d,
+ 0X29b5584d83d35bbd,
+ 0X4b04e0e61401255f,
+ 0X25e8f7b1f1c9d027,
+ 0X80d4c8c000f3e881,
+ 0Xbd1255431904b9dd,
+ 0X8a3bd4485eee6d81,
+ 0X3bc721b2aad05197,
+ 0X71b1a19b907d6e33,
+ 0X525e6c1084a8534b,
+ 0X9e4c2cd340c1299f,
+ 0Xde3add92e94caa37,
+ 0X7e14eadb1f65311d,
+ 0X3f5aa40f89812853,
+ 0X33b15a3b587d15c9,
};
/* Multiply 64-bit V and P and return sum of high and low parts of the
result. */
static inline uint64_t
-_mum (uint64_t v, uint64_t p) {
- uint64_t hi, lo;
+_mum(uint64_t v, uint64_t p)
+{
+ uint64_t hi, lo;
#if _MUM_USE_INT128
#if defined(__aarch64__)
- /* AARCH64 needs 2 insns to calculate 128-bit result of the
+ /* AARCH64 needs 2 insns to calculate 128-bit result of the
multiplication. If we use a generic code we actually call a
function doing 128x128->128 bit multiplication. The function is
very slow. */
- lo = v * p, hi;
- asm ("umulh %0, %1, %2" : "=r" (hi) : "r" (v), "r" (p));
+ lo = v * p, hi;
+ asm("umulh %0, %1, %2" : "=r"(hi) : "r"(v), "r"(p));
#else
- __uint128_t r = (__uint128_t) v * (__uint128_t) p;
- hi = (uint64_t) (r >> 64);
- lo = (uint64_t) r;
+ __uint128_t r = (__uint128_t) v * (__uint128_t) p;
+ hi = (uint64_t) (r >> 64);
+ lo = (uint64_t) r;
#endif
#else
- /* Implementation of 64x64->128-bit multiplication by four 32x32->64
+ /* Implementation of 64x64->128-bit multiplication by four 32x32->64
bit multiplication. */
- uint64_t hv = v >> 32, hp = p >> 32;
- uint64_t lv = (uint32_t) v, lp = (uint32_t) p;
- uint64_t rh = hv * hp;
- uint64_t rm_0 = hv * lp;
- uint64_t rm_1 = hp * lv;
- uint64_t rl = lv * lp;
- uint64_t t, carry = 0;
-
- /* We could ignore a carry bit here if we did not care about the
+ uint64_t hv = v >> 32, hp = p >> 32;
+ uint64_t lv = (uint32_t) v, lp = (uint32_t) p;
+ uint64_t rh = hv * hp;
+ uint64_t rm_0 = hv * lp;
+ uint64_t rm_1 = hp * lv;
+ uint64_t rl = lv * lp;
+ uint64_t t, carry = 0;
+
+ /* We could ignore a carry bit here if we did not care about the
same hash for 32-bit and 64-bit targets. */
- t = rl + (rm_0 << 32);
+ t = rl + (rm_0 << 32);
#ifdef MUM_TARGET_INDEPENDENT_HASH
- carry = t < rl;
+ carry = t < rl;
#endif
- lo = t + (rm_1 << 32);
+ lo = t + (rm_1 << 32);
#ifdef MUM_TARGET_INDEPENDENT_HASH
- carry += lo < t;
+ carry += lo < t;
#endif
- hi = rh + (rm_0 >> 32) + (rm_1 >> 32) + carry;
+ hi = rh + (rm_0 >> 32) + (rm_1 >> 32) + carry;
#endif
- /* We could use XOR here too but, for some reasons, on Haswell and
+ /* We could use XOR here too but, for some reasons, on Haswell and
Power7 using an addition improves hashing performance by 10% for
small strings. */
- return hi + lo;
+ return hi + lo;
}
#if defined(_MSC_VER)
-#define _mum_bswap_32(x) _byteswap_uint32_t (x)
-#define _mum_bswap_64(x) _byteswap_uint64_t (x)
+#define _mum_bswap_32(x) _byteswap_uint32_t(x)
+#define _mum_bswap_64(x) _byteswap_uint64_t(x)
#elif defined(__APPLE__)
#include <libkern/OSByteOrder.h>
-#define _mum_bswap_32(x) OSSwapInt32 (x)
-#define _mum_bswap_64(x) OSSwapInt64 (x)
+#define _mum_bswap_32(x) OSSwapInt32(x)
+#define _mum_bswap_64(x) OSSwapInt64(x)
#elif defined(__GNUC__)
-#define _mum_bswap32(x) __builtin_bswap32 (x)
-#define _mum_bswap64(x) __builtin_bswap64 (x)
+#define _mum_bswap32(x) __builtin_bswap32(x)
+#define _mum_bswap64(x) __builtin_bswap64(x)
#else
#include <byteswap.h>
-#define _mum_bswap32(x) bswap32 (x)
-#define _mum_bswap64(x) bswap64 (x)
+#define _mum_bswap32(x) bswap32(x)
+#define _mum_bswap64(x) bswap64(x)
#endif
static inline uint64_t
-_mum_le (uint64_t v) {
+_mum_le(uint64_t v)
+{
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
- return v;
+ return v;
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
- return _mum_bswap64 (v);
+ return _mum_bswap64(v);
#else
#error "Unknown endianness"
#endif
}
static inline uint32_t
-_mum_le32 (uint32_t v) {
+_mum_le32(uint32_t v)
+{
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
- return v;
+ return v;
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
- return _mum_bswap32 (v);
+ return _mum_bswap32(v);
#else
#error "Unknown endianness"
#endif
@@ -214,71 +229,74 @@ _mum_le32 (uint32_t v) {
#define _MUM_UNROLL_FACTOR (1 << _MUM_UNROLL_FACTOR_POWER)
static inline uint64_t _MUM_OPTIMIZE("unroll-loops")
-_mum_hash_aligned (uint64_t start, const void *key, size_t len) {
- uint64_t result = start;
- const unsigned char *str = (const unsigned char *) key;
- uint64_t u64;
- int i;
- size_t n;
-
- result = _mum (result, _mum_block_start_prime);
- while (len > _MUM_UNROLL_FACTOR * sizeof (uint64_t)) {
- /* This loop could be vectorized when we have vector insns for
+ _mum_hash_aligned(uint64_t start, const void *key, size_t len)
+{
+ uint64_t result = start;
+ const unsigned char *str = (const unsigned char *) key;
+ uint64_t u64;
+ int i;
+ size_t n;
+
+ result = _mum(result, _mum_block_start_prime);
+ while (len > _MUM_UNROLL_FACTOR * sizeof(uint64_t)) {
+ /* This loop could be vectorized when we have vector insns for
64x64->128-bit multiplication. AVX2 currently only have a
vector insn for 4 32x32->64-bit multiplication. */
- for (i = 0; i < _MUM_UNROLL_FACTOR; i++)
- result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
- len -= _MUM_UNROLL_FACTOR * sizeof (uint64_t);
- str += _MUM_UNROLL_FACTOR * sizeof (uint64_t);
- /* We will use the same prime numbers on the next iterations --
+ for (i = 0; i < _MUM_UNROLL_FACTOR; i++)
+ result ^= _mum(_mum_le(((uint64_t *) str)[i]), _mum_primes[i]);
+ len -= _MUM_UNROLL_FACTOR * sizeof(uint64_t);
+ str += _MUM_UNROLL_FACTOR * sizeof(uint64_t);
+ /* We will use the same prime numbers on the next iterations --
randomize the state. */
- result = _mum (result, _mum_unroll_prime);
- }
- n = len / sizeof (uint64_t);
- for (i = 0; i < (int)n; i++)
- result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
- len -= n * sizeof (uint64_t); str += n * sizeof (uint64_t);
- switch (len) {
- case 7:
- u64 = _mum_le32 (*(uint32_t *) str);
- u64 |= (uint64_t) str[4] << 32;
- u64 |= (uint64_t) str[5] << 40;
- u64 |= (uint64_t) str[6] << 48;
- return result ^ _mum (u64, _mum_tail_prime);
- case 6:
- u64 = _mum_le32 (*(uint32_t *) str);
- u64 |= (uint64_t) str[4] << 32;
- u64 |= (uint64_t) str[5] << 40;
- return result ^ _mum (u64, _mum_tail_prime);
- case 5:
- u64 = _mum_le32 (*(uint32_t *) str);
- u64 |= (uint64_t) str[4] << 32;
- return result ^ _mum (u64, _mum_tail_prime);
- case 4:
- u64 = _mum_le32 (*(uint32_t *) str);
- return result ^ _mum (u64, _mum_tail_prime);
- case 3:
- u64 = str[0];
- u64 |= (uint64_t) str[1] << 8;
- u64 |= (uint64_t) str[2] << 16;
- return result ^ _mum (u64, _mum_tail_prime);
- case 2:
- u64 = str[0];
- u64 |= (uint64_t) str[1] << 8;
- return result ^ _mum (u64, _mum_tail_prime);
- case 1:
- u64 = str[0];
- return result ^ _mum (u64, _mum_tail_prime);
- }
- return result;
+ result = _mum(result, _mum_unroll_prime);
+ }
+ n = len / sizeof(uint64_t);
+ for (i = 0; i < (int) n; i++)
+ result ^= _mum(_mum_le(((uint64_t *) str)[i]), _mum_primes[i]);
+ len -= n * sizeof(uint64_t);
+ str += n * sizeof(uint64_t);
+ switch (len) {
+ case 7:
+ u64 = _mum_le32(*(uint32_t *) str);
+ u64 |= (uint64_t) str[4] << 32;
+ u64 |= (uint64_t) str[5] << 40;
+ u64 |= (uint64_t) str[6] << 48;
+ return result ^ _mum(u64, _mum_tail_prime);
+ case 6:
+ u64 = _mum_le32(*(uint32_t *) str);
+ u64 |= (uint64_t) str[4] << 32;
+ u64 |= (uint64_t) str[5] << 40;
+ return result ^ _mum(u64, _mum_tail_prime);
+ case 5:
+ u64 = _mum_le32(*(uint32_t *) str);
+ u64 |= (uint64_t) str[4] << 32;
+ return result ^ _mum(u64, _mum_tail_prime);
+ case 4:
+ u64 = _mum_le32(*(uint32_t *) str);
+ return result ^ _mum(u64, _mum_tail_prime);
+ case 3:
+ u64 = str[0];
+ u64 |= (uint64_t) str[1] << 8;
+ u64 |= (uint64_t) str[2] << 16;
+ return result ^ _mum(u64, _mum_tail_prime);
+ case 2:
+ u64 = str[0];
+ u64 |= (uint64_t) str[1] << 8;
+ return result ^ _mum(u64, _mum_tail_prime);
+ case 1:
+ u64 = str[0];
+ return result ^ _mum(u64, _mum_tail_prime);
+ }
+ return result;
}
/* Final randomization of H. */
static inline uint64_t
-_mum_final (uint64_t h) {
- h ^= _mum (h, _mum_finish_prime1);
- h ^= _mum (h, _mum_finish_prime2);
- return h;
+_mum_final(uint64_t h)
+{
+ h ^= _mum(h, _mum_finish_prime1);
+ h ^= _mum(h, _mum_finish_prime2);
+ return h;
}
#if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
@@ -288,16 +306,14 @@ _mum_final (uint64_t h) {
3-cycles vs. 4 for MULX, MULX permits more freedom in insn
scheduling as it uses less fixed registers. */
static inline uint64_t _MUM_TARGET("arch=haswell")
-_mum_hash_avx2 (const void * key, size_t len, uint64_t seed) {
- return _mum_final (_mum_hash_aligned (seed + len, key, len));
+ _mum_hash_avx2(const void *key, size_t len, uint64_t seed)
+{
+ return _mum_final(_mum_hash_aligned(seed + len, key, len));
}
#endif
#ifndef _MUM_UNALIGNED_ACCESS
-#if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) \
- || defined(__s390__) || defined(__m32c__) || defined(cris) \
- || defined(__CR16__) || defined(__vax__) || defined(__m68k__) \
- || defined(__aarch64__)
+#if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) || defined(__s390__) || defined(__m32c__) || defined(cris) || defined(__CR16__) || defined(__vax__) || defined(__m68k__) || defined(__aarch64__)
#define _MUM_UNALIGNED_ACCESS 1
#else
#define _MUM_UNALIGNED_ACCESS 0
@@ -317,101 +333,108 @@ _mum_hash_avx2 (const void * key, size_t len, uint64_t seed) {
static inline uint64_t
#if defined(__x86_64__)
-_MUM_TARGET("inline-all-stringops")
+ _MUM_TARGET("inline-all-stringops")
#endif
-_mum_hash_default (const void *key, size_t len, uint64_t seed) {
- uint64_t result;
- const unsigned char *str = (const unsigned char *) key;
- size_t block_len;
- uint64_t buf[_MUM_BLOCK_LEN / sizeof (uint64_t)];
-
- result = seed + len;
- if (_MUM_UNALIGNED_ACCESS || ((size_t) str & 0x7) == 0)
- result = _mum_hash_aligned (result, key, len);
- else {
- while (len != 0) {
- block_len = len < _MUM_BLOCK_LEN ? len : _MUM_BLOCK_LEN;
- memmove (buf, str, block_len);
- result = _mum_hash_aligned (result, buf, block_len);
- len -= block_len;
- str += block_len;
- }
- }
- return _mum_final (result);
+ _mum_hash_default(const void *key, size_t len, uint64_t seed)
+{
+ uint64_t result;
+ const unsigned char *str = (const unsigned char *) key;
+ size_t block_len;
+ uint64_t buf[_MUM_BLOCK_LEN / sizeof(uint64_t)];
+
+ result = seed + len;
+ if (_MUM_UNALIGNED_ACCESS || ((size_t) str & 0x7) == 0)
+ result = _mum_hash_aligned(result, key, len);
+ else {
+ while (len != 0) {
+ block_len = len < _MUM_BLOCK_LEN ? len : _MUM_BLOCK_LEN;
+ memmove(buf, str, block_len);
+ result = _mum_hash_aligned(result, buf, block_len);
+ len -= block_len;
+ str += block_len;
+ }
+ }
+ return _mum_final(result);
}
static inline uint64_t
-_mum_next_factor (void) {
- uint64_t start = 0;
- int i;
+_mum_next_factor(void)
+{
+ uint64_t start = 0;
+ int i;
- for (i = 0; i < 8; i++)
- start = (start << 8) | rand() % 256;
- return start;
+ for (i = 0; i < 8; i++)
+ start = (start << 8) | rand() % 256;
+ return start;
}
/* ++++++++++++++++++++++++++ Interface functions: +++++++++++++++++++ */
/* Set random multiplicators depending on SEED. */
static inline void
-mum_hash_randomize (uint64_t seed) {
- int i;
-
- srand (seed);
- _mum_hash_step_prime = _mum_next_factor ();
- _mum_key_step_prime = _mum_next_factor ();
- _mum_finish_prime1 = _mum_next_factor ();
- _mum_finish_prime2 = _mum_next_factor ();
- _mum_block_start_prime = _mum_next_factor ();
- _mum_unroll_prime = _mum_next_factor ();
- _mum_tail_prime = _mum_next_factor ();
- for (i = 0; i < (int)(sizeof (_mum_primes) / sizeof (uint64_t)); i++)
- _mum_primes[i] = _mum_next_factor ();
+mum_hash_randomize(uint64_t seed)
+{
+ int i;
+
+ srand(seed);
+ _mum_hash_step_prime = _mum_next_factor();
+ _mum_key_step_prime = _mum_next_factor();
+ _mum_finish_prime1 = _mum_next_factor();
+ _mum_finish_prime2 = _mum_next_factor();
+ _mum_block_start_prime = _mum_next_factor();
+ _mum_unroll_prime = _mum_next_factor();
+ _mum_tail_prime = _mum_next_factor();
+ for (i = 0; i < (int) (sizeof(_mum_primes) / sizeof(uint64_t)); i++)
+ _mum_primes[i] = _mum_next_factor();
}
/* Start hashing data with SEED. Return the state. */
static inline uint64_t
-mum_hash_init (uint64_t seed) {
- return seed;
+mum_hash_init(uint64_t seed)
+{
+ return seed;
}
/* Process data KEY with the state H and return the updated state. */
static inline uint64_t
-mum_hash_step (uint64_t h, uint64_t key)
+mum_hash_step(uint64_t h, uint64_t key)
{
- return _mum (h, _mum_hash_step_prime) ^ _mum (key, _mum_key_step_prime);
+ return _mum(h, _mum_hash_step_prime) ^ _mum(key, _mum_key_step_prime);
}
/* Return the result of hashing using the current state H. */
static inline uint64_t
-mum_hash_finish (uint64_t h) {
- return _mum_final (h);
+mum_hash_finish(uint64_t h)
+{
+ return _mum_final(h);
}
/* Fast hashing of KEY with SEED. The hash is always the same for the
same key on any target. */
static inline size_t
-mum_hash64 (uint64_t key, uint64_t seed) {
- return mum_hash_finish (mum_hash_step (mum_hash_init (seed), key));
+mum_hash64(uint64_t key, uint64_t seed)
+{
+ return mum_hash_finish(mum_hash_step(mum_hash_init(seed), key));
}
/* Hash data KEY of length LEN and SEED. The hash depends on the
target endianness and the unroll factor. */
static inline uint64_t
-mum_hash (const void *key, size_t len, uint64_t seed) {
+mum_hash(const void *key, size_t len, uint64_t seed)
+{
#if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
- static int avx2_support = 0;
-
- if (avx2_support > 0)
- return _mum_hash_avx2 (key, len, seed);
- else if (! avx2_support) {
- __builtin_cpu_init ();
- avx2_support = __builtin_cpu_supports ("avx2") ? 1 : -1;
- if (avx2_support > 0)
- return _mum_hash_avx2 (key, len, seed);
- }
+ static int avx2_support = 0;
+
+ if (avx2_support > 0)
+ return _mum_hash_avx2(key, len, seed);
+ else if (!avx2_support) {
+ __builtin_cpu_init();
+ avx2_support = __builtin_cpu_supports("avx2") ? 1 : -1;
+ if (avx2_support > 0)
+ return _mum_hash_avx2(key, len, seed);
+ }
#endif
- return _mum_hash_default (key, len, seed);
+ return _mum_hash_default(key, len, seed);
}
#endif