This page looks at some non-cryptographic hash designs. The hashes are from n-bit inputs to n-bit outputs. These are mainly for use with PRNG designs.
Methodology
Our goal is to find functions that are good at mixing/diffusing.
Evaluation
We will evaluate how the functions behave using an avalanche diagram. An avalanche diagram is a graph that displays the probability of each output bit flipping in response to each input bit flipping. It allows us to quickly see if the function has any terrible biases.
Design criteria
It is not difficult to make a good hash function by combining a lot of subdiffusions. What we want is to achieve good mixing with the smallest number of operations. Pretty much all uses cases of non-cryptographic hash functions need to execute quickly and consume minimal resources.
Algorithms
Identity function
The identity function (f(x) = x
) does not modify the input value, so each bit only affects itself. This cannot be considered a hash function by any means, but serves as a nice sanity check to see if the graph is what we expect.
%%cython ctypedef unsigned long long u64 cpdef u64 identity_function(u64 x): return x
plot_hash(identity_function)
Non-deterministic random values
On the opposite end of the spectrum, we completely throw out determinism and just use real random numbers.
def urandom64(x): return int.from_bytes(os.urandom(8), 'big') plot_hash(urandom64)
def urandom32(x): return int.from_bytes(os.urandom(4), 'big') plot_hash(urandom32, bits=32)
Multiplication with a large prime
%%cython ctypedef unsigned long long u64 cpdef u64 prime_multiplication(u64 x): cdef u64 prime = 10115642443237858459; return x * prime
plot_hash(prime_multiplication)
Rotate-XOR-Prime (rxprime)
This hash is based on multiplying the input by primes, and XORing it with rotated versions of itself.
32-bit rxprime
%%cython ctypedef unsigned char u8 ctypedef unsigned long u32 cdef u32 rotl32(u32 n, u8 k): cdef u32 a = n << k cdef u32 b = n >> (32 - k) return a | b cpdef u32 rxprime32(u32 x): x *= 7919 x ^= rotl32(x, 7) x *= 7723 x ^= rotl32(x, 11) x *= 7561 x ^= rotl32(x, 13) return x
plot_hash(rxprime32, bits=32)
64-bit rxprime
%%cython ctypedef unsigned char u8 ctypedef unsigned long long u64 cdef u64 rotl64(u64 n, u8 k): cdef u64 a = n << k cdef u64 b = n >> (64 - k) return a | b cpdef u64 rxprime64(u64 x): x *= 7919 x ^= rotl64(x, 7) x *= 7723 x ^= rotl64(x, 11) x *= 7561 x ^= rotl64(x, 13) x *= 7411 x ^= rotl64(x, 17) return x
plot_hash(rxprime64)
SplitMix64
%%cython -a ctypedef unsigned long long u64 cpdef u64 splitmix64(u64 x): x ^= x >> 30 x *= <u64>0xbf58476d1ce4e5b9 x ^= x >> 27 x *= <u64>0x94d049bb133111eb x ^= x >> 31 return x
Cython: _cython_magic_1df062dd335dea8b17c4f61033efc66b.pyx Generated by Cython 0.29.24
Yellow lines hint at Python interaction.
Click on a line that starts with a "+
" to see the C code that Cython generated for it.01:
02: ctypedef unsigned long long u6403:
+04: cpdef u64 splitmix64(u64 x):static PyObject *__pyx_pw_46_cython_magic_1df062dd335dea8b17c4f61033efc66b_1splitmix64(PyObject *__pyx_self, PyObject *__pyx_arg_x); /*proto*/ static __pyx_t_46_cython_magic_1df062dd335dea8b17c4f61033efc66b_u64 __pyx_f_46_cython_magic_1df062dd335dea8b17c4f61033efc66b_splitmix64(__pyx_t_46_cython_magic_1df062dd335dea8b17c4f61033efc66b_u64 __pyx_v_x, CYTHON_UNUSED int __pyx_skip_dispatch) { __pyx_t_46_cython_magic_1df062dd335dea8b17c4f61033efc66b_u64 __pyx_r; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("splitmix64", 0); /* … */ /* function exit code */ __pyx_L0:; __Pyx_RefNannyFinishContext(); return __pyx_r; } /* Python wrapper */ static PyObject *__pyx_pw_46_cython_magic_1df062dd335dea8b17c4f61033efc66b_1splitmix64(PyObject *__pyx_self, PyObject *__pyx_arg_x); /*proto*/ static PyObject *__pyx_pw_46_cython_magic_1df062dd335dea8b17c4f61033efc66b_1splitmix64(PyObject *__pyx_self, PyObject *__pyx_arg_x) { __pyx_t_46_cython_magic_1df062dd335dea8b17c4f61033efc66b_u64 __pyx_v_x; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("splitmix64 (wrapper)", 0); assert(__pyx_arg_x); { __pyx_v_x = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_arg_x); if (unlikely((__pyx_v_x == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 4, __pyx_L3_error) } goto __pyx_L4_argument_unpacking_done; __pyx_L3_error:; __Pyx_AddTraceback("_cython_magic_1df062dd335dea8b17c4f61033efc66b.splitmix64", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; __pyx_r = __pyx_pf_46_cython_magic_1df062dd335dea8b17c4f61033efc66b_splitmix64(__pyx_self, ((__pyx_t_46_cython_magic_1df062dd335dea8b17c4f61033efc66b_u64)__pyx_v_x)); int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; /* function exit code */ __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_pf_46_cython_magic_1df062dd335dea8b17c4f61033efc66b_splitmix64(CYTHON_UNUSED PyObject *__pyx_self, __pyx_t_46_cython_magic_1df062dd335dea8b17c4f61033efc66b_u64 __pyx_v_x) { PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("splitmix64", 0); __Pyx_XDECREF(__pyx_r); __pyx_t_1 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_46_cython_magic_1df062dd335dea8b17c4f61033efc66b_splitmix64(__pyx_v_x, 0)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 4, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); __Pyx_AddTraceback("_cython_magic_1df062dd335dea8b17c4f61033efc66b.splitmix64", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; }+05: x ^= x >> 30__pyx_v_x = (__pyx_v_x ^ (__pyx_v_x >> 30));+06: x *= <u64>0xbf58476d1ce4e5b9__pyx_v_x = (__pyx_v_x * ((__pyx_t_46_cython_magic_1df062dd335dea8b17c4f61033efc66b_u64)0xbf58476d1ce4e5b9));+07: x ^= x >> 27__pyx_v_x = (__pyx_v_x ^ (__pyx_v_x >> 27));+08: x *= <u64>0x94d049bb133111eb__pyx_v_x = (__pyx_v_x * ((__pyx_t_46_cython_magic_1df062dd335dea8b17c4f61033efc66b_u64)0x94d049bb133111eb));+09: x ^= x >> 31__pyx_v_x = (__pyx_v_x ^ (__pyx_v_x >> 31));+10: return x__pyx_r = __pyx_v_x; goto __pyx_L0;
plot_hash(splitmix64)
Prospector
https://nullprogram.com/blog/2018/07/31/
%%cython ctypedef unsigned long u32 cpdef u32 prospector32(u32 x): x ^= x >> 15 x *= <u32>0x2c1b3c6d x ^= x >> 12 x *= <u32>0x297a2d39 x ^= x >> 15 return x
plot_hash(prospector32, bits=32)
%%cython ctypedef unsigned long u32 cpdef u32 triple32(u32 x): x ^= x >> 17 x *= <u32>0xed5ad4bb x ^= x >> 11 x *= <u32>0xac4c1b51 x ^= x >> 15 x *= <u32>0x31848bab x ^= x >> 14 return x
plot_hash(triple32, bits=32)
ARX - Add, Rotate, XOR
This hash is based on ARX, which is usually cheap to execute in software. One advantage is that there are no multiplications. In exchange for having cheaper operations, we have to execute more of them.
%%cython ctypedef unsigned char u8 ctypedef unsigned long u32 cdef u32 rotl32(u32 n, u8 k): cdef u32 a = n << k cdef u32 b = n >> (32 - k) return a | b cpdef u32 arx32(u32 a): cdef u32 b = 0 cdef u32 c = 0 cdef u32 d = 0 for _ in range(3): b ^= rotl32(a + d, 7) c ^= rotl32(b + a, 9) d ^= rotl32(c + b, 13) a ^= rotl32(d + c, 18) return a ^ c
plot_hash(arx32, bits=32)
%%cython ctypedef unsigned char u8 ctypedef unsigned long long u64 cdef u64 rotl64(u64 n, u8 k): cdef u64 a = n << k cdef u64 b = n >> (64 - k) return a | b cpdef u64 arx64(u64 a): cdef u64 b = 0 cdef u64 c = 0 for _ in range(4): b ^= rotl64(a + c, 7) c ^= rotl64(b + a, 9) a ^= rotl64(c + b, 13) return a
plot_hash(arx64)