001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hive.common.util;
020
021/**
022 * Murmur3 is successor to Murmur2 fast non-crytographic hash algorithms.
023 *
024 * Murmur3 32 and 128 bit variants.
025 * 32-bit Java port of https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#94
026 * 128-bit Java port of https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#255
027 *
028 * This is a public domain code with no copyrights.
029 * From homepage of MurmurHash (https://code.google.com/p/smhasher/),
030 * "All MurmurHash versions are public domain software, and the author disclaims all copyright
031 * to their code."
032 */
033public class Murmur3 {
034  // from 64-bit linear congruential generator
035  public static final long NULL_HASHCODE = 2862933555777941757L;
036
037  // Constants for 32 bit variant
038  private static final int C1_32 = 0xcc9e2d51;
039  private static final int C2_32 = 0x1b873593;
040  private static final int R1_32 = 15;
041  private static final int R2_32 = 13;
042  private static final int M_32 = 5;
043  private static final int N_32 = 0xe6546b64;
044
045  // Constants for 128 bit variant
046  private static final long C1 = 0x87c37b91114253d5L;
047  private static final long C2 = 0x4cf5ad432745937fL;
048  private static final int R1 = 31;
049  private static final int R2 = 27;
050  private static final int R3 = 33;
051  private static final int M = 5;
052  private static final int N1 = 0x52dce729;
053  private static final int N2 = 0x38495ab5;
054
055  public static final int DEFAULT_SEED = 104729;
056
057  public static int hash32(long l0, long l1) {
058    return hash32(l0, l1, DEFAULT_SEED);
059  }
060
061  public static int hash32(long l0) {
062    return hash32(l0, DEFAULT_SEED);
063  }
064
065  /**
066   * Murmur3 32-bit variant.
067   */
068  public static int hash32(long l0, int seed) {
069    int hash = seed;
070    final long r0 = Long.reverseBytes(l0);
071
072    hash = mix32((int) r0, hash);
073    hash = mix32((int) (r0 >>> 32), hash);
074
075    return fmix32(Long.BYTES, hash);
076  }
077
078  /**
079   * Murmur3 32-bit variant.
080   */
081  public static int hash32(long l0, long l1, int seed) {
082    int hash = seed;
083    final long r0 = Long.reverseBytes(l0);
084    final long r1 = Long.reverseBytes(l1);
085
086    hash = mix32((int) r0, hash);
087    hash = mix32((int) (r0 >>> 32), hash);
088    hash = mix32((int) (r1), hash);
089    hash = mix32((int) (r1 >>> 32), hash);
090
091    return fmix32(Long.BYTES * 2, hash);
092  }
093
094  /**
095   * Murmur3 32-bit variant.
096   *
097   * @param data - input byte array
098   * @return - hashcode
099   */
100  public static int hash32(byte[] data) {
101    return hash32(data, 0, data.length, DEFAULT_SEED);
102  }
103
104  /**
105   * Murmur3 32-bit variant.
106   *
107   * @param data - input byte array
108   * @param length - length of array
109   * @return - hashcode
110   */
111  public static int hash32(byte[] data, int length) {
112    return hash32(data, 0, length, DEFAULT_SEED);
113  }
114
115  /**
116   * Murmur3 32-bit variant.
117   *
118   * @param data   - input byte array
119   * @param length - length of array
120   * @param seed   - seed. (default 0)
121   * @return - hashcode
122   */
123  public static int hash32(byte[] data, int length, int seed) {
124    return hash32(data, 0, length, seed);
125  }
126
127  /**
128   * Murmur3 32-bit variant.
129   *
130   * @param data   - input byte array
131   * @param offset - offset of data
132   * @param length - length of array
133   * @param seed   - seed. (default 0)
134   * @return - hashcode
135   */
136  public static int hash32(byte[] data, int offset, int length, int seed) {
137    int hash = seed;
138    final int nblocks = length >> 2;
139
140    // body
141    for (int i = 0; i < nblocks; i++) {
142      int i_4 = i << 2;
143      int k = (data[offset + i_4] & 0xff)
144          | ((data[offset + i_4 + 1] & 0xff) << 8)
145          | ((data[offset + i_4 + 2] & 0xff) << 16)
146          | ((data[offset + i_4 + 3] & 0xff) << 24);
147
148      hash = mix32(k, hash);
149    }
150
151    // tail
152    int idx = nblocks << 2;
153    int k1 = 0;
154    switch (length - idx) {
155      case 3:
156        k1 ^= data[offset + idx + 2] << 16;
157      case 2:
158        k1 ^= data[offset + idx + 1] << 8;
159      case 1:
160        k1 ^= data[offset + idx];
161
162        // mix functions
163        k1 *= C1_32;
164        k1 = Integer.rotateLeft(k1, R1_32);
165        k1 *= C2_32;
166        hash ^= k1;
167    }
168
169    return fmix32(length, hash);
170  }
171
172  private static int mix32(int k, int hash) {
173    k *= C1_32;
174    k = Integer.rotateLeft(k, R1_32);
175    k *= C2_32;
176    hash ^= k;
177    return Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
178  }
179
180  private static int fmix32(int length, int hash) {
181    hash ^= length;
182    hash ^= (hash >>> 16);
183    hash *= 0x85ebca6b;
184    hash ^= (hash >>> 13);
185    hash *= 0xc2b2ae35;
186    hash ^= (hash >>> 16);
187
188    return hash;
189  }
190
191  /**
192   * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit variant.
193   *
194   * @param data - input byte array
195   * @return - hashcode
196   */
197  public static long hash64(byte[] data) {
198    return hash64(data, 0, data.length, DEFAULT_SEED);
199  }
200
201  public static long hash64(long data) {
202    long hash = DEFAULT_SEED;
203    long k = Long.reverseBytes(data);
204    int length = Long.BYTES;
205    // mix functions
206    k *= C1;
207    k = Long.rotateLeft(k, R1);
208    k *= C2;
209    hash ^= k;
210    hash = Long.rotateLeft(hash, R2) * M + N1;
211    // finalization
212    hash ^= length;
213    hash = fmix64(hash);
214    return hash;
215  }
216
217  public static long hash64(int data) {
218    long k1 = Integer.reverseBytes(data) & (-1L >>> 32);
219    int length = Integer.BYTES;
220    long hash = DEFAULT_SEED;
221    k1 *= C1;
222    k1 = Long.rotateLeft(k1, R1);
223    k1 *= C2;
224    hash ^= k1;
225    // finalization
226    hash ^= length;
227    hash = fmix64(hash);
228    return hash;
229  }
230
231  public static long hash64(short data) {
232    long hash = DEFAULT_SEED;
233    long k1 = 0;
234    k1 ^= ((long) data & 0xff) << 8;
235    k1 ^= ((long)((data & 0xFF00) >> 8) & 0xff);
236    k1 *= C1;
237    k1 = Long.rotateLeft(k1, R1);
238    k1 *= C2;
239    hash ^= k1;
240
241    // finalization
242    hash ^= Short.BYTES;
243    hash = fmix64(hash);
244    return hash;
245  }
246
247  public static long hash64(byte[] data, int offset, int length) {
248    return hash64(data, offset, length, DEFAULT_SEED);
249  }
250
251  /**
252   * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit variant.
253   *
254   * @param data   - input byte array
255   * @param length - length of array
256   * @param seed   - seed. (default is 0)
257   * @return - hashcode
258   */
259  public static long hash64(byte[] data, int offset, int length, int seed) {
260    long hash = seed;
261    final int nblocks = length >> 3;
262
263    // body
264    for (int i = 0; i < nblocks; i++) {
265      final int i8 = i << 3;
266      long k = ((long) data[offset + i8] & 0xff)
267          | (((long) data[offset + i8 + 1] & 0xff) << 8)
268          | (((long) data[offset + i8 + 2] & 0xff) << 16)
269          | (((long) data[offset + i8 + 3] & 0xff) << 24)
270          | (((long) data[offset + i8 + 4] & 0xff) << 32)
271          | (((long) data[offset + i8 + 5] & 0xff) << 40)
272          | (((long) data[offset + i8 + 6] & 0xff) << 48)
273          | (((long) data[offset + i8 + 7] & 0xff) << 56);
274
275      // mix functions
276      k *= C1;
277      k = Long.rotateLeft(k, R1);
278      k *= C2;
279      hash ^= k;
280      hash = Long.rotateLeft(hash, R2) * M + N1;
281    }
282
283    // tail
284    long k1 = 0;
285    int tailStart = nblocks << 3;
286    switch (length - tailStart) {
287      case 7:
288        k1 ^= ((long) data[offset + tailStart + 6] & 0xff) << 48;
289      case 6:
290        k1 ^= ((long) data[offset + tailStart + 5] & 0xff) << 40;
291      case 5:
292        k1 ^= ((long) data[offset + tailStart + 4] & 0xff) << 32;
293      case 4:
294        k1 ^= ((long) data[offset + tailStart + 3] & 0xff) << 24;
295      case 3:
296        k1 ^= ((long) data[offset + tailStart + 2] & 0xff) << 16;
297      case 2:
298        k1 ^= ((long) data[offset + tailStart + 1] & 0xff) << 8;
299      case 1:
300        k1 ^= ((long) data[offset + tailStart] & 0xff);
301        k1 *= C1;
302        k1 = Long.rotateLeft(k1, R1);
303        k1 *= C2;
304        hash ^= k1;
305    }
306
307    // finalization
308    hash ^= length;
309    hash = fmix64(hash);
310
311    return hash;
312  }
313
314  /**
315   * Murmur3 128-bit variant.
316   *
317   * @param data - input byte array
318   * @return - hashcode (2 longs)
319   */
320  public static long[] hash128(byte[] data) {
321    return hash128(data, 0, data.length, DEFAULT_SEED);
322  }
323
324  /**
325   * Murmur3 128-bit variant.
326   *
327   * @param data   - input byte array
328   * @param offset - the first element of array
329   * @param length - length of array
330   * @param seed   - seed. (default is 0)
331   * @return - hashcode (2 longs)
332   */
333  public static long[] hash128(byte[] data, int offset, int length, int seed) {
334    long h1 = seed;
335    long h2 = seed;
336    final int nblocks = length >> 4;
337
338    // body
339    for (int i = 0; i < nblocks; i++) {
340      final int i16 = i << 4;
341      long k1 = ((long) data[offset + i16] & 0xff)
342          | (((long) data[offset + i16 + 1] & 0xff) << 8)
343          | (((long) data[offset + i16 + 2] & 0xff) << 16)
344          | (((long) data[offset + i16 + 3] & 0xff) << 24)
345          | (((long) data[offset + i16 + 4] & 0xff) << 32)
346          | (((long) data[offset + i16 + 5] & 0xff) << 40)
347          | (((long) data[offset + i16 + 6] & 0xff) << 48)
348          | (((long) data[offset + i16 + 7] & 0xff) << 56);
349
350      long k2 = ((long) data[offset + i16 + 8] & 0xff)
351          | (((long) data[offset + i16 + 9] & 0xff) << 8)
352          | (((long) data[offset + i16 + 10] & 0xff) << 16)
353          | (((long) data[offset + i16 + 11] & 0xff) << 24)
354          | (((long) data[offset + i16 + 12] & 0xff) << 32)
355          | (((long) data[offset + i16 + 13] & 0xff) << 40)
356          | (((long) data[offset + i16 + 14] & 0xff) << 48)
357          | (((long) data[offset + i16 + 15] & 0xff) << 56);
358
359      // mix functions for k1
360      k1 *= C1;
361      k1 = Long.rotateLeft(k1, R1);
362      k1 *= C2;
363      h1 ^= k1;
364      h1 = Long.rotateLeft(h1, R2);
365      h1 += h2;
366      h1 = h1 * M + N1;
367
368      // mix functions for k2
369      k2 *= C2;
370      k2 = Long.rotateLeft(k2, R3);
371      k2 *= C1;
372      h2 ^= k2;
373      h2 = Long.rotateLeft(h2, R1);
374      h2 += h1;
375      h2 = h2 * M + N2;
376    }
377
378    // tail
379    long k1 = 0;
380    long k2 = 0;
381    int tailStart = nblocks << 4;
382    switch (length - tailStart) {
383      case 15:
384        k2 ^= (long) (data[offset + tailStart + 14] & 0xff) << 48;
385      case 14:
386        k2 ^= (long) (data[offset + tailStart + 13] & 0xff) << 40;
387      case 13:
388        k2 ^= (long) (data[offset + tailStart + 12] & 0xff) << 32;
389      case 12:
390        k2 ^= (long) (data[offset + tailStart + 11] & 0xff) << 24;
391      case 11:
392        k2 ^= (long) (data[offset + tailStart + 10] & 0xff) << 16;
393      case 10:
394        k2 ^= (long) (data[offset + tailStart + 9] & 0xff) << 8;
395      case 9:
396        k2 ^= (long) (data[offset + tailStart + 8] & 0xff);
397        k2 *= C2;
398        k2 = Long.rotateLeft(k2, R3);
399        k2 *= C1;
400        h2 ^= k2;
401
402      case 8:
403        k1 ^= (long) (data[offset + tailStart + 7] & 0xff) << 56;
404      case 7:
405        k1 ^= (long) (data[offset + tailStart + 6] & 0xff) << 48;
406      case 6:
407        k1 ^= (long) (data[offset + tailStart + 5] & 0xff) << 40;
408      case 5:
409        k1 ^= (long) (data[offset + tailStart + 4] & 0xff) << 32;
410      case 4:
411        k1 ^= (long) (data[offset + tailStart + 3] & 0xff) << 24;
412      case 3:
413        k1 ^= (long) (data[offset + tailStart + 2] & 0xff) << 16;
414      case 2:
415        k1 ^= (long) (data[offset + tailStart + 1] & 0xff) << 8;
416      case 1:
417        k1 ^= (long) (data[offset + tailStart] & 0xff);
418        k1 *= C1;
419        k1 = Long.rotateLeft(k1, R1);
420        k1 *= C2;
421        h1 ^= k1;
422    }
423
424    // finalization
425    h1 ^= length;
426    h2 ^= length;
427
428    h1 += h2;
429    h2 += h1;
430
431    h1 = fmix64(h1);
432    h2 = fmix64(h2);
433
434    h1 += h2;
435    h2 += h1;
436
437    return new long[]{h1, h2};
438  }
439
440  private static long fmix64(long h) {
441    h ^= (h >>> 33);
442    h *= 0xff51afd7ed558ccdL;
443    h ^= (h >>> 33);
444    h *= 0xc4ceb9fe1a85ec53L;
445    h ^= (h >>> 33);
446    return h;
447  }
448
449  public static class IncrementalHash32 {
450    byte[] tail = new byte[3];
451    int tailLen;
452    int totalLen;
453    int hash;
454
455    public final void start(int hash) {
456      tailLen = totalLen = 0;
457      this.hash = hash;
458    }
459
460    public final void add(byte[] data, int offset, int length) {
461      if (length == 0) return;
462      totalLen += length;
463      if (tailLen + length < 4) {
464        System.arraycopy(data, offset, tail, tailLen, length);
465        tailLen += length;
466        return;
467      }
468      int offset2 = 0;
469      if (tailLen > 0) {
470        offset2 = (4 - tailLen);
471        int k = -1;
472        switch (tailLen) {
473        case 1:
474          k = orBytes(tail[0], data[offset], data[offset + 1], data[offset + 2]);
475          break;
476        case 2:
477          k = orBytes(tail[0], tail[1], data[offset], data[offset + 1]);
478          break;
479        case 3:
480          k = orBytes(tail[0], tail[1], tail[2], data[offset]);
481          break;
482        default: throw new AssertionError(tailLen);
483        }
484        // mix functions
485        k *= C1_32;
486        k = Integer.rotateLeft(k, R1_32);
487        k *= C2_32;
488        hash ^= k;
489        hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
490      }
491      int length2 = length - offset2;
492      offset += offset2;
493      final int nblocks = length2 >> 2;
494
495      for (int i = 0; i < nblocks; i++) {
496        int i_4 = (i << 2) + offset;
497        int k = orBytes(data[i_4], data[i_4 + 1], data[i_4 + 2], data[i_4 + 3]);
498
499        // mix functions
500        k *= C1_32;
501        k = Integer.rotateLeft(k, R1_32);
502        k *= C2_32;
503        hash ^= k;
504        hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
505      }
506
507      int consumed = (nblocks << 2);
508      tailLen = length2 - consumed;
509      if (consumed == length2) return;
510      System.arraycopy(data, offset + consumed, tail, 0, tailLen);
511    }
512
513    public final int end() {
514      int k1 = 0;
515      switch (tailLen) {
516        case 3:
517          k1 ^= tail[2] << 16;
518        case 2:
519          k1 ^= tail[1] << 8;
520        case 1:
521          k1 ^= tail[0];
522
523          // mix functions
524          k1 *= C1_32;
525          k1 = Integer.rotateLeft(k1, R1_32);
526          k1 *= C2_32;
527          hash ^= k1;
528      }
529
530      // finalization
531      hash ^= totalLen;
532      hash ^= (hash >>> 16);
533      hash *= 0x85ebca6b;
534      hash ^= (hash >>> 13);
535      hash *= 0xc2b2ae35;
536      hash ^= (hash >>> 16);
537      return hash;
538    }
539  }
540
541  private static int orBytes(byte b1, byte b2, byte b3, byte b4) {
542    return (b1 & 0xff) | ((b2 & 0xff) << 8) | ((b3 & 0xff) << 16) | ((b4 & 0xff) << 24);
543  }
544}