001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hive.common.util; 020 021/** 022 * Murmur3 is successor to Murmur2 fast non-crytographic hash algorithms. 023 * 024 * Murmur3 32 and 128 bit variants. 025 * 32-bit Java port of https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#94 026 * 128-bit Java port of https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#255 027 * 028 * This is a public domain code with no copyrights. 029 * From homepage of MurmurHash (https://code.google.com/p/smhasher/), 030 * "All MurmurHash versions are public domain software, and the author disclaims all copyright 031 * to their code." 032 */ 033public class Murmur3 { 034 // from 64-bit linear congruential generator 035 public static final long NULL_HASHCODE = 2862933555777941757L; 036 037 // Constants for 32 bit variant 038 private static final int C1_32 = 0xcc9e2d51; 039 private static final int C2_32 = 0x1b873593; 040 private static final int R1_32 = 15; 041 private static final int R2_32 = 13; 042 private static final int M_32 = 5; 043 private static final int N_32 = 0xe6546b64; 044 045 // Constants for 128 bit variant 046 private static final long C1 = 0x87c37b91114253d5L; 047 private static final long C2 = 0x4cf5ad432745937fL; 048 private static final int R1 = 31; 049 private static final int R2 = 27; 050 private static final int R3 = 33; 051 private static final int M = 5; 052 private static final int N1 = 0x52dce729; 053 private static final int N2 = 0x38495ab5; 054 055 public static final int DEFAULT_SEED = 104729; 056 057 public static int hash32(long l0, long l1) { 058 return hash32(l0, l1, DEFAULT_SEED); 059 } 060 061 public static int hash32(long l0) { 062 return hash32(l0, DEFAULT_SEED); 063 } 064 065 /** 066 * Murmur3 32-bit variant. 067 */ 068 public static int hash32(long l0, int seed) { 069 int hash = seed; 070 final long r0 = Long.reverseBytes(l0); 071 072 hash = mix32((int) r0, hash); 073 hash = mix32((int) (r0 >>> 32), hash); 074 075 return fmix32(Long.BYTES, hash); 076 } 077 078 /** 079 * Murmur3 32-bit variant. 080 */ 081 public static int hash32(long l0, long l1, int seed) { 082 int hash = seed; 083 final long r0 = Long.reverseBytes(l0); 084 final long r1 = Long.reverseBytes(l1); 085 086 hash = mix32((int) r0, hash); 087 hash = mix32((int) (r0 >>> 32), hash); 088 hash = mix32((int) (r1), hash); 089 hash = mix32((int) (r1 >>> 32), hash); 090 091 return fmix32(Long.BYTES * 2, hash); 092 } 093 094 /** 095 * Murmur3 32-bit variant. 096 * 097 * @param data - input byte array 098 * @return - hashcode 099 */ 100 public static int hash32(byte[] data) { 101 return hash32(data, 0, data.length, DEFAULT_SEED); 102 } 103 104 /** 105 * Murmur3 32-bit variant. 106 * 107 * @param data - input byte array 108 * @param length - length of array 109 * @return - hashcode 110 */ 111 public static int hash32(byte[] data, int length) { 112 return hash32(data, 0, length, DEFAULT_SEED); 113 } 114 115 /** 116 * Murmur3 32-bit variant. 117 * 118 * @param data - input byte array 119 * @param length - length of array 120 * @param seed - seed. (default 0) 121 * @return - hashcode 122 */ 123 public static int hash32(byte[] data, int length, int seed) { 124 return hash32(data, 0, length, seed); 125 } 126 127 /** 128 * Murmur3 32-bit variant. 129 * 130 * @param data - input byte array 131 * @param offset - offset of data 132 * @param length - length of array 133 * @param seed - seed. (default 0) 134 * @return - hashcode 135 */ 136 public static int hash32(byte[] data, int offset, int length, int seed) { 137 int hash = seed; 138 final int nblocks = length >> 2; 139 140 // body 141 for (int i = 0; i < nblocks; i++) { 142 int i_4 = i << 2; 143 int k = (data[offset + i_4] & 0xff) 144 | ((data[offset + i_4 + 1] & 0xff) << 8) 145 | ((data[offset + i_4 + 2] & 0xff) << 16) 146 | ((data[offset + i_4 + 3] & 0xff) << 24); 147 148 hash = mix32(k, hash); 149 } 150 151 // tail 152 int idx = nblocks << 2; 153 int k1 = 0; 154 switch (length - idx) { 155 case 3: 156 k1 ^= data[offset + idx + 2] << 16; 157 case 2: 158 k1 ^= data[offset + idx + 1] << 8; 159 case 1: 160 k1 ^= data[offset + idx]; 161 162 // mix functions 163 k1 *= C1_32; 164 k1 = Integer.rotateLeft(k1, R1_32); 165 k1 *= C2_32; 166 hash ^= k1; 167 } 168 169 return fmix32(length, hash); 170 } 171 172 private static int mix32(int k, int hash) { 173 k *= C1_32; 174 k = Integer.rotateLeft(k, R1_32); 175 k *= C2_32; 176 hash ^= k; 177 return Integer.rotateLeft(hash, R2_32) * M_32 + N_32; 178 } 179 180 private static int fmix32(int length, int hash) { 181 hash ^= length; 182 hash ^= (hash >>> 16); 183 hash *= 0x85ebca6b; 184 hash ^= (hash >>> 13); 185 hash *= 0xc2b2ae35; 186 hash ^= (hash >>> 16); 187 188 return hash; 189 } 190 191 /** 192 * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit variant. 193 * 194 * @param data - input byte array 195 * @return - hashcode 196 */ 197 public static long hash64(byte[] data) { 198 return hash64(data, 0, data.length, DEFAULT_SEED); 199 } 200 201 public static long hash64(long data) { 202 long hash = DEFAULT_SEED; 203 long k = Long.reverseBytes(data); 204 int length = Long.BYTES; 205 // mix functions 206 k *= C1; 207 k = Long.rotateLeft(k, R1); 208 k *= C2; 209 hash ^= k; 210 hash = Long.rotateLeft(hash, R2) * M + N1; 211 // finalization 212 hash ^= length; 213 hash = fmix64(hash); 214 return hash; 215 } 216 217 public static long hash64(int data) { 218 long k1 = Integer.reverseBytes(data) & (-1L >>> 32); 219 int length = Integer.BYTES; 220 long hash = DEFAULT_SEED; 221 k1 *= C1; 222 k1 = Long.rotateLeft(k1, R1); 223 k1 *= C2; 224 hash ^= k1; 225 // finalization 226 hash ^= length; 227 hash = fmix64(hash); 228 return hash; 229 } 230 231 public static long hash64(short data) { 232 long hash = DEFAULT_SEED; 233 long k1 = 0; 234 k1 ^= ((long) data & 0xff) << 8; 235 k1 ^= ((long)((data & 0xFF00) >> 8) & 0xff); 236 k1 *= C1; 237 k1 = Long.rotateLeft(k1, R1); 238 k1 *= C2; 239 hash ^= k1; 240 241 // finalization 242 hash ^= Short.BYTES; 243 hash = fmix64(hash); 244 return hash; 245 } 246 247 public static long hash64(byte[] data, int offset, int length) { 248 return hash64(data, offset, length, DEFAULT_SEED); 249 } 250 251 /** 252 * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit variant. 253 * 254 * @param data - input byte array 255 * @param length - length of array 256 * @param seed - seed. (default is 0) 257 * @return - hashcode 258 */ 259 public static long hash64(byte[] data, int offset, int length, int seed) { 260 long hash = seed; 261 final int nblocks = length >> 3; 262 263 // body 264 for (int i = 0; i < nblocks; i++) { 265 final int i8 = i << 3; 266 long k = ((long) data[offset + i8] & 0xff) 267 | (((long) data[offset + i8 + 1] & 0xff) << 8) 268 | (((long) data[offset + i8 + 2] & 0xff) << 16) 269 | (((long) data[offset + i8 + 3] & 0xff) << 24) 270 | (((long) data[offset + i8 + 4] & 0xff) << 32) 271 | (((long) data[offset + i8 + 5] & 0xff) << 40) 272 | (((long) data[offset + i8 + 6] & 0xff) << 48) 273 | (((long) data[offset + i8 + 7] & 0xff) << 56); 274 275 // mix functions 276 k *= C1; 277 k = Long.rotateLeft(k, R1); 278 k *= C2; 279 hash ^= k; 280 hash = Long.rotateLeft(hash, R2) * M + N1; 281 } 282 283 // tail 284 long k1 = 0; 285 int tailStart = nblocks << 3; 286 switch (length - tailStart) { 287 case 7: 288 k1 ^= ((long) data[offset + tailStart + 6] & 0xff) << 48; 289 case 6: 290 k1 ^= ((long) data[offset + tailStart + 5] & 0xff) << 40; 291 case 5: 292 k1 ^= ((long) data[offset + tailStart + 4] & 0xff) << 32; 293 case 4: 294 k1 ^= ((long) data[offset + tailStart + 3] & 0xff) << 24; 295 case 3: 296 k1 ^= ((long) data[offset + tailStart + 2] & 0xff) << 16; 297 case 2: 298 k1 ^= ((long) data[offset + tailStart + 1] & 0xff) << 8; 299 case 1: 300 k1 ^= ((long) data[offset + tailStart] & 0xff); 301 k1 *= C1; 302 k1 = Long.rotateLeft(k1, R1); 303 k1 *= C2; 304 hash ^= k1; 305 } 306 307 // finalization 308 hash ^= length; 309 hash = fmix64(hash); 310 311 return hash; 312 } 313 314 /** 315 * Murmur3 128-bit variant. 316 * 317 * @param data - input byte array 318 * @return - hashcode (2 longs) 319 */ 320 public static long[] hash128(byte[] data) { 321 return hash128(data, 0, data.length, DEFAULT_SEED); 322 } 323 324 /** 325 * Murmur3 128-bit variant. 326 * 327 * @param data - input byte array 328 * @param offset - the first element of array 329 * @param length - length of array 330 * @param seed - seed. (default is 0) 331 * @return - hashcode (2 longs) 332 */ 333 public static long[] hash128(byte[] data, int offset, int length, int seed) { 334 long h1 = seed; 335 long h2 = seed; 336 final int nblocks = length >> 4; 337 338 // body 339 for (int i = 0; i < nblocks; i++) { 340 final int i16 = i << 4; 341 long k1 = ((long) data[offset + i16] & 0xff) 342 | (((long) data[offset + i16 + 1] & 0xff) << 8) 343 | (((long) data[offset + i16 + 2] & 0xff) << 16) 344 | (((long) data[offset + i16 + 3] & 0xff) << 24) 345 | (((long) data[offset + i16 + 4] & 0xff) << 32) 346 | (((long) data[offset + i16 + 5] & 0xff) << 40) 347 | (((long) data[offset + i16 + 6] & 0xff) << 48) 348 | (((long) data[offset + i16 + 7] & 0xff) << 56); 349 350 long k2 = ((long) data[offset + i16 + 8] & 0xff) 351 | (((long) data[offset + i16 + 9] & 0xff) << 8) 352 | (((long) data[offset + i16 + 10] & 0xff) << 16) 353 | (((long) data[offset + i16 + 11] & 0xff) << 24) 354 | (((long) data[offset + i16 + 12] & 0xff) << 32) 355 | (((long) data[offset + i16 + 13] & 0xff) << 40) 356 | (((long) data[offset + i16 + 14] & 0xff) << 48) 357 | (((long) data[offset + i16 + 15] & 0xff) << 56); 358 359 // mix functions for k1 360 k1 *= C1; 361 k1 = Long.rotateLeft(k1, R1); 362 k1 *= C2; 363 h1 ^= k1; 364 h1 = Long.rotateLeft(h1, R2); 365 h1 += h2; 366 h1 = h1 * M + N1; 367 368 // mix functions for k2 369 k2 *= C2; 370 k2 = Long.rotateLeft(k2, R3); 371 k2 *= C1; 372 h2 ^= k2; 373 h2 = Long.rotateLeft(h2, R1); 374 h2 += h1; 375 h2 = h2 * M + N2; 376 } 377 378 // tail 379 long k1 = 0; 380 long k2 = 0; 381 int tailStart = nblocks << 4; 382 switch (length - tailStart) { 383 case 15: 384 k2 ^= (long) (data[offset + tailStart + 14] & 0xff) << 48; 385 case 14: 386 k2 ^= (long) (data[offset + tailStart + 13] & 0xff) << 40; 387 case 13: 388 k2 ^= (long) (data[offset + tailStart + 12] & 0xff) << 32; 389 case 12: 390 k2 ^= (long) (data[offset + tailStart + 11] & 0xff) << 24; 391 case 11: 392 k2 ^= (long) (data[offset + tailStart + 10] & 0xff) << 16; 393 case 10: 394 k2 ^= (long) (data[offset + tailStart + 9] & 0xff) << 8; 395 case 9: 396 k2 ^= (long) (data[offset + tailStart + 8] & 0xff); 397 k2 *= C2; 398 k2 = Long.rotateLeft(k2, R3); 399 k2 *= C1; 400 h2 ^= k2; 401 402 case 8: 403 k1 ^= (long) (data[offset + tailStart + 7] & 0xff) << 56; 404 case 7: 405 k1 ^= (long) (data[offset + tailStart + 6] & 0xff) << 48; 406 case 6: 407 k1 ^= (long) (data[offset + tailStart + 5] & 0xff) << 40; 408 case 5: 409 k1 ^= (long) (data[offset + tailStart + 4] & 0xff) << 32; 410 case 4: 411 k1 ^= (long) (data[offset + tailStart + 3] & 0xff) << 24; 412 case 3: 413 k1 ^= (long) (data[offset + tailStart + 2] & 0xff) << 16; 414 case 2: 415 k1 ^= (long) (data[offset + tailStart + 1] & 0xff) << 8; 416 case 1: 417 k1 ^= (long) (data[offset + tailStart] & 0xff); 418 k1 *= C1; 419 k1 = Long.rotateLeft(k1, R1); 420 k1 *= C2; 421 h1 ^= k1; 422 } 423 424 // finalization 425 h1 ^= length; 426 h2 ^= length; 427 428 h1 += h2; 429 h2 += h1; 430 431 h1 = fmix64(h1); 432 h2 = fmix64(h2); 433 434 h1 += h2; 435 h2 += h1; 436 437 return new long[]{h1, h2}; 438 } 439 440 private static long fmix64(long h) { 441 h ^= (h >>> 33); 442 h *= 0xff51afd7ed558ccdL; 443 h ^= (h >>> 33); 444 h *= 0xc4ceb9fe1a85ec53L; 445 h ^= (h >>> 33); 446 return h; 447 } 448 449 public static class IncrementalHash32 { 450 byte[] tail = new byte[3]; 451 int tailLen; 452 int totalLen; 453 int hash; 454 455 public final void start(int hash) { 456 tailLen = totalLen = 0; 457 this.hash = hash; 458 } 459 460 public final void add(byte[] data, int offset, int length) { 461 if (length == 0) return; 462 totalLen += length; 463 if (tailLen + length < 4) { 464 System.arraycopy(data, offset, tail, tailLen, length); 465 tailLen += length; 466 return; 467 } 468 int offset2 = 0; 469 if (tailLen > 0) { 470 offset2 = (4 - tailLen); 471 int k = -1; 472 switch (tailLen) { 473 case 1: 474 k = orBytes(tail[0], data[offset], data[offset + 1], data[offset + 2]); 475 break; 476 case 2: 477 k = orBytes(tail[0], tail[1], data[offset], data[offset + 1]); 478 break; 479 case 3: 480 k = orBytes(tail[0], tail[1], tail[2], data[offset]); 481 break; 482 default: throw new AssertionError(tailLen); 483 } 484 // mix functions 485 k *= C1_32; 486 k = Integer.rotateLeft(k, R1_32); 487 k *= C2_32; 488 hash ^= k; 489 hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32; 490 } 491 int length2 = length - offset2; 492 offset += offset2; 493 final int nblocks = length2 >> 2; 494 495 for (int i = 0; i < nblocks; i++) { 496 int i_4 = (i << 2) + offset; 497 int k = orBytes(data[i_4], data[i_4 + 1], data[i_4 + 2], data[i_4 + 3]); 498 499 // mix functions 500 k *= C1_32; 501 k = Integer.rotateLeft(k, R1_32); 502 k *= C2_32; 503 hash ^= k; 504 hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32; 505 } 506 507 int consumed = (nblocks << 2); 508 tailLen = length2 - consumed; 509 if (consumed == length2) return; 510 System.arraycopy(data, offset + consumed, tail, 0, tailLen); 511 } 512 513 public final int end() { 514 int k1 = 0; 515 switch (tailLen) { 516 case 3: 517 k1 ^= tail[2] << 16; 518 case 2: 519 k1 ^= tail[1] << 8; 520 case 1: 521 k1 ^= tail[0]; 522 523 // mix functions 524 k1 *= C1_32; 525 k1 = Integer.rotateLeft(k1, R1_32); 526 k1 *= C2_32; 527 hash ^= k1; 528 } 529 530 // finalization 531 hash ^= totalLen; 532 hash ^= (hash >>> 16); 533 hash *= 0x85ebca6b; 534 hash ^= (hash >>> 13); 535 hash *= 0xc2b2ae35; 536 hash ^= (hash >>> 16); 537 return hash; 538 } 539 } 540 541 private static int orBytes(byte b1, byte b2, byte b3, byte b4) { 542 return (b1 & 0xff) | ((b2 & 0xff) << 8) | ((b3 & 0xff) << 16) | ((b4 & 0xff) << 24); 543 } 544}