1 /// Polynomial hash for partial rehashing. 2 /// http://stackoverflow.com/a/42112687/21501 3 /// Written by Vladimir Panteleev <vladimir@thecybershadow.net> 4 /// License: Boost Software License, Version 1.0 5 6 module polyhash; 7 8 import std.range.primitives; 9 import std.traits; 10 11 struct PolynomialHash(Value) 12 { 13 Value value; /// The hash value of the hashed string 14 size_t length; /// The length of the hashed string 15 16 // Cycle length == 2^^30 for uint, > 2^^46 for ulong 17 // TODO: find primitive root modulo 2^^32, if one exists 18 enum Value p = Value(269); 19 20 private 21 { 22 /// Precalculated table for (p^^(2^^i)) 23 alias Power2s = Value[size_t.sizeof * 8]; 24 25 static Power2s genTable() 26 { 27 Value[size_t.sizeof * 8] result; 28 Value v = p; 29 foreach (i; 0 .. result.length) 30 { 31 result[i] = v; 32 v *= v; 33 } 34 return result; 35 } 36 37 static if (is(typeof({ enum table = genTable(); }))) 38 static immutable Power2s power2s = genTable(); // Compute at compile-time 39 else 40 { 41 static immutable Power2s power2s; 42 // Compute at run-time (initialization) 43 shared static this() { power2s = genTable(); } 44 } 45 } 46 47 /// Return p^^power (mod q). 48 static Value pPower(size_t power) 49 { 50 Value v = 1; 51 foreach (b; 0 .. power2s.length) 52 if ((size_t(1) << b) & power) 53 v *= power2s[b]; 54 return v; 55 } 56 57 void put(char c) 58 { 59 value *= p; 60 value += Value(c); 61 length++; 62 } 63 64 void put(in char[] s) 65 { 66 foreach (c; s) 67 { 68 value *= p; 69 value += Value(c); 70 } 71 length += s.length; 72 } 73 74 void put(ref typeof(this) hash) 75 { 76 value *= pPower(hash.length); 77 value += hash.value; 78 length += hash.length; 79 } 80 81 static typeof(this) hash(T)(T value) 82 if (is(typeof({ typeof(this) result; .put(result, value); }))) 83 { 84 typeof(this) result; 85 .put(result, value); 86 return result; 87 } 88 89 unittest 90 { 91 assert(hash("").value == 0); 92 assert(hash([hash(""), hash("")]).value == 0); 93 94 // "a" + "" + "b" == "ab" 95 assert(hash([hash("a"), hash(""), hash("b")]) == hash("ab")); 96 97 // "a" + "bc" == "ab" + "c" 98 assert(hash([hash("a"), hash("bc")]) == hash([hash("ab"), hash("c")])); 99 100 // "a" != "b" 101 assert(hash("a") != hash("b")); 102 103 // "ab" != "ba" 104 assert(hash("ab") != hash("ba")); 105 assert(hash([hash("a"), hash("b")]) != hash([hash("b"), hash("a")])); 106 107 // Test overflow 108 assert(hash([ 109 hash("Mary"), 110 hash(" "), 111 hash("had"), 112 hash(" "), 113 hash("a"), 114 hash(" "), 115 hash("little"), 116 hash(" "), 117 hash("lamb"), 118 hash("") 119 ]) == hash("Mary had a little lamb")); 120 } 121 } 122 123 unittest 124 { 125 PolynomialHash!uint uintTest; 126 PolynomialHash!ulong ulongTest; 127 } 128 129 unittest 130 { 131 PolynomialHash!(ModQ!(uint, 4294967291)) modQtest; 132 } 133 134 // **************************************************************************** 135 136 /// Represents a value and performs calculations in modulo q. 137 struct ModQ(T, T q) 138 if (isUnsigned!T) 139 { 140 T value; 141 142 this(T v) 143 { 144 debug assert(v < q); 145 this.value = v; 146 } 147 148 bool opEquals(T operand) const 149 { 150 debug assert(operand < q); 151 return value == operand; 152 } 153 154 void opOpAssign(string op : "+")(typeof(this) operand) 155 { 156 T result = this.value; 157 result += operand.value; 158 if (result >= q || result < this.value || result < operand.value) 159 result -= q; 160 this.value = result; 161 } 162 163 void opOpAssign(string op : "*")(typeof(this) operand) 164 { 165 this.value = longMul(this.value, operand.value).longDiv(q).remainder; 166 } 167 168 T opCast(Q)() const if (is(Q == T)) { return value; } 169 170 // Ensure this type is supported whet it is instantiated, 171 // instead of when the operator overloads are 172 private static void check() { typeof(this) m; m *= typeof(this)(0); } 173 } 174 175 unittest 176 { 177 alias M = ModQ!(ushort, 100); 178 M value; 179 value += M(56); 180 value += M(78); 181 assert(value == 34); 182 } 183 184 unittest 185 { 186 alias M = ModQ!(ushort, 100); 187 M value; 188 value += M(12); 189 value *= M(12); 190 assert(value == 44); 191 } 192 193 // **************************************************************************** 194 195 private: 196 197 import std.traits; 198 199 /// Get the smallest built-in unsigned integer type 200 /// that can store this many bits of data. 201 template TypeForBits(uint bits) 202 { 203 static if (bits <= 8) 204 alias TypeForBits = ubyte; 205 else 206 static if (bits <= 16) 207 alias TypeForBits = ushort; 208 else 209 static if (bits <= 32) 210 alias TypeForBits = uint; 211 else 212 static if (bits <= 64) 213 alias TypeForBits = ulong; 214 else 215 static assert(false, "No integer type big enough for " ~ bits.stringof ~ " bits"); 216 } 217 218 struct LongInt(uint bits, bool signed) 219 { 220 TypeForBits!bits low; 221 static if (signed) 222 Signed!(TypeForBits!bits) high; 223 else 224 TypeForBits!bits high; 225 } 226 227 alias LongInt(T) = LongInt!(T.sizeof * 8, isSigned!T); 228 229 alias Cent = LongInt!long; 230 alias UCent = LongInt!ulong; 231 232 version (X86) 233 version = Intel; 234 else 235 version (X86_64) 236 version = Intel; 237 238 // Hack to work around DMD bug https://issues.dlang.org/show_bug.cgi?id=20677 239 version (Intel) 240 public enum modQSupported = size_t.sizeof == 8; 241 else 242 public enum modQSupported = false; 243 244 version (Intel) 245 { 246 version (DigitalMars) 247 enum x86RegSizePrefix(T) = 248 T.sizeof == 2 ? "" : 249 T.sizeof == 4 ? "E" : 250 T.sizeof == 8 ? "R" : 251 "?"; // force syntax error 252 else 253 { 254 enum x86RegSizePrefix(T) = 255 T.sizeof == 2 ? "" : 256 T.sizeof == 4 ? "e" : 257 T.sizeof == 8 ? "r" : 258 "?"; // force syntax error 259 enum x86SizeOpSuffix(T) = 260 T.sizeof == 2 ? "w" : 261 T.sizeof == 4 ? "l" : 262 T.sizeof == 8 ? "q" : 263 "?"; // force syntax error 264 } 265 266 enum x86SignedOpPrefix(T) = isSigned!T ? "i" : ""; 267 } 268 269 LongInt!T longMul(T)(T a, T b) 270 if (is(T : long) && T.sizeof >= 2) 271 { 272 version (Intel) 273 { 274 version (LDC) 275 { 276 import ldc.llvmasm; 277 auto t = __asmtuple!(T, T)( 278 x86SignedOpPrefix!T~`mul`~x86SizeOpSuffix!T~` $3`, 279 // Technically, the last one should be "rm", but that generates suboptimal code in many cases 280 `={`~x86RegSizePrefix!T~`ax},={`~x86RegSizePrefix!T~`dx},{`~x86RegSizePrefix!T~`ax},r`, 281 a, b 282 ); 283 return typeof(return)(t.v[0], t.v[1]); 284 } 285 else 286 version (GNU) 287 { 288 T low = void, high = void; 289 mixin(` 290 asm 291 { 292 "`~x86SignedOpPrefix!T~`mul`~x86SizeOpSuffix!T~` %3" 293 : "=a" low, "=d" high 294 : "a" a, "rm" b; 295 } 296 `); 297 return typeof(return)(low, high); 298 } 299 else 300 { 301 T low = void, high = void; 302 mixin(` 303 asm 304 { 305 mov `~x86RegSizePrefix!T~`AX, a; 306 `~x86SignedOpPrefix!T~`mul b; 307 mov low, `~x86RegSizePrefix!T~`AX; 308 mov high, `~x86RegSizePrefix!T~`DX; 309 } 310 `); 311 return typeof(return)(low, high); 312 } 313 } 314 else 315 static assert(false, "Not implemented on this architecture"); 316 } 317 318 version (Intel) 319 unittest 320 { 321 assert(longMul(1, 1) == LongInt!int(1, 0)); 322 assert(longMul(1, 2) == LongInt!int(2, 0)); 323 assert(longMul(0x1_0000, 0x1_0000) == LongInt!int(0, 1)); 324 325 assert(longMul(short(1), short(1)) == LongInt!short(1, 0)); 326 assert(longMul(short(0x100), short(0x100)) == LongInt!short(0, 1)); 327 328 assert(longMul(short(1), short(-1)) == LongInt!short(cast(ushort)-1, -1)); 329 assert(longMul(ushort(1), cast(ushort)-1) == LongInt!ushort(cast(ushort)-1, 0)); 330 331 version(X86_64) 332 { 333 assert(longMul(1L, 1L) == LongInt!long(1, 0)); 334 assert(longMul(0x1_0000_0000L, 0x1_0000_0000L) == LongInt!long(0, 1)); 335 } 336 } 337 338 struct DivResult(T) { T quotient, remainder; } 339 340 DivResult!T longDiv(T, L)(L a, T b) 341 if (is(T : long) && T.sizeof >= 2 && is(L == LongInt!T)) 342 { 343 version (Intel) 344 { 345 version (LDC) 346 { 347 import ldc.llvmasm; 348 auto t = __asmtuple!(T, T)( 349 x86SignedOpPrefix!T~`div`~x86SizeOpSuffix!T~` $4`, 350 // Technically, the last one should be "rm", but that generates suboptimal code in many cases 351 `={`~x86RegSizePrefix!T~`ax},={`~x86RegSizePrefix!T~`dx},{`~x86RegSizePrefix!T~`ax},{`~x86RegSizePrefix!T~`dx},r`, 352 a.low, a.high, b 353 ); 354 return typeof(return)(t.v[0], t.v[1]); 355 } 356 else 357 version (GNU) 358 { 359 T low = a.low, high = a.high; 360 T quotient = void; 361 T remainder = void; 362 mixin(` 363 asm 364 { 365 "`~x86SignedOpPrefix!T~`div`~x86SizeOpSuffix!T~` %4" 366 : "=a" quotient, "=d" remainder 367 : "a" low, "d" high, "rm" b; 368 } 369 `); 370 return typeof(return)(quotient, remainder); 371 } 372 else 373 { 374 auto low = a.low; 375 auto high = a.high; 376 T quotient = void; 377 T remainder = void; 378 mixin(` 379 asm 380 { 381 mov `~x86RegSizePrefix!T~`AX, low; 382 mov `~x86RegSizePrefix!T~`DX, high; 383 `~x86SignedOpPrefix!T~`div b; 384 mov quotient, `~x86RegSizePrefix!T~`AX; 385 mov remainder, `~x86RegSizePrefix!T~`DX; 386 } 387 `); 388 return typeof(return)(quotient, remainder); 389 } 390 } 391 else 392 static assert(false, "Not implemented on this architecture"); 393 } 394 395 version (Intel) 396 unittest 397 { 398 assert(longDiv(LongInt!int(1, 0), 1) == DivResult!int(1, 0)); 399 assert(longDiv(LongInt!int(5, 0), 2) == DivResult!int(2, 1)); 400 assert(longDiv(LongInt!int(0, 1), 0x1_0000) == DivResult!int(0x1_0000, 0)); 401 402 assert(longDiv(LongInt!short(1, 0), short(1)) == DivResult!short(1, 0)); 403 assert(longDiv(LongInt!short(0, 1), short(0x100)) == DivResult!short(0x100, 0)); 404 405 assert(longDiv(LongInt!short(cast(ushort)-1, -1), short(-1)) == DivResult!short(1)); 406 assert(longDiv(LongInt!ushort(cast(ushort)-1, 0), cast(ushort)-1) == DivResult!ushort(1)); 407 408 version(X86_64) 409 { 410 assert(longDiv(LongInt!long(1, 0), 1L) == DivResult!long(1)); 411 assert(longDiv(LongInt!long(0, 1), 0x1_0000_0000L) == DivResult!long(0x1_0000_0000)); 412 } 413 }