1 /// Polynomial hash for partial rehashing.
2 /// http://stackoverflow.com/a/42112687/21501
3 /// Written by Vladimir Panteleev <vladimir@thecybershadow.net>
4 /// License: Boost Software License, Version 1.0
5 
6 module polyhash;
7 
8 import std.range.primitives;
9 import std.traits;
10 
11 struct PolynomialHash(Value)
12 {
13 	Value value;   /// The hash value of the hashed string
14 	size_t length; /// The length of the hashed string
15 
16 	// Cycle length == 2^^30 for uint, > 2^^46 for ulong
17 	// TODO: find primitive root modulo 2^^32, if one exists
18 	enum Value p = Value(269);
19 
20 	private
21 	{
22 		/// Precalculated table for (p^^(2^^i))
23 		alias Power2s = Value[size_t.sizeof * 8];
24 
25 		static Power2s genTable()
26 		{
27 			Value[size_t.sizeof * 8] result;
28 			Value v = p;
29 			foreach (i; 0 .. result.length)
30 			{
31 				result[i] = v;
32 				v *= v;
33 			}
34 			return result;
35 		}
36 
37 		static if (is(typeof({ enum table = genTable(); })))
38 			static immutable Power2s power2s = genTable(); // Compute at compile-time
39 		else
40 		{
41 			static immutable Power2s power2s;
42 			// Compute at run-time (initialization)
43 			shared static this() { power2s = genTable(); }
44 		}
45 	}
46 
47 	/// Return p^^power (mod q).
48 	static Value pPower(size_t power)
49 	{
50 		Value v = 1;
51 		foreach (b; 0 .. power2s.length)
52 			if ((size_t(1) << b) & power)
53 				v *= power2s[b];
54 		return v;
55 	}
56 
57 	void put(char c)
58 	{
59 		value *= p;
60 		value += Value(c);
61 		length++;
62 	}
63 
64 	void put(in char[] s)
65 	{
66 		foreach (c; s)
67 		{
68 			value *= p;
69 			value += Value(c);
70 		}
71 		length += s.length;
72 	}
73 
74 	void put(ref typeof(this) hash)
75 	{
76 		value *= pPower(hash.length);
77 		value += hash.value;
78 		length += hash.length;
79 	}
80 
81 	static typeof(this) hash(T)(T value)
82 	if (is(typeof({ typeof(this) result; .put(result, value); })))
83 	{
84 		typeof(this) result;
85 		.put(result, value);
86 		return result;
87 	}
88 
89 	unittest
90 	{
91 		assert(hash("").value == 0);
92 		assert(hash([hash(""), hash("")]).value == 0);
93 
94 		// "a" + "" + "b" == "ab"
95 		assert(hash([hash("a"), hash(""), hash("b")]) == hash("ab"));
96 
97 		// "a" + "bc" == "ab" + "c"
98 		assert(hash([hash("a"), hash("bc")]) == hash([hash("ab"), hash("c")]));
99 
100 		// "a" != "b"
101 		assert(hash("a") != hash("b"));
102 
103 		// "ab" != "ba"
104 		assert(hash("ab") != hash("ba"));
105 		assert(hash([hash("a"), hash("b")]) != hash([hash("b"), hash("a")]));
106 
107 		// Test overflow
108 		assert(hash([
109 			hash("Mary"),
110 			hash(" "),
111 			hash("had"),
112 			hash(" "),
113 			hash("a"),
114 			hash(" "),
115 			hash("little"),
116 			hash(" "),
117 			hash("lamb"),
118 			hash("")
119 		]) == hash("Mary had a little lamb"));
120 	}
121 }
122 
123 unittest
124 {
125 	PolynomialHash!uint uintTest;
126 	PolynomialHash!ulong ulongTest;
127 }
128 
129 unittest
130 {
131 	PolynomialHash!(ModQ!(uint, 4294967291)) modQtest;
132 }
133 
134 // ****************************************************************************
135 
136 /// Represents a value and performs calculations in modulo q.
137 struct ModQ(T, T q)
138 if (isUnsigned!T)
139 {
140 	T value;
141 
142 	this(T v)
143 	{
144 		debug assert(v < q);
145 		this.value = v;
146 	}
147 
148 	bool opEquals(T operand) const
149 	{
150 		debug assert(operand < q);
151 		return value == operand;
152 	}
153 
154 	void opOpAssign(string op : "+")(typeof(this) operand)
155 	{
156 		T result = this.value;
157 		result += operand.value;
158 		if (result >= q || result < this.value || result < operand.value)
159 			result -= q;
160 		this.value = result;
161 	}
162 
163 	void opOpAssign(string op : "*")(typeof(this) operand)
164 	{
165 		this.value = longMul(this.value, operand.value).longDiv(q).remainder;
166 	}
167 
168 	T opCast(Q)() const if (is(Q == T)) { return value; }
169 
170 	// Ensure this type is supported whet it is instantiated,
171 	// instead of when the operator overloads are
172 	private static void check() { typeof(this) m; m *= typeof(this)(0); }
173 }
174 
175 unittest
176 {
177 	alias M = ModQ!(ushort, 100);
178 	M value;
179 	value += M(56);
180 	value += M(78);
181 	assert(value == 34);
182 }
183 
184 unittest
185 {
186 	alias M = ModQ!(ushort, 100);
187 	M value;
188 	value += M(12);
189 	value *= M(12);
190 	assert(value == 44);
191 }
192 
193 // ****************************************************************************
194 
195 private:
196 
197 import std.traits;
198 
199 /// Get the smallest built-in unsigned integer type
200 /// that can store this many bits of data.
201 template TypeForBits(uint bits)
202 {
203 	static if (bits <= 8)
204 		alias TypeForBits = ubyte;
205 	else
206 	static if (bits <= 16)
207 		alias TypeForBits = ushort;
208 	else
209 	static if (bits <= 32)
210 		alias TypeForBits = uint;
211 	else
212 	static if (bits <= 64)
213 		alias TypeForBits = ulong;
214 	else
215 		static assert(false, "No integer type big enough for " ~ bits.stringof ~ " bits");
216 }
217 
218 struct LongInt(uint bits, bool signed)
219 {
220 	TypeForBits!bits low;
221 	static if (signed)
222 		Signed!(TypeForBits!bits) high;
223 	else
224 		TypeForBits!bits high;
225 }
226 
227 alias LongInt(T) = LongInt!(T.sizeof * 8, isSigned!T);
228 
229 alias Cent = LongInt!long;
230 alias UCent = LongInt!ulong;
231 
232 version (X86)
233 	version = Intel;
234 else
235 version (X86_64)
236 	version = Intel;
237 
238 // Hack to work around DMD bug https://issues.dlang.org/show_bug.cgi?id=20677
239 version (Intel)
240 	public enum modQSupported = size_t.sizeof == 8;
241 else
242 	public enum modQSupported = false;
243 
244 version (Intel)
245 {
246 	version (DigitalMars)
247 		enum x86RegSizePrefix(T) =
248 			T.sizeof == 2 ? "" :
249 			T.sizeof == 4 ? "E" :
250 			T.sizeof == 8 ? "R" :
251 			"?"; // force syntax error
252 	else
253 	{
254 		enum x86RegSizePrefix(T) =
255 			T.sizeof == 2 ? "" :
256 			T.sizeof == 4 ? "e" :
257 			T.sizeof == 8 ? "r" :
258 			"?"; // force syntax error
259 		enum x86SizeOpSuffix(T) =
260 			T.sizeof == 2 ? "w" :
261 			T.sizeof == 4 ? "l" :
262 			T.sizeof == 8 ? "q" :
263 			"?"; // force syntax error
264 	}
265 
266 	enum x86SignedOpPrefix(T) = isSigned!T ? "i" : "";
267 }
268 
269 LongInt!T longMul(T)(T a, T b)
270 if (is(T : long) && T.sizeof >= 2)
271 {
272 	version (Intel)
273 	{
274 		version (LDC)
275 		{
276 			import ldc.llvmasm;
277 			auto t = __asmtuple!(T, T)(
278 				x86SignedOpPrefix!T~`mul`~x86SizeOpSuffix!T~` $3`,
279 				// Technically, the last one should be "rm", but that generates suboptimal code in many cases
280 				`={`~x86RegSizePrefix!T~`ax},={`~x86RegSizePrefix!T~`dx},{`~x86RegSizePrefix!T~`ax},r`,
281 				a, b
282 			);
283 			return typeof(return)(t.v[0], t.v[1]);
284 		}
285 		else
286 		version (GNU)
287 		{
288 			T low = void, high = void;
289 			mixin(`
290 				asm
291 				{
292 					"`~x86SignedOpPrefix!T~`mul`~x86SizeOpSuffix!T~` %3"
293 					: "=a" low, "=d" high
294 					: "a" a, "rm" b;
295 				}
296 			`);
297 			return typeof(return)(low, high);
298 		}
299 		else
300 		{
301 			T low = void, high = void;
302 			mixin(`
303 				asm
304 				{
305 					mov `~x86RegSizePrefix!T~`AX, a;
306 					`~x86SignedOpPrefix!T~`mul b;
307 					mov low, `~x86RegSizePrefix!T~`AX;
308 					mov high, `~x86RegSizePrefix!T~`DX;
309 				}
310 			`);
311 			return typeof(return)(low, high);
312 		}
313 	}
314 	else
315 		static assert(false, "Not implemented on this architecture");
316 }
317 
318 version (Intel)
319 unittest
320 {
321 	assert(longMul(1, 1) == LongInt!int(1, 0));
322 	assert(longMul(1, 2) == LongInt!int(2, 0));
323 	assert(longMul(0x1_0000, 0x1_0000) == LongInt!int(0, 1));
324 
325 	assert(longMul(short(1), short(1)) == LongInt!short(1, 0));
326 	assert(longMul(short(0x100), short(0x100)) == LongInt!short(0, 1));
327 
328 	assert(longMul(short(1), short(-1)) == LongInt!short(cast(ushort)-1, -1));
329 	assert(longMul(ushort(1), cast(ushort)-1) == LongInt!ushort(cast(ushort)-1, 0));
330 
331 	version(X86_64)
332 	{
333 		assert(longMul(1L, 1L) == LongInt!long(1, 0));
334 		assert(longMul(0x1_0000_0000L, 0x1_0000_0000L) == LongInt!long(0, 1));
335 	}
336 }
337 
338 struct DivResult(T) { T quotient, remainder; }
339 
340 DivResult!T longDiv(T, L)(L a, T b)
341 if (is(T : long) && T.sizeof >= 2 && is(L == LongInt!T))
342 {
343 	version (Intel)
344 	{
345 		version (LDC)
346 		{
347 			import ldc.llvmasm;
348 			auto t = __asmtuple!(T, T)(
349 				x86SignedOpPrefix!T~`div`~x86SizeOpSuffix!T~` $4`,
350 				// Technically, the last one should be "rm", but that generates suboptimal code in many cases
351 				`={`~x86RegSizePrefix!T~`ax},={`~x86RegSizePrefix!T~`dx},{`~x86RegSizePrefix!T~`ax},{`~x86RegSizePrefix!T~`dx},r`,
352 				a.low, a.high, b
353 			);
354 			return typeof(return)(t.v[0], t.v[1]);
355 		}
356 		else
357 		version (GNU)
358 		{
359 			T low = a.low, high = a.high;
360 			T quotient = void;
361 			T remainder = void;
362 			mixin(`
363 				asm
364 				{
365 					"`~x86SignedOpPrefix!T~`div`~x86SizeOpSuffix!T~` %4"
366 					: "=a" quotient, "=d" remainder
367 					: "a" low, "d" high, "rm" b;
368 				}
369 			`);
370 			return typeof(return)(quotient, remainder);
371 		}
372 		else
373 		{
374 			auto low = a.low;
375 			auto high = a.high;
376 			T quotient = void;
377 			T remainder = void;
378 			mixin(`
379 				asm
380 				{
381 					mov `~x86RegSizePrefix!T~`AX, low;
382 					mov `~x86RegSizePrefix!T~`DX, high;
383 					`~x86SignedOpPrefix!T~`div b;
384 					mov quotient, `~x86RegSizePrefix!T~`AX;
385 					mov remainder, `~x86RegSizePrefix!T~`DX;
386 				}
387 			`);
388 			return typeof(return)(quotient, remainder);
389 		}
390 	}
391 	else
392 		static assert(false, "Not implemented on this architecture");
393 }
394 
395 version (Intel)
396 unittest
397 {
398 	assert(longDiv(LongInt!int(1, 0), 1) == DivResult!int(1, 0));
399 	assert(longDiv(LongInt!int(5, 0), 2) == DivResult!int(2, 1));
400 	assert(longDiv(LongInt!int(0, 1), 0x1_0000) == DivResult!int(0x1_0000, 0));
401 
402 	assert(longDiv(LongInt!short(1, 0), short(1)) == DivResult!short(1, 0));
403 	assert(longDiv(LongInt!short(0, 1), short(0x100)) == DivResult!short(0x100, 0));
404 
405 	assert(longDiv(LongInt!short(cast(ushort)-1, -1), short(-1)) == DivResult!short(1));
406 	assert(longDiv(LongInt!ushort(cast(ushort)-1, 0), cast(ushort)-1) == DivResult!ushort(1));
407 
408 	version(X86_64)
409 	{
410 		assert(longDiv(LongInt!long(1, 0), 1L) == DivResult!long(1));
411 		assert(longDiv(LongInt!long(0, 1), 0x1_0000_0000L) == DivResult!long(0x1_0000_0000));
412 	}
413 }