1 /// DustMite, a general-purpose data reduction tool
2 /// Written by Vladimir Panteleev <vladimir@thecybershadow.net>
3 /// License: Boost Software License, Version 1.0
4 
5 module dustmite;
6 
7 import core.atomic;
8 import core.thread;
9 
10 import std.algorithm;
11 import std.array;
12 import std.ascii;
13 import std.container.rbtree;
14 import std.conv;
15 import std.datetime;
16 import std.datetime.stopwatch : StopWatch;
17 import std.exception;
18 import std.file;
19 import std.getopt;
20 import std.math : nextPow2;
21 import std.path;
22 import std.parallelism : totalCPUs;
23 import std.process;
24 import std.random;
25 import std.range;
26 import std.regex;
27 import std.stdio;
28 import std.string;
29 import std.typecons;
30 
31 import splitter;
32 
33 alias Splitter = splitter.Splitter;
34 
35 // Issue 314 workarounds
36 alias std..string.join join;
37 alias std..string.startsWith startsWith;
38 
39 string dir, resultDir, tester, globalCache;
40 string dirSuffix(string suffix) { return (dir.absolutePath().buildNormalizedPath() ~ "." ~ suffix).relativePath(); }
41 
42 size_t maxBreadth;
43 size_t origDescendants;
44 int tests, maxSteps = -1; bool foundAnything;
45 bool noSave, trace, noRedirect, doDump, whiteout;
46 string strategy = "inbreadth";
47 
48 struct Times { StopWatch total, load, testSave, resultSave, apply, lookaheadApply, lookaheadWaitThread, lookaheadWaitProcess, test, clean, globalCache, misc; }
49 Times times;
50 static this() { times.total.start(); times.misc.start(); }
51 void measure(string what)(scope void delegate() p)
52 {
53 	times.misc.stop(); mixin("times."~what~".start();");
54 	p();
55 	mixin("times."~what~".stop();"); times.misc.start();
56 }
57 
58 struct Reduction
59 {
60 	enum Type { None, Remove, Unwrap, Concat, ReplaceWord, Swap }
61 	Type type;
62 	Entity root;
63 
64 	// Remove / Unwrap / Concat / Swap
65 	const(Address)* address;
66 	// Swap
67 	const(Address)* address2;
68 
69 	// ReplaceWord
70 	string from, to;
71 	size_t index, total;
72 
73 	string toString()
74 	{
75 		string name = .to!string(type);
76 
77 		final switch (type)
78 		{
79 			case Reduction.Type.None:
80 				return name;
81 			case Reduction.Type.ReplaceWord:
82 				return format(`%s [%d/%d: %s -> %s]`, name, index+1, total, from, to);
83 			case Reduction.Type.Remove:
84 			case Reduction.Type.Unwrap:
85 			case Reduction.Type.Concat:
86 			{
87 				auto address = addressToArr(this.address);
88 				string[] segments = new string[address.length];
89 				Entity e = root;
90 				size_t progress;
91 				bool binary = maxBreadth == 2;
92 				foreach (i, a; address)
93 				{
94 					auto p = a;
95 					foreach (c; e.children[0..a])
96 						if (c.dead)
97 							p--;
98 					segments[i] = binary ? text(p) : format("%d/%d", e.children.length-a, e.children.length);
99 					foreach (c; e.children[0..a])
100 						progress += c.descendants;
101 					progress++; // account for this node
102 					e = e.children[a];
103 				}
104 				progress += e.descendants;
105 				auto progressPM = (origDescendants-progress) * 1000 / origDescendants; // per-mille
106 				return format("[%2d.%d%%] %s [%s]", progressPM/10, progressPM%10, name, segments.join(binary ? "" : ", "));
107 			}
108 			case Reduction.Type.Swap:
109 			{
110 				static string addressToString(Entity root, const(Address)* addressPtr)
111 				{
112 					auto address = addressToArr(findEntityEx(root, addressPtr).address);
113 					string[] segments = new string[address.length];
114 					bool binary = maxBreadth == 2;
115 					Entity e = root;
116 					foreach (i, a; address)
117 					{
118 						auto p = a;
119 						foreach (c; e.children[0..a])
120 							if (c.dead)
121 								p--;
122 						segments[i] = binary ? text(p) : format("%d/%d", e.children.length-a, e.children.length);
123 						e = e.children[a];
124 					}
125 					return segments.join(binary ? "" : ", ");
126 				}
127 				return format("[%s] <-> [%s]",
128 					addressToString(root, address),
129 					addressToString(root, address2));
130 			}
131 		}
132 	}
133 }
134 
135 Address rootAddress;
136 
137 auto nullReduction = Reduction(Reduction.Type.None);
138 
139 struct RemoveRule { Regex!char regexp; string shellGlob; bool remove; }
140 
141 int main(string[] args)
142 {
143 	bool force, dumpHtml, showTimes, stripComments, obfuscate, fuzz, keepLength, showHelp, showVersion, noOptimize, inPlace;
144 	string coverageDir;
145 	RemoveRule[] removeRules;
146 	string[] splitRules;
147 	uint lookaheadCount, tabWidth = 8;
148 
149 	args = args.filter!(
150 		(arg)
151 		{
152 			if (arg.startsWith("-j"))
153 			{
154 				arg = arg[2..$];
155 				lookaheadCount = arg.length ? arg.to!uint : totalCPUs;
156 				return false;
157 			}
158 			return true;
159 		}).array();
160 
161 	getopt(args,
162 		"force", &force,
163 		"reduceonly|reduce-only", (string opt, string value) { removeRules ~= RemoveRule(Regex!char.init, value, true); },
164 		"remove"                , (string opt, string value) { removeRules ~= RemoveRule(regex(value, "mg"), null, true); },
165 		"noremove|no-remove"    , (string opt, string value) { removeRules ~= RemoveRule(regex(value, "mg"), null, false); },
166 		"strip-comments", &stripComments,
167 		"whiteout|white-out", &whiteout,
168 		"coverage", &coverageDir,
169 		"obfuscate", &obfuscate,
170 		"fuzz", &fuzz,
171 		"keep-length", &keepLength,
172 		"strategy", &strategy,
173 		"split", &splitRules,
174 		"dump", &doDump,
175 		"dump-html", &dumpHtml,
176 		"times", &showTimes,
177 		"noredirect|no-redirect", &noRedirect,
178 		"cache", &globalCache, // for research
179 		"trace", &trace, // for debugging
180 		"nosave|no-save", &noSave, // for research
181 		"nooptimize|no-optimize", &noOptimize, // for research
182 		"tab-width", &tabWidth,
183 		"max-steps", &maxSteps, // for research / benchmarking
184 		"i|in-place", &inPlace,
185 		"h|help", &showHelp,
186 		"V|version", &showVersion,
187 	);
188 
189 	if (showVersion)
190 	{
191 		version (Dlang_Tools)
192 			enum source = "dlang/tools";
193 		else
194 		version (Dustmite_CustomSource) // Packaging Dustmite separately for a distribution?
195 			enum source = import("source");
196 		else
197 			enum source = "upstream";
198 		writeln("DustMite build ", __DATE__, " (", source, "), built with ", __VENDOR__, " ", __VERSION__);
199 		if (args.length == 1)
200 			return 0;
201 	}
202 
203 	if (showHelp || args.length == 1 || args.length>3)
204 	{
205 		stderr.writef(q"EOS
206 Usage: %s [OPTION]... PATH TESTER
207 PATH should be a directory containing a clean copy of the file-set to reduce.
208 A file path can also be specified. NAME.EXT will be treated like NAME/NAME.EXT.
209 TESTER should be a shell command which returns 0 for a correct reduction,
210 and anything else otherwise.
211 Supported options:
212   --force            Force reduction of unusual files
213   --reduce-only MASK Only reduce paths glob-matching MASK
214                        (may be used multiple times)
215   --remove REGEXP    Only reduce blocks covered by REGEXP
216                        (may be used multiple times)
217   --no-remove REGEXP Do not reduce blocks containing REGEXP
218                        (may be used multiple times)
219   --strip-comments   Attempt to remove comments from source code
220   --white-out        Replace deleted text with spaces to preserve line numbers
221   --coverage DIR     Load .lst files corresponding to source files from DIR
222   --fuzz             Instead of reducing, fuzz the input by performing random
223                        changes until TESTER returns 0
224   --obfuscate        Instead of reducing, obfuscate the input by replacing
225                        words with random substitutions
226   --keep-length      Preserve word length when obfuscating
227   --split MASK:MODE  Parse and reduce files specified by MASK using the given
228                        splitter. Can be repeated. MODE must be one of:
229                        %-(%s, %)
230   --no-redirect      Don't redirect stdout/stderr streams of test command
231   -j[N]              Use N look-ahead processes (%d by default)
232 EOS", args[0], splitterNames, totalCPUs);
233 
234 		if (!showHelp)
235 		{
236 			stderr.write(q"EOS
237   -h, --help         Show this message and some less interesting options
238 EOS");
239 		}
240 		else
241 		{
242 			stderr.write(q"EOS
243   -h, --help         Show this message
244 Less interesting options:
245   -V, --version      Show program version
246   --strategy STRAT   Set strategy (careful/lookback/pingpong/indepth/inbreadth)
247   --dump             Dump parsed tree to DIR.dump file
248   --dump-html        Dump parsed tree to DIR.html file
249   --times            Display verbose spent time breakdown
250   --cache DIR        Use DIR as persistent disk cache
251                        (in addition to memory cache)
252   --trace            Save all attempted reductions to DIR.trace
253   -i, --in-place     Overwrite input with results
254   --no-save          Disable saving in-progress results
255   --no-optimize      Disable tree optimization step
256                        (may be useful with --dump)
257   --max-steps N      Perform no more than N steps when reducing
258   --tab-width N      How many spaces one tab is equivalent to
259                        (for the "indent" split mode)
260 EOS");
261 		}
262 		stderr.write(q"EOS
263 
264 Full documentation can be found on the GitHub wiki:
265   https://github.com/CyberShadow/DustMite/wiki
266 EOS");
267 		return showHelp ? 0 : 64; // EX_USAGE
268 	}
269 
270 	enforce(!(stripComments && coverageDir), "Sorry, --strip-comments is not compatible with --coverage");
271 
272 	dir = args[1];
273 	if (isDirSeparator(dir[$-1]))
274 		dir = dir[0..$-1];
275 
276 	if (args.length>=3)
277 		tester = args[2];
278 
279 	bool isDotName(string fn) { return fn.startsWith(".") && !(fn=="." || fn==".."); }
280 
281 	bool suspiciousFilesFound;
282 	if (!force && isDir(dir))
283 		foreach (string path; dirEntries(dir, SpanMode.breadth))
284 			if (isDotName(baseName(path)) || isDotName(baseName(dirName(path))) || extension(path)==".o" || extension(path)==".obj" || extension(path)==".exe")
285 			{
286 				stderr.writeln("Warning: Suspicious file found: ", path);
287 				suspiciousFilesFound = true;
288 			}
289 	if (suspiciousFilesFound)
290 		stderr.writeln("You should use a clean copy of the source tree.\nIf it was your intention to include this file in the file-set to be reduced,\nyou can use --force to silence this message.");
291 
292 	ParseRule parseSplitRule(string rule)
293 	{
294 		auto p = rule.lastIndexOf(':');
295 		enforce(p > 0, "Invalid parse rule: " ~ rule);
296 		auto pattern = rule[0..p];
297 		auto splitterName = rule[p+1..$];
298 		auto splitterIndex = splitterNames.countUntil(splitterName);
299 		enforce(splitterIndex >= 0, "Unknown splitter: " ~ splitterName);
300 		return ParseRule(pattern, cast(Splitter)splitterIndex);
301 	}
302 
303 	Entity root;
304 
305 	ParseOptions parseOptions;
306 	parseOptions.stripComments = stripComments;
307 	parseOptions.mode = obfuscate ? ParseOptions.Mode.words : ParseOptions.Mode.source;
308 	parseOptions.rules = splitRules.map!parseSplitRule().array();
309 	parseOptions.tabWidth = tabWidth;
310 	measure!"load"({root = loadFiles(dir, parseOptions);});
311 	enforce(root.children.length, "No files in specified directory");
312 
313 	applyNoRemoveMagic(root);
314 	applyNoRemoveRules(root, removeRules);
315 	applyNoRemoveDeps(root);
316 	if (coverageDir)
317 		loadCoverage(root, coverageDir);
318 	if (!obfuscate && !noOptimize)
319 		optimize(root);
320 	maxBreadth = getMaxBreadth(root);
321 	assignID(root);
322 	convertRefs(root);
323 	recalculate(root);
324 	resetProgress(root);
325 
326 	if (doDump)
327 		dumpSet(root, dirSuffix("dump"));
328 	if (dumpHtml)
329 		dumpToHtml(root, dirSuffix("html"));
330 
331 	if (tester is null)
332 	{
333 		writeln("No tester specified, exiting");
334 		return 0;
335 	}
336 
337 	if (inPlace)
338 		resultDir = dir;
339 	else
340 	{
341 		resultDir = dirSuffix("reduced");
342 		if (resultDir.exists)
343 		{
344 			writeln("Hint: read https://github.com/CyberShadow/DustMite/wiki#result-directory-already-exists");
345 			throw new Exception("Result directory already exists");
346 		}
347 	}
348 
349 	if (!fuzz)
350 	{
351 		auto nullResult = test(root, [nullReduction]);
352 		if (!nullResult.success)
353 		{
354 			auto testerFile = dir.buildNormalizedPath(tester);
355 			version (Posix)
356 			{
357 				if (testerFile.exists && (testerFile.getAttributes() & octal!111) == 0)
358 					writeln("Hint: test program seems to be a non-executable file, try: chmod +x " ~ testerFile.escapeShellFileName());
359 			}
360 			if (!testerFile.exists && tester.exists)
361 				writeln("Hint: test program path should be relative to the source directory, try " ~
362 					tester.absolutePath.relativePath(dir.absolutePath).escapeShellFileName() ~
363 					" instead of " ~ tester.escapeShellFileName());
364 			if (!noRedirect)
365 				writeln("Hint: use --no-redirect to see test script output");
366 			writeln("Hint: read https://github.com/CyberShadow/DustMite/wiki#initial-test-fails");
367 			throw new Exception("Initial test fails: " ~ nullResult.reason);
368 		}
369 	}
370 
371 	lookaheadProcesses = new Lookahead[lookaheadCount];
372 
373 	foundAnything = false;
374 	string resultAdjective;
375 	if (obfuscate)
376 	{
377 		resultAdjective = "obfuscated";
378 		.obfuscate(root, keepLength);
379 	}
380 	else
381 	if (fuzz)
382 	{
383 		resultAdjective = "fuzzed";
384 		.fuzz(root);
385 	}
386 	else
387 	{
388 		resultAdjective = "reduced";
389 		reduce(root);
390 	}
391 
392 	auto duration = times.total.peek();
393 	duration = dur!"msecs"(duration.total!"msecs"); // truncate anything below ms, users aren't interested in that
394 	if (foundAnything)
395 	{
396 		if (!root.dead)
397 		{
398 			if (noSave)
399 				measure!"resultSave"({safeSave(root, resultDir);});
400 			writefln("Done in %s tests and %s; %s version is in %s", tests, duration, resultAdjective, resultDir);
401 		}
402 		else
403 		{
404 			writeln("Hint: read https://github.com/CyberShadow/DustMite/wiki#reduced-to-empty-set");
405 			writefln("Done in %s tests and %s; %s to empty set", tests, duration, resultAdjective);
406 		}
407 	}
408 	else
409 		writefln("Done in %s tests and %s; no reductions found", tests, duration);
410 
411 	if (showTimes)
412 		foreach (i, t; times.tupleof)
413 			writefln("%s: %s", times.tupleof[i].stringof, times.tupleof[i].peek());
414 
415 	return 0;
416 }
417 
418 size_t getMaxBreadth(Entity e)
419 {
420 	size_t breadth = e.children.length;
421 	foreach (child; e.children)
422 	{
423 		auto childBreadth = getMaxBreadth(child);
424 		if (breadth < childBreadth)
425 			breadth = childBreadth;
426 	}
427 	return breadth;
428 }
429 
430 /// An output range which only allocates a new copy on the first write
431 /// that's different from a given original copy.
432 auto cowRange(E)(E[] arr)
433 {
434 	static struct Range
435 	{
436 		void put(ref E item)
437 		{
438 			if (pos != size_t.max)
439 			{
440 				if (pos == arr.length || item != arr[pos])
441 				{
442 					arr = arr[0 .. pos];
443 					pos = size_t.max;
444 					// continue to append (appending to a slice will copy)
445 				}
446 				else
447 				{
448 					pos++;
449 					return;
450 				}
451 			}
452 
453 			arr ~= item;
454 		}
455 
456 		E[] get() { return pos == size_t.max ? arr : arr[0 .. pos]; }
457 
458 	private:
459 		E[] arr;
460 		size_t pos; // if size_t.max, then the copy has occurred
461 	}
462 	return Range(arr);
463 }
464 
465 /// Update computed fields for dirty nodes
466 void recalculate(Entity root)
467 {
468 	// Pass 1 - length + hash (and some other calculated fields)
469 	{
470 		bool pass1(Entity e, Address *addr)
471 		{
472 			if (e.clean)
473 				return false;
474 
475 			auto allDependents = e.allDependents.cowRange();
476 			e.descendants = 1;
477 			e.hash = e.deadHash = EntityHash.init;
478 			e.contents = e.deadContents = null;
479 
480 			void putString(string s)
481 			{
482 				e.hash.put(s);
483 
484 				// There is a circular dependency here.
485 				// Calculating the whited-out string efficiently
486 				// requires the total length of all children,
487 				// which is conveniently included in the hash.
488 				// However, calculating the hash requires knowing
489 				// the whited-out string that will be written.
490 				// Break the cycle by calculating the hash of the
491 				// redundantly whited-out string explicitly here.
492 				if (whiteout)
493 					foreach (c; s)
494 						e.deadHash.put(c.isWhite ? c : ' ');
495 			}
496 
497 			putString(e.filename);
498 			putString(e.head);
499 
500 			void addDependents(R)(R range, bool fresh)
501 			{
502 				if (e.dead)
503 					return;
504 
505 				auto oldDependents = allDependents.get();
506 			dependentLoop:
507 				foreach (const(Address)* d; range)
508 				{
509 					if (!fresh)
510 					{
511 						d = findEntity(root, d).address;
512 						if (!d)
513 							continue; // Gone
514 					}
515 
516 					if (d.startsWith(addr))
517 						continue; // Internal
518 
519 					// Deduplicate
520 					foreach (o; oldDependents)
521 						if (equal(d, o))
522 							continue dependentLoop;
523 
524 					allDependents.put(d);
525 				}
526 			}
527 			if (e.dead)
528 				assert(!e.dependents);
529 			else
530 			{
531 				// Update dependents' addresses
532 				auto dependents = cowRange(e.dependents);
533 				foreach (d; e.dependents)
534 				{
535 					d.address = findEntity(root, d.address).address;
536 					if (d.address)
537 						dependents.put(d);
538 				}
539 				e.dependents = dependents.get();
540 			}
541 			addDependents(e.dependents.map!(d => d.address), true);
542 
543 			foreach (i, c; e.children)
544 			{
545 				bool fresh = pass1(c, addr.child(i));
546 				e.descendants += c.descendants;
547 				e.hash.put(c.hash);
548 				e.deadHash.put(c.deadHash);
549 				addDependents(c.allDependents, fresh);
550 			}
551 			putString(e.tail);
552 
553 			e.allDependents = allDependents.get();
554 
555 			assert(e.deadHash.length == (whiteout ? e.hash.length : 0));
556 
557 			if (e.dead)
558 			{
559 				e.descendants = 0;
560 				// Switch to the "dead" variant of this subtree's hash at this point.
561 				// This is irreversible (in child nodes).
562 				e.hash = e.deadHash;
563 			}
564 
565 			return true;
566 		}
567 		pass1(root, &rootAddress);
568 	}
569 
570 	// --white-out pass - calculate deadContents
571 	if (whiteout)
572 	{
573 		// At the top-most dirty node, start a contiguous buffer
574 		// which contains the concatenation of all child nodes.
575 
576 		char[] buf;
577 		size_t pos;
578 
579 		void putString(string s)
580 		{
581 			foreach (char c; s)
582 			{
583 				if (!isWhite(c))
584 					c = ' ';
585 				buf[pos++] = c;
586 			}
587 		}
588 		void putWhite(string s)
589 		{
590 			foreach (c; s)
591 				buf[pos++] = c;
592 		}
593 
594 		// This needs to run in a second pass because we
595 		// need to know the total length of nodes first.
596 		void passWO(Entity e, bool inFile)
597 		{
598 			if (e.clean)
599 			{
600 				if (buf)
601 					putWhite(e.deadContents);
602 				return;
603 			}
604 
605 			inFile |= e.isFile;
606 
607 			assert(e.hash.length == e.deadHash.length);
608 
609 			bool bufStarted;
610 			// We start a buffer even when outside of a file,
611 			// for efficiency (use one buffer across many files).
612 			if (!buf && e.deadHash.length)
613 			{
614 				buf = new char[e.deadHash.length];
615 				pos = 0;
616 				bufStarted = true;
617 			}
618 
619 			auto start = pos;
620 
621 			putString(e.filename);
622 			putString(e.head);
623 			foreach (c; e.children)
624 				passWO(c, inFile);
625 			putString(e.tail);
626 			assert(e.deadHash.length == e.hash.length);
627 
628 			e.deadContents = cast(string)buf[start .. pos];
629 
630 			if (bufStarted)
631 			{
632 				assert(pos == buf.length);
633 				buf = null;
634 				pos = 0;
635 			}
636 
637 			if (inFile)
638 				e.contents = e.deadContents;
639 		}
640 		passWO(root, false);
641 	}
642 
643 	{
644 		void passFinal(Entity e)
645 		{
646 			if (e.clean)
647 				return;
648 
649 			foreach (c; e.children)
650 				passFinal(c);
651 
652 			e.clean = true;
653 		}
654 		passFinal(root);
655 	}
656 }
657 
658 size_t checkDescendants(Entity e)
659 {
660 	if (e.dead)
661 		return 0;
662 	size_t n = e.dead ? 0 : 1;
663 	foreach (c; e.children)
664 		n += checkDescendants(c);
665 	assert(e.descendants == n, "Wrong descendant count: expected %d, found %d".format(e.descendants, n));
666 	return n;
667 }
668 
669 bool addressDead(Entity root, size_t[] address) // TODO: this function shouldn't exist
670 {
671 	if (root.dead)
672 		return true;
673 	if (!address.length)
674 		return false;
675 	return addressDead(root.children[address[0]], address[1..$]);
676 }
677 
678 
679 struct ReductionIterator
680 {
681 	Strategy strategy;
682 
683 	bool done = false;
684 
685 	Reduction.Type type = Reduction.Type.None;
686 	Entity e;
687 
688 	this(Strategy strategy)
689 	{
690 		this.strategy = strategy;
691 		next(false);
692 	}
693 
694 	this(this)
695 	{
696 		strategy = strategy.dup;
697 	}
698 
699 	@property ref Entity root() { return strategy.root; }
700 	@property Reduction front() { return Reduction(type, root, strategy.front.addressFromArr); }
701 
702 	void nextEntity(bool success) /// Iterate strategy until the next non-dead node
703 	{
704 		strategy.next(success);
705 		while (!strategy.done && root.addressDead(strategy.front))
706 			strategy.next(false);
707 	}
708 
709 	void reset()
710 	{
711 		strategy.reset();
712 		type = Reduction.Type.None;
713 	}
714 
715 	void next(bool success)
716 	{
717 		if (success && type == Reduction.Type.Concat)
718 			reset(); // Significant changes across the tree
719 
720 		while (true)
721 		{
722 			final switch (type)
723 			{
724 				case Reduction.Type.None:
725 					if (strategy.done)
726 					{
727 						done = true;
728 						return;
729 					}
730 
731 					e = root.entityAt(strategy.front);
732 
733 					if (e.noRemove)
734 					{
735 						nextEntity(false);
736 						continue;
737 					}
738 
739 					if (e is root && !root.children.length)
740 					{
741 						nextEntity(false);
742 						continue;
743 					}
744 
745 					// Try next reduction type
746 					type = Reduction.Type.Remove;
747 					return;
748 
749 				case Reduction.Type.Remove:
750 					if (success)
751 					{
752 						// Next node
753 						type = Reduction.Type.None;
754 						nextEntity(true);
755 						continue;
756 					}
757 
758 					// Try next reduction type
759 					type = Reduction.Type.Unwrap;
760 
761 					if (e.head.length && e.tail.length)
762 						return; // Try this
763 					else
764 					{
765 						success = false; // Skip
766 						continue;
767 					}
768 
769 				case Reduction.Type.Unwrap:
770 					if (success)
771 					{
772 						// Next node
773 						type = Reduction.Type.None;
774 						nextEntity(true);
775 						continue;
776 					}
777 
778 					// Try next reduction type
779 					type = Reduction.Type.Concat;
780 
781 					if (e.isFile)
782 						return; // Try this
783 					else
784 					{
785 						success = false; // Skip
786 						continue;
787 					}
788 
789 				case Reduction.Type.Concat:
790 					// Next node
791 					type = Reduction.Type.None;
792 					nextEntity(success);
793 					continue;
794 
795 				case Reduction.Type.ReplaceWord:
796 				case Reduction.Type.Swap:
797 					assert(false);
798 			}
799 		}
800 	}
801 }
802 
803 void resetProgress(Entity root)
804 {
805 	origDescendants = root.descendants;
806 }
807 
808 abstract class Strategy
809 {
810 	Entity root;
811 	uint progressGeneration = 0;
812 	bool done = false;
813 
814 	void copy(Strategy result) const
815 	{
816 		result.root = cast()root;
817 		result.progressGeneration = this.progressGeneration;
818 		result.done = this.done;
819 	}
820 
821 	abstract @property size_t[] front();
822 	abstract void next(bool success);
823 	abstract void reset(); /// Invoked by ReductionIterator for significant tree changes
824 	int getIteration() { return -1; }
825 	int getDepth() { return -1; }
826 
827 	final Strategy dup()
828 	{
829 		auto result = cast(Strategy)this.classinfo.create();
830 		copy(result);
831 		return result;
832 	}
833 }
834 
835 class SimpleStrategy : Strategy
836 {
837 	size_t[] address;
838 
839 	override void copy(Strategy target) const
840 	{
841 		super.copy(target);
842 		auto result = cast(SimpleStrategy)target;
843 		result.address = this.address.dup;
844 	}
845 
846 	override @property size_t[] front()
847 	{
848 		assert(!done, "Done");
849 		return address;
850 	}
851 
852 	override void next(bool success)
853 	{
854 		assert(!done, "Done");
855 	}
856 }
857 
858 class IterativeStrategy : SimpleStrategy
859 {
860 	int iteration = 0;
861 	bool iterationChanged;
862 
863 	override int getIteration() { return iteration; }
864 
865 	override void copy(Strategy target) const
866 	{
867 		super.copy(target);
868 		auto result = cast(IterativeStrategy)target;
869 		result.iteration = this.iteration;
870 		result.iterationChanged = this.iterationChanged;
871 	}
872 
873 	override void next(bool success)
874 	{
875 		super.next(success);
876 		iterationChanged |= success;
877 	}
878 
879 	void nextIteration()
880 	{
881 		assert(iterationChanged, "Starting new iteration after no changes");
882 		reset();
883 	}
884 
885 	override void reset()
886 	{
887 		iteration++;
888 		iterationChanged = false;
889 		address = null;
890 		progressGeneration++;
891 	}
892 }
893 
894 /// Find the first address at the depth of address.length,
895 /// and populate address[] accordingly.
896 /// Return false if no address at that level could be found.
897 bool findAddressAtLevel(size_t[] address, Entity root)
898 {
899 	if (root.dead)
900 		return false;
901 	if (!address.length)
902 		return true;
903 	foreach_reverse (i, child; root.children)
904 	{
905 		if (findAddressAtLevel(address[1..$], child))
906 		{
907 			address[0] = i;
908 			return true;
909 		}
910 	}
911 	return false;
912 }
913 
914 /// Find the next address at the depth of address.length,
915 /// and update address[] accordingly.
916 /// Return false if no more addresses at that level could be found.
917 bool nextAddressInLevel(size_t[] address, Entity root)
918 {
919 	if (!address.length || root.dead)
920 		return false;
921 	if (nextAddressInLevel(address[1..$], root.children[address[0]]))
922 		return true;
923 	if (!address[0])
924 		return false;
925 
926 	foreach_reverse (i; 0..address[0])
927 	{
928 		if (findAddressAtLevel(address[1..$], root.children[i]))
929 		{
930 			address[0] = i;
931 			return true;
932 		}
933 	}
934 	return false;
935 }
936 
937 /// Find the next address, starting from the given one
938 /// (going depth-first). Update address accordingly.
939 /// If descend is false, then skip addresses under the given one.
940 /// Return false if no more addresses could be found.
941 bool nextAddress(ref size_t[] address, Entity root, bool descend)
942 {
943 	if (root.dead)
944 		return false;
945 
946 	if (!address.length)
947 	{
948 		if (descend && root.children.length)
949 		{
950 			address ~= [root.children.length-1];
951 			return true;
952 		}
953 		return false;
954 	}
955 
956 	auto cdr = address[1..$];
957 	if (nextAddress(cdr, root.children[address[0]], descend))
958 	{
959 		address = address[0] ~ cdr;
960 		return true;
961 	}
962 
963 	if (address[0])
964 	{
965 		address = [address[0] - 1];
966 		return true;
967 	}
968 
969 	return false;
970 }
971 
972 class LevelStrategy : IterativeStrategy
973 {
974 	bool levelChanged;
975 	bool invalid;
976 
977 	override int getDepth() { return cast(int)address.length; }
978 
979 	override void copy(Strategy target) const
980 	{
981 		super.copy(target);
982 		auto result = cast(LevelStrategy)target;
983 		result.levelChanged = this.levelChanged;
984 		result.invalid = this.invalid;
985 	}
986 
987 	override void next(bool success)
988 	{
989 		super.next(success);
990 		levelChanged |= success;
991 	}
992 
993 	override void nextIteration()
994 	{
995 		super.nextIteration();
996 		invalid = false;
997 		levelChanged = false;
998 	}
999 
1000 	final bool nextInLevel()
1001 	{
1002 		assert(!invalid, "Choose a level!");
1003 		if (nextAddressInLevel(address, root))
1004 			return true;
1005 		else
1006 		{
1007 			invalid = true;
1008 			return false;
1009 		}
1010 	}
1011 
1012 	final @property size_t currentLevel() const { return address.length; }
1013 
1014 	final bool setLevel(size_t level)
1015 	{
1016 		address.length = level;
1017 		if (findAddressAtLevel(address, root))
1018 		{
1019 			invalid = false;
1020 			levelChanged = false;
1021 			progressGeneration++;
1022 			return true;
1023 		}
1024 		else
1025 			return false;
1026 	}
1027 }
1028 
1029 /// Keep going deeper until we find a successful reduction.
1030 /// When found, finish tests at current depth and restart from top depth (new iteration).
1031 /// If we reach the bottom (depth with no nodes on it), we're done.
1032 final class CarefulStrategy : LevelStrategy
1033 {
1034 	override void next(bool success)
1035 	{
1036 		super.next(success);
1037 
1038 		if (!nextInLevel())
1039 		{
1040 			// End of level
1041 			if (levelChanged)
1042 			{
1043 				nextIteration();
1044 			}
1045 			else
1046 			if (!setLevel(currentLevel + 1))
1047 			{
1048 				if (iterationChanged)
1049 					nextIteration();
1050 				else
1051 					done = true;
1052 			}
1053 		}
1054 	}
1055 }
1056 
1057 /// Keep going deeper until we find a successful reduction.
1058 /// When found, go up a depth level.
1059 /// Keep going up while we find new reductions. Repeat topmost depth level as necessary.
1060 /// Once no new reductions are found at higher depths, jump to the next unvisited depth in this iteration.
1061 /// If we reach the bottom (depth with no nodes on it), start a new iteration.
1062 /// If we finish an iteration without finding any reductions, we're done.
1063 final class LookbackStrategy : LevelStrategy
1064 {
1065 	size_t maxLevel = 0;
1066 
1067 	override void copy(Strategy target) const
1068 	{
1069 		super.copy(target);
1070 		auto result = cast(LookbackStrategy)target;
1071 		result.maxLevel = this.maxLevel;
1072 	}
1073 
1074 	override void nextIteration()
1075 	{
1076 		super.nextIteration();
1077 		maxLevel = 0;
1078 	}
1079 
1080 	override void next(bool success)
1081 	{
1082 		super.next(success);
1083 
1084 		if (!nextInLevel())
1085 		{
1086 			// End of level
1087 			if (levelChanged)
1088 			{
1089 				setLevel(currentLevel ? currentLevel - 1 : 0);
1090 			}
1091 			else
1092 			if (setLevel(maxLevel + 1))
1093 			{
1094 				maxLevel = currentLevel;
1095 			}
1096 			else
1097 			{
1098 				if (iterationChanged)
1099 					nextIteration();
1100 				else
1101 					done = true;
1102 			}
1103 		}
1104 	}
1105 }
1106 
1107 /// Keep going deeper until we find a successful reduction.
1108 /// When found, go up a depth level.
1109 /// Keep going up while we find new reductions. Repeat topmost depth level as necessary.
1110 /// Once no new reductions are found at higher depths, start going downwards again.
1111 /// If we reach the bottom (depth with no nodes on it), start a new iteration.
1112 /// If we finish an iteration without finding any reductions, we're done.
1113 final class PingPongStrategy : LevelStrategy
1114 {
1115 	override void next(bool success)
1116 	{
1117 		super.next(success);
1118 
1119 		if (!nextInLevel())
1120 		{
1121 			// End of level
1122 			if (levelChanged)
1123 			{
1124 				setLevel(currentLevel ? currentLevel - 1 : 0);
1125 			}
1126 			else
1127 			if (!setLevel(currentLevel + 1))
1128 			{
1129 				if (iterationChanged)
1130 					nextIteration();
1131 				else
1132 					done = true;
1133 			}
1134 		}
1135 	}
1136 }
1137 
1138 /// Keep going deeper.
1139 /// If we reach the bottom (depth with no nodes on it), start a new iteration.
1140 /// If we finish an iteration without finding any reductions, we're done.
1141 final class InBreadthStrategy : LevelStrategy
1142 {
1143 	override void next(bool success)
1144 	{
1145 		super.next(success);
1146 
1147 		if (!nextInLevel())
1148 		{
1149 			// End of level
1150 			if (!setLevel(currentLevel + 1))
1151 			{
1152 				if (iterationChanged)
1153 					nextIteration();
1154 				else
1155 					done = true;
1156 			}
1157 		}
1158 	}
1159 }
1160 
1161 /// Look at every entity in the tree.
1162 /// If we can reduce this entity, continue looking at its siblings.
1163 /// Otherwise, recurse and look at its children.
1164 /// End an iteration once we looked at an entire tree.
1165 /// If we finish an iteration without finding any reductions, we're done.
1166 final class InDepthStrategy : IterativeStrategy
1167 {
1168 	final bool nextAddress(bool descend)
1169 	{
1170 		return .nextAddress(address, root, descend);
1171 	}
1172 
1173 	override void next(bool success)
1174 	{
1175 		super.next(success);
1176 
1177 		if (!nextAddress(!success))
1178 		{
1179 			if (iterationChanged)
1180 				nextIteration();
1181 			else
1182 				done = true;
1183 		}
1184 	}
1185 }
1186 
1187 ReductionIterator iter;
1188 
1189 void reduceByStrategy(Strategy strategy)
1190 {
1191 	int lastIteration = -1;
1192 	int lastDepth = -1;
1193 	int lastProgressGeneration = -1;
1194 	int steps = 0;
1195 
1196 	iter = ReductionIterator(strategy);
1197 
1198 	while (!iter.done)
1199 	{
1200 		if (maxSteps >= 0 && steps++ == maxSteps)
1201 			return;
1202 
1203 		if (lastIteration != strategy.getIteration())
1204 		{
1205 			writefln("############### ITERATION %d ################", strategy.getIteration());
1206 			lastIteration = strategy.getIteration();
1207 		}
1208 		if (lastDepth != strategy.getDepth())
1209 		{
1210 			writefln("============= Depth %d =============", strategy.getDepth());
1211 			lastDepth = strategy.getDepth();
1212 		}
1213 		if (lastProgressGeneration != strategy.progressGeneration)
1214 		{
1215 			resetProgress(iter.root);
1216 			lastProgressGeneration = strategy.progressGeneration;
1217 		}
1218 
1219 		auto result = tryReduction(iter.root, iter.front);
1220 
1221 		iter.next(result);
1222 		predictor.put(result);
1223 	}
1224 }
1225 
1226 Strategy createStrategy(string name)
1227 {
1228 	switch (name)
1229 	{
1230 		case "careful":
1231 			return new CarefulStrategy();
1232 		case "lookback":
1233 			return new LookbackStrategy();
1234 		case "pingpong":
1235 			return new PingPongStrategy();
1236 		case "indepth":
1237 			return new InDepthStrategy();
1238 		case "inbreadth":
1239 			return new InBreadthStrategy();
1240 		default:
1241 			throw new Exception("Unknown strategy");
1242 	}
1243 }
1244 
1245 void reduce(ref Entity root)
1246 {
1247 	auto strategy = createStrategy(.strategy);
1248 	strategy.root = root;
1249 	reduceByStrategy(strategy);
1250 	root = strategy.root;
1251 }
1252 
1253 Mt19937 rng;
1254 
1255 void obfuscate(ref Entity root, bool keepLength)
1256 {
1257 	bool[string] wordSet;
1258 	string[] words; // preserve file order
1259 
1260 	foreach (f; root.children)
1261 	{
1262 		foreach (entity; parseToWords(f.filename) ~ f.children)
1263 			if (entity.head.length && !isDigit(entity.head[0]))
1264 				if (entity.head !in wordSet)
1265 				{
1266 					wordSet[entity.head] = true;
1267 					words ~= entity.head;
1268 				}
1269 	}
1270 
1271 	string idgen(size_t length)
1272 	{
1273 		static const first = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; // use caps to avoid collisions with reserved keywords
1274 		static const other = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_";
1275 
1276 		if (keepLength)
1277 		{
1278 			auto result = new char[length];
1279 			foreach (i, ref c; result)
1280 				c = (i==0 ? first : other)[uniform(0, cast(uint)$, rng)];
1281 
1282 			return assumeUnique(result);
1283 		}
1284 		else
1285 		{
1286 			static int n;
1287 			int index = n++;
1288 
1289 			string result;
1290 			result ~= first[index % $];
1291 			index /= first.length;
1292 
1293 			while (index)
1294 				result ~= other[index % $],
1295 				index /= other.length;
1296 
1297 			return result;
1298 		}
1299 	}
1300 
1301 	auto r = Reduction(Reduction.Type.ReplaceWord);
1302 	r.total = words.length;
1303 	foreach (i, word; words)
1304 	{
1305 		r.index = i;
1306 		r.from = word;
1307 		int tries = 0;
1308 		do
1309 			r.to = idgen(word.length);
1310 		while (r.to in wordSet && tries++ < 10);
1311 		wordSet[r.to] = true;
1312 
1313 		tryReduction(root, r);
1314 	}
1315 }
1316 
1317 void fuzz(ref Entity root)
1318 {
1319 	debug {} else rng.seed(unpredictableSeed);
1320 
1321 	Address*[] allAddresses;
1322 	void collectAddresses(Entity e, Address* addr)
1323 	{
1324 		allAddresses ~= addr;
1325 		foreach (i, c; e.children)
1326 			collectAddresses(c, addr.child(i));
1327 	}
1328 	collectAddresses(root, &rootAddress);
1329 
1330 	while (true)
1331 	{
1332 		import std.math : log2;
1333 		auto newRoot = root;
1334 		auto numReductions = uniform(1, cast(int)log2(allAddresses.length), rng);
1335 		Reduction[] reductions;
1336 		foreach (n; 0 .. numReductions)
1337 		{
1338 			static immutable Reduction.Type[] reductionTypes = [Reduction.Type.Swap];
1339 			auto r = Reduction(reductionTypes[uniform(0, $, rng)], newRoot);
1340 			switch (r.type)
1341 			{
1342 				case Reduction.Type.Swap:
1343 					r.address  = findEntity(newRoot, allAddresses[uniform(0, $, rng)]).address;
1344 					r.address2 = findEntity(newRoot, allAddresses[uniform(0, $, rng)]).address;
1345 					if (r.address.startsWith(r.address2) ||
1346 						r.address2.startsWith(r.address))
1347 						continue;
1348 					break;
1349 				default:
1350 					assert(false);
1351 			}
1352 			newRoot = applyReduction(newRoot, r);
1353 			reductions ~= r;
1354 		}
1355 		if (newRoot is root)
1356 			continue;
1357 
1358 		auto result = test(newRoot, reductions);
1359 		if (result.success)
1360 		{
1361 			foundAnything = true;
1362 			root = newRoot;
1363 			saveResult(root);
1364 			return;
1365 		}
1366 	}
1367 }
1368 
1369 void dump(Writer)(Entity root, Writer writer)
1370 {
1371 	void dumpEntity(bool inFile)(Entity e)
1372 	{
1373 		if (e.dead)
1374 		{
1375 			if (inFile && e.contents.length)
1376 				writer.handleText(e.contents[e.filename.length .. $]);
1377 		}
1378 		else
1379 		if (!inFile && e.isFile)
1380 		{
1381 			writer.handleFile(e.filename);
1382 			foreach (c; e.children)
1383 				dumpEntity!true(c);
1384 		}
1385 		else
1386 		{
1387 			if (inFile && e.head.length) writer.handleText(e.head);
1388 			foreach (c; e.children)
1389 				dumpEntity!inFile(c);
1390 			if (inFile && e.tail.length) writer.handleText(e.tail);
1391 		}
1392 	}
1393 
1394 	dumpEntity!false(root);
1395 }
1396 
1397 static struct FastWriter(Next) /// Accelerates Writer interface by bulking contiguous strings
1398 {
1399 	Next next;
1400 	immutable(char)* start, end;
1401 	void finish()
1402 	{
1403 		if (start != end)
1404 			next.handleText(start[0 .. end - start]);
1405 		start = end = null;
1406 	}
1407 	void handleFile(string s)
1408 	{
1409 		finish();
1410 		next.handleFile(s);
1411 	}
1412 	void handleText(string s)
1413 	{
1414 		if (s.ptr != end)
1415 		{
1416 			finish();
1417 			start = s.ptr;
1418 		}
1419 		end = s.ptr + s.length;
1420 	}
1421 	~this() { finish(); }
1422 }
1423 
1424 void save(Entity root, string savedir)
1425 {
1426 	safeDelete(savedir);
1427 	safeMkdir(savedir);
1428 
1429 	static struct DiskWriter
1430 	{
1431 		string dir;
1432 
1433 		File o;
1434 		typeof(o.lockingBinaryWriter()) binaryWriter;
1435 
1436 		void handleFile(string fn)
1437 		{
1438 			static Appender!(char[]) pathBuf;
1439 			pathBuf.clear();
1440 			pathBuf.put(dir.chainPath(fn));
1441 			auto path = pathBuf.data;
1442 			if (!exists(dirName(path)))
1443 				safeMkdir(dirName(path));
1444 
1445 			o.open(cast(string)path, "wb");
1446 			binaryWriter = o.lockingBinaryWriter;
1447 		}
1448 
1449 		void handleText(string s)
1450 		{
1451 			binaryWriter.put(s);
1452 		}
1453 	}
1454 	FastWriter!DiskWriter writer;
1455 	writer.next.dir = savedir;
1456 	dump(root, &writer);
1457 	writer.finish();
1458 }
1459 
1460 Entity entityAt(Entity root, size_t[] address) // TODO: replace uses with findEntity and remove
1461 {
1462 	Entity e = root;
1463 	foreach (a; address)
1464 		e = e.children[a];
1465 	return e;
1466 }
1467 
1468 Address* addressFromArr(size_t[] address) // TODO: replace uses with findEntity and remove
1469 {
1470 	Address* a = &rootAddress;
1471 	foreach (index; address)
1472 		a = a.child(index);
1473 	return a;
1474 }
1475 
1476 size_t[] addressToArr(const(Address)* address)
1477 {
1478 	auto result = new size_t[address.depth];
1479 	while (address.parent)
1480 	{
1481 		result[address.depth - 1] = address.index;
1482 		address = address.parent;
1483 	}
1484 	return result;
1485 }
1486 
1487 /// Return true if these two addresses are the same
1488 /// (they point to the same node).
1489 bool equal(const(Address)* a, const(Address)* b)
1490 {
1491 	if (a is b)
1492 		return true;
1493 	if (a.depth != b.depth)
1494 		return false;
1495 	if (a.index != b.index)
1496 		return false;
1497 	assert(a.parent && b.parent); // If we are at the root node, then the address check should have passed
1498 	return equal(a.parent, b.parent);
1499 }
1500 alias equal = std.algorithm.comparison.equal;
1501 
1502 /// Returns true if the `haystack` address starts with the `needle` address,
1503 /// i.e. the entity that needle points at is a child of the entity that haystack points at.
1504 bool startsWith(const(Address)* haystack, const(Address)* needle)
1505 {
1506 	if (haystack.depth < needle.depth)
1507 		return false;
1508 	while (haystack.depth > needle.depth)
1509 		haystack = haystack.parent;
1510 	return equal(haystack, needle);
1511 }
1512 
1513 /// Try specified reduction. If it succeeds, apply it permanently and save intermediate result.
1514 bool tryReduction(ref Entity root, Reduction r)
1515 {
1516 	Entity newRoot;
1517 	measure!"apply"({ newRoot = root.applyReduction(r); });
1518 	if (newRoot is root)
1519 	{
1520 		assert(r.type != Reduction.Type.None);
1521 		writeln(r, " => N/A");
1522 		return false;
1523 	}
1524 	if (test(newRoot, [r]).success)
1525 	{
1526 		foundAnything = true;
1527 		root = newRoot;
1528 		saveResult(root);
1529 		return true;
1530 	}
1531 	return false;
1532 }
1533 
1534 /// Apply a reduction to this tree, and return the resulting tree.
1535 /// The original tree remains unchanged.
1536 /// Copies only modified parts of the tree, and whatever references them.
1537 Entity applyReductionImpl(Entity origRoot, ref Reduction r)
1538 {
1539 	Entity root = origRoot;
1540 
1541 	debug static ubyte[] treeBytes(Entity e) { return (cast(ubyte*)e)[0 .. __traits(classInstanceSize, Entity)] ~ cast(ubyte[])e.children ~ e.children.map!treeBytes.join; }
1542 	debug auto origBytes = treeBytes(origRoot);
1543 	scope(exit) debug assert(origBytes == treeBytes(origRoot), "Original tree was changed!");
1544 	scope(success) debug if (root !is origRoot) assert(treeBytes(root) != origBytes, "Tree was unchanged");
1545 
1546 	debug void checkClean(Entity e) { assert(e.clean, "Found dirty node before/after reduction"); foreach (c; e.children) checkClean(c); }
1547 	debug checkClean(root);
1548 	scope(success) debug checkClean(root);
1549 
1550 	static struct EditResult
1551 	{
1552 		Entity entity; /// Entity at address. Never null.
1553 		bool dead;     /// Entity or one of its parents is dead.
1554 	}
1555 	EditResult editImpl(const(Address)* addr)
1556 	{
1557 		Entity* pEntity;
1558 		bool dead;
1559 		if (addr.parent)
1560 		{
1561 			auto result = editImpl(addr.parent);
1562 			auto parent = result.entity;
1563 			pEntity = &parent.children[addr.index];
1564 			dead = result.dead;
1565 		}
1566 		else
1567 		{
1568 			pEntity = &root;
1569 			dead = false;
1570 		}
1571 
1572 		auto oldEntity = *pEntity;
1573 
1574 		if (oldEntity.redirect)
1575 		{
1576 			assert(oldEntity.dead);
1577 			return editImpl(oldEntity.redirect);
1578 		}
1579 
1580 		dead |= oldEntity.dead;
1581 
1582 		// We can avoid copying the entity if it (or a parent) is dead, because edit()
1583 		// will not allow such an entity to be returned back to applyReduction.
1584 		if (!oldEntity.clean || dead)
1585 			return EditResult(oldEntity, dead);
1586 
1587 		auto newEntity = oldEntity.dup();
1588 		newEntity.clean = false;
1589 		*pEntity = newEntity;
1590 
1591 		return EditResult(newEntity, dead);
1592 	}
1593 	Entity edit(const(Address)* addr) /// Returns a writable copy of the entity at the given Address
1594 	{
1595 		auto r = editImpl(addr);
1596 		return r.dead ? null : r.entity;
1597 	}
1598 
1599 	final switch (r.type)
1600 	{
1601 		case Reduction.Type.None:
1602 			break;
1603 
1604 		case Reduction.Type.ReplaceWord:
1605 			foreach (i; 0 .. root.children.length)
1606 			{
1607 				auto fa = rootAddress.children[i];
1608 				auto f = edit(fa);
1609 				f.filename = applyReductionToPath(f.filename, r);
1610 				foreach (j, const word; f.children)
1611 					if (word.head == r.from)
1612 						edit(fa.children[j]).head = r.to;
1613 			}
1614 			break;
1615 		case Reduction.Type.Remove:
1616 		{
1617 			assert(!findEntity(root, r.address).entity.dead, "Trying to remove a tombstone");
1618 			void remove(const(Address)* address)
1619 			{
1620 				auto n = edit(address);
1621 				if (!n)
1622 					return; // This dependency was removed by something else
1623 				n.dead = true; // Mark as dead early, so that we don't waste time processing dependencies under this node
1624 				foreach (dep; n.allDependents)
1625 					remove(dep);
1626 				n.kill(); // Convert to tombstone
1627 			}
1628 			remove(r.address);
1629 			break;
1630 		}
1631 		case Reduction.Type.Unwrap:
1632 		{
1633 			assert(!findEntity(root, r.address).entity.dead, "Trying to unwrap a tombstone");
1634 			bool changed;
1635 			with (edit(r.address))
1636 				foreach (value; [&head, &tail])
1637 				{
1638 					string newValue = whiteout
1639 						? cast(string)((*value).representation.map!(c => isWhite(c) ? char(c) : ' ').array)
1640 						: null;
1641 					changed |= *value != newValue;
1642 					*value = newValue;
1643 				}
1644 			if (!changed)
1645 				root = origRoot;
1646 			break;
1647 		}
1648 		case Reduction.Type.Concat:
1649 		{
1650 			// Move all nodes from all files to a single file (the target).
1651 			// Leave behind redirects.
1652 
1653 			size_t numFiles;
1654 			Entity[] allData;
1655 			Entity[Entity] tombstones; // Map from moved entity to its tombstone
1656 
1657 			// Collect the nodes to move, and leave behind tombstones.
1658 
1659 			void collect(Entity e, Address* addr)
1660 			{
1661 				if (e.dead)
1662 					return;
1663 				if (e.isFile)
1664 				{
1665 					// Skip noRemove files, except when they are the target
1666 					// (in which case they will keep their contents after the reduction).
1667 					if (e.noRemove && !equal(addr, r.address))
1668 						return;
1669 
1670 					if (!e.children.canFind!(c => !c.dead))
1671 						return; // File is empty (already concat'd?)
1672 
1673 					numFiles++;
1674 					allData ~= e.children;
1675 					auto f = edit(addr);
1676 					f.children = new Entity[e.children.length];
1677 					foreach (i; 0 .. e.children.length)
1678 					{
1679 						auto tombstone = new Entity;
1680 						tombstone.kill();
1681 						f.children[i] = tombstone;
1682 						tombstones[e.children[i]] = tombstone;
1683 					}
1684 				}
1685 				else
1686 					foreach (i, c; e.children)
1687 						collect(c, addr.child(i));
1688 			}
1689 
1690 			collect(root, &rootAddress);
1691 
1692 			// Fail the reduction if there are less than two files to concatenate.
1693 			if (numFiles < 2)
1694 			{
1695 				root = origRoot;
1696 				break;
1697 			}
1698 
1699 			auto n = edit(r.address);
1700 
1701 			auto temp = new Entity;
1702 			temp.children = allData;
1703 			temp.optimizeUntil!((Entity e) => e in tombstones);
1704 
1705 			// The optimize function rearranges nodes in a tree,
1706 			// so we need to do a recursive scan to find their new location.
1707 			void makeRedirects(Address* address, Entity e)
1708 			{
1709 				if (auto p = e in tombstones)
1710 					p.redirect = address; // Patch the tombstone to point to the node's new location.
1711 				else
1712 				{
1713 					assert(!e.clean); // This node was created by optimize(), it can't be clean
1714 					foreach (i, child; e.children)
1715 						makeRedirects(address.child(i), child);
1716 				}
1717 			}
1718 			foreach (i, child; temp.children)
1719 				makeRedirects(r.address.child(n.children.length + i), child);
1720 
1721 			n.children ~= temp.children;
1722 
1723 			break;
1724 		}
1725 		case Reduction.Type.Swap:
1726 		{
1727 			// Cannot swap child and parent.
1728 			assert(
1729 				!r.address.startsWith(r.address2) &&
1730 				!r.address2.startsWith(r.address),
1731 				"Invalid swap");
1732 			// Corollary: neither address may be the root address.
1733 
1734 			// Cannot swap siblings (special case).
1735 			if (equal(r.address.parent, r.address2.parent))
1736 				break;
1737 
1738 			static struct SwapSite
1739 			{
1740 				Entity source;       /// Entity currently at this site's address
1741 				Entity* target;      /// Where to place the other site's entity
1742 				Address* newAddress; /// Address of target
1743 				Entity tombstone;    /// Redirect to the other site's swap target
1744 			}
1745 
1746 			SwapSite prepareSwap(const(Address)* address)
1747 			{
1748 				auto p = edit(address.parent);
1749 				assert(address.index < p.children.length);
1750 
1751 				SwapSite result;
1752 				// Duplicate children.
1753 				// Replace the first half with redirects to the second half.
1754 				// The second half is the same as the old children,
1755 				// except with the target node replaced with the swap target.
1756 				auto children = new Entity[p.children.length * 2];
1757 				// First half:
1758 				foreach (i; 0 .. p.children.length)
1759 				{
1760 					auto tombstone = new Entity;
1761 					tombstone.kill();
1762 					if (i == address.index)
1763 						result.tombstone = tombstone;
1764 					else
1765 						tombstone.redirect = address.parent.child(p.children.length + i);
1766 					children[i] = tombstone;
1767 				}
1768 				// Second half:
1769 				foreach (i; 0 .. p.children.length)
1770 				{
1771 					if (i == address.index)
1772 					{
1773 						result.source = p.children[i];
1774 						result.target = &children[p.children.length + i];
1775 						result.newAddress = address.parent.child(p.children.length + i);
1776 					}
1777 					else
1778 						children[p.children.length + i] = p.children[i];
1779 				}
1780 				p.children = children;
1781 				return result;
1782 			}
1783 
1784 			auto site1 = prepareSwap(r.address);
1785 			auto site2 = prepareSwap(r.address2);
1786 
1787 			void finalizeSwap(ref SwapSite thisSite, ref SwapSite otherSite)
1788 			{
1789 				assert(otherSite.source);
1790 				*thisSite.target = otherSite.source;
1791 				thisSite.tombstone.redirect = otherSite.newAddress;
1792 			}
1793 
1794 			finalizeSwap(site1, site2);
1795 			finalizeSwap(site2, site1);
1796 			break;
1797 		}
1798 	}
1799 
1800 	if (root !is origRoot)
1801 		assert(!root.clean);
1802 
1803 	recalculate(root); // Recalculate cumulative information for the part of the tree that we edited
1804 
1805 	debug checkDescendants(root);
1806 
1807 	return root;
1808 }
1809 
1810 /// Polyfill for object.require
1811 static if (!__traits(hasMember, object, "require"))
1812 ref V require(K, V)(ref V[K] aa, K key, lazy V value = V.init)
1813 {
1814 	auto p = key in aa;
1815 	if (p)
1816 		return *p;
1817 	return aa[key] = value;
1818 }
1819 
1820 // std.functional.memoize evicts old entries after a hash collision.
1821 // We have much to gain by evicting in strictly chronological order.
1822 struct RoundRobinCache(K, V)
1823 {
1824 	V[K] lookup;
1825 	K[] keys;
1826 	size_t pos;
1827 
1828 	void requireSize(size_t size)
1829 	{
1830 		if (keys.length >= size)
1831 			return;
1832 		T roundUpToPowerOfTwo(T)(T x) { return nextPow2(x-1); }
1833 		keys.length = roundUpToPowerOfTwo(size);
1834 	}
1835 
1836 	ref V get(ref K key, lazy V value)
1837 	{
1838 		return lookup.require(key,
1839 			{
1840 				lookup.remove(keys[pos]);
1841 				keys[pos++] = key;
1842 				if (pos == keys.length)
1843 					pos = 0;
1844 				return value;
1845 			}());
1846 	}
1847 }
1848 alias ReductionCacheKey = Tuple!(Entity, q{origRoot}, Reduction, q{r});
1849 RoundRobinCache!(ReductionCacheKey, Entity) reductionCache;
1850 
1851 Entity applyReduction(Entity origRoot, ref Reduction r)
1852 {
1853 	if (lookaheadProcesses.length)
1854 	{
1855 		if (!reductionCache.keys)
1856 			reductionCache.requireSize(1 + lookaheadProcesses.length);
1857 
1858 		auto cacheKey = ReductionCacheKey(origRoot, r);
1859 		return reductionCache.get(cacheKey, applyReductionImpl(origRoot, r));
1860 	}
1861 	else
1862 		return applyReductionImpl(origRoot, r);
1863 }
1864 
1865 string applyReductionToPath(string path, Reduction reduction)
1866 {
1867 	if (reduction.type == Reduction.Type.ReplaceWord)
1868 	{
1869 		Entity[] words = parseToWords(path);
1870 		string result;
1871 		foreach (i, word; words)
1872 		{
1873 			if (i > 0 && i == words.length-1 && words[i-1].tail.endsWith("."))
1874 				result ~= word.head; // skip extension
1875 			else
1876 			if (word.head == reduction.from)
1877 				result ~= reduction.to;
1878 			else
1879 				result ~= word.head;
1880 			result ~= word.tail;
1881 		}
1882 		return result;
1883 	}
1884 	return path;
1885 }
1886 
1887 void autoRetry(scope void delegate() fun, lazy const(char)[] operation)
1888 {
1889 	while (true)
1890 		try
1891 		{
1892 			fun();
1893 			return;
1894 		}
1895 		catch (Exception e)
1896 		{
1897 			writeln("Error while attempting to " ~ operation ~ ": " ~ e.msg);
1898 			import core.thread;
1899 			Thread.sleep(dur!"seconds"(1));
1900 			writeln("Retrying...");
1901 		}
1902 }
1903 
1904 void deleteAny(string path)
1905 {
1906 	if (exists(path))
1907 	{
1908 		if (isDir(path))
1909 			rmdirRecurse(path);
1910 		else
1911 			remove(path);
1912 	}
1913 
1914 	// The ugliest hacks, only for the ugliest operating system
1915 	version (Windows)
1916 	{
1917 		/// Alternative way to check for file existence
1918 		/// Files marked for deletion act as inexistant, but still prevent creation and appear in directory listings
1919 		bool exists2(string path)
1920 		{
1921 			return !dirEntries(dirName(path), baseName(path), SpanMode.shallow).empty;
1922 		}
1923 
1924 		enforce(!exists(path) && !exists2(path), "Path still exists"); // Windows only marks locked directories for deletion
1925 	}
1926 }
1927 
1928 void safeDelete(string path) { autoRetry({deleteAny(path);}, "delete " ~ path); }
1929 void safeRename(string src, string dst) { autoRetry({rename(src, dst);}, "rename " ~ src ~ " to " ~ dst); }
1930 void safeMkdir(in char[] path) { autoRetry({mkdirRecurse(path);}, "mkdir " ~ path); }
1931 
1932 void safeReplace(string path, void delegate(string path) creator)
1933 {
1934 	auto tmpPath = path ~ ".inprogress";
1935 	if (exists(tmpPath)) safeDelete(tmpPath);
1936 	auto oldPath = path ~ ".old";
1937 	if (exists(oldPath)) safeDelete(oldPath);
1938 
1939 	{
1940 		scope(failure) safeDelete(tmpPath);
1941 		creator(tmpPath);
1942 	}
1943 
1944 	if (exists(path)) safeRename(path, oldPath);
1945 	safeRename(tmpPath, path);
1946 	if (exists(oldPath)) safeDelete(oldPath);
1947 }
1948 
1949 
1950 void safeSave(Entity root, string savedir) { safeReplace(savedir, path => save(root, path)); }
1951 
1952 void saveResult(Entity root)
1953 {
1954 	if (!noSave)
1955 		measure!"resultSave"({safeSave(root, resultDir);});
1956 }
1957 
1958 struct Lookahead
1959 {
1960 	Thread thread;
1961 	shared Pid pid;
1962 	string testdir;
1963 	EntityHash digest;
1964 }
1965 Lookahead[] lookaheadProcesses;
1966 
1967 TestResult[EntityHash] lookaheadResults;
1968 
1969 struct AccumulatingPredictor(double exp)
1970 {
1971 	double r = 0.5;
1972 
1973 	void put(bool outcome)
1974 	{
1975 		r = (1 - exp) * r + exp * outcome;
1976 	}
1977 
1978 	double predict()
1979 	{
1980 		return r;
1981 	}
1982 }
1983 // Parameters found through empirical testing (gradient descent)
1984 alias Predictor = AccumulatingPredictor!(0.01);
1985 Predictor predictor;
1986 
1987 version (Windows)
1988 	enum nullFileName = "nul";
1989 else
1990 	enum nullFileName = "/dev/null";
1991 
1992 bool[EntityHash] cache;
1993 
1994 struct TestResult
1995 {
1996 	bool success;
1997 
1998 	enum Source : ubyte
1999 	{
2000 		none,
2001 		tester,
2002 		lookahead,
2003 		diskCache,
2004 		ramCache,
2005 	}
2006 	Source source;
2007 
2008 	int status;
2009 	string reason()
2010 	{
2011 		final switch (source)
2012 		{
2013 			case Source.none:
2014 				assert(false);
2015 			case Source.tester:
2016 				return format("Test script %(%s%) exited with exit code %d (%s)",
2017 					[tester], status, (success ? "success" : "failure"));
2018 			case Source.lookahead:
2019 				return format("Test script %(%s%) (in lookahead) exited with exit code %d (%s)",
2020 					[tester], status, (success ? "success" : "failure"));
2021 			case Source.diskCache:
2022 				return "Test result was cached on disk as " ~ (success ? "success" : "failure");
2023 			case Source.ramCache:
2024 				return "Test result was cached in memory as " ~ (success ? "success" : "failure");
2025 		}
2026 	}
2027 }
2028 
2029 TestResult test(
2030 	Entity root,            /// New root, with reduction already applied
2031 	Reduction[] reductions, /// For display purposes only
2032 )
2033 {
2034 	writef("%-(%s, %) => ", reductions); stdout.flush();
2035 
2036 	EntityHash digest = root.hash;
2037 
2038 	TestResult ramCached(lazy TestResult fallback)
2039 	{
2040 		auto cacheResult = digest in cache;
2041 		if (cacheResult)
2042 		{
2043 			// Note: as far as I can see, a cache hit for a positive reduction is not possible (except, perhaps, for a no-op reduction)
2044 			writeln(*cacheResult ? "Yes" : "No", " (cached)");
2045 			return TestResult(*cacheResult, TestResult.Source.ramCache);
2046 		}
2047 		auto result = fallback;
2048 		cache[digest] = result.success;
2049 		return result;
2050 	}
2051 
2052 	TestResult diskCached(lazy TestResult fallback)
2053 	{
2054 		tests++;
2055 
2056 		if (globalCache)
2057 		{
2058 			if (!exists(globalCache)) mkdirRecurse(globalCache);
2059 			string cacheBase = absolutePath(buildPath(globalCache, format("%016X", cast(ulong)digest.value))) ~ "-";
2060 			bool found;
2061 
2062 			measure!"globalCache"({ found = exists(cacheBase~"0"); });
2063 			if (found)
2064 			{
2065 				writeln("No (disk cache)");
2066 				return TestResult(false, TestResult.Source.diskCache);
2067 			}
2068 			measure!"globalCache"({ found = exists(cacheBase~"1"); });
2069 			if (found)
2070 			{
2071 				writeln("Yes (disk cache)");
2072 				return TestResult(true, TestResult.Source.diskCache);
2073 			}
2074 			auto result = fallback;
2075 			measure!"globalCache"({ autoRetry({ std.file.write(cacheBase ~ (result.success ? "1" : "0"), ""); }, "save result to disk cache"); });
2076 			return result;
2077 		}
2078 		else
2079 			return fallback;
2080 	}
2081 
2082 	TestResult lookahead(lazy TestResult fallback)
2083 	{
2084 		if (iter.strategy)
2085 		{
2086 			// Handle existing lookahead jobs
2087 
2088 			TestResult reap(ref Lookahead process, int status)
2089 			{
2090 				scope(success) process = Lookahead.init;
2091 				safeDelete(process.testdir);
2092 				if (process.thread)
2093 					process.thread.join(/*rethrow:*/true);
2094 				return lookaheadResults[process.digest] = TestResult(status == 0, TestResult.Source.lookahead, status);
2095 			}
2096 
2097 			foreach (ref process; lookaheadProcesses)
2098 				if (process.thread)
2099 				{
2100 					debug (DETERMINISTIC_LOOKAHEAD)
2101 					{
2102 						process.thread.join(/*rethrow:*/true);
2103 						process.thread = null;
2104 					}
2105 
2106 					auto pid = cast()atomicLoad(process.pid);
2107 					if (pid)
2108 					{
2109 						debug (DETERMINISTIC_LOOKAHEAD)
2110 							reap(process, pid.wait());
2111 						else
2112 						{
2113 							auto waitResult = pid.tryWait();
2114 							if (waitResult.terminated)
2115 								reap(process, waitResult.status);
2116 						}
2117 					}
2118 				}
2119 
2120 			static struct PredictedState
2121 			{
2122 				double probability;
2123 				ReductionIterator iter;
2124 				Predictor predictor;
2125 			}
2126 
2127 			auto initialState = new PredictedState(1.0, iter, predictor);
2128 			alias PredictionTree = RedBlackTree!(PredictedState*, (a, b) => a.probability > b.probability, true);
2129 			auto predictionTree = new PredictionTree((&initialState)[0..1]);
2130 
2131 			// Start new lookahead jobs
2132 
2133 			size_t numSteps;
2134 
2135 			foreach (ref process; lookaheadProcesses)
2136 				while (!process.thread && !predictionTree.empty)
2137 				{
2138 					auto state = predictionTree.front;
2139 					predictionTree.removeFront();
2140 
2141 				retryIter:
2142 					if (state.iter.done)
2143 						continue;
2144 					reductionCache.requireSize(lookaheadProcesses.length + ++numSteps);
2145 					auto reduction = state.iter.front;
2146 					Entity newRoot;
2147 					measure!"lookaheadApply"({ newRoot = state.iter.root.applyReduction(reduction); });
2148 					if (newRoot is state.iter.root)
2149 					{
2150 						state.iter.next(false);
2151 						goto retryIter; // inapplicable reduction
2152 					}
2153 
2154 					auto digest = newRoot.hash;
2155 
2156 					double prediction;
2157 					if (digest in cache || digest in lookaheadResults || lookaheadProcesses[].canFind!(p => p.thread && p.digest == digest))
2158 					{
2159 						if (digest in cache)
2160 							prediction = cache[digest] ? 1 : 0;
2161 						else
2162 						if (digest in lookaheadResults)
2163 							prediction = lookaheadResults[digest].success ? 1 : 0;
2164 						else
2165 							prediction = state.predictor.predict();
2166 					}
2167 					else
2168 					{
2169 						process.digest = digest;
2170 
2171 						static int counter;
2172 						process.testdir = dirSuffix("lookahead.%d".format(counter++));
2173 
2174 						// Saving and process creation are expensive.
2175 						// Don't block the main thread, use a worker thread instead.
2176 						static void runThread(Entity newRoot, ref Lookahead process, string tester)
2177 						{
2178 							process.thread = new Thread({
2179 								save(newRoot, process.testdir);
2180 
2181 								auto nul = File(nullFileName, "w+");
2182 								auto pid = spawnShell(tester, nul, nul, nul, null, Config.none, process.testdir);
2183 								atomicStore(process.pid, cast(shared)pid);
2184 							});
2185 							process.thread.start();
2186 						}
2187 						runThread(newRoot, process, tester);
2188 
2189 						prediction = state.predictor.predict();
2190 					}
2191 
2192 					foreach (outcome; 0 .. 2)
2193 					{
2194 						auto probability = outcome ? prediction : 1 - prediction;
2195 						if (probability == 0)
2196 							continue; // no chance
2197 						probability *= state.probability; // accumulate
2198 						auto nextState = new PredictedState(probability, state.iter, state.predictor);
2199 						if (outcome)
2200 							nextState.iter.root = newRoot;
2201 						nextState.iter.next(!!outcome);
2202 						nextState.predictor.put(!!outcome);
2203 						predictionTree.insert(nextState);
2204 					}
2205 				}
2206 
2207 			// Find a result for the current test.
2208 
2209 			auto plookaheadResult = digest in lookaheadResults;
2210 			if (plookaheadResult)
2211 			{
2212 				writeln(plookaheadResult.success ? "Yes" : "No", " (lookahead)");
2213 				return *plookaheadResult;
2214 			}
2215 
2216 			foreach (ref process; lookaheadProcesses)
2217 			{
2218 				if (process.thread && process.digest == digest)
2219 				{
2220 					// Current test is already being tested in the background, wait for its result.
2221 
2222 					// Join the thread first, to guarantee that there is a pid
2223 					measure!"lookaheadWaitThread"({ process.thread.join(/*rethrow:*/true); });
2224 					process.thread = null;
2225 
2226 					auto pid = cast()atomicLoad(process.pid);
2227 					int exitCode;
2228 					measure!"lookaheadWaitProcess"({ exitCode = pid.wait(); });
2229 
2230 					auto result = reap(process, exitCode);
2231 					writeln(result.success ? "Yes" : "No", " (lookahead-wait)");
2232 					return result;
2233 				}
2234 			}
2235 		}
2236 
2237 		return fallback;
2238 	}
2239 
2240 	TestResult doTest()
2241 	{
2242 		string testdir = dirSuffix("test");
2243 		measure!"testSave"({save(root, testdir);}); scope(exit) measure!"clean"({safeDelete(testdir);});
2244 
2245 		Pid pid;
2246 		if (noRedirect)
2247 			pid = spawnShell(tester, null, Config.none, testdir);
2248 		else
2249 		{
2250 			auto nul = File(nullFileName, "w+");
2251 			pid = spawnShell(tester, nul, nul, nul, null, Config.none, testdir);
2252 		}
2253 
2254 		int status;
2255 		measure!"test"({status = pid.wait();});
2256 		auto result = TestResult(status == 0, TestResult.Source.tester, status);
2257 		writeln(result.success ? "Yes" : "No");
2258 		return result;
2259 	}
2260 
2261 	auto result = ramCached(diskCached(lookahead(doTest())));
2262 	if (trace) saveTrace(root, reductions, dirSuffix("trace"), result.success);
2263 	return result;
2264 }
2265 
2266 void saveTrace(Entity root, Reduction[] reductions, string dir, bool result)
2267 {
2268 	if (!exists(dir)) mkdir(dir);
2269 	static size_t count;
2270 	string countStr = format("%08d-%(#%08d-%|%)%d",
2271 		count++,
2272 		reductions
2273 			.map!(reduction => reduction.address ? findEntityEx(root, reduction.address).entity : null)
2274 			.map!(target => target ? target.id : 0),
2275 		result ? 1 : 0);
2276 	auto traceDir = buildPath(dir, countStr);
2277 	save(root, traceDir);
2278 	if (doDump && result)
2279 		dumpSet(root, traceDir ~ ".dump");
2280 }
2281 
2282 void applyNoRemoveMagic(Entity root)
2283 {
2284 	enum MAGIC_PREFIX = "DustMiteNoRemove";
2285 	enum MAGIC_START = MAGIC_PREFIX ~ "Start";
2286 	enum MAGIC_STOP  = MAGIC_PREFIX ~ "Stop";
2287 
2288 	bool state = false;
2289 
2290 	bool scanString(string s)
2291 	{
2292 		if (s.length == 0)
2293 			return false;
2294 		if (s.canFind(MAGIC_START))
2295 			state = true;
2296 		if (s.canFind(MAGIC_STOP))
2297 			state = false;
2298 		return state;
2299 	}
2300 
2301 	bool scan(Entity e)
2302 	{
2303 		bool removeThis;
2304 		removeThis  = scanString(e.head);
2305 		foreach (c; e.children)
2306 			removeThis |= scan(c);
2307 		removeThis |= scanString(e.tail);
2308 		e.noRemove |= removeThis;
2309 		return removeThis;
2310 	}
2311 
2312 	scan(root);
2313 }
2314 
2315 void applyNoRemoveRules(Entity root, RemoveRule[] removeRules)
2316 {
2317 	if (!removeRules.length)
2318 		return;
2319 
2320 	// By default, for content not covered by any of the specified
2321 	// rules, do the opposite of the first rule.
2322 	// I.e., if the default rule is "remove only", then by default
2323 	// don't remove anything except what's specified by the rule.
2324 	bool defaultRemove = !removeRules.front.remove;
2325 
2326 	auto files = root.isFile ? [root] : root.children;
2327 
2328 	foreach (f; files)
2329 	{
2330 		assert(f.isFile);
2331 
2332 		// Check file name
2333 		bool removeFile = defaultRemove;
2334 		foreach (rule; removeRules)
2335 		{
2336 			if (
2337 				(rule.shellGlob && f.filename.globMatch(rule.shellGlob))
2338 			||
2339 				(rule.regexp !is Regex!char.init && f.filename.match(rule.regexp))
2340 			)
2341 				removeFile = rule.remove;
2342 		}
2343 
2344 		auto removeChar = new bool[f.contents.length];
2345 		removeChar[] = removeFile;
2346 
2347 		foreach (rule; removeRules)
2348 			if (rule.regexp !is Regex!char.init)
2349 				foreach (m; f.contents.matchAll(rule.regexp))
2350 				{
2351 					auto start = m.hit.ptr - f.contents.ptr;
2352 					auto end = start + m.hit.length;
2353 					removeChar[start .. end] = rule.remove;
2354 				}
2355 
2356 		bool scanString(string s)
2357 		{
2358 			if (!s.length)
2359 				return true;
2360 			auto start = s.ptr - f.contents.ptr;
2361 			auto end = start + s.length;
2362 			return removeChar[start .. end].all;
2363 		}
2364 
2365 		bool scan(Entity e)
2366 		{
2367 			bool remove = true;
2368 			remove &= scanString(e.head);
2369 			foreach (c; e.children)
2370 				remove &= scan(c);
2371 			remove &= scanString(e.tail);
2372 			if (!remove)
2373 				e.noRemove = root.noRemove = true;
2374 			return remove;
2375 		}
2376 
2377 		scan(f);
2378 	}
2379 }
2380 
2381 void applyNoRemoveDeps(Entity root)
2382 {
2383 	static bool isNoRemove(Entity e)
2384 	{
2385 		if (e.noRemove)
2386 			return true;
2387 		foreach (dependent; e.dependents)
2388 			if (isNoRemove(dependent.entity))
2389 				return true;
2390 		return false;
2391 	}
2392 
2393 	// Propagate upwards
2394 	static bool fill(Entity e)
2395 	{
2396 		e.noRemove |= isNoRemove(e);
2397 		foreach (c; e.children)
2398 			e.noRemove |= fill(c);
2399 		return e.noRemove;
2400 	}
2401 
2402 	fill(root);
2403 }
2404 
2405 void loadCoverage(Entity root, string dir)
2406 {
2407 	void scanFile(Entity f)
2408 	{
2409 		auto fn = buildPath(dir, setExtension(baseName(f.filename), "lst"));
2410 		if (!exists(fn))
2411 			return;
2412 		writeln("Loading coverage file ", fn);
2413 
2414 		static bool covered(string line)
2415 		{
2416 			enforce(line.length >= 8 && line[7]=='|', "Invalid syntax in coverage file");
2417 			line = line[0..7];
2418 			return line != "0000000" && line != "       ";
2419 		}
2420 
2421 		auto lines = map!covered(splitLines(readText(fn))[0..$-1]);
2422 		uint line = 0;
2423 
2424 		bool coverString(string s)
2425 		{
2426 			bool result;
2427 			foreach (char c; s)
2428 			{
2429 				result |= lines[line];
2430 				if (c == '\n')
2431 					line++;
2432 			}
2433 			return result;
2434 		}
2435 
2436 		bool cover(ref Entity e)
2437 		{
2438 			bool result;
2439 			result |= coverString(e.head);
2440 			foreach (ref c; e.children)
2441 				result |= cover(c);
2442 			result |= coverString(e.tail);
2443 
2444 			e.noRemove |= result;
2445 			return result;
2446 		}
2447 
2448 		foreach (ref c; f.children)
2449 			f.noRemove |= cover(c);
2450 	}
2451 
2452 	void scanFiles(Entity e)
2453 	{
2454 		if (e.isFile)
2455 			scanFile(e);
2456 		else
2457 			foreach (c; e.children)
2458 				scanFiles(c);
2459 	}
2460 
2461 	scanFiles(root);
2462 }
2463 
2464 void assignID(Entity e)
2465 {
2466 	static int counter;
2467 	e.id = ++counter;
2468 	foreach (c; e.children)
2469 		assignID(c);
2470 }
2471 
2472 void convertRefs(Entity root)
2473 {
2474 	Address*[int] addresses;
2475 
2476 	void collectAddresses(Entity e, Address* address)
2477 	{
2478 		assert(e.id !in addresses);
2479 		addresses[e.id] = address;
2480 
2481 		assert(address.children.length == 0);
2482 		foreach (i, c; e.children)
2483 		{
2484 			auto childAddress = new Address(address, i, address.depth + 1);
2485 			address.children ~= childAddress;
2486 			collectAddresses(c, childAddress);
2487 		}
2488 		assert(address.children.length == e.children.length);
2489 	}
2490 	collectAddresses(root, &rootAddress);
2491 
2492 	void convertRef(ref EntityRef r)
2493 	{
2494 		assert(r.entity && !r.address);
2495 		r.address = addresses[r.entity.id];
2496 		r.entity = null;
2497 	}
2498 
2499 	void convertRefs(Entity e)
2500 	{
2501 		foreach (ref r; e.dependents)
2502 			convertRef(r);
2503 		foreach (c; e.children)
2504 			convertRefs(c);
2505 	}
2506 	convertRefs(root);
2507 }
2508 
2509 struct FindResult
2510 {
2511 	Entity entity;          /// null if gone
2512 	const Address* address; /// the "real" (no redirects) address, null if gone
2513 }
2514 
2515 FindResult findEntity(Entity root, const(Address)* addr)
2516 {
2517 	auto result = findEntityEx(root, addr);
2518 	if (result.dead)
2519 		return FindResult(null, null);
2520 	return FindResult(result.entity, result.address);
2521 }
2522 
2523 struct FindResultEx
2524 {
2525 	Entity entity;          /// never null
2526 	const Address* address; /// never null
2527 	bool dead;              /// a dead node has been traversed to get here
2528 }
2529 
2530 static FindResultEx findEntityEx(Entity root, const(Address)* addr)
2531 {
2532 	if (!addr.parent) // root
2533 		return FindResultEx(root, addr, root.dead);
2534 
2535 	auto r = findEntityEx(root, addr.parent);
2536 	auto e = r.entity.children[addr.index];
2537 	if (e.redirect)
2538 	{
2539 		assert(e.dead);
2540 		return findEntityEx(root, e.redirect); // shed the "dead" flag here
2541 	}
2542 
2543 	addr = r.address.child(addr.index); // apply redirects in parents
2544 	return FindResultEx(e, addr, r.dead || e.dead); // accumulate the "dead" flag
2545 }
2546 
2547 struct AddressRange
2548 {
2549 	const(Address)*address;
2550 	bool empty() { return !address.parent; }
2551 	size_t front() { return address.index; }
2552 	void popFront() { address = address.parent; }
2553 }
2554 
2555 void dumpSet(Entity root, string fn)
2556 {
2557 	auto f = File(fn, "wb");
2558 
2559 	string printable(string s) { return s is null ? "null" : `"` ~ s.replace("\\", `\\`).replace("\"", `\"`).replace("\r", `\r`).replace("\n", `\n`) ~ `"`; }
2560 	string printableFN(string s) { return "/*** " ~ s ~ " ***/"; }
2561 
2562 	int[][int] dependencies;
2563 	bool[int] redirects;
2564 	void scanDependencies(Entity e)
2565 	{
2566 		foreach (d; e.dependents)
2567 		{
2568 			auto dependent = findEntityEx(root, d.address).entity;
2569 			if (dependent)
2570 				dependencies[dependent.id] ~= e.id;
2571 		}
2572 		foreach (c; e.children)
2573 			scanDependencies(c);
2574 		if (e.redirect)
2575 		{
2576 			auto target = findEntityEx(root, e.redirect).entity;
2577 			if (target)
2578 				redirects[target.id] = true;
2579 		}
2580 	}
2581 	scanDependencies(root);
2582 
2583 	void print(Entity e, int depth)
2584 	{
2585 		auto prefix = replicate("  ", depth);
2586 
2587 		// if (!fileLevel) { f.writeln(prefix, "[ ... ]"); continue; }
2588 
2589 		f.write(
2590 			prefix,
2591 			"[",
2592 			e.noRemove ? "!" : "",
2593 			e.dead ? "X" : "",
2594 		);
2595 		if (e.children.length == 0)
2596 		{
2597 			f.write(
2598 				" ",
2599 				e.redirect ? "-> " ~ text(findEntityEx(root, e.redirect).entity.id) ~ " " : "",
2600 				e.isFile ? e.filename ? printableFN(e.filename) ~ " " : null : e.head ? printable(e.head) ~ " " : null,
2601 				e.tail ? printable(e.tail) ~ " " : null,
2602 				e.comment ? "/* " ~ e.comment ~ " */ " : null,
2603 				"]"
2604 			);
2605 		}
2606 		else
2607 		{
2608 			f.writeln(e.comment ? " // " ~ e.comment : null);
2609 			if (e.isFile) f.writeln(prefix, "  ", printableFN(e.filename));
2610 			if (e.head) f.writeln(prefix, "  ", printable(e.head));
2611 			foreach (c; e.children)
2612 				print(c, depth+1);
2613 			if (e.tail) f.writeln(prefix, "  ", printable(e.tail));
2614 			f.write(prefix, "]");
2615 		}
2616 		if (e.dependents.length || e.id in redirects || trace)
2617 			f.write(" =", e.id);
2618 		if (e.id in dependencies)
2619 		{
2620 			f.write(" =>");
2621 			foreach (d; dependencies[e.id])
2622 				f.write(" ", d);
2623 		}
2624 		f.writeln();
2625 	}
2626 
2627 	print(root, 0);
2628 
2629 	f.close();
2630 }
2631 
2632 void dumpToHtml(Entity root, string fn)
2633 {
2634 	auto buf = appender!string();
2635 
2636 	void dumpText(string s)
2637 	{
2638 		foreach (c; s)
2639 			switch (c)
2640 			{
2641 				case '<':
2642 					buf.put("&lt;");
2643 					break;
2644 				case '>':
2645 					buf.put("&gt;");
2646 					break;
2647 				case '&':
2648 					buf.put("&amp;");
2649 					break;
2650 				default:
2651 					buf.put(c);
2652 			}
2653 	}
2654 
2655 	void dump(Entity e)
2656 	{
2657 		if (e.isFile)
2658 		{
2659 			buf.put("<h1>");
2660 			dumpText(e.filename);
2661 			buf.put("</h1><pre>");
2662 			foreach (c; e.children)
2663 				dump(c);
2664 			buf.put("</pre>");
2665 		}
2666 		else
2667 		{
2668 			buf.put("<span>");
2669 			dumpText(e.head);
2670 			foreach (c; e.children)
2671 				dump(c);
2672 			dumpText(e.tail);
2673 			buf.put("</span>");
2674 		}
2675 	}
2676 
2677 	buf.put(q"EOT
2678 <style> pre span:hover { outline: 1px solid rgba(0,0,0,0.2); background-color: rgba(100,100,100,0.1	); } </style>
2679 EOT");
2680 
2681 	dump(root);
2682 
2683 	std.file.write(fn, buf.data());
2684 }
2685 
2686 // void dumpText(string fn, ref Reduction r = nullReduction)
2687 // {
2688 // 	auto f = File(fn, "wt");
2689 // 	dump(root, r, (string) {}, &f.write!string);
2690 // 	f.close();
2691 // }
2692 
2693 version(testsuite)
2694 shared static this()
2695 {
2696 	import core.runtime;
2697 	"../cov".mkdir.collectException();
2698 	dmd_coverDestPath("../cov");
2699 	dmd_coverSetMerge(true);
2700 }