//written by Stanley Dodds (that's me!) //I can probably supply a full proof of correctness if needed, but I have nothing written down right now - so currently, source: dude, just trust me (or try reading my code) //Of course, you can compare up to n = 19 with previous results and see that it's definitely doing something right using System.Diagnostics; internal class Program { const int n = 17;//number of polycube cells. Need n >= 4 if single threading, or n >= filterDepth >= 5 if multithreading (I think) const int filterDepth = 12; const int threads = 5; private static void Main() { Stopwatch sw = Stopwatch.StartNew(); long count = 0; Task[] tasks = new Task[threads];//please don't judge my multithreading - this was basically just my first attempt, to see if it helped int i = 4 * (n - filterDepth) - 2;//this is the maximum left stack length, which is the value being filtered to separate work for (int j = 0; j < tasks.Length; j++) { int filter = i--;//copy of i, since lambda expression captures the variable tasks[j] = Task.Factory.StartNew(() => CountExtensionsSubset(filter)); } while (i >= 0) { int j = 0; while (!tasks[j].IsCompleted) { j++; j %= tasks.Length; Thread.Sleep(50); } count += tasks[j].Result; int filter = i--;//copy of i, since lambda expression captures the variable tasks[j] = Task.Factory.StartNew(() => CountExtensionsSubset(filter)); } count += tasks.Sum(t => t.Result); Console.WriteLine("{0:N0} polycubes with {1} cells\nTook {2:%d} day(s), {2:hh\\:mm\\:ss\\.f} h:m:s", count, n, sw.Elapsed); Console.ReadKey(); } private static long CountExtensionsSubset(int filter) { unsafe { const int X = (n + 5) / 4 * ((n + 5) / 4 * 3 - 2);//set X n const int Y = X + 1;//trivial choice is X = 1, Y = n, Z = n * n. A simple reduction is X = 1, Y = n, Z = n * (n / 2) + (n + 1) / 2 const int Z = X + (n + 5) / 4 * 3;//minimising Z is memory efficient. Unclear if this noticably affects performance. Z ~ 3/16 n^2 is the best I can find for arbitrary n byte* byteBoard = stackalloc byte[(n + 2) * Z];//could use ints or shorts as offsets to save memory, but it's faster to directly store the pointers to avoid adding pointer offsets at every lookup byte** refStack = stackalloc byte*[(n - 2) * 4];//total length of the two stacks is at most 4n-9. One stack grows from the left, the other stack grows from the right *refStack = byteBoard += Z;//seeded with first index of the byte board as the only allowed extension for (byte* i = byteBoard + (n + 1) * Z; --i != byteBoard;) *i = 255;//the first Z + 1 bytes are disallowed extensions; first Z are less than the minimum, last 1 due to edge case of initial polycube having no neighbours Console.WriteLine($"started task {filter}"); long count = CountExtensions(n, refStack + 1, refStack + (n - 2) * 4); Console.WriteLine($"finished task {filter} with subcount {count:N0}"); return count; long CountExtensions(int depth, byte** stackTop1, byte** stackTop2) { long count = 0; byte** stackTopOriginal = stackTop1; while (stackTop1 != refStack) { byte* index = *--stackTop1; byte** stackTopInner = stackTop1; if (++*(index - X) == 0) *stackTopInner++ = index - X; if (++*(index - Y) == 0) *stackTopInner++ = index - Y; if (++*(index - Z) == 0) *stackTopInner++ = index - Z; if (++*(index + X) == 0) *stackTopInner++ = index + X; if (++*(index + Y) == 0) *stackTopInner++ = index + Y; if (++*(index + Z) == 0) *stackTopInner++ = index + Z; //if (depth == 3) //{ // count += (stackTopInner - refStack) * (stackTopInner - refStack - 1) >> 1;//length choose 2 ways to choose final two extensions, excluding new extensions, covered separately // while (stackTopInner != refStack) // { // byte* i = *--stackTopInner;//counts the terms equal to 255 by abusing byte overflow and the automatic int conversion when performing addition. Any not equal to 255 will be too small to overflow. Constant added should be between 5 and ~200 // count += *(i - X) + *(i - Y) + *(i - Z) + *(i + X) + *(i + Y) + *(i + Z) + 128 >> 8;//is there a faster way to count the number of bytes equal to a particular value (which is an extremum of a small range)? // } //} if (depth == 4) count += CountFinalExtensions(stackTopInner); else if (depth != filterDepth || stackTop1 - refStack == filter) count += CountExtensions(depth - 1, stackTopInner, stackTop2); --*(index - X); --*(index - Y); --*(index - Z); --*(index + X); --*(index + Y); --*(index + Z); *--stackTop2 = index;//doing this push before the recursion would add one extra unnecessary element to the stack at each level of recursion } while (stackTop1 != stackTopOriginal) *stackTop1++ = *stackTop2++; return count; } int CountFinalExtensions(byte** stackTop) { const int X2 = X + X, Y2 = Y + Y, Z2 = Z + Z, sYX = Y + X, sZX = Z + X, sZY = Z + Y, dYX = Y - X, dZX = Z - X, dZY = Z - Y; int count = 0; while (stackTop != refStack) { byte* i = *--stackTop; int length = (int)(stackTop - refStack); int subcount = 128;//128 is small enough that this can never overflow to 256 in an unintended case if (++*(i - X) == 0) { length++; subcount += *(i - X2) + *(i - sYX) + *(i - sZX) + *(i + dYX) + *(i + dZX); } if (++*(i - Y) == 0) { length++; subcount += *(i - Y2) + *(i - sYX) + *(i - sZY) + *(i - dYX) + *(i + dZY); } if (++*(i - Z) == 0) { length++; subcount += *(i - Z2) + *(i - sZX) + *(i - sZY) + *(i - dZX) + *(i - dZY); } if (++*(i + X) == 0) { length++; subcount += *(i + X2) + *(i + sYX) + *(i + sZX) + *(i - dYX) + *(i - dZX); } if (++*(i + Y) == 0) { length++; subcount += *(i + Y2) + *(i + sYX) + *(i + sZY) + *(i + dYX) + *(i - dZY); } if (++*(i + Z) == 0) { length++; subcount += *(i + Z2) + *(i + sZX) + *(i + sZY) + *(i + dZX) + *(i + dZY); } count += (subcount >> 8) + (length * (length - 1) >> 1); //int a = ((*(i - X))++) >> 7; //int b = ((*(i - Y))++) >> 7; //int c = ((*(i - Z))++) >> 7; //int d = ((*(i + X))++) >> 7; //int e = ((*(i + Y))++) >> 7; //int f = ((*(i + Z))++) >> 7; //int length = (int)(stackTop - refStack) + a + b + c + d + e + f; //count += (128 + a * *(i - X2) + b * *(i - Y2) + c * *(i - Z2) + d * *(i + X2) + e * *(i + Y2) + f * *(i + Z2) // + (d + e) * *(i + sYX) + (d + f) * *(i + sZX) + (e + f) * *(i + sZY) // + (a + b) * *(i - sYX) + (a + c) * *(i - sZX) + (b + c) * *(i - sZY) // + (a + e) * *(i + dYX) + (a + f) * *(i + dZX) + (b + f) * *(i + dZY) // + (b + d) * *(i - dYX) + (c + d) * *(i - dZX) + (c + e) * *(i - dZY) >> 8) + (length * (length - 1) >> 1); byte** stackTopTemp = stackTop; while (stackTopTemp != refStack) { byte* j = *--stackTopTemp;//counts the terms equal to 255 by abusing byte overflow and the automatic int conversion when performing addition. Any not equal to 255 will be too small to overflow. Constant added should be between 5 and ~200 count += *(j - X) + *(j - Y) + *(j - Z) + *(j + X) + *(j + Y) + *(j + Z) + 128 >> 8;//is there a faster way to count the number of bytes equal to a particular value (which is an extremum of a small range)? } --*(i - X); --*(i - Y); --*(i - Z); --*(i + X); --*(i + Y); --*(i + Z); } return count; } } } }