/* this program computes values for https://oeis.org/A001931 it is an excerpt (the 2nd portion) of the main program that computes the values for https://oeis.org/A000162 (except for trivially small values of N) this is based on a rust port of Stanley Dodds's algorithm, whose original C# code is available at: https://oeis.org/A000162/a000162.cs.txt and: https://oeis.org/A001931/a001931.cs.txt rust port by Phil Thompson in January 2024, along with changes: - allow the work to be divided into many more thread tasks by changing FILTER_TASK relative to N - allow use of previously-computed counts at "checkpoints" (to resume a long-running computation) - allow specifying more threads than worker tasks - print timestamps (elapsed time) for most messages ----- change FILTER_DEPTH to control to how many thread tasks we create: tasks for FILTER_DEPTH=5: N = 11 -> 23502 N = 12 -> 162913 N = 13 -> 1152870 tasks for FILTER_DEPTH=7: N = 11 -> 534 N = 12 -> 3481 N = 13 -> 23502 */ use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; use std::time::Duration; use std::time::Instant; use std::thread; use std::collections::BTreeMap; // this import needed depending on rust toolchain? or edition? (e.g. on Amazon Linux) //use std::convert::TryInto; const N: usize = 15; // number of polycube cells const FILTER_DEPTH: usize = 11; // keep smaller than N to use multiple threads (lower=more shorter thread tasks) const THREADS: usize = 8; // output checkpoint count+finished tasks info no more often than this many seconds const CHECKPOINT_SECONDS: f32 = 5.0; const PLACEHOLDER_REF_STACK_OFFSET: isize = -123456; // if this is changed, the task hashes will change and any previous checkpoints will no longer work // if false, only checkpoints will be printed to stdout const SHOW_ALL_TASK_RESULTS: bool = false; const X: i32 = (N as i32 + 5) / 4 * ((N as i32 + 5) / 4 * 3 - 2); // set X n const Y: i32 = X + 1; // trivial choice is X = 1, Y = n, Z = n * n. A simple reduction is X = 1, Y = n, Z = n * (n / 2) + (n + 1) / 2 const Z: i32 = X + (N as i32 + 5) / 4 * 3; // minimising Z is memory efficient. Unclear if this noticably affects performance. Z ~ 3/16 n^2 is the best I can find for arbitrary n const X2: usize = (X + X) as usize; const Y2: usize = (Y + Y) as usize; const Z2: usize = (Z + Z) as usize; const SYX: usize = (Y + X) as usize; const SZX: usize = (Z + X) as usize; const SZY: usize = (Z + Y) as usize; const DYX: usize = (Y - X) as usize; const DZX: usize = (Z - X) as usize; const DZY: usize = (Z - Y) as usize; const BYTE_BOARD_LEN: usize = (N + 2) * (Z as usize); const REF_STACK_LEN: usize = (N - 2) * 4; // comment these out if wanting to re-calculate with the same N and FILTER_DEPTH const FIXED_PORTION_CHECKPOINTS: [(usize, usize, u64, u128); 5] = [ // N, FILTER_DEPTH, task hash, cumulative fixed polycubes count (16, 12, 9317956767031721395, 622727856628), (16, 12, 15320539189854766585, 1039500728908), (16, 12, 16863857699850425004, 1233957548477), // (92.13%) (17, 12, 10306999600203458141, 6036447860508), (17, 12, 14726552232942904259, 8794072714458), // (79.23%) ]; // for smaller values of N, we can greatly speed up computation by // lowering the sleep time used when waiting for threads to finish // for larger values of N, we can check less often and waste fewer // cycles in the main thread const N_SLEEP_MILLIS: u64 = if N > 19 { N as u64 * 12 } else if N > 17 { N as u64 * 10 } else if N > 12 { N as u64 * 2 } else { N as u64 + 5 }; pub struct ThreadTask { pub depth: usize, pub stack_top_inner_offset: isize, pub stack_top_2_offset: isize, pub ref_stack_offset: isize, pub byte_board: [u8; BYTE_BOARD_LEN], // can't actually store the pointers here, instead, need to // store offsets (isize) to re-create this pub ref_stack: [isize; REF_STACK_LEN] } impl Hash for ThreadTask { fn hash(&self, hasher: &mut H) { self.depth.hash(hasher); self.stack_top_inner_offset.hash(hasher); self.stack_top_2_offset.hash(hasher); self.ref_stack_offset.hash(hasher); self.byte_board.hash(hasher); self.ref_stack.hash(hasher); } } fn get_task_hash(thread_task: &ThreadTask) -> u64 { let mut hasher = DefaultHasher::new(); thread_task.hash(&mut hasher); return hasher.finish(); } fn seconds_to_dur(s: f64) -> String { let days = (s / 86400.0).floor(); let hours = ((s - (days * 86400.0)) / 3600.0).floor(); let minutes = ((s - (days * 86400.0) - (hours * 3600.0)) / 60.0).floor(); let seconds = s - (days * 86400.0) - (hours * 3600.0) - (minutes * 60.0); return format!("{:0>3}d:{:0>2}h:{:0>2}m:{:0>6.3}s", days, hours, minutes, seconds); } fn print_w_time(start_time: Instant, message: String) { println!("[{}] {message}", seconds_to_dur(start_time.elapsed().as_secs_f64())); } // thanks to https://stackoverflow.com/a/67834588/259456 fn integer_with_thousands_separator(num: u128) -> String { return num.to_string() .as_bytes() .rchunks(3) .rev() .map(std::str::from_utf8) .collect::, _>>() .unwrap() .join(","); } fn main() { let overall_start_time = Instant::now(); print_w_time(overall_start_time, "Counting fixed polycubes via extensions...".to_string()); let n_sleep = Duration::from_millis(N_SLEEP_MILLIS); println!("THREADS={THREADS}, N={N}, n_sleep_millis={N_SLEEP_MILLIS}"); let sw = Instant::now(); let mut subcount: u128 = 0; let total_tasks: usize; // using a BTreeMap so we can pop tasks off the front, in order by hash let mut tasks: BTreeMap; let ordered_task_hashes: Vec; let mut completed_task_counts: BTreeMap = BTreeMap::new(); let mut last_checkpoint_task_hash: Option = None; let mut last_checkpoint_time = Instant::now(); let mut last_checkpoint_total: u128 = 0; let mut completed_tasks: usize = 0; // find the highest available checkpoint for N+FILTER_DEPTH, if any for checkpoint in FIXED_PORTION_CHECKPOINTS { if checkpoint.0 != N || checkpoint.1 != FILTER_DEPTH { continue; } completed_task_counts.insert(checkpoint.2, checkpoint.3); if last_checkpoint_task_hash.is_none() || checkpoint.2 > last_checkpoint_task_hash.unwrap() { last_checkpoint_task_hash = Some(checkpoint.2); last_checkpoint_total = checkpoint.3; subcount = last_checkpoint_total; } } if last_checkpoint_task_hash.is_some() { println!("using highest matching checkpoint hash {} with count {}", last_checkpoint_task_hash.unwrap(), last_checkpoint_total); } // initialize the BTreeMap of hash-ordered thread tasks unsafe { let start_time = Instant::now(); let mut byte_board_arr: [u8; BYTE_BOARD_LEN] = [0; BYTE_BOARD_LEN]; let mut byte_board = byte_board_arr.as_mut_ptr(); let ref_stack_placeholder = byte_board.offset(PLACEHOLDER_REF_STACK_OFFSET); let mut ref_stack_arr: [*mut u8; REF_STACK_LEN] = [ref_stack_placeholder; REF_STACK_LEN]; let ref_stack = ref_stack_arr.as_mut_ptr(); byte_board = byte_board.offset(Z as isize); // seeded with first index of the byte board as the only allowed extension *ref_stack = byte_board; let mut i = byte_board.offset((N as isize + 1) * Z as isize); i = i.sub(1); // the first Z + 1 bytes are disallowed extensions; first Z are less than the minimum, last 1 due to edge case of initial polycube having no neighbours while i != byte_board { *i = 255; i = i.sub(1); } print_w_time(overall_start_time, format!("started initializing tasks, N={N}, FILTER_DEPTH={FILTER_DEPTH}")); // a vec of stacks, and offsets for stack_top_1 // for each entry in the vec, with a separate thread, run count_extensions_subset_final let count_and_tasks: (u128, BTreeMap) = count_extensions_subset_inner(N as usize, ref_stack.add(1), ref_stack.add((N - 2) * 4 as usize), ref_stack, &byte_board_arr, &ref_stack_arr); if last_checkpoint_task_hash.is_some() { if count_and_tasks.0 > 0 { println!("??? the initialization itself returns a count ???"); } } else { subcount += count_and_tasks.0; } tasks = count_and_tasks.1; total_tasks = tasks.len(); // create a copy of all task hashes, in order ordered_task_hashes = tasks.keys().cloned().collect(); let time_elapsed = start_time.elapsed().as_secs_f64(); print_w_time(overall_start_time, format!("finished initializing {} tasks, N={N}, FILTER_DEPTH={FILTER_DEPTH}, took {}", tasks.len(), seconds_to_dur(time_elapsed))); // remove completed tasks until we find the checkpoint match last_checkpoint_task_hash { Some(checkpoint_hash) => { if !tasks.contains_key(&checkpoint_hash) { println!("the checkpoint hash {checkpoint_hash} is not found among the tasks"); last_checkpoint_task_hash = None; last_checkpoint_total = 0; subcount = 0; } else { loop { let popped = tasks.pop_first(); if popped.is_none() { break; } completed_tasks += 1; if popped.unwrap().0 == checkpoint_hash { break; } } println!("found {completed_tasks} completed tasks including the checkpoint"); } } None => {} } } let mut thread_handles = Vec::new(); // spawn the desired number of threads while thread_handles.len() < THREADS { let task = tasks.pop_first(); if task.is_none() { break; } let (_task_hash, task) = task.unwrap(); let handle = thread::spawn(move || count_extensions_subset_outer(task, completed_tasks, overall_start_time)); thread_handles.push(handle); } let mut handle_index_to_replace: Option = None; // replace completed threads with new ones until there are none left while completed_tasks < total_tasks { let mut j: usize = 0; while j < thread_handles.len() { if thread_handles[j].is_finished() { handle_index_to_replace = Some(j); break; } j += 1; } if handle_index_to_replace.is_none() { thread::sleep(n_sleep); continue; } let (task_count, task_hash) = thread_handles.remove(handle_index_to_replace.take().unwrap()).join().unwrap(); completed_task_counts.insert(task_hash, task_count); subcount += task_count; completed_tasks += 1; if SHOW_ALL_TASK_RESULTS { print_w_time(overall_start_time, format!("tasks remaining: [{total_tasks}-{completed_tasks}={}]", total_tasks-completed_tasks)); } // if enough time has passed, see if we can print a new checkpoint if completed_tasks > 0 && last_checkpoint_time.elapsed().as_secs_f32() > CHECKPOINT_SECONDS { last_checkpoint_time = Instant::now(); let mut new_checkpoint_task_hash: Option = None; let mut new_checkpoint_total = last_checkpoint_total; // if there was no previous checkpoint, we need to start at the 0th task let mut slice_lower: usize = 0; // if there was a previous checkpoint, start checking at the next task after that if last_checkpoint_task_hash.is_some() { match ordered_task_hashes.binary_search(&last_checkpoint_task_hash.unwrap()) { Ok(hash_index) => { slice_lower = hash_index + 1 } Err(_) => { println!("somehow the last_checkpoint_task_hash was not found"); } } } // walk through all tasks to find where the next incomplete one is for ordered_task_hash in &ordered_task_hashes[slice_lower .. ordered_task_hashes.len()] { match completed_task_counts.get(&ordered_task_hash) { Some(count) => { new_checkpoint_task_hash = Some(*ordered_task_hash); new_checkpoint_total += count; } None => { break; } } } if new_checkpoint_task_hash.is_some() { if last_checkpoint_task_hash.is_none() || new_checkpoint_task_hash.unwrap() > last_checkpoint_task_hash.unwrap() { last_checkpoint_task_hash = new_checkpoint_task_hash; last_checkpoint_total = new_checkpoint_total; let new_checkpoint_nth_task: usize = ordered_task_hashes.binary_search(&last_checkpoint_task_hash.unwrap()).unwrap() + 1; let new_checkpoint_pct = (new_checkpoint_nth_task as f32 * 100.0) / (total_tasks as f32); print_w_time(overall_start_time, format!("N={N}, FILTER_DEPTH={FILTER_DEPTH} checkpoint at task [{}], {new_checkpoint_nth_task} of {total_tasks} ({:.2}%), cumulative fixed polycubes count={new_checkpoint_total}", last_checkpoint_task_hash.unwrap(), new_checkpoint_pct)); } } } loop { let task = tasks.pop_first(); if task.is_none() { break; } let (_task_hash, task) = task.unwrap(); let handle = thread::spawn(move || count_extensions_subset_outer(task, completed_tasks, overall_start_time)); thread_handles.push(handle); break; } } println!("\n\n{:} polycubes with {} cells (number of polycubes fixed by trivial symmetry)\nTook {}", integer_with_thousands_separator(subcount), N, seconds_to_dur(sw.elapsed().as_secs_f64())); } // rebuild state from ThreadTask, then continue execution with count_extensions_subset_inner() fn count_extensions_subset_outer(task: ThreadTask, task_num: usize, overall_start_time: Instant) -> (u128, u64) { unsafe { let start_time = Instant::now(); let mut byte_board_arr: [u8; BYTE_BOARD_LEN] = task.byte_board; let byte_board = byte_board_arr.as_mut_ptr(); let ref_stack_placeholder = byte_board.offset(PLACEHOLDER_REF_STACK_OFFSET); let mut ref_stack_arr: [*mut u8; REF_STACK_LEN] = [ref_stack_placeholder; REF_STACK_LEN]; // restore the ref stack using offsets for i in 0..REF_STACK_LEN { ref_stack_arr[i] = byte_board.offset(task.ref_stack[i]); } let depth = task.depth; let ref_stack = ref_stack_arr.as_mut_ptr().offset(task.ref_stack_offset); let stack_top_inner = ref_stack.offset(task.stack_top_inner_offset); let stack_top_2 = ref_stack.offset(task.stack_top_2_offset); if SHOW_ALL_TASK_RESULTS { print_w_time(overall_start_time, format!("started count_extensions_subset_outer() with task_number={}, N={N}, FILTER_DEPTH={FILTER_DEPTH}", task_num)); } let count_and_tasks: (u128, BTreeMap) = count_extensions_subset_inner(depth, stack_top_inner, stack_top_2, ref_stack, &byte_board_arr, &ref_stack_arr); if SHOW_ALL_TASK_RESULTS { let time_elapsed = start_time.elapsed().as_secs_f64(); print_w_time(overall_start_time, format!("finished count_extensions_subset_outer() with task_number={}, N={N}, FILTER_DEPTH={FILTER_DEPTH}, took {} with count={}", task_num, seconds_to_dur(time_elapsed), count_and_tasks.0)); } return (count_and_tasks.0, get_task_hash(&task)); } } fn copy_ref_stack_as_offsets(byte_board_arr: &[u8; BYTE_BOARD_LEN], ref_stack_arr: &[*mut u8; REF_STACK_LEN]) -> [isize; REF_STACK_LEN] { let mut ref_stack_copy: [isize; REF_STACK_LEN] = [0; REF_STACK_LEN]; let byte_board = (*byte_board_arr).as_ptr(); unsafe { for i in 0..REF_STACK_LEN { // !!!!! !!!!! !!!!! !!!!! !!!!! !!!!! !!!!! // very odd... need to keep the below junk // instead of just assigning the value, or // else task hashes end up changed ... this // doesn't really affect running time, since // this only runs for the short thread tasks // initializaion, so may as well leave it // !!!!! !!!!! !!!!! !!!!! !!!!! !!!!! !!!!! let mut o = ref_stack_arr[i].offset_from(byte_board); if o < -255 && o != PLACEHOLDER_REF_STACK_OFFSET { println!("have a isize offset for a ref_stack entry (at index {i}) of {o}"); o = PLACEHOLDER_REF_STACK_OFFSET; } ref_stack_copy[i] = o; } } return ref_stack_copy; } fn count_extensions_subset_inner( depth: usize, mut stack_top_1: *mut *mut u8, mut stack_top_2: *mut *mut u8, ref_stack: *mut *mut u8, byte_board_arr: &[u8; BYTE_BOARD_LEN], ref_stack_arr: &[*mut u8; REF_STACK_LEN]) -> (u128, BTreeMap) { unsafe { let mut count: u128 = 0; let mut tasks: BTreeMap = BTreeMap::new(); let stack_top_original = stack_top_1; while stack_top_1 != ref_stack { stack_top_1 = stack_top_1.sub(1); let index = *stack_top_1; let mut stack_top_inner = stack_top_1; let incr = index.sub(X as usize); *incr = (*incr).wrapping_add(1); // thanks to https://stackoverflow.com/a/70776258/259456 if *incr == 0 { *stack_top_inner = index.sub(X as usize); stack_top_inner = stack_top_inner.add(1); } let incr = index.sub(Y as usize); *incr = (*incr).wrapping_add(1); if *incr == 0 { *stack_top_inner = index.sub(Y as usize); stack_top_inner = stack_top_inner.add(1); } let incr = index.sub(Z as usize); *incr = (*incr).wrapping_add(1); if *incr == 0 { *stack_top_inner = index.sub(Z as usize); stack_top_inner = stack_top_inner.add(1); } let incr = index.add(X as usize); *incr = (*incr).wrapping_add(1); if *incr == 0 { *stack_top_inner = index.add(X as usize); stack_top_inner = stack_top_inner.add(1); } let incr = index.add(Y as usize); *incr = (*incr).wrapping_add(1); if *incr == 0 { *stack_top_inner = index.add(Y as usize); stack_top_inner = stack_top_inner.add(1); } let incr = index.add(Z as usize); *incr = (*incr).wrapping_add(1); if *incr == 0 { *stack_top_inner = index.add(Z as usize); stack_top_inner = stack_top_inner.add(1); } if depth == 4 { count += count_extensions_subset_final(stack_top_inner, ref_stack); } else if depth != FILTER_DEPTH { let mut deeper = count_extensions_subset_inner(depth - 1, stack_top_inner, stack_top_2, ref_stack, byte_board_arr, ref_stack_arr); count += deeper.0; tasks.append(&mut deeper.1); // at depth == FILTER_DEPTH, create a copy of the state to be later resumed by a thread } else { let task = ThreadTask { depth: depth - 1, stack_top_inner_offset: stack_top_inner.offset_from(ref_stack), stack_top_2_offset: stack_top_2.offset_from(ref_stack), ref_stack_offset: ref_stack.offset_from(ref_stack_arr.as_ptr()), byte_board: byte_board_arr.clone(), ref_stack: copy_ref_stack_as_offsets(byte_board_arr, ref_stack_arr) }; tasks.insert(get_task_hash(&task), task); } *index.sub(X as usize) = (*index.sub(X as usize)).wrapping_sub(1); *index.sub(Y as usize) = (*index.sub(Y as usize)).wrapping_sub(1); *index.sub(Z as usize) = (*index.sub(Z as usize)).wrapping_sub(1); *index.add(X as usize) = (*index.add(X as usize)).wrapping_sub(1); *index.add(Y as usize) = (*index.add(Y as usize)).wrapping_sub(1); *index.add(Z as usize) = (*index.add(Z as usize)).wrapping_sub(1); stack_top_2 = stack_top_2.sub(1); *stack_top_2 = index; // doing this push before the recursion would add one extra unnecessary element to the stack at each level of recursion } while stack_top_1 != stack_top_original { *stack_top_1 = *stack_top_2; stack_top_1 = stack_top_1.add(1); stack_top_2 = stack_top_2.add(1); } return (count, tasks); } } fn count_extensions_subset_final(mut stack_top: *mut *mut u8, ref_stack: *mut *mut u8) -> u128 { unsafe { let length = stack_top.offset_from(ref_stack) as i128; let mut count: i128 = (length * (length - 1) * (length - 2) / 6).try_into().unwrap(); let mut stack_top_temp = stack_top; while stack_top_temp != ref_stack { stack_top_temp = stack_top_temp.sub(1); let i = *stack_top_temp; let mut neighbours: i128 = 0; let mut subcount: usize = 128; let incr = i.sub(X as usize); if *incr > 127 { neighbours += 1; subcount += *i.sub(X2) as usize + *i.sub(SYX) as usize + *i.sub(SZX) as usize + *i.add(DYX) as usize + *i.add(DZX) as usize; *incr -= 1; count += *incr as i128; } let incr = i.sub(Y as usize); if *incr > 127 { neighbours += 1; subcount += *i.sub(Y2) as usize + *i.sub(SYX) as usize + *i.sub(SZY) as usize + *i.sub(DYX) as usize + *i.add(DZY) as usize; *incr -= 1; count += *incr as i128; } let incr = i.sub(Z as usize); if *incr > 127 { neighbours += 1; subcount += *i.sub(Z2) as usize + *i.sub(SZX) as usize + *i.sub(SZY) as usize + *i.sub(DZX) as usize + *i.sub(DZY) as usize; *incr -= 1; count += *incr as i128; } let incr = i.add(X as usize); if *incr > 127 { neighbours += 1; subcount += *i.add(X2) as usize + *i.add(SYX) as usize + *i.add(SZX) as usize + *i.sub(DYX) as usize + *i.sub(DZX) as usize; *incr -= 1; count += *incr as i128; } let incr = i.add(Y as usize); if *incr > 127 { neighbours += 1; subcount += *i.add(Y2) as usize + *i.add(SYX) as usize + *i.add(SZY) as usize + *i.add(DYX) as usize + *i.sub(DZY) as usize; *incr -= 1; count += *incr as i128; } let incr = i.add(Z as usize); if *incr > 127 { neighbours += 1; subcount += *i.add(Z2) as usize + *i.add(SZX) as usize + *i.add(SZY) as usize + *i.add(DZX) as usize + *i.add(DZY) as usize; *incr -= 1; count += *incr as i128; } // some of these values are sometimes negative count += ((subcount >> 8) as i128) + ((neighbours * (neighbours + (length << 1) - 511)) >> 1); } while stack_top != ref_stack { stack_top = stack_top.sub(1); let i = *stack_top; let incr = i.sub(X as usize); *incr |= *incr >> 4; let incr = i.sub(Y as usize); *incr |= *incr >> 4; let incr = i.sub(Z as usize); *incr |= *incr >> 4; let incr = i.add(X as usize); *incr |= *incr >> 4; let incr = i.add(Y as usize); *incr |= *incr >> 4; let incr = i.add(Z as usize); *incr |= *incr >> 4; } return count as u128; } }