#!/usr/bin/perl -w # Create b-file for A257291. # Kevin Ryde, June 2020. # # Usage: perl a257291-bfile.pl 1000 # or perl a257291-bfile.pl foma 10000 # # Input file: b000040.txt # Output file: b257291.txt # When foma method, temp files: b257291-tempfile.foma # b257291-tempfile.out # # Prime numbers (in decimal) are read from b000040.txt as from A000040. # A000040 includes a bigger a-file too if want to go further. # Any equivalent source of primes would suit. # # There are two methods: some naive pure perl, or generate a script for foma # and parse its output. Command line option "perl" or "foma" chooses the # method, and then a number is the maximum n to go up to (inclusive). use 5.006; use strict; use warnings; use autodie ':all'; # including die on failed system() $|=1; # uncomment this to run the ### lines # use Smart::Comments; my $want_num_primes = 500; my $primes_filename = 'b000040.txt'; my $output_bfile_filename = 'b257291.txt'; my $foma_script_filename = 'b257291-tempfile.foma'; my $foma_out_filename = 'b257291-tempfile.out'; my $foma_program = 'foma'; my $method; foreach my $arg (@ARGV) { if ($arg =~ /^[0-9]+$/) { # number $want_num_primes = $arg; } else { # method if (defined $method) { die "duplicate method choice: $method and $arg"; } $method = $arg; } } if (! defined $method) { $method = 'perl'; } print "Create $output_bfile_filename n=1 to n=$want_num_primes using $method ...\n"; my $start_time = time(); { open my $primes, '<', $primes_filename; END { close $primes; } sub nextprime { while (defined(my $line = readline $primes)) { next if $line =~ /^\s*(#|\s*$)/; # comments and blanks $line =~ /^(\d+)\s+(\d+)/ or die "oops, unrecognised line from $primes_filename: \"$line\""; my $n = $1; my $prime = $2; return ($n, $prime); } return (); # end of file } } open my $bfile, '>', $output_bfile_filename; if ($method eq 'perl') { # Perl Method # ----------- # # count_states_by_arrays($aref) takes an arrayref $aref of strings and # returns the number of states for a complete DFA to match these strings. # # A set of strings is a state in the DFA. # $seen{$key} >= 1 means the set of strings of $key has been seen. # $key is the strings joined with commas like # # "0,1,01,11" # 4 strings # ",1,11,001,011" # 5 strings including the empty string # "" # empty string only # # The empty string, if present, is first. If it's the only string in # $aref then $key is the empty string. The set of no strings is not # entered in %seen so no need to distinguish that from one empty string. # # At a set of strings, those strings starting with 0 are taken and the 0 # is removed. Similarly those strings starting 1. The initial strings in # $aref are in ascending numerical order so that the set of suffix strings # resulting from one prefix is in the same order as the suffix strings # from a different prefix. This order is by ascending length, and among # equal lengths by ascending lexicographic. # # This approach is naive, but adequate easily up to about n=1000, and more # if you don't mind waiting. sub count_states_by_arrays { my @pending = @_; my %seen; # excludes empty set and the initial all-strings set while (@pending) { ### %seen ### @pending my %new_arrays; # key = 0 or 1 stripped; value = new arrayref foreach my $str (@{pop @pending}) { if (length($str)) { push @{$new_arrays{substr($str,0,1)}}, substr($str,1); } } foreach my $aref (values %new_arrays) { my $key = join(',', @$aref); ### $key unless ($seen{$key}++) { push @pending, $aref; } } } ### final: %seen return scalar(keys %seen) + 2; } my @array; # of primes as strings in binary while (my ($n,$prime) = nextprime()) { last if $n > $want_num_primes; push @array, sprintf "%b", $prime; my $num_states = count_states_by_arrays(\@array); print $bfile "$n $num_states\n"; } } elsif ($method eq 'foma') { # Foma Method # ----------- # # A big foma script file b257291-tempfile.foma is created. Bit strings # for the primes use letters "a" and "b" instead of bits 0 and 1 since # foma's input syntax uses 0 for the empty regex. The script maintains a # current state machine matching all bit strings up to prime(n), then # unions in the next string prime(n+1), and so on. # # The state counts in A257291 are a "complete" DFA, meaning every state # has a destination for both bits 0 and 1. This includes a "non-accepting # sink" state where anything not a prefix of one of the relevant primes # goes. Foma's minimization normally omits a non-accepting sink but # command "complete net" adds it back. # # The output from foma is parsed to get its state counts. An "echo" from # the script indicates what "n" we're up-to in the output. This parse # will be highly dependent on foma's output format. The code was created # for foma version 0.9.18. # # Foma is quite fast so a run for a 10000 primes b-file should take # somewhere around 1 minute even on a nowadays modest CPU. A run for # 100000 primes (using the a-file from A000040 for primes say) is feasible # in a few hours run. # # Foma includes a set-of-strings -> DFA constructor (command "read text"). # That would suit the state count for a particular isolated n, but going # incrementally is much faster for successive values. { open my $foma_script, '>', $foma_script_filename; my @bit_to_letter = ('a','b'); # letters a,b for bits 0,1 while (my ($n,$prime) = nextprime()) { last if $n > $want_num_primes; my $re = sprintf '%b', $prime; # in binary $re =~ s/./ $bit_to_letter[$&]/g; # as letters a,b and space between print $foma_script "regex $re;\n"; if ($n > 1) { print $foma_script "union net\n"; } print $foma_script "complete net\n"; print $foma_script "echo Prime $n, decimal $prime = string $re\n"; print $foma_script "echo\n"; print $foma_script "\n"; } close $foma_script; } { my $command = "$foma_program -f $foma_script_filename >$foma_out_filename"; print "Run: $command\n"; system $command; } { my $num_states = 0; open my $from_foma, '<', $foma_out_filename; while (defined(my $line = readline $from_foma)) { # foma size output line like: # 2.4 kB. 47 states, 141 arcs, Cyclic. if ($line =~ /(\d+) states/) { $num_states = $1; # echo'ed line in the script, after each completed state machine } elsif ($line =~ /Prime (\d+)/) { my $n = $1; defined $num_states or die "oops, unrecognised foma output"; print $bfile "$n $num_states\n"; undef $num_states; } } close $from_foma; } } else { die "unrecognised method: \"$method\""; } close $bfile; my $elapsed_time = time() - $start_time; print "Elapsed time $elapsed_time seconds, $output_bfile_filename size ", -s $output_bfile_filename," bytes\n"; exit 0