#!/usr/bin/perl eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' if $running_under_some_shell; $bin = '/usr/bin'; # $RCSfile: s2p.SH,v $$Revision: 4.0.1.2 $$Date: 92/06/08 17:26:31 $ # # $Log: s2p.SH,v $ # Revision 4.0.1.2 92/06/08 17:26:31 lwall # patch20: s2p didn't output portable startup code # patch20: added ... as variant on .. # patch20: s2p didn't translate s/pat/\&/ or s/pat/\$/ or s/pat/\\1/ right # # Revision 4.0.1.1 91/06/07 12:19:18 lwall # patch4: s2p now handles embedded newlines better and optimizes common idioms # # Revision 4.0 91/03/20 01:57:59 lwall # 4.0 baseline. # # $indent = 4; $shiftwidth = 4; $l = '{'; $r = '}'; while ($ARGV[0] =~ /^-/) { $_ = shift; last if /^--/; if (/^-D/) { $debug++; open(BODY,'>-'); next; } if (/^-n/) { $assumen++; next; } if (/^-p/) { $assumep++; next; } die "I don't recognize this switch: $_\n"; } unless ($debug) { open(BODY,">/tmp/sperl$$") || &Die("Can't open temp file: $!\n"); } if (!$assumen && !$assumep) { print BODY &q(<<'EOT'); : while ($ARGV[0] =~ /^-/) { : $_ = shift; : last if /^--/; : if (/^-n/) { : $nflag++; : next; : } : die "I don't recognize this switch: $_\\n"; : } : EOT } print BODY &q(<<'EOT'); : #ifdef PRINTIT : #ifdef ASSUMEP : $printit++; : #else : $printit++ unless $nflag; : #endif : #endif : <><> : $\ = "\n"; # automatically add newline on print : <><> : #ifdef TOPLABEL : LINE: : while (chop($_ = <>)) { : #else : LINE: : while (<>) { : chop; : #endif EOT LINE: while (<>) { # Wipe out surrounding whitespace. s/[ \t]*(.*)\n$/$1/; # Perhaps it's a label/comment. if (/^:/) { s/^:[ \t]*//; $label = &make_label($_); if ($. == 1) { $toplabel = $label; if (/^(top|(re)?start|redo|begin(ning)|again|input)$/i) { $_ = <>; redo LINE; # Never referenced, so delete it if not a comment. } } $_ = "$label:"; if ($lastlinewaslabel++) { $indent += 4; print BODY &tab, ";\n"; $indent -= 4; } if ($indent >= 2) { $indent -= 2; $indmod = 2; } next; } else { $lastlinewaslabel = ''; } # Look for one or two address clauses $addr1 = ''; $addr2 = ''; if (s/^([0-9]+)//) { $addr1 = "$1"; $addr1 = "\$. == $addr1" unless /^,/; } elsif (s/^\$//) { $addr1 = 'eof()'; } elsif (s|^/||) { $addr1 = &fetchpat('/'); } if (s/^,//) { if (s/^([0-9]+)//) { $addr2 = "$1"; } elsif (s/^\$//) { $addr2 = "eof()"; } elsif (s|^/||) { $addr2 = &fetchpat('/'); } else { &Die("Invalid second address at line $.\n"); } if ($addr2 =~ /^\d+$/) { $addr1 .= "..$addr2"; } else { $addr1 .= "...$addr2"; } } # Now we check for metacommands {, }, and ! and worry # about indentation. s/^[ \t]+//; # a { to keep vi happy if ($_ eq '}') { $indent -= 4; next; } if (s/^!//) { $if = 'unless'; $else = "$r else $l\n"; } else { $if = 'if'; $else = ''; } if (s/^{//) { # a } to keep vi happy $indmod = 4; $redo = $_; $_ = ''; $rmaybe = ''; } else { $rmaybe = "\n$r"; if ($addr2 || $addr1) { $space = ' ' x $shiftwidth; } else { $space = ''; } $_ = &transmogrify(); } # See if we can optimize to modifier form. if ($addr1) { if ($_ !~ /[\n{}]/ && $rmaybe && !$change && $_ !~ / if / && $_ !~ / unless /) { s/;$/ $if $addr1;/; $_ = substr($_,$shiftwidth,1000); } else { $_ = "$if ($addr1) $l\n$change$_$rmaybe"; } $change = ''; next LINE; } } continue { @lines = split(/\n/,$_); for (@lines) { unless (s/^ *<<--//) { print BODY &tab; } print BODY $_, "\n"; } $indent += $indmod; $indmod = 0; if ($redo) { $_ = $redo; $redo = ''; redo LINE; } } if ($lastlinewaslabel++) { $indent += 4; print BODY &tab, ";\n"; $indent -= 4; } if ($appendseen || $tseen || !$assumen) { $printit++ if $dseen || (!$assumen && !$assumep); print BODY &q(<<'EOT'); : #ifdef SAWNEXT : } : continue { : #endif : #ifdef PRINTIT : #ifdef DSEEN : #ifdef ASSUMEP : print if $printit++; : #else : if ($printit) : { print; } : else : { $printit++ unless $nflag; } : #endif : #else : print if $printit; : #endif : #else : print; : #endif : #ifdef TSEEN : $tflag = 0; : #endif : #ifdef APPENDSEEN : if ($atext) { chop $atext; print $atext; $atext = ''; } : #endif EOT print BODY &q(<<'EOT'); : } EOT } close BODY; unless ($debug) { open(HEAD,">/tmp/sperl2$$.c") || &Die("Can't open temp file 2: $!\n"); print HEAD "#define PRINTIT\n" if $printit; print HEAD "#define APPENDSEEN\n" if $appendseen; print HEAD "#define TSEEN\n" if $tseen; print HEAD "#define DSEEN\n" if $dseen; print HEAD "#define ASSUMEN\n" if $assumen; print HEAD "#define ASSUMEP\n" if $assumep; print HEAD "#define TOPLABEL\n" if $toplabel; print HEAD "#define SAWNEXT\n" if $sawnext; if ($opens) {print HEAD "$opens\n";} open(BODY,"/tmp/sperl$$") || &Die("Can't reopen temp file: $!\n"); while () { print HEAD $_; } close HEAD; print &q(<<"EOT"); : #!$bin/perl : eval 'exec $bin/perl -S \$0 \${1+"\$@"}' : if \$running_under_some_shell; : EOT open(BODY,"cc -E /tmp/sperl2$$.c |") || &Die("Can't reopen temp file: $!\n"); while () { /^# [0-9]/ && next; /^[ \t]*$/ && next; s/^<><>//; print; } } &Cleanup; exit; sub Cleanup { chdir "/tmp"; unlink "sperl$$", "sperl2$$", "sperl2$$.c"; } sub Die { &Cleanup; die $_[0]; } sub tab { "\t" x ($indent / 8) . ' ' x ($indent % 8); } sub make_filehandle { local($_) = $_[0]; local($fname) = $_; if (!$seen{$fname}) { $_ = "FH_" . $_ if /^\d/; s/[^a-zA-Z0-9]/_/g; s/^_*//; $_ = "\U$_"; if ($fhseen{$_}) { for ($tmp = "a"; $fhseen{"$_$tmp"}; $a++) {} $_ .= $tmp; } $fhseen{$_} = 1; $opens .= &q(<<"EOT"); : open($_, '>$fname') || die "Can't create $fname: \$!"; EOT $seen{$fname} = $_; } $seen{$fname}; } sub make_label { local($label) = @_; $label =~ s/[^a-zA-Z0-9]/_/g; if ($label =~ /^[0-9_]/) { $label = 'L' . $label; } $label = substr($label,0,8); # Could be a reserved word, so capitalize it. substr($label,0,1) =~ y/a-z/A-Z/ if $label =~ /^[a-z]/; $label; } sub transmogrify { { # case if (/^d/) { $dseen++; chop($_ = &q(<<'EOT')); : <<--#ifdef PRINTIT : $printit = 0; : <<--#endif : next LINE; EOT $sawnext++; next; } if (/^n/) { chop($_ = &q(<<'EOT')); : <<--#ifdef PRINTIT : <<--#ifdef DSEEN : <<--#ifdef ASSUMEP : print if $printit++; : <<--#else : if ($printit) : { print; } : else : { $printit++ unless $nflag; } : <<--#endif : <<--#else : print if $printit; : <<--#endif : <<--#else : print; : <<--#endif : <<--#ifdef APPENDSEEN : if ($atext) {chop $atext; print $atext; $atext = '';} : <<--#endif : $_ = <>; : chop; : <<--#ifdef TSEEN : $tflag = 0; : <<--#endif EOT next; } if (/^a/) { $appendseen++; $command = $space . "\$atext .= <<'End_Of_Text';\n<<--"; $lastline = 0; while (<>) { s/^[ \t]*//; s/^[\\]//; unless (s|\\$||) { $lastline = 1;} s/^([ \t]*\n)/<><>$1/; $command .= $_; $command .= '<<--'; last if $lastline; } $_ = $command . "End_Of_Text"; last; } if (/^[ic]/) { if (/^c/) { $change = 1; } $addr1 = 1 if $addr1 eq ''; $addr1 = '$iter = (' . $addr1 . ')'; $command = $space . " if (\$iter == 1) { print <<'End_Of_Text'; }\n<<--"; $lastline = 0; while (<>) { s/^[ \t]*//; s/^[\\]//; unless (s/\\$//) { $lastline = 1;} s/'/\\'/g; s/^([ \t]*\n)/<><>$1/; $command .= $_; $command .= '<<--'; last if $lastline; } $_ = $command . "End_Of_Text"; if ($change) { $dseen++; $change = "$_\n"; chop($_ = &q(<<"EOT")); : <<--#ifdef PRINTIT : $space\$printit = 0; : <<--#endif : ${space}next LINE; EOT $sawnext++; } last; } if (/^s/) { $delim = substr($_,1,1); $len = length($_); $repl = $end = 0; $inbracket = 0; for ($i = 2; $i < $len; $i++) { $c = substr($_,$i,1); if ($c eq $delim) { if ($inbracket) { substr($_, $i, 0) = '\\'; $i++; $len++; } else { if ($repl) { $end = $i; last; } else { $repl = $i; } } } elsif ($c eq '\\') { $i++; if ($i >= $len) { $_ .= 'n'; $_ .= <>; $len = length($_); $_ = substr($_,0,--$len); } elsif (substr($_,$i,1) =~ /^[n]$/) { ; } elsif (!$repl && substr($_,$i,1) =~ /^[(){}\w]$/) { $i--; $len--; substr($_, $i, 1) = ''; } elsif (!$repl && substr($_,$i,1) =~ /^[<>]$/) { substr($_,$i,1) = 'b'; } elsif ($repl && substr($_,$i,1) =~ /^\d$/) { substr($_,$i-1,1) = '$'; } } elsif ($c eq '&' && $repl) { substr($_, $i, 0) = '$'; $i++; $len++; } elsif ($c eq '$' && $repl) { substr($_, $i, 0) = '\\'; $i++; $len++; } elsif ($c eq '[' && !$repl) { $i++ if substr($_,$i,1) eq '^'; $i++ if substr($_,$i,1) eq ']'; $inbracket = 1; } elsif ($c eq ']') { $inbracket = 0; } elsif ($c eq "\t") { substr($_, $i, 1) = '\\t'; $i++; $len++; } elsif (!$repl && index("()+",$c) >= 0) { substr($_, $i, 0) = '\\'; $i++; $len++; } } &Die("Malformed substitution at line $.\n") unless $end; $pat = substr($_, 0, $repl + 1); $repl = substr($_, $repl+1, $end-$repl-1); $end = substr($_, $end + 1, 1000); &simplify($pat); $dol = '$'; $subst = "$pat$repl$delim"; $cmd = ''; while ($end) { if ($end =~ s/^g//) { $subst .= 'g'; next; } if ($end =~ s/^p//) { $cmd .= ' && (print)'; next; } if ($end =~ s/^w[ \t]*//) { $fh = &make_filehandle($end); $cmd .= " && (print $fh \$_)"; $end = ''; next; } &Die("Unrecognized substitution command". "($end) at line $.\n"); } chop ($_ = &q(<<"EOT")); : <<--#ifdef TSEEN : $subst && \$tflag++$cmd; : <<--#else : $subst$cmd; : <<--#endif EOT next; } if (/^p/) { $_ = 'print;'; next; } if (/^w/) { s/^w[ \t]*//; $fh = &make_filehandle($_); $_ = "print $fh \$_;"; next; } if (/^r/) { $appendseen++; s/^r[ \t]*//; $file = $_; $_ = "\$atext .= `cat $file 2>/dev/null`;"; next; } if (/^P/) { $_ = 'print $1 if /^(.*)/;'; next; } if (/^D/) { chop($_ = &q(<<'EOT')); : s/^.*\n?//; : redo LINE if $_; : next LINE; EOT $sawnext++; next; } if (/^N/) { chop($_ = &q(<<'EOT')); : $_ .= "\n"; : $len1 = length; : $_ .= <>; : chop if $len1 < length; : <<--#ifdef TSEEN : $tflag = 0; : <<--#endif EOT next; } if (/^h/) { $_ = '$hold = $_;'; next; } if (/^H/) { $_ = '$hold .= "\n"; $hold .= $_;'; next; } if (/^g/) { $_ = '$_ = $hold;'; next; } if (/^G/) { $_ = '$_ .= "\n"; $_ .= $hold;'; next; } if (/^x/) { $_ = '($_, $hold) = ($hold, $_);'; next; } if (/^b$/) { $_ = 'next LINE;'; $sawnext++; next; } if (/^b/) { s/^b[ \t]*//; $lab = &make_label($_); if ($lab eq $toplabel) { $_ = 'redo LINE;'; } else { $_ = "goto $lab;"; } next; } if (/^t$/) { $_ = 'next LINE if $tflag;'; $sawnext++; $tseen++; next; } if (/^t/) { s/^t[ \t]*//; $lab = &make_label($_); $_ = q/if ($tflag) {$tflag = 0; /; if ($lab eq $toplabel) { $_ .= 'redo LINE;}'; } else { $_ .= "goto $lab;}"; } $tseen++; next; } if (/^y/) { s/abcdefghijklmnopqrstuvwxyz/a-z/g; s/ABCDEFGHIJKLMNOPQRSTUVWXYZ/A-Z/g; s/abcdef/a-f/g; s/ABCDEF/A-F/g; s/0123456789/0-9/g; s/01234567/0-7/g; $_ .= ';'; } if (/^=/) { $_ = 'print $.;'; next; } if (/^q/) { chop($_ = &q(<<'EOT')); : close(ARGV); : @ARGV = (); : next LINE; EOT $sawnext++; next; } } continue { if ($space) { s/^/$space/; s/(\n)(.)/$1$space$2/g; } last; } $_; } sub fetchpat { local($outer) = @_; local($addr) = $outer; local($inbracket); local($prefix,$delim,$ch); # Process pattern one potential delimiter at a time. DELIM: while (s#^([^\]+(|)[\\/]*)([]+(|)[\\/])##) { $prefix = $1; $delim = $2; if ($delim eq '\\') { s/(.)//; $ch = $1; $delim = '' if $ch =~ /^[(){}A-Za-mo-z]$/; $ch = 'b' if $ch =~ /^[<>]$/; $delim .= $ch; } elsif ($delim eq '[') { $inbracket = 1; s/^\^// && ($delim .= '^'); s/^]// && ($delim .= ']'); } elsif ($delim eq ']') { $inbracket = 0; } elsif ($inbracket || $delim ne $outer) { $delim = '\\' . $delim; } $addr .= $prefix; $addr .= $delim; if ($delim eq $outer && !$inbracket) { last DELIM; } } $addr =~ s/\t/\\t/g; &simplify($addr); $addr; } sub q { local($string) = @_; local($*) = 1; $string =~ s/^:\t?//g; $string; } sub simplify { $_[0] =~ s/_a-za-z0-9/\\w/ig; $_[0] =~ s/a-z_a-z0-9/\\w/ig; $_[0] =~ s/a-za-z_0-9/\\w/ig; $_[0] =~ s/a-za-z0-9_/\\w/ig; $_[0] =~ s/_0-9a-za-z/\\w/ig; $_[0] =~ s/0-9_a-za-z/\\w/ig; $_[0] =~ s/0-9a-z_a-z/\\w/ig; $_[0] =~ s/0-9a-za-z_/\\w/ig; $_[0] =~ s/\[\\w\]/\\w/g; $_[0] =~ s/\[^\\w\]/\\W/g; $_[0] =~ s/\[0-9\]/\\d/g; $_[0] =~ s/\[^0-9\]/\\D/g; $_[0] =~ s/\\d\\d\*/\\d+/g; $_[0] =~ s/\\D\\D\*/\\D+/g; $_[0] =~ s/\\w\\w\*/\\w+/g; $_[0] =~ s/\\t\\t\*/\\t+/g; $_[0] =~ s/(\[.[^]]*\])\1\*/$1+/g; $_[0] =~ s/([\w\s!@#%^&-=,:;'"])\1\*/$1+/g; }