taco@elvenkind.com wrote:
Hi Wim,
Let me know if you need perl help, ok?
Taco
Hi Taco, I am doing regular expression (regex) and I am trying to process my Latex files (a lot) to automate the translation to Context primitives. I am partially successfully but it is a complex job, so some of the Latex/Tex primitives I convert by hand. But thanks for your kind offer. Attached is my perl script, but don't expect a lot from it or clean or optimized code, I wrote it in 2 hours. Feel free to share it with anyone, if you think others may benefit. Kind regards Wim # # Author : Wim Neimeijer, The Netherlands # # File : convert.pl # # Date : 25 March 2006 # # Purpose : Process a Latex script and try to convert # : my Latex commands into Context one's, # # Coding : no standards # # Level : N/A # # Warning : Use at your own risk, certainly not full proof # : and quit minimum in its workings, # # #!/usr/bin/perl use File::Basename; if ( $#ARGV eq -1){ $wildcards = "*.tex"; } if ( $#ARGV ge 0 ){ foreach $b (@ARGV){ $wildcards = $wildcards . " " . $b; } } @pstfiles= glob $wildcards; for $a (@pstfiles){ ($base,$dir,$ext) = fileparse($a); @part = split '\.', $base; $newfile = $part[0]; $sourcefile = $a; open(SOURCE, "<" ,$sourcefile) or die "$0 - cannot open $sourcefile"; $tmpfile = "tmp.log"; open(TMPFILE, ">" ,$tmpfile) or die "$0 - cannot open $tmpfile"; @linesa = <SOURCE>; # remove comments first @foo = grep(!/^\%/, @linesa); my @newline; $i = 0; $currentline = 0; for $b (@foo){ chomp($b); chomp($foo[$i]); # replace chapter{} by chapter[]{} if ($foo[$i] =~ /^\\chapter\{/){ if ($foo[$i+1] =~ /^\\label\{/){ $startbrace = index($foo[$i+1],"\{"); $endbrace = index($foo[$i+1],"\}"); if (($startbrace >= 0) and ($endbrace <= length($foo[$i+1]))) { $s = substr($foo[$i+1],$startbrace+1,$endbrace - $startbrace - 1); $b =~ s/\\chapter\{/\\chapter[$s]\{/; push @newline, $b; delete $foo[$i+1]; } } else { $b =~ s/\\chapter\{/\\chapter[]\{/; push @newline, $b; } } elsif ($foo[$i] =~ /^\\section\{/){ if ($foo[$i+1] =~ /^\\label\{/){ $startbrace = index($foo[$i+1],"\{"); $endbrace = index($foo[$i+1],"\}"); if (($startbrace >= 0) and ($endbrace <= length($foo[$i+1]))) { $s = substr($foo[$i+1],$startbrace+1,$endbrace - $startbrace - 1); $b =~ s/\\section\{/\\section[$s]\{/; push @newline, $b; delete $foo[$i+1]; } } else { $b[$i] =~ s/\\section\{/\\section[]\{/; push @newline, $b; } } elsif ($foo[$i] =~ /^\\begin{equation}/) { if ($foo[$i+1] =~ /^\\label\{/){ $startbrace = index($foo[$i+1],"\{"); $endbrace = index($foo[$i+1],"\}"); if (($startbrace >= 0) and ($endbrace <= length($foo[$i+1]))) { $s = substr($foo[$i+1],$startbrace+1,$endbrace - $startbrace - 1); $b =~ s/\\begin{equation}/\\placeformula[$s]/; push @newline, $b; push @newline , "\\startformula"; delete $foo[$i+1]; } } else { $b =~ s/\\begin{equation}/\\placeformula[]/; push @newline, $b; push @newline, "\\startformula"; } } elsif ($foo[$i] =~ /^\\begin{align\*}/) { push @newline, "\\startformula"; push @newline, "\\startalign[n=3]"; } elsif ($foo[$i] =~ /^\\begin{equation\*}/) { push @newline, "\\startformula"; } elsif ($foo[$i] =~ /^\\begin{eqnarray}/){ if ($foo[$i+1] =~ /^\\label\{/){ $startbrace = index($foo[$i+1],"\{"); $endbrace = index($foo[$i+1],"\}"); if (($startbrace >= 0) and ($endbrace <= length($foo[$i+1]))) { $s = substr($foo[$i+1],$startbrace+1,$endbrace - $startbrace - 1); $b =~ s/^\\begin{eqnarray}/\\placeformula[$s]/; push @newline, $b; push @newline, "\\startformula"; push @newline, "\\startalign[n=3]"; delete $foo[$i+1]; } } else { $b =~ s/^\\begin{eqnarray}/\\placeformula[]/; push @newline, $b; push @newline, "\\startformula"; push @newline, "\\startalign[n=3]"; } } elsif ($foo[$i] =~ /^\\begin{eqnarray\*}/) { push @newline, "\\startformula"; push @newline, "\\startalign[n=3]"; } elsif ($foo[$i] =~ /^\\end{align\*}/) { push @newline,"\\stopalign"; push @newline,"\\stopformula"; } elsif ($foo[$i] =~ /^\\end{align}/) { push @newline,"\\stopalign"; push @newline,"\\stopformula"; } elsif ($foo[$i] =~ /^\\end{eqnarray\*}/) { push @newline,"\\stopalign"; push @newline,"\\stopformula"; } elsif ($foo[$i] =~ /^\\end{eqnarray}/) { push @newline,"\\stopalign"; push @newline,"\\stopformula"; } elsif ( $foo[$i] =~/^\\end{equation}/){ push @newline, "\\stopformula"; } elsif ($foo[$i] =~/^\\end{equation\*\}/){ push @newline,"\\stopformula"; } elsif ($foo[$i] =~ /^\\begin{align}/){ if ($foo[$i+1] =~ /^\\label\{/){ $startbrace = index($foo[$i+1],"\{"); $endbrace = index($foo[$i+1],"\}"); if (($startbrace >= 0) and ($endbrace <= length($foo[$i+1]))) { $s = substr($foo[$i+1],$startbrace+1,$endbrace - $startbrace - 1); $b =~ s/^\\begin{align}/\\placeformula[$s]/; push @newline, $b; push @newline, "\\startformula"; push @newline, "\\startalign[n=3]"; delete $foo[$i+1]; } } else { $b =~ s/^\\begin{align}/\\placeformula[]/; push @newline, $b; push @newline, "\\startformula"; push @newline, "\\startalign[n=3]"; } } elsif ($foo[$i] =~ /^\\begin{figure}/){ if ($foo[$i+4] =~ /^\\end{figure}/) { $startbrace = index($foo[$i+3],"\{"); $endbrace = index($foo[$i+3],"\}"); if (($startbrace >= 0) and ($endbrace <= length($foo[$i+3]))) { $s = substr($foo[$i+3],$startbrace+1,$endbrace - $startbrace - 1); $b = "\\myfigure{}{$s}"; } $startbrace = index($foo[$i+2],"\{"); $endbrace = index($foo[$i+2],"\}"); if (($startbrace >= 0) and ($endbrace <= length($foo[$i+2]))) { $s = substr($foo[$i+2],$startbrace+1,$endbrace - $startbrace - 1); $b = $b . "{$s}"; } push @newline, $b; delete $foo[$i]; delete $foo[$i+1]; delete $foo[$i+2]; delete $foo[$i+3]; delete $foo[$i+4]; } } elsif ($foo[$i] =~ /\\begin{enumerate}/){ push @newline,"\\startitemize[n][stopper=)]"; } elsif ($foo[$i] =~ /\\end{enumerate}/){ push @newline,"\\stopitemize"; } elsif ($foo[$i] =~ /\\begin{itemize}/){ push @newline,"\\startitemize[a]"; } elsif ($foo[$i] =~ /\\end{itemize}/){ push @newline,"\\stopitemize"; } elsif ($foo[$i] =~ /(\\ref\{.*?eqn.*?\})/ ){ # line contains one or more \ref # split line into words and find all the ref's @words = split / / , $foo[$i]; my $line; foreach $aword (@words){ if ($aword =~ /(\\ref\{.*?eqn.*?\})/ ){ $aword =~ s/\\ref/\\informula/; $aword =~ s/\{/\[/; $aword =~ s/\}/\]/; $line = $line . " " . $aword; } else { $line = $line . " " . $aword; } } if ($line =~ /^\s/){ $line =~ s/^\s//; } push @newline, $line; } elsif ($foo[$i] =~ /supacc/m){ my $line; $line = $foo[$i]; print TMPFILE "original line = $line\n"; $line =~ s/\\supacc\{(\B.+?\B)\}/$1\{\}'/gm; $line =~ s/\\supacc\{(.+?)\}/$1\{\}'/gm; print TMPFILE "changed line = $line\n"; push @newline, $line; } elsif ($foo[$i] =~ /^\\begin{/){ push @newline, "\%" . $foo[$i]; } elsif ($foo[$i] =~ /^(\\end\{|\\end)/i){ push @newline, "\%" . $foo[$i]; } elsif ($foo[$i] =~ /^\\renewcommand/){ push @newline, "\%" . $foo[$i]; } elsif ($foo[$i] =~ /^\\label\{/){ push @newline, "\%" . $foo[$i]; } elsif ($foo[$i] =~ /^\\centering/){ push @newline, "\%" . $foo[$i]; } elsif ($foo[$i] =~ /^\{/){ push @newline, "\%" . $foo[$i]; } elsif ($foo[$i] =~ /^\}/){ push @newline, "\%" . $foo[$i]; } elsif ($foo[$i] =~ /^\\rput/){ push @newline, "\%" . $foo[$i]; } elsif ($foo[$i] =~ /^\\caption/){ push @newline, "\%" . $foo[$i]; } elsif ($foo[$i] =~ /^\\ps/){ push @newline, "\%" . $foo[$i]; } else { push @newline, $b; } $i = $i + 1; $currentline = $currentline + 1; } for $b (@newline){ if ($b !~ /^$/){ print $b . "\n"; } } close(SOURCE); close(TMPFILE); }