#!/usr/local/bin/perl 'di'; 'ig00'; # This variable can be set manually or by the installation script # to point to the DIRECTORY where the latex2html files can be found. $LATEX2HTMLDIR='/usr/local/latex2html-95.1';# Inserted by installation script # LaTeX2HTML by Nikos Drakos # # **************************************************************** # LaTeX To HTML Translation ************************************** # **************************************************************** # LaTeX2HTML is a Perl program that translates LaTeX source # files into HTML (HyperText Markup Language). For each source # file given as an argument the translator will create a # directory containing the corresponding HTML files. # # The man page for this program is included at the end of this file # and can be viewed using # %nroff -man latex2html # # For more information on this program and some examples of its # capabilities see # http://cbl.leeds.ac.uk/nikos/tex2html/doc/latex2html/latex2html.html # # Written by Nikos Drakos, July 1993. # # Address: Computer Based Learning Unit # University of Leeds # Leeds, LS2 9JT # # Copyright (c) 1993. All rights reserved. # # See general license below. # # **************************************************************** # General License Agreement and Lack of Warranty ***************** # **************************************************************** # # This software is distributed in the hope that it will be useful # but WITHOUT ANY WARRANTY. The author(s) do not accept responsibility # to anyone for the consequences of using it or for whether it serves # any particular purpose or works at all. No warranty is made about # the software or its performance. # # Use and copying of this software and the preparation of derivative # works based on this software are permitted, so long as the following # conditions are met: # o The copyright notice and this entire notice are included intact # and prominently carried on all copies and supporting documentation. # o No fees or compensation are charged for use, copies, or # access to this software. You may charge a nominal # distribution fee for the physical act of transferring a # copy, but you may not charge for the program itself. # o If you modify this software, you must cause the modified # file(s) to carry prominent notices (a Change Log) # describing the changes, who made the changes, and the date # of those changes. # o Any work distributed or published that in whole or in part # contains or is a derivative of this software or any part # thereof is subject to the terms of this agreement. The # aggregation of another unrelated program with this software # or its derivative on a volume of storage or distribution # medium does not bring the other program under the scope # of these terms. # # This software is made available AS IS, and is distributed without # warranty of any kind, either expressed or implied. # # In no event will the author(s) or their institutions be liable to you # for damages, including lost profits, lost monies, or other special, # incidental or consequential damages arising out of or in connection # with the use or inability to use (including but not limited to loss of # data or data being rendered inaccurate or losses sustained by third # parties or a failure of the program to operate as documented) the # program, even if you have been advised of the possibility of such # damages, or for any claim by any other party, whether in an action of # contract, negligence, or other tortious action. # # Please send bug reports, comments, questions and suggestions to # nikos@cbl.leeds.ac.uk. We would also appreciate receiving any changes # or improvements you may make. # ############################# System Parameters ########################## # # The $TEXEXPAND, $LATEX, and $DVIPS variables # should be set in $HOME/.latex2html-init $TEXEXPAND = 'texexpand'; $LATEX = 'latex'; $DVIPS = 'dvips'; $TEX2HTMLVERSION = '95.1 (Fri Jan 20 1995)'; $TEX2HTMLADDRESS = "http://cbl.leeds.ac.uk/nikos/tex2html/doc/latex2html/latex2html.html"; $AUTHORADDRESS = "http://cbl.leeds.ac.uk/nikos/personal.html"; push(@INC,$ENV{'HOME'}); $| = 1; # flush stdout with every print -- gives better feedback during # long computations ######################### Command Line Argument Defaults ######################## # Destination directory $DESTDIR = '.'; # Determines the level at which it will stop splitting sections # into individual files (e.g. $MAX_SPLIT_DEPTH = 0 will generate a single # HTML document, $MAX_SPLIT_DEPTH = 1 will split individual parts, etc.) $MAX_SPLIT_DEPTH = 8; # Determines how much of the document structure to provide links to, from # each node. A value of 0 will show NO links to child nodes, a value of # 1 will show only the immediate child nodes (as in Texinfo), etc. A value # at least as big as $MAX_SPLIT_DEPTH will produce a table of contents for # the tree structure rooted at any given node. $MAX_LINK_DEPTH = 4; # Determines whether LaTeX should be invoked to process unknown environments. # It is useful to set it to 1 in order to get a quick draft of the basic # Structure of a document without any fancy figures, tables etc. $NOLATEX = 0; # Determines whether any generated images will be inlined or not. $EXTERNAL_IMAGES = 0; # If this flag is set, external postscript images are used rather than GIF's. $PS_IMAGES = 0; # Determines whether the navigation and cross-reference marks are iconic # or textual. In ascii mode the output of the translator can be used on # character based browsers which do not support inlined images # (the tag). Setting this variable also sets the $EXTERNAL_IMAGES. $ASCII_MODE = 0; # The document title. $default_title = 'No Title'; $TITLE = $default_title; #Determines whether to include navigation links. $NO_NAVIGATION = 0; #Determines whether to navigation links should be at the top or the bottom #of each page. The default is at the top. $TOP_NAVIGATION = 1; #Determines whether to navigation links should be at the top or the bottom #of each page. The default is at the top. $BOTTOM_NAVIGATION = 0; #Puts navigation links at the top of each page. If the page has more than #$WORDS_IN_PAGE it also puts one at the bottom. $AUTO_NAVIGATION = 1; $WORDS_IN_PAGE = 200; #Puts a link to the index in the navigation panel if there is one. $INDEX_IN_NAVIGATION = 1; #Puts a link to the table of contents in the navigation panel if there is one. $CONTENTS_IN_NAVIGATION = 1; #Puts a link to the next logical page in the navigation panel if there is one. #Next page will visit any subsections instead of the next section i.e. #it allows a depth first visiting order. $NEXT_PAGE_IN_NAVIGATION = 1; #Puts a link to the previous logical page in the navigation panel if there is one. $PREVIOUS_PAGE_IN_NAVIGATION = 1; # Information page # This causes a new part to be added to the document with information about # the source file, the translator etc. $INFO = 1; # Separator between the body of text in a page and the child links $CHILDLINE = "

\n"; # Names of style-files and tex inputs that the translator should *not* look at # (because they contain macrology which is too complex for it to handle). # Commands defined by these styles and used in environments which don't # go to tex (i.e., outside of math, figures, tables, etc.) need to be # handled in some other way --- preferably by means of a &process_to_bitmap # routine analogous to &ignore (n.b. &process_to_bitmap doesn't exist yet). # For instance, the \psfig command defined by either of the following style # files is already special-cased below. $DONT_INCLUDE = ""; # Image recycling $REUSE = 0; # If 1 it Causes images to be reused # and switches off the interactive session # When this is 1, the section numbers are shown. The section numbers should # then match those that would have bee produced by LaTeX. # The correct section numbers are obtained from the $FILE.aux file generated # by LaTeX. # Hiding the seciton numbers encourages # use of particular sections as standalone documents. In this case the # cross reference to a section is shown using the default symbol rather # than the section number. $SHOW_SECTION_NUMBERS = 0; # This is the line width measured in pixels and it is used to right justify # equations and equation arrays; $LINE_WIDTH = 450; # This number will determine the size of the equations, special characters, # and anything which will be converted into an inlined image # *except* "image generating environments" such as "figure", "table" # or "minipage". # Effective values are those greater than 0. # Sensible values are between 0.1 - 4. $MATH_SCALE_FACTOR = 1.6; # This number will determine the size of # image generating environments such as "figure", "table" or "minipage". # Effective values are those greater than 0. # Sensible values are between 0.1 - 4. $FIGURE_SCALE_FACTOR = 1.6; # When $NO_IMAGES is set LaTeX2HTML will not attempt to produce any inlined images # The missing images can be generated "off-line" by restarting LaTeX2HTML # after setting $IMAGES_ONLY (see below); $NO_IMAGES = 0; # When $IMAGES_ONLY is set, LaTeX2HTML will only try to convert the inlined images # in the file "images.tex" which should have been generated automatically during # previous runs. This is very useful for correcting "bad LaTeX" in this file. # Warning: With Mosaic you will need to "Flush Image Cache" before you can see # the newly generated images. To do that select the appropriate option in the # "Options" drop down menu bar. $IMAGES_ONLY = 0; # When this is set, the generated HTML files will be placed in the # current directory. If set to 0 the default behaviour is to create (or reuse) # another file directory. $NO_SUBDIR = 0; # This affects ONLY the way accents are processed $default_language = 'english'; # No arguments!! (&usage && die "No files to process!\n") unless @ARGV; # Set $HOME to the environment variable just in case tries to use it! $HOME = $ENV{'HOME'}; # Author address @address_data = &address_data; # Trap errors if getcwd.pl is not found in the library eval "require 'getcwd.pl'"; # Define GETCWD if we can't find it in the Perl library eval "sub getcwd { `pwd` };" unless (defined &getcwd); # Read latex2html.config require("$LATEX2HTMLDIR/latex2html.config") if ((-f "$LATEX2HTMLDIR/latex2html.config") || die "LaTeX2HTML has not been installed correctly:". "\nCould not find file $LATEX2HTMLDIR/latex2html.config\n"); # Read .late2html-init file if one is found require("$ENV{'HOME'}/.latex2html-init") if (-f "$ENV{'HOME'}/.latex2html-init"); # Read .late2html-init file if one is found in current directory require("./.latex2html-init") if ( (! (&getcwd eq $ENV{'HOME'} )) && (-f "./.latex2html-init")); $ADDRESS = "$address_data[0]\n$address_data[1]" unless $ADDRESS; # Process switches $argv = join(' ',@ARGV); # Save the command line arguments while ($ARGV[0] =~ /^-/) { $_ = shift; if (/^-split$/) { $_ = shift; ((($MAX_SPLIT_DEPTH) = /^(\d+)$/) || print("Unrecognised value for -split: $_\n") && &usage && die); } elsif (/^-link$/) { $_ = shift; ((($MAX_LINK_DEPTH) = /^(\d+)$/) || print("Unrecognised value for -link: $_\n") && &usage && die); } elsif (/^-nolatex$/) { $NOLATEX = 1; } elsif (/^-external_images$/) { $EXTERNAL_IMAGES = 1; } elsif (/^-ascii_mode$/) { $ASCII_MODE = 1; $EXTERNAL_IMAGES = 1; } elsif (/^-ps_images$/) { $PS_IMAGES = 1; $EXTERNAL_IMAGES = 1; } elsif (/^-no_navigation$/) { $NO_NAVIGATION = 1; } elsif (/^-top_navigation$/) { $TOP_NAVIGATION = 1; } elsif (/^-bottom_navigation$/) { $BOTTOM_NAVIGATION = 1; } elsif (/^-auto_navigation$/) { $AUTO_NAVIGATION = 1; } elsif (/^-index_in_navigation$/) { $INDEX_IN_NAVIGATION = 1; } elsif (/^-contents_in_navigation$/) { $CONTENTS_IN_NAVIGATION = 1; } elsif (/^-next_page_in_navigation$/) { $NEXT_PAGE_IN_NAVIGATION = 1; } elsif (/^-previous_page_in_navigation$/) { $PREVIOUS_PAGE_IN_NAVIGATION = 1; } elsif (/^-t$/) { $_ = shift; ((($TITLE) = /^(.+)$/) || print("No title for -t? $_\n") && &usage && die); } elsif (/^-dir$/) { $_ = shift; ((($DESTDIR) = /^(.+)$/) || print("No directory for -d? $_\n") && &usage && die); ($DESTDIR) = &get_full_path($DESTDIR); } elsif (/^-address$/) { $ADDRESS = shift; } elsif (/^-no_subdir$/) { $NO_SUBDIR = 1; } elsif (/^-info$/) { $_ = shift; ((($INFO) = /^(.+)$/) || print("No string for -info: Will not generate information page.\n") ); } elsif (/^-dont_include/) { $DONT_INCLUDE .= ':' . shift; } elsif (/^-reuse/) { $REUSE = 1; } elsif (/^-no_reuse/) { $REUSE = 0; } elsif (/^-no_images/) { $NO_IMAGES = 1; } elsif (/^-images_only/) { $IMAGES_ONLY = 1; } elsif (/^-show_section_numbers/) { $SHOW_SECTION_NUMBERS = 1; } elsif (/^-init_file/) { $init_file = shift; require($init_file) if (-f $init_file); } elsif (/^-h(elp)?$/) { &usage; } else { &usage; die "Unrecognised switch: $_\n"; } } $NO_NAVIGATION = 1 unless $MAX_SPLIT_DEPTH; $NO_FOOTNODE = 1 unless $MAX_SPLIT_DEPTH; $NO_SPLIT = 1 unless $MAX_SPLIT_DEPTH; # The mapping from numbers to accents. # These are required to process the \accent command, which is found in # tables of contents whenever there is an accented character in a # caption or section title. Processing the \accent command makes # $encoded_*_number work properly (see &extract_captions) with # captions that contain accented characters. # I got the numbers from the plain.tex file, version 3.141. # Missing entries should be looked up by a native speaker. # Have a look at generate_accent_commands and $iso_latin1_character_map. %accent_type = ( '18', 'grave', # \` '19', 'acute', # `' '20', '', # \v (I don't know) '21', '', # \u (I don't know) '22', 'overscore', # \= (I'm not sure) '94', 'circ', # \^ '95', '', # \. (I don't know) '7D', '', # \H (I don't know) '7d', '', # \H (I don't know) '7E', 'tilde', # \~ '7e', 'tilde', # \~ '7F', 'uml', # \" '7f', 'uml', # \" ); &driver; # Process each file ... sub driver { local($FILE, $texfilepath, $orig_cwd, %unknown_commands, $bbl_cnt); $orig_cwd = &getcwd; &initialise; # Initialise some global variables &ascii_mode if $ASCII_MODE; # Must come after initialization &titles_language($TITLES_LANGUAGE); foreach $FILE (@ARGV) { local($bbl_nr) = 1; local($global_page_num) = (0); # The number of reused images and those in images.tex local($new_page_num) = (0); # The number of images in images.tex local($pid, $sections_rx, $sections_no_delim_rx, $outermost_level, %cached_env_img, %id_map, %latex_body, $latex_body, %symbolic_labels, %latex_labels, %encoded_section_number, %verbatim, %new_command, %new_environment, $preamble); ($texfilepath, $FILE) = &get_full_path($FILE); if (-f "$texfilepath/$FILE") { print "This is LaTeX2HTML Version $TEX2HTMLVERSION by Nikos Drakos, \nComputer Based Learning Unit, University of Leeds.\n\n"; # Tell texexpand which files we *don't* want to look at. $ENV{'TEXE_DONT_INCLUDE'} = $DONT_INCLUDE; $FILE =~ s/.tex$//; $DESTDIR = $DESTDIR . "/". $FILE unless $NO_SUBDIR; print "OPENING $texfilepath/$FILE.tex \n"; next unless &new_dir($DESTDIR); &deal_with_texinputs($texfilepath, $DESTDIR); # This needs $DESTDIR to have been created ... system("$TEXEXPAND -auto_exclude -save_styles $DESTDIR/TMP_styles $texfilepath/$FILE.tex > $DESTDIR/TMP_$FILE") && print "Error: $!\n"; chdir($DESTDIR) || die "$!\n"; $SIG{'INT'} = 'handler'; &open_dbm_database; if ($IMAGES_ONLY) { &make_off_line_images} else { &rename_image_files; &load_style_file_translations; &make_language_rx; &make_raw_arg_cmd_rx; &translate_titles; print "\nReading ..."; &slurp_input_and_partition_and_pre_process("TMP_$FILE"); &add_preamble_head; # Create a regular expressions &set_depth_levels; &make_sections_rx; &make_order_sensitive_rx; &add_document_info_page if $INFO; &add_bbl_and_idx_dummy_commands; &translate; # Destructive! } &cleanup; print "\n\n*********** WARNINGS *********** \n$global{'warnings'}" if ($global{'warnings'} || $NO_IMAGES || $IMAGES_ONLY); &image_cache_message if ($NO_IMAGES || $IMAGES_ONLY); &image_message if ($global{'warnings'} =~ /Failed to convert/io); chdir($orig_cwd);# Go back to the source directory } else { print "Cannot read $texfilepath/$FILE \n";} } print "\nUnknown commands: ". join(" ",keys %unknown_commands) if %unknown_commands; print "\nDone.\n"; $_; } sub open_dbm_database { # These are DBM (unix DataBase Management) arrays which are actually # stored in external files. They are used for communication between # the main process and forked child processes; dbmopen(%verb, "TMP_verb",0755); dbmopen(%verb_delim, "TMP_verb_delim",0755); dbmopen(%expanded,"TMP_expanded",0755); # Holds max_id, verb_counter, verbatim_counter, warnings dbmopen(%global, "TMP_global",0755); # Theses next two are used during off-line image conversion # %new_id_map maps image id's to page_numbers of the images in images.tex # %image_params maps image_ids to conversion parameters for that image dbmopen(%new_id_map, ".ID_MAP",0755); dbmopen(%img_params, ".IMG_PARAMS",0755); dbmopen(%orig_name_map, ".ORIG_MAP",0755); $global{'max_id'} = ($global{'max_id'} | 0); &read_mydb(*verbatim, "verbatim"); &read_mydb(*new_command, "new_command"); &read_mydb(*new_environment, "new_environment"); $preamble = &read_mydb(*preamble, "preamble"); } sub close_dbm_database { dbmclose(%verb); undef %verb; dbmclose(%verb_delim); undef %verb_delim; dbmclose(%expanded); undef %expanded; dbmclose(%global); undef %global; dbmclose(%new_id_map); undef %new_id_map; dbmclose(%img_params); undef %img_params; dbmclose(%orig_name_map); undef %orig_name_map; } sub clear_images_dbm_database { # %new_id_map will be used by the off-line image conversion process undef %new_id_map; undef %img_params; undef %orig_name_map; dbmopen(%new_id_map, ".ID_MAP",0755); dbmopen(%img_params, ".IMG_PARAMS",0755); dbmopen(%orig_name_map, ".ORIG_MAP",0755); } # These three subroutines should have been handled within the DBM database. # Unfortunately if the contents of an array are more than ~900 (system # dependent) chars long then dbm cannot handle it and gives error messages. # So here we save and then read the contents explicitly. sub write_mydb { local($db, $key, $_) = @_; open(DB,">>TMP_$db"); print DB join('', "\n$mydb_mark","#", $key, "#", $_); close DB; } sub write_mydb_simple { local($db, $_) = @_; open(DB,">TMP_$db"); print DB $_; close DB; } # Assumes the existence of a file TMP_verbatim which contains # sequences of verbatim counters and verbatim contents. sub read_mydb { local(*db,$name) = @_; local($_,@tmp,$i,$tmp1,$tmp2); return unless (-f "TMP_$name"); $_ = `cat TMP_$name`; $| = 1; @tmp = split(/\n$mydb_mark#([^#]*)#/); $i = 1; # Ignore the first element at 0 while ($i < scalar(@tmp)) { $tmp1 = $tmp[$i]; $tmp2 = $tmp[++$i]; $db{$tmp1} = <<"_END_OF_STRING_"; $tmp2 _END_OF_STRING_ ++$i; }; undef @tmp; $_; } # Reads in a latex generated file (e.g. .bbl or .aux) # It returns success or failure # ****** and binds $_ in the caller as a side-effect ****** sub process_ext_file { local($ext) = @_; local($found) = 0; print "\nReading $FILE.$ext ..."; local($file) = &fulltexpath("$FILE.$ext"); $global{'warnings'} .= "\n$FILE.tex is newer than $FILE.$ext: Please rerun latex" . (($ext =~ /bbl/) ? " and bibtex.\n" : ".\n") if ( ($found = (-f $file)) && &newer(&fulltexpath("$FILE.tex"), $file)); if ( $found ) { &slurp_input($file); &pre_process; &substitute_meta_cmds if /$meta_cmd_rx/; &wrap_shorthand_environments; $_ = &translate_commands(&translate_environments($_)); }; $found; } sub deal_with_texinputs { local($source) = @_; $ENV{'TEXINPUTS'} = join(":", $ENV{'TEXINPUTS'}, $source, ".",":"); } sub add_document_info_page { # Uses $outermost_level # Nasty race conditions if the next two are done in parallel local($X) = ++$global{'max_id'}; local($Y) = ++$global{'max_id'}; $_ = join('', $_, "\\$outermost_level$O$X$C $O$Y$C $info_title$O$Y$C $O$X$C \n \\textohtml_info_page"); } # For each style file name in TMP_styles (generated by texexpand) look for a # perl file in $LATEX2HTMLDIR/styles and load it. sub load_style_file_translations { local($_, $file); open(STYLES, ") { s/\s//g; $file = $_; if (-f ($_ = "$texfilepath/$file.perl")) { print "\nLoading $_...\n"; require ($_)} elsif (-f ($_ = "$LATEX2HTMLSTYLES/$file.perl")) { print "\nLoading $_...\n"; require($_)} } close(STYLES); } ################## Weird Special case ################## # The new texexpand can be told to leave in \input and \include # commands which contain code that the translator should simply pass # to latex, such as the psfig stuff. These should still be seen by # TeX, so we add them to the preamble ... sub do_include_lines { while (s/$include_line_rx//o) { local($include_line) = &revert_to_raw_tex($&); &add_to_preamble ('include', $include_line); } } ########################## Preprocessing ############################ # The \verb declaration and the verbatim environment contain simulated # typed text and should not be processed. Characters such as $,\,{,and } # loose their special meanings and should not be considered when marking # brackets etc. To achieve this \verb declarations and the contents of # verbatim environments are replaced by markers. At the end the original # text is put back into the document. # The markers for verb and verbatim are different so that these commands # can be restored to what the raw input was just in case they need to # be passed to latex. sub pre_process { # Modifies $_; &replace_html_special_chars; s/\\\\/\\\\ /go; # Makes it unnecessary to look for escaped cmds local($next, $esc_del); &normalize_language_changes; while (/\\begin$verbatim_env_rx/o) { local($before, $contents, $after, $env); ($before, $after, $env) = ($`, $', $1); if ($after =~ /\s*\\end{$env[*]?}/) { # Must NOT use the s///o option!!! ($contents, $after) = ($`, $'); $contents =~ s/^\n*//; $contents = &revert_to_raw_tex($contents) if ($env =~ /rawhtml/i); #&warn_if_too_long(*contents,*env); #$verbatim{++$global{'verbatim_counter'}} = $contents; &write_mydb("verbatim", ++$global{'verbatim_counter'}, $contents); $after = join("",$verbatim_mark,$env,$global{'verbatim_counter'},$after);} else { print "Cannot find \\end{$env}\n"; } $_ = join("",$before,$after); } # Now do the \verb declarations while ((($del) = /\\verb[*]?(.)/)) { $esc_del = &escape_rx_chars($del); last unless s/\\verb[*]?[$esc_del]([^$esc_del]*)[$esc_del]/$verb{++$global{'verb_counter'}}=$1; $verb_delim{$global{'verb_counter'}}=$del;join('',$verb_mark,$global{'verb_counter'},$verb_mark)/e; } $* = 1; # Multiline matching ON s/^%.*\n|([^\\]%).*\n/$1\n/go; # Remove Comments but not % which may be meaningful $* = 0; # Multiline matching OFF &mark_string; } sub warn_if_too_long { local(*str,*type) = @_; if (length($str) > 900) { local($tmp) = &get_first_words($str, 7); $global{'warnings'} .= "A $type environment is too long and may have disappeared\n" . "(causing \"dbm\" errors). Try separating it into smaller pieces.\n" . "Potential DBM error:\n$tmp\n\n"; print "\nPotential DBM error >>>: \n$tmp\n<<<\n"; } } #################### Marking Matching Brackets ###################### # Reads the entire input file and performs pre_processing operations # on it before returning it as a single string. The pre_processing is # done on separate chunks of the input file by separate Unix processes # as determined by LaTeX \input commands, in order to reduce the memory # requirements of LaTeX2HTML. sub slurp_input_and_partition_and_pre_process { local($file) = @_; local(%string, @files, $pos); local ($count) = 1; open(SINPUT,"<$file"); while () { if (/TEXEXPAND: INCLUDED FILE MARKER (\S*)/) { # Forking seems to screw up the rest of the input stream # We save the current position ... $pos = tell SINPUT; &write_string_out($count); # ... so that we can return to it seek(SINPUT, $pos, 0); print STDERR "\nDoing $1"; ++$count} else { $string{'STRING'} .= $_} } &write_string_out($count); @files = sort file_sort (); foreach $file (@files) { $_ .= `cat $file`;} } sub write_string_out { local($count) = @_; local($ppid) = TMP; local($pid); # All open unflushed streams are inherited by the child. If this is # not set then the parent will *not* wait $| = 1; # fork returns 0 to the child and PID to the parent &close_dbm_database; unless ($pid = fork) { local($_); &open_dbm_database; $_ = delete $string{'STRING'}; # Replace verbatim environments etc. &pre_process; # Handle newcommand, newenvironment ... &substitute_meta_cmds; open(OUTPUT, ">$ppid-part$count"); print OUTPUT $_; close(OUTPUT); &write_mydb_simple("preamble", $preamble); &close_dbm_database; exit 0; }; waitpid($pid,0); &open_dbm_database; delete $string{'STRING'}; } # Reads the entire input file into a # single string. sub slurp_input { local($file) = @_; local(%string); open(INPUT,"<$file"); while () { $string{'STRING'} .= $_}; $_ = delete $string{'STRING'}; # Blow it away and return the result } sub special { ($x) = @_; $y= $html_specials{$x}; ($y ? $y : $x)} sub special_inv { ($x) = @_; $y= $html_specials_inv{$x}; ($y ? $y : $x)} # Mark each matching opening and closing bracket with a unique id. sub mark_string { # Modifies $_ in the caller; $* = 1; # Multiline matching ON s/^\\{|([^\\])\\{/$1tex2html_escaped_opening_bracket/go; s/^\\}|([^\\])\\}/$1tex2html_escaped_closing_bracket/go; $* = 0; # Multiline matching OFF for (;;) { # Infinite loop last unless s/{([^{}]*)}/join("",$O,++$global{'max_id'},$C,$1,$O,$global{'max_id'},$C)/geo; } s/tex2html_escaped_opening_bracket/\\{/go; s/tex2html_escaped_closing_bracket/\\}/go; } sub replace_html_special_chars { # Replaces html special characters with markers unless preceded by "\" $* = 1; # Multiline matching ON s/([^\\])(<|>|&|\")/&special($1).&special($2)/geo; # MUST DO IT AGAIN JUST IN CASE THERE ARE CONSECUTIVE HTML SPECIALS s/([^\\])(<|>|&|\")/&special($1).&special($2)/geo; s/^(<|>|&|\")/&special($1)/geo; $* = 0; # Multiline matching OFF } # The bibliography and the index should be treated as separate sections # in their own HTML files. The \bibliography{} command acts as a sectioning command # that has the desired effect. But when the bibliography is constructed # manually using the thebibliography environment, or when using the # theindex environment it is not possible to use the normal sectioning # mechanism. This subroutine inserts a \bibliography{} or a dummy # \tex2htmlindex command just before the appropriate environments # to force sectioning. sub add_bbl_and_idx_dummy_commands { local($id); s/([\\]begin\s*$O\d+$C\s*thebibliography)/$bbl_cnt++; $1/eg; #print STDERR "\nthebibliography: $bbl_cnt\n"; #if ($bbl_cnt == 1) { s/([\\]begin\s*$O\d+$C\s*thebibliography)/do { $id = ++$global{'max_id'}; "\\bibliography$O$id$C$O$id$C $1"}/geo; #} s/([\\]begin\s*$O\d+$C\s*theindex)/\\tex2htmlindex $1/o; s/[\\]printindex/\\tex2htmlindex /o; &lib_add_bbl_and_idx_dummy_commands() if defined(&lib_add_bbl_and_idx_dummy_commands); } # Uses and modifies $default_language sub convert_iso_latin_chars { local($_) = @_; local($next_language, $pattern, $before, $after, $funct); if (/$language_rx/o) { ($next_language, $pattern, $before, $after) = (($1||$2), $&, $`, $'); $before = &convert_iso_latin_chars($before); $default_language = $next_language; $_ = join($pattern, $before, &convert_iso_latin_chars($after)); } else { $funct = $language_translations{$default_language}; (defined(&$funct) ? $_ = &$funct($_) : do {$global{'warnings'} .= "\nCould not find translation function for $default_language.\n\n" unless $global{'warnings'} =~ /$default_language/}) } $_; } # May need to add something here later sub english_translation { $_[0]; } # This replaces \setlanguage{\language} with \languageTeX # This makes the identification of language chunks easier. sub normalize_language_changes { s/$setlanguage_rx/\\$1TeX/go; } sub translate { &tokenize($sections_no_delim_rx); # Inserts space after a sectioning command &normalize_sections; # Deal with the *-form of sectioning commands # Split the input into sections local(@sections) = split(/$sections_no_delim_rx /, $_); local($sections) = int(scalar(@sections) / 2); # Initialises $curr_sec_id to a list of 0's equal to # the number of sectioning commands. local(@curr_sec_id) = split(' ', &make_first_key); local($i, $current_depth) = (0,0); local($curr_sec) = ($FILE); local(%section_info, %toc_section_info, $CURRENT_FILE, %cite_info, %ref_files); # These filenames may be set when translating the corresponding commands. local($tocfile, $loffile, $lotfile, $footfile, $citefile, $idxfile, $figure_captions, $table_captions, $footnotes, $citations, %index, $t_title, $t_author, $t_date); &process_aux_file if (/\\ref/o || /\\caption/o || ($SHOW_SECTION_NUMBERS)); print "\nTranslating ..."; while ($i <= @sections) { $_ = $sections[$i]; s/^[\s]*//; # Remove initial blank lines # The section command was removed when splitting ... s/^/\\$curr_sec / if ($i > 0); # ... so put it back if ($current_depth < $MAX_SPLIT_DEPTH) { $CURRENT_FILE = &make_name($curr_sec, join('_',@curr_sec_id)); open(OUTPUT, ">$CURRENT_FILE") || die "Cannot open $DESTDIR/$FILE $!"; }; &remove_document_env; &wrap_shorthand_environments; print $i/2 . "/$sections..."; # Must do this early ... It also sets $TITLE &process_command($sections_rx, *_) if /$sections_rx/; $_ = &translate_commands(&translate_environments($_)); print OUTPUT $_; # Associate each id with the depth, the filename and the title $TITLE = $CURRENT_FILE unless ($TITLE); $toc_section_info{join(' ',@curr_sec_id)} = "$current_depth$delim$CURRENT_FILE$delim$TITLE"; $section_info{join(' ',@curr_sec_id)} = "$current_depth$delim$CURRENT_FILE$delim$TITLE" if ($current_depth < $MAX_SPLIT_DEPTH); ++$i; # Get the depth of the current section; $curr_sec = $sections[$i]; $current_depth = $section_commands{$curr_sec}; @curr_sec_id = &new_level($current_depth, @curr_sec_id); $TITLE = ''; ++$i; } $_ = undef; $_ = &make_footnotes if $footnotes; print OUTPUT; close OUTPUT; &make_image_file; &make_images; # Link sections, add head/body/address do cross-refs etc &post_process; &save_labels_in_file if %ref_files; &save_image_cache_in_file if %cached_env_img; } ############################ Processing Environments ########################## sub wrap_shorthand_environments { # This wraps a dummy environment around environments that do not use # the begin-end convention. The wrapper will force them to be # evaluated by Latex rather than them being translated. # Wrap a dummy environment around matching TMPs. # s/^\$\$|([^\\])\$\$/{$1.&next_wrapper('tex2html_double_dollar')}/ge; # Wrap a dummy environment around matching $s. # s/^\$|([^\\])\$/{$1.&next_wrapper('$')}/ge; # s/tex2html_double_dollar/\$\$/go; # Do \(s and \[s $* = 1; # Multiline matching ON s/(^\\[(])|([^\\])(\\[(])/{$2.&make_wrapper(1).$1.$3}/geo; s/(^\\[)]|[^\\]\\[)])/{$1.&make_wrapper(0)}/geo; s/(^\\[[])|([^\\])(\\[[])/{$2.&make_any_wrapper(1,"displaymath")}/geo; s/(^\\[\]])|([^\\])(\\[\]])/{$2.&make_any_wrapper(0,"displaymath")}/geo; $* = 0; # Multiline matching OFF $double_dol_rx = '(^|[^\\\\])\\$\\$'; $single_dol_rx = '(^|[^\\\\])\\$'; $_ = &wrap_math_environment; $_ = &wrap_raw_arg_cmds; } sub wrap_math_environment { # This wraps math-type environments # The trick here is that the opening brace is the same as the close, # but they *can* still nest, in cases like this: # # $ outer stuff ... \hbox{ ... $ inner stuff $ ... } ... $ # # Note that the inner pair of $'s is nested within a group. So, to # handle these cases correctly, we need to make sure that the outer # brace-level is the same as the inner. --- rst #tex2html_wrap # And yet another problem: there is a scungy local idiom to do # this: $\_$ for a boldfaced underscore. xmosaic can't display the # resulting itty-bitty bitmap, for some reason; even if it could, it # would probably come out as an overbar because of the floating- # baseline problem. So, we have to special case this. --- rst again. local ($processed_text, $before, $end_rx, $delim); local ($underscore_match_rx) = "^\\s*\\\\\\_\\s*\\\$"; local ($wrapper); while (/$single_dol_rx/) { $processed_text .= $`.$1; $_ = $'; $wrapper = "tex2html_wrap_inline"; $end_rx = $single_dol_rx; # Default, unless we begin with $$. $delim = "\$"; if (/^\$/ && (! $`)) { s/^\$//; $end_rx = $double_dol_rx; $delim = ""; # Cannot say "\$\$" inside displaymath $wrapper = "displaymath"; } elsif (/$underscore_match_rx/ && (! $`)) { # Special case for $\_$ ... s/$underscore_match_rx//; $processed_text .= '\\_'; next; } # Have an opening $ or $$. Find matching close, at same bracket level $processed_text .= &make_any_wrapper(1,$wrapper).$delim; while (/$end_rx/) { # Forget the $$ if we are going to replace it with "displaymath" $before = $` . (($wrapper eq "displaymath")? "$1" : $&); $processed_text .= $before; $_ = $'; # Found dollar sign inside open subgroup ... now see if it's # at the same brace-level ... local ($losing, $br_rx) = (0, ''); while ($before =~ /$begin_cmd_rx/) { $br_rx = &make_end_cmd_rx($1); $before = $'; if ($before =~ /$br_rx/) { $before = $'; } else { $losing = 1; last; } } last unless $losing; # It wasn't ... find the matching close brace farther on; then # keep going. /$br_rx/; $processed_text .= $`.$&; $_ = $'; } # Got to the end. Whew! $processed_text .= &make_any_wrapper(0,$wrapper); } $processed_text . $_; } sub translate_environments { local ($_) = @_; local($tmp); #print "\nTranslating environments ..."; for (;;) { last unless (/$begin_env_rx/o); local ($contents, $before, $br_id, $env, $after, $pattern); # $1 : br_id (at the beginning) # $2 : environment ($before, $br_id, $env, $after, $pattern) = ($`, $1, $2, $', $&); $contents = undef; # Sets $contents and modifies $after if (&find_end_env($env,*contents,*after)) { &process_command($counters_rx, *before) if ($before =~ /$counters_rx/); # This may modify $before and $after &extract_captions($env) if ($env =~ /(figure)|(table)/o); # Modifies $contents $contents = &translate_environments($contents) if (&defined_env($env) && (! ($env =~ /latexonly/o)) && (! $raw_arg_cmds{$env})); &process_environment($env, $br_id); undef $_; $_ = join("", $before, $contents, $after)} ### Evan Welsh added the next 24 lines ## elsif (&defined_env($env)) { # If I specify a function for the environment then it # calls it with the contents truncated at the next section. # It assumes I know what I'm doing and doesn't give a # deferred warning. &extract_captions($env) if ($env =~ /(figure)|(table)/); $contents = $after; $contents = &process_environment($env, $br_id, $contents); $_ = join("", $before, $contents); } elsif ($ignored{$env}) { # If I specify that the environment should be ignored then # it is but I get a deferred warning. $_ = join("", $before, $contents, $after); $global{'warnings'} .= "\n\\end{$env} not found (ignored).\n"; } elsif ($raw_arg_cmds{$env}) { # If I specify that the environment should be passed to tex # then it is with the environment truncated at the next # section and I get a deferred warning. &extract_captions($env) if ($env =~ /(figure)|(table)/); $contents = $after; $contents = &process_environment($env, $br_id, $contents); $_ = join("", $before, $contents); $global{'warnings'} .= "\n\\end{$env} not found (truncated at next section boundary).\n";} else { $pattern = &escape_rx_chars($pattern); s/$pattern//; print "Cannot find \\end{$env}\n"; } } $tmp = $_; undef $_; &process_command($counters_rx, *tmp) if ($tmp =~ /$counters_rx/); $_ = $tmp; undef $tmp; $_ } sub find_end_env { local ($env, *ref_contents, *rest) = @_; local ($be_rx) = &make_begin_end_env_rx ($env); local ($count) = 1; while ($rest =~ /$be_rx/) { $ref_contents .= $`; if ($1 eq "begin") { ++$count } else { --$count }; $rest = $'; last if $count == 0; $ref_contents .= $&; } if ($count != 0) { $rest = join('', $ref_contents, $rest); $ref_contents = ""; return(0)} else { return(1)} } # MODIFIES $contents sub process_environment { local($env, $id) = @_; local($env_sub) = ("do_env_$env"); if (&defined_env($env)) { print "."; $contents = &$env_sub($contents); } elsif (&special_env) { # &special_env modifies $contents } elsif ($ignore{$env}) { "" } else { # Generate picture $contents = &process_undefined_environment($env, $id, $contents); $env_sub = "post_latex_$env_sub"; # i.e. post_latex_do_env_ENV ( defined &$env_sub ? $contents = &$env_sub($contents) : do {$contents = join('',"

",$contents,"

") unless ($env =~ /tex2html_wrap/o)}); }; } # The $<$, $>$, $|$ and $=>$, etc strings are replaced with their textual # equivalents instead of passing them on to latex for processing in math-mode. # This will not be necessary when the mechanism for passing environments # to Latex is improved. # RETURNS SUCCESS OR FAILURE sub special_env { # Modifies $contents in its caller $contents =~ s/^\$(\s*($html_specials_inv_rx|[\da-z<>|=\-+,\s])+\s*)\$/$1<\/b>/igo; } sub defined_env { local($env) = @_; local($env_sub) = ("do_env_$env"); # The test using declarations should not be necessary but 'defined' # doesn't seem to recognise subroutines generated dynamically using 'eval'. # Remember that each entry in $declarations generates a dynamic prodedure ... ((defined &$env_sub) || ($declarations{$env})); } sub process_undefined_environment { local($env, $id, $contents) = @_; local($name,$cached,$raw_contents,$uucontents) = ("$env$id"); $contents = "% latex2html id marker $id\n$contents" if $contents =~ /$order_sensitive_rx/; $contents = "\\begin{$env}$contents\\end{$env}"; $latex_body{$name} = $contents; $uucontents = &encode($contents); if ($NOLATEX) { $id_map{$name} = "[$name]";} elsif ($cached = $cached_env_img{$uucontents}) { ++$global_page_num; ($cached =~ s/(img\d+)\.gif/img$global_page_num.gif/o); rename("$1.old", "img$global_page_num.gif"); $id_map{$name} = $cached; #!1$orig_name_map{$new_id_map{$name}} = $cached; $cached_env_img{$uucontents} = $cached; } else { &clear_images_dbm_database unless $new_page_num; $new_id_map{$name} = $id_map{$name} = ++$global_page_num . "#" . ++$new_page_num; $orig_name_map{$id_map{$name}} = $name; $img_params{$name} = join('#', &extract_parameters($contents)); $raw_contents = &revert_to_raw_tex($contents); if ($env =~ /inline/) { $raw_contents = &make_box("$name", $raw_contents); } $raw_contents =~ s/\\pagebreak/\\\\/go; $latex_body .= "{\\newpage\n\\clearpage\n\\samepage $raw_contents\n}\n\n"; } &do_labels($contents,"$image_mark#$name#"); # Anchor the labels and put a marker in the text; } sub make_box { local($id,$contents) = @_; local($start) = '\\setbox\\sizebox=\\hbox{'; local($end) = "}\\lthtmltypeout{latex2htmlSize :$id: \\the\\ht\\sizebox"; $end .= "::\\the\\dp\\sizebox.}\\box\\sizebox"; $start . $contents . $end; } sub make_image_file { do { print "\nWriting image file ...\n"; open(ENV,">images.tex") || die "Cannot open $name.tex $!\n"; print ENV &make_latex($latex_body); print ENV "\n"; close ENV; ©_file($FILE, "bbl"); ©_file($FILE, "aux"); } if ((%latex_body) && ($latex_body =~ /newpage/)); } sub make_off_line_images { local($name, $page_num); do { system("$LATEX images.tex"); print "\nGenerating postscript images using dvips ...\n"; &process_log_file("images.log"); # Get eqn size info system("$DVIPS -M -S 1 -i -o $$\_image images.dvi") && print "Error: $!\n"; open(IMAGE, "echo $$\_image* | tr -s ' \t\r\f' '\\012\\012\\012\\012'|"); while () {chop; rename($_, "$_.ps") if /\d\d\d$/}; } if ((!$NOLATEX) && (-f "images.tex")); while ( ($name, $page_num) = each %new_id_map) { # Extract the page, convert and save it &extract_image($page_num,$orig_name_map{$page_num}); } } # Generate images for unknown environments, equations etc, and replace # the markers in the main text with them. # - $cached_env_img maps encoded contents to image URL's # - $id_map maps $env$id to page numbers in the generated latex file and after # the images are generated, maps page numbers to image URL's # - $page_map maps page_numbers to image URL's (temporary map); # Uses global variables $id_map and $cached_env_img, # $new_page_num and $latex_body sub make_images { local($name, $contents, $raw_contents, $uucontents, $page_num, $uucontents, %page_map, $img); # It is necessary to run LaTeX this early because we need the log file # which contains information used to determine equation alignment if ( $latex_body =~ /newpage/) { print "\n"; system("$LATEX images.tex"); &process_log_file("images.log"); # Get eqn size info } if ($NO_IMAGES) { system("cp $LATEX2HTMLDIR/icons/image.gif .") if (-e "$LATEX2HTMLDIR/icons/image.gif") } elsif ((!$NOLATEX) && ($latex_body =~ /newpage/)) { print "\nGenerating postscript images using dvips ...\n"; system("$DVIPS -M -S 1 -i -o $$\_image images.dvi") && print "Error: $!\n"; open(IMAGE, "echo $$\_image* | tr -s ' \t\r\f' '\\012\\012\\012\\012'|"); while () {chop; rename($_, "$_.ps") if /\d\d\d$/}; } while ( ($name, $page_num) = each %id_map) { $contents = $latex_body{$name}; if ($page_num =~ /^\d+\#\d+$/) { # If it is a page number do { # Extract the page, convert and save it $img = &extract_image($page_num,$orig_name_map{$page_num}); $uucontents = &encode($contents); # Arrggh if (! ($contents =~ /$order_sensitive_rx/)) { $cached_env_img{$uucontents} = $img; } else { # Blow it away so it is not saved for next time delete $cached_env_img{$uucontents} } $page_map{$page_num} = $img; } unless ($img = $page_map{$page_num}); # unless we've just done it $id_map{$name} = $img; } else { $img = $page_num;}} # it is already available from previous runs $global{'warnings'} .= "\nOne of the images is more than one page long.\n". "This may cause the rest of the images to get out of sync.\n\n" if (-f sprintf("%s%.3d%s", "$$\_image", ++$new_page_num, ".ps")); &cleanup; } sub process_log_file { local($logfile) = @_; local($name); open(LOG, "$logfile") || die "Cannot find logfile $logfile"; while () { if (/latex2htmlSize/) { /:([^:]*):/; $name = $1; /: ([0-9.]*)pt/; $height{$name} = $1; /::([0-9.]*)pt/; $depth{$name} = $1; } } close(LOG); } # Uses $img_params sub extract_image { local($page_num,$name) = @_; local($scale, $external, $thumbnail, $map, $psimage, $global_num, $new_num); # $global_num identifies this image in the original source file # $new_num identifies this image in images.tex ($global_num, $new_num) = split("#", $page_num); $name =~ s/\*//; local($env,$basename,$img) = ($name,"img$global_num"); $env =~ s/\d+$//; $psname = sprintf("%s%.3d", "$$\_image", $new_num); if ( $EXTERNAL_IMAGES && $PS_IMAGES ) { $img = "$basename.ps"; system("cp $psname.ps $img")} else { $img = "$basename.gif"; ($scale, $external, $thumbnail, $map, $psimage) = split('#', $img_params{$name}); if ($NO_IMAGES) { symlink("image.gif", $img); if ($thumbnail) { symlink("image.gif", "T$img"); $thumbnail = "T$img"; } } else { if ( ($name =~ /figure/) || $psimage || $scale || $thumbnail) { $scale = $FIGURE_SCALE_FACTOR unless (defined $scale); &convert_image("$psname.ps", $img, $scale, ""); if ($thumbnail) { # $thumbnail contains the reduction factor &convert_image("$psname.ps", "T$img", $thumbnail, ""); $thumbnail = "T$img"}} else { &convert_image("$psname.ps", $img, $MATH_SCALE_FACTOR , 1)} if ($name =~ /(equation|eqnarray|displaymath)/) { &right_justify($basename, $name)} if ($name =~ /inline/ && $depth{$name} != 0) { &top_justify($basename, $name)} if ($TRANSPARENT_FIGURES || (! $env =~/figure/o)) { &make_transparent($img); &make_transparent("T$img") if $thumbnail; } $global{'warnings'} .= "\nFailed to convert image $psname.ps" if ! -r $img; } } &embed_image($img, $name, $external, $thumbnail, $map); } sub extract_parameters { local($contents) = @_; local($_, $scale, $external, $thumbnail, $map, $psimage); $psimage++ if ($contents =~ /\.ps/); $contents =~ s/$htmlimage_rx/$_ = $2;''/ego; $contents =~ s/\s//g; # Remove spaces ($scale) = /scale=([\.\d]*)/; $external = /external/; ($thumbnail) = /thumbnail=([\.\d]*)/; ($map) = /map=([^\s,]+)/; ($scale, $external, $thumbnail, $map, $psimage); } sub convert_image { local($in_img, $out_img, $scale, $depth) = @_; local($paperopt) = ( ($ENV{'PSTOPPM'} =~ /pstoppm\.ps/) ? " -papersize $PAPERSIZE" : undef); unlink $out_img; system( "$PSTOGIF " . do {" -depth $depth " if $depth} . # This will work not work with pstoppm3.ps $paperopt . do {" -scale $scale " if ($scale > 0)} . " -out $out_img $in_img") && print "Error while converting image: $!\n"; } sub make_transparent { local($img) = @_; if ($GIFTRANS) { # Making the white color transparent - this may not # always be a good idea... system("$GIFTRANS -t '#ffffff' $img > TMP_tmp.gif") && do {print "Could not make $img transparent: $!\n"; return}; # Make the background color gray anyway for broken browsers system("$GIFTRANS -B '#bfbfbf' TMP_tmp.gif > $img");} elsif ($NETPBM) { system("$GIFTOPPM $img | $PPMTOGIF -trans '#ffffff' > TMP_tmp.gif") && do {print "Could not make $img transparent: $!\n"; return}; rename("TMP_tmp.gif", $img) ; } } # This takes a filename (an GIF of an equation or equation array) and # prepends enough whitespace so that it is right justified. # It uses the global variable $LINE_WIDTH and the pbmplus routines # giftoppm (to convert to ppm), pnmfile (to ask for its dimensions), # pnmtile (to replicate a blank PBM of 1 pixel), pnmcat # (to prepend the replicated blank bitmap) and ppmtogif # (to convert the result back to GIF). sub right_justify { local($basename, $env) = @_; local($_, $img_width, $justification_width) = (0,0); system("$GIFTOPPM $basename.gif > $basename.ppm") && print "Error: $!\n"; $_ = `$PNMFILE $basename.ppm`; s/([\d]+) by/$img_width = $1/eo; do { $justification_width = ($LINE_WIDTH - $img_width); $justification_width = ($justification_width / 2) if $env =~ /displaymath/; system("$PNMTILE $justification_width 1 $BLANKPBM |$PNMCAT -l - $basename.ppm|$PPMTOGIF - > $basename.gif") && print "Error: $!\n"; unlink "$basename.ppm"; } if ($img_width < $LINE_WIDTH); } sub top_justify { local($basename, $env) = @_; local($_, $img_height, $h, $d, $adjust) = (0,0); system("$GIFTOPPM $basename.gif > $basename.ppm") && print "Error: $!\n"; $_ = `$PNMFILE $basename.ppm`; s/by ([\d]+)/$img_height = $1/eo; $h = $height{$name}; $d = $depth{$name}; $adjust = ($h - $d) * $img_height / ($h + $d); $adjust = int($adjust + 0.99); # Assumes $adjust is positive! system("$PNMTILE 1 $adjust $BLANKPBM |$PNMCAT -tb $basename.ppm -|$PPMTOGIF - > $basename.gif") && print "Error: $!\n"; unlink "$basename.ppm"; } sub process_in_latex { # This is just a wrapper for process_undefined_environment. # @[0] = contents $global{'max_id'}++; &process_undefined_environment('tex2html_wrap',$global{'max_id'},$_[0]); } sub copy_file { local($file, $ext) = @_; $file = &fulltexpath("$FILE.$ext"); system("cp $file images.$ext") if (-e $file); } sub rename_image_files { local($_, $old_name); foreach () { $old_name = $_; s/\.gif$/\.old/o; rename($old_name, $_); } } ############################ Processing Commands ########################## sub translate_commands { local ($_) = @_; #print "\nTranslating commands ..."; &replace_strange_accents; for (;;) { # For each opening bracket ... last unless (/$begin_cmd_rx/o); local($before, $contents, $br_id, $after, $pattern); ($before, $br_id, $after, $pattern) = ($`, $1, $', $&); local($end_cmd_rx) = &make_end_cmd_rx($br_id); if ($after =~ /$end_cmd_rx/) { # ... find the the matching closing one ($contents, $after) = ($`, $'); undef $_; $contents = &translate_commands($contents) if ($contents =~ /$match_br_rx/o); # Modifies $contents &process_command($single_cmd_rx,*contents) if ($contents =~ /\\/o); # THIS MARKS THE OPEN-CLOSE DELIMITERS AS PROCESSED $_ = join("", $before,"$OP$br_id$CP", $contents,"$OP$br_id$CP", $after); } else { $pattern = &escape_rx_chars($pattern); s/$pattern//; print "\nCannot find matching bracket for $br_id"; } } # Now do any top level commands that are not inside any brackets # MODIFIES $_ &process_command($single_cmd_rx,*_); } # Modifies $contents sub process_command { local ($cmd_rx, *ref_contents) = @_; local($ref_before, $cmd, $after, $cmd_sub, $cmd_trans); local (@open_font_tags); $ref_contents = &convert_iso_latin_chars($ref_contents); for (;;) { # Do NOT use the o option last unless ($ref_contents =~ /$cmd_rx/ ); ($ref_before, $cmd, $after) = ($`, $1, "$2$'"); print("."); $cmd = &normalize($cmd); $after =~ s/^[ ]+/ /o; # Collapse all spaces that follow a command ($cmd_sub, $cmd_trans) = ("do_cmd_$cmd", $declarations{$cmd}); if (defined &$cmd_sub) { # $ref_before may also be modified ... $after = &$cmd_sub($after, @open_font_tags); } elsif ($cmd_trans) { # One to one transform $after = "<$cmd_trans>" . $after . ""; push(@open_font_tags, $cmd) if ($cmd =~ /$fontchange_rx/o);} elsif ($ignore{$cmd}) { # Ignored command print "."} elsif ($cmd =~ /^the(.+)$/) { # Counter $counter = $1; $after = &do_cmd_thecounter($after);} else { # Do not add if reading an auxiliary file ++$unknown_commands{$cmd} unless $AUX_FILE; } $ref_contents = join('', $ref_before, $after); } $ref_contents; } ####################### Processing Meta Commands ############################ # This is a specialised version of process_command above. # The special commands (newcommand, newenvironment etc.) # must be processed before translating their arguments, # and before we cut up the document into sections # (there might be sectioning commands in the new definitions etc.). # \newtheorem commands are treated during normal processing by # generating code for the environments they define. sub substitute_meta_cmds { local ($next_def); local ($cmd, $argn, $body, $before, $after, $new_cmd_rx, $new_env_rx); &tokenize($meta_cmd_rx); # Inserts a space after meta commands ... print "\nProcessing macros ..."; while (/$meta_cmd_rx /o) { # ... and uses the space undef $_; ($before, $cmd, $after) = ($`, $1, $'); print "."; $next_def = "\n\\$cmd"; local($cmd_sub) = "get_body_$cmd"; $_ = join('',$before, &$cmd_sub(*after)); &add_to_preamble($cmd, $next_def); } # All the definitions have now moved to the $preamble and their bodies # are stored in %new_command and %new_environment # # Now substitute the new commands and environments: # (must do them all together because of cross definitions) ($new_cmd_rx, $new_env_rx) = (&make_new_cmd_rx, &make_new_env_rx('begin')); do { while (($cmd, $code) = each %new_command) { if (! $expanded{"CMD$cmd"}++) { $new_command{$cmd} = &expand_code($code); &write_mydb("new_command", $cmd, $new_command{$cmd}); } } while (($cmd, $code) = each %new_environment) { if (! $expanded{"ENV$cmd"}++) { $new_environment{$cmd} = &expand_code($code); &write_mydb("new_environment", $cmd, $new_environment{$cmd}); } } &tokenize($new_cmd_rx); # Inserts a space after the new commands ... print "+"; if ($new_cmd_rx) { while (/$new_cmd_rx /o && (($before, $cmd, $after) = ($`, $1, $'))) { print "."; $_ = join('',$before, &substitute_newcmd);}} if ($new_env_rx) { while (/$new_env_rx/o && (($before, $cmd, $after) = ($`, $2, $'))) { print "."; $_ = join('',$before, &substitute_newenv);}} } if (each %new_command || each %new_environment); } sub expand_code { local($_) = @_; # Uses $new_cmd_rx and $new_env_rx set in the caller if ($new_cmd_rx eq "0") { $new_cmd_rx = "<<{this cant possibly match}>>"; } if ($new_env_rx eq "0") { $new_env_rx = "<<{this cant possibly match}>>"; } local($cmd, $before, $after); &tokenize($new_cmd_rx); # Inserts a space after the new commands ... while ( ($new_cmd_rx && /$new_cmd_rx /) || ($new_env_rx && /$new_env_rx/)) { # $new_cmd_rx binds $1 and $new_env_rx binds $2 ... $cmd = ($2 ? $2 : $1); ($before, $after) = ($`, $'); if ($new_command{$cmd}) { # We have a command $_ = join('',$before, &substitute_newcmd); } elsif ($new_environment{$cmd}) { $_ = join('',$before, &substitute_newenv); } &tokenize($new_cmd_rx); # Must do it for any newly inserted code } $_; } # Removes the definition from the input string, adds to the preamble # and stores the body in %new_command; sub get_body_newcommand { local(*_) = @_; local($argn,$cmd,$body,$tmp); $cmd = &get_next(1); # Get command name $cmd =~ s/^\s*\\//; $argn = &get_next(0); # Get optional no. of args $argn = 0 unless $argn; # Get the body of the code and store it with the name and number of args # UNLESS THE COMMAND IS ALREADY DEFINED # (This is the mechanism with which raw html can be ignored in a Latex document # but be recognised as such by the translator). $body = &get_next(1); $tmp = "do_cmd_$cmd"; $new_command{$cmd} = join(':!:',$argn,$body) unless (defined &$tmp); undef $body; $_; } # Like get_body_newcommand above, but for simple raw TeX \defs sub get_body_def { local(*_) = @_; local($argn,$cmd,$body,$is_simple_def,$tmp); $cmd = &get_next(2); $cmd =~ s/^\s*\\//; $argn = &get_next(3); $argn = 0 unless $argn; $body = &get_next(1); $tmp = "do_cmd_$cmd"; if ($is_simple_def && !defined (&$tmp)) { $new_command{$cmd} = join(':!:',$argn,$body); } undef $body; $_; } # Removes the definition from the input string, adds to the preamble # and stores the body in %new_environment; sub get_body_newenvironment { local(*_) = @_; local($argn,$env,$begin,$end,$tmp); $env = &get_next(1); # Get the environment name $env =~ s/^\s*\\//; $argn = &get_next(0); # Get optional no. of args $argn = 0 unless $argn; # Get the body of the code and store it with the name and number of args # UNLESS THE COMMAND IS ALREADY DEFINED (see get_body_newcommand) $tmp = "do_env_$env"; $begin = &get_next(1); $end = &get_next(1); $new_environment{$env} = join(':!:', $argn, $begin, $end) unless defined &$tmp; $_; } sub get_body_renewcommand { &get_body_newcommand($_[0]); } sub get_body_renewenvironment { &get_body_newenvironment($_[0]); } sub substitute_newcmd { # Modifies $cmd and $after in the caller # Get the body from the new_command array local($argn, $_) = split(/:!:/, $new_command{$cmd}); local($arg); foreach $i (1..$argn) { $arg = $undef_mark; $after =~ s/$next_pair_rx/$arg = $2;''/eo; # Get the next argument # Next argument may not be in braces - get next character - ARGG! $after =~ s/\s*(.)/$arg = $1;''/eo if ($arg eq $undef_mark); s/\#$i/$arg/g;} # Substitute the arguments in the body # Make the body unique (give unique id's to the brackets), # translate, and return it $_ = &revert_to_raw_tex($_); &pre_process; join('',$_,$after); } sub substitute_newenv { # Modifies $cmd and $after in the caller # Get the body from the new_environment array local($argn, $begdef, $enddef) = split(/:!:/, $new_environment{$cmd}); local($arg,$new_def_rx); # Note that latex allows argument substitution only in the # \begin part of the new definition local($_) = $begdef; foreach $i (1..$argn) { $after =~ s/$next_pair_rx/$arg = $2;''/eo; # Get the next argument s/\#$i/$arg/g;} # Substitute the arguments in the body # Make the body unique (Give unique id's to the brackets), # translate, and return it $_ = &revert_to_raw_tex($_); &pre_process; # Make uniqu $begdef = $_; # Now substitute the \end part: $_ = &revert_to_raw_tex($enddef); &pre_process; # Make unique $enddef = $_; $new_def_rx = &make_end_env_rx($cmd); $after =~ s/$new_def_rx/$enddef/; join('',$begdef,$after); } # Instead of substituting as with newcommand and newenvironment, # or generating code to handle each new theorem environment, # it now does nothing. This forces theorem environments to be passed # to latex. Although it would be possible to handle theorem # formatting in HTML as it was done previously it is impossible # to keep the theorem counters in step with other counters (e.g. equations) # to which only latex has access to. Sad... sub get_body_newtheorem { local(*_) = @_; # Just chop off the arguments and append to $next_def &get_next(1); &get_next(0); &get_next(1); &get_next(0); $_; } # Modifies $_ in the caller and as a side-effect it modifies $next_def # which is local to substitute_meta_cmds sub get_next { local($what) = @_; local($next, $pat, $tmp); if ($what == 1) { ($next, $tmp, $pat) = &get_next_argument;} elsif ($what == 2) { ($next, $pat) = &get_next_tex_cmd;} elsif ($what == 3) { ($next, $pat) = &get_next_def_arg;} else { ($next, $pat) = &get_next_optional_argument;} $next_def .= &revert_to_raw_tex($pat) if $pat; $next =~ s/(^\s*)|(\s*$)//g; $next; } # The following get_next_ ARE ALL DESTRUCTIVE. sub get_next_argument { local($next, $br_id, $pat); s/$next_pair_rx/$br_id=$1;$next=$2;$pat=$&;''/eo; ($next, $br_id, $pat); } sub get_next_pair_or_char_pr { local($next, $br_id, $pat, $epat); if ( (/^\s*([\w])/o && (! $`))) { ($next, $pat) = ($1, $&) } elsif ( /$next_pair_pr_rx/o && (! $`)) { ($next, $br_id, $pat) = ($2, $1, $&) }; $epat = &escape_rx_chars($pat); s/$epat// if $pat; ($next, $br_id, $pat); } sub get_next_optional_argument { local($next, $pat); s/$optional_arg_rx/$next=$1;$pat=$&;''/eo if (/\s*[[]/ && (! $`)); # if the first character is a [ # (/^[]/ does not work because it may match the beginning of ANY line s/^\s*\[\]//g unless $pat; # This is not picked by $optional_arg_rx ($next, $pat); } sub get_next_tex_cmd { local($next, $pat); s/$single_cmd_rx/$next = $1; $pat=$&; ''/eo; ($next, $pat); } sub get_next_def_arg { local($next, $pat); # Sets is_simple_def for caller. Start by turning it off, then # turn it on if we find one of the "simple" patterns. # This has got to be hit-or-miss to an extent, given the # thoroughly incestuous relationship between the TeX macroprocessor # ('mouth') and typesetting back-end ('stomach'). Anything which # even does catcode hacking is going to lose BAD. s/^\s*//o; # Remove whitespace $is_simple_def = 0; # no arguments if (/^$O/ && (! $`)) { $next=0; $pat=''; $is_simple_def=1; $O } # 'simple' arguments if (! $is_simple_def && /$tex_def_arg_rx/o && (! $`)) { s/$tex_def_arg_rx/$next=$1; $pat=$&; $is_simple_def=1; $O/eo; } # MESSY arguments if (! $is_simple_def) { print "Arguments to $cmd are too complex ...\n"; print "It will not be processed unless used in another environment\n"; print "which is passed to LaTeX whole for processing.\n"; s/^[^<]*(<[^<]+)*< $depth) {$_ = 0 ;}; $i++; 0; }, @curr_sec_id); @curr_sec_id; } #sub make_head_and_body { # local($title) = @_; # "\n" . # "\n" . # "\n$title\n" . # "\n\n" . # &meta_information($title) . # "\n"; #} sub make_head_and_body { local($title) = @_; "\n$title\n\n

\n" . "Next:
\n" . "Previous:
\n" . "Table of Contents --- Search --- PS reprint


\n" . "Astronomical Data Analysis Software and Systems V
\n" . "ASP Conference Series, Vol. 101, 1996
\n" . "George H. Jacoby and Jeannette Barnes, eds.

\n" . "

$title

"; } sub make_address { local($_) = $ADDRESS; # ($_ ? "

\n$_\n
" : "") . "\n\n"; "Next:
\n" . "Previous:
\n" . "Contents --- Search --- PS reprint
\n" . ($_ ? "
\n$_\n
" : "") . "\n\n"; } sub encode_title { local($_) = @_; $_ = &encode($_); while (/(<[^<>]*>)/o) {s/$1//g}; # Remove HTML tags s/#[^#]*#//g; # Remove #-delimited markers $_; } # Encodes the contents of enviroments that are passed to latex. The code # is then used as key to a hash table pointing to the URL of the resulting # picture. sub encode { local($_) = @_; for (;;) { # Remove invocation-specific stuff last unless s/begin|end|<<\d+>>|tex2html_|wrap//go; } #$_ = pack("u*", $_); # uuencode s/\/|\\//g; # remove funnies may cause problems in a hash key s/\s*|\n//g; # Remove spaces and newlines $_; } ##################### Hypertext Section Links ######################## sub post_process { # Put hyperlinks between sections, add HTML headers and addresses, # do cross references and citations. # Uses the %section_info array created in sub translate. # Binds the global variables # $PREVIOUS, $PREVIOUS_TITLE # $NEXT, $NEXT_TITLE # $UP, $UP_TITLE # $CONTENTS # $INDEX # $NEXT_GROUP, $NEXT_GROUP_TITLE # $PREVIOUS_GROUP, $PREVIOUS_GROUP_TITLE # Converting to and from lists and strings is very inefficient. # Maybe proper lists of lists should be used (or wait for Perl5?) local($_, $key, $depth, $file, $title, $header, @link, @old_link, $navigation, %done, @keys, @tmp_keys, $flag, $child_links); @tmp_keys = @keys = sort numerically keys %section_info; print "\nDoing section links ..."; while (@tmp_keys) { $key = shift @tmp_keys; print "."; ($depth, $file, $title) = split($delim,$section_info{$key}); unless ($done{$file}) { $PREVIOUS = $PREVIOUS_TITLE = $NEXT = $NEXT_TITLE = $UP = $UP_TITLE = $CONTENTS = $INDEX = $NEXT_GROUP = $NEXT_GROUP_TITLE = $PREVIOUS_GROUP = $PREVIOUS_GROUP_TITLE = $_ = $navigation = undef; @link = split(' ',$key); ($PREVIOUS, $PREVIOUS_TITLE) = &add_link($previous_page_visible_mark,$file,@old_link); @old_link = @link; $link[$depth]++; ($NEXT_GROUP, $NEXT_GROUP_TITLE) = &add_link($next_visible_mark, $file, @link); $link[$depth]--;$link[$depth]--; ($PREVIOUS_GROUP, $PREVIOUS_GROUP_TITLE) = &add_link($previous_visible_mark, $file,@link); $link[$depth] = 0; ($UP, $UP_TITLE) = &add_link($up_visible_mark, $file, @link); @link = split(' ',$tmp_keys[0]); ($NEXT, $NEXT_TITLE) = &add_link($next_page_visible_mark, $file,@link); $CONTENTS = &add_special_link($contents_visible_mark, $tocfile, $file) if $CONTENTS_IN_NAVIGATION; $INDEX = &add_special_link($index_visible_mark, $idxfile, $file) if $INDEX_IN_NAVIGATION; $navigation = &navigation_panel unless $NO_NAVIGATION; $_ = &make_head_and_body($title); $header = join(' ', $_); $header = join(' ', $header, $navigation) if $TOP_NAVIGATION; rename($file, "TMP.$file"); open(INPUT, "$file") || die "Cannot open file $file $!"; &slurp_input("TMP.$file"); $child_links = &add_child_links(0,$depth, $key, @keys); $flag = (($BOTTOM_NAVIGATION || &auto_navigation) && $navigation); $_ = join('', $_, $CHILDLINE) if $child_links; $_ = join('', $header, $_, $child_links); $_ = join('', $_, $navigation) if ($flag); $flag = 0; $_ = join('', $_, $CHILDLINE) unless $flag; &remove_markers; print OUTFILE $_; print OUTFILE &make_address; close OUTFILE; $done{$file}++; &cleanup; } } } sub add_link { # Returns a pair (iconic link, textual link) local($icon, $current_file, @link) = @_; local($dummy, $file, $title) = split($delim,$section_info{join(' ',@link)}); if ($title) { $title = &get_first_words($title, $WORDS_IN_NAVIGATION_PANEL_TITLES); if ($file eq $current_file) { return (&make_href($EXTERNAL_UP_LINK, $icon), &make_href($EXTERNAL_UP_LINK, "$EXTERNAL_UP_TITLE")) if ($EXTERNAL_UP_LINK && $EXTERNAL_UP_TITLE)} else { return (&make_href($file, $icon), &make_href($file, "$title")) } } (&inactive_img($icon), ""); } sub add_special_link { local($icon, $file, $current_file) = @_; (($file && ($file ne $current_file)) ? &make_href($file, $icon) : undef) } sub remove_markers { &remove_general_markers; &text_cleanup; # Must NOT clean the ~'s out of the navigation icons (in panel or text), # and must not interfere with verbatim-like environments &remove_sensitive_markers; } sub remove_general_markers { s/$lof_mark/