#!/bin/ksh # # $Header: /afs/northstar/ufac/richard/projects/class-web-builder/RCS/buildhtml,v 1.17 2005/03/19 05:15:48 richard Exp $ # # Replace some metatag shortcuts with HTML tags, or # redefine some standard tags with what we'd really # like them to do. # # Arguments are expected to be 1 or more .src files, which are processed and left in # the corresponding .html file. If no file names are specified, work with stdin/stdout. # If an argument is a .html, but the corresponding .src file exists, that will be referenced # instead. This allows simplification of the Makefiles - only one list of files is needed. # # Use the slide.order file to determine the section ordering and previous/next links # for the navigation buttons. # # If source files are named, and contain DETAIL metatags, we also create .detail.html files # and appropriate links to them. # # Not much error checking yet - assumes the slide.order file exists. # All sed commands use # as regex delimiters since there are so many "/" in the variables # # This script is used in conjunction with buildslidelist and buildframeset # # 2001/10/26 Richard Brittain, Dartmouth College. # # 2003/10/27 RB Added support for translation - like but with a link to # online man pages (for introducing new commands). Changed some of the sed script regex characters to ! # since # may appear in the replacement strings (as colour specification) # Added creation of named anchors for all H[1-3] tags, for use by a more sophisticated indexer. # Added buildhtml.conf file option # # 2004/04/22 RB Allow for .shtml as well as .html # 2004/10/19 RB Added option to customize DETAIL tags with fixed entry and exit codes. Apply these to both # screen and printable format files # 2005/02/03 RB Added css code to make page breaks for each new section in the printed output # Omit the slide number at top right of each slide - repeat of navbar just above. # 2005/02/09 RB Rewrite to use a single navigation plus content page and no more frameset and nav files. # Frameless version works much better. # 2005/03/10 RB Moved the "top" anchor from the very top to the start of the real content - makes # class presentations flow much smoother. Nav buttons at bottom now point to the #top # anchor, while the top ones just point to the bare page. # Allow for case-independant HTML tags in the input files, and create only lower case tags # in output. # 2005/03/17 RB Make intro page a variable - not hard-coded "welcome.html" # Solaris has two versions of grep - make sure we get the right one by tweeking $PATH PATH=/usr/xpg4/bin:$PATH; export PATH # Make sure we get these only from the .conf file unset author keywords description # Read in the config file setting various optional features. Look only in the current directory # This can preset several variables used later in this script. [[ -r ./buildhtml.conf ]] && . ./buildhtml.conf edit_file() { # Use stdin,stdout. Let the caller redirect these as needed # Uses global variable $mtime (set by caller) # Uses preset environment variables for the HTML tag replacements, or hardwired defaults # defined here. # $1 is the name of the source file # $2 is a file ID (page number), to be placed somewhere on the page by these # editing operations. The string will cause trouble if it confuses the 'sed' parsing. # $3 is a filename for the "previous" link # $4 is a filename for the "next" link # $5 is a page title src=$1 fid=$2 prevf=$3 nextf=$4 title=$5 # HTML replacements # Look for variables optionally set in buildhtml.conf, or in pre-existing # environment variables. # code_start, code_end, body_fn, h1_start, h1_end, leftarrow, rightarrow, uparrow # man_url, vspace, h2_start, h2_end, h3_start, h3_end # => Example Code code_start="${code_start:-}" code_end="${code_end:-}" # URL to use for online man pages ( tags). Pattern \1 is the tag content string # If not defined, process as for normal tags if [[ -z "$man_url" ]]; then mancode=$code_start\\1$code_end else mancode=''$code_start\\1$code_end'' fi # Default font> (screen only) # Use a larger font than normal since mostly this is for class display body_fn="${body_fn:-size=+1}" # Body start - default top material # try to open a new window. leftarrow=${leftarrow:-left.gif} rightarrow=${rightarrow:-right.gif} uparrow=${uparrow:-up.gif} # Header generation. bodystart=''"\\ "''"\\ " # redefinition h1_start="${h1_start:-

}" h1_end="${h1_end:-

}" h2_start="${h2_start:-

}" h2_end="${h2_end:-

}" h3_start="${h3_start:-

}" h3_end="${h3_end:-

}" # Vertical space at end of each page vspace=${vspace:-"
"} # Header and Footer navigation links if [[ -n "$prevf" ]]; then # Header generation - top navigation links. # Note the complicated quoting and newlines are to get escaped newlines # into the substitute string for sed, so that the generated HTML is a bit easier to read. # Add in a page title and navigation links. # Skip the TARGET="mainplusnav" - framed pages will load in the current frame, and frameless pages will not topnavlinks=''"\\ "' '"\\ "' '"\\ "' '"\\ "' '"\\ "' '"\\ "'
Text-onlyTable of Contents (frame/'"\\ "'no frame)
'"$fid $title"''"\\ "' Previous'"\\ "'Top '"\\ "' Next'"\\ "'
'"\\ "''"\\ " # Footer generation. # For Frameless pages, skip the TARGET=mainplusnav in the nav links botnavlinks="\\ "'

'"\\ "'Previous '"\\ "'Top '"\\ "'Next'"\\ " # Alternate navigation links with text instead of icons. # botnavlinks='

Previous Top Next' else topnavlinks= botnavlinks= fi # More footer lines - include a call to the table of contents (slide list) for frames/noframes versions # plus the printable version lastmod="\\ "''"\\ "''"\\ "''"\\ "''"\\ "''"\\ "''"\\ "'
'"$src"'\ \ last modified '"$mtime"'IntroductionTable of Contents
(frame/no frame)
Printable
(single file)
\© Dartmouth College
'"\\ " if [[ $print = 0 ]]; then # process for screen formatting, unless -p was specified # => Insert background image and default font size. Insert vertical spacer at end of file # Insert page number (file ID) and navigation links immediately after BODY # Place tags around all tags. Leading and trailing whitespace is stripped # from the tag content, as it breaks the anchor action. # DETAIL metatags are handled separately print -R "$doctype" print '' print '' print '' [[ $nometa = 0 && -n "$author" ]] && print '' [[ $nometa = 0 && -n "$keywords" ]] && print '' [[ $nometa = 0 && -n "$description" ]] && print '' # Let the TITLE come through as written in the source file, but delete the and sed \ -e 's#<[hH][tT][mM][lL]>##' \ -e 's#<[hH][eE][aA][dD]>##' \ -e 's!<[cC][oO][dD][eE]>!'"$code_start"'!g' \ -e 's!!'"$code_end"'!g' \ -e 's! *\([^<]*\) *!'"$mancode"'!g' \ -e 's!^<[bB][oO][dD][yY].*$!'"$bodystart""$topnavlinks"'!' \ -e 's!^!'"$vspace""$botnavlinks"'


'"$lastmod"'
!' \ -e 's!<[hH]\([1-5]\)\([^>]*\)>[ ]*\(.*[^ ]\)[ ]*!\3!' \ -e 's!<[hH]1>!'"$h1_start"'!g' \ -e 's!!'"$h1_end"'!g' \ -e 's!<[hH]2>!'"$h2_start"'!g' \ -e 's!!'"$h2_end"'!g' \ -e 's!<[hH]3>!'"$h3_start"'!g' \ -e 's!!'"$h3_end"'!g' \ else # For printable version, use a smaller font and skip spacing at end of pages. # Skip background images, and all HTML/BODY tages - we'll supply separate ones # at start and end. # Skip the internal anchors on tags. # Replace DETAIL metatags with the detail_start and detail_end codes, if any. # Insert "newpage" code at start of each file, unless the $src is the introduction - quick # hack to get pagination right at start of printed notes case $src in ${intro%.*}.*) class= ;; *) class='class="newpage"' ;; esac sed \ -e 's!<[hH][tT][mM][lL]>.*$!!' \ -e 's!.*$!!' \ -e 's!<[hH][eE][aA][dD]>.*$!!' \ -e 's!.*$!!' \ -e 's!<[tT][iI][tT][lL][eE].*$!!' \ -e "s!<[cC][oO][dD][eE]>!$code_start!g" \ -e "s!!$code_end!g" \ -e 's! *\([^<]*\) *!'"$mancode"'!g' \ -e 's!^<[bB][oO][dD][yY].*$!

'$fid'

!' \ -e 's!!!' \ -e 's!<[hH]1>!'"$h1_start"'!g' \ -e 's!!'"$h1_end"'!g' \ -e 's!<[hH]2>!'"$h2_start"'!g' \ -e 's!!'"$h2_end"'!g' \ -e 's!<[hH]3>!'"$h3_start"'!g' \ -e 's!!'"$h3_end"'!g' \ -e 's##'"$detail_start"'#g' \ -e 's##'"$detail_end"'#g' \ fi } create_detail() { # Create a "detail" file by processing the DETAIL metatags. # We operate on the HTML file already created, so that we don't need to worry # about all the other translations. # The non-blank characters following DETAIL- in the tag are used to create anchors # for the links between the two versions. # $1 is the name of the .[s]html file we need to process. # We will replace $1 with a new version, and create a .detail.[s]html file to go with it. src=$1 # Look for variables optionally set in buildhtml.conf, or in pre-existing # environment variables. # detail_start, detail_end # Link for "detail" page. The "default" page hides the detail - turns them # into an HTML comment, and inserts a link to the page with the details included. # For the printable version, we ignore the detail metatags, which leaves the content in the document. # A side effect of this is that detail_start and detail_end tags don't appear in the printable # copy detail=${src%.$html}.detail.$html moredetail="\\ "''"\\ "'More detail'"\\ " lessdetail="\\ "''"\\ "'Less detail'"\\ " # First create the modified "standard" file, turning the DETAIL content into comments and inserting a link sed < $src > $src.$$ \ -e 's##'"$moredetail"'# -->#' # Now create the "detail" file sed < $src > $detail \ -e 's#\(\)#'"$lessdetail"'\1'"$detail_start"'#' \ -e 's#\(\)#'"$detail_end"'\1#' ## -e 's#\(\)#'"$lessdetail"'\1#' # overwrite the original input file. mv $src.$$ $src } print_start() { # print HTML header fluff - this will be stripped out of all component files. # Arguments expected are: # 1 = the title of the combined document. # 2 = base URL (optional) # Since we use server side include here, could suck in custom preface for a given class print "$doctype" print "" print "" print "" [[ $nometa = 0 && -n "$author" ]] && print '' [[ $nometa = 0 && -n "$keywords" ]] && print '' [[ $nometa = 0 && -n "$description" ]] && print '' print "$1" # Some CSS magic to allow us to force pagebreaks in the printed output print "" print "" print "" # Leave body at default browser font. Components will be resized relative to this. print "" print "

$1

" print "

Course Handout: (last update )

" print "
" [[ ! -z $2 ]] && print "These notes may be found at $2. The online version has many links to additional"\ "information and may be more up to date than the printed notes" print "
" } print_end() { # print HTML trailer fluff - this has been stripped out of all the component files. # Arguments expected are: # 1 = the title of the combined document. # 2 = base URL (optional) # Turn the title into something we can use for a download counterID counterid=$(print $1 | sed -es'/ \{1,\}/_/g' -es'/[^A-Za-z0-9_]//g') print "


" print "$1: Course Handout
" # This assumes the simplecounter CGI is available on the system used to deliver the page. # Version with visible counter: # print 'Download count [ ]  ' # Version with invisible counter print '' print "" print "(last update   )  ©Dartmouth College" [[ ! -z $2 ]] && print "    $2" print "" print "" print "" print "" } # Control starts here integer i maxslide print=0 stream=0 nometa=0 baseurl= doctype=${doctype:-""} frametype=${frametype:-""} html=html # Option -p = format for printer-optimized HTML (default is screen) # Option -s = output to stdout, even if named files are given to us. Don't create the .html files. # Option -b = specify a BASE URL # Option -n = no META tags in header (author, keywords etc.) # Option -S = create .shtml instead of .html as output (file contains SSI) while getopts npsSb: o ; do case $o in n) nometa=1;; p) print=1;; s) stream=1;; S) html=shtml;; b) baseurl=$OPTARG;; esac done shift $OPTIND-1 # Get the intro page name from line 1 of slide.order intro=$(sed -n -e '1s/^[ 0-9]*//p' slide.order) # Get the "class" title from the TITLE of the intro page # We'll use this in the printable version if [[ -r ${intro%.*}.src ]] ; then ctitle=$(grep -i '##' -e 's#.*##') else # Oops - well just use a generic title ctitle="Class Handout" fi # Set up the bodytag variable which is used in a couple of functions # image or colour. Default is white, special value "none" # omits the tag and lets the browser default be used body_bg="${body_bg:-WHITE}" case $body_bg in (*.gif|*.jpg) # Assume it is an image bodytag="background=$body_bg" ;; none) # We don't want any background bodytag= ;; *) # Assume it is a colour bodytag="bgcolor=$body_bg" ;; esac if [[ $# -gt 0 ]] ; then # There are arguments - loop over all the .src files and turn them into .html # This code assumes the filenames have the form *.src # The '*' part is extracted and used as the file ID tag. # Print the headers, for the printable version [[ $print = 1 ]] && print_start "$ctitle" "$baseurl" # maxslide is the number of the last slide in the sequence maxslide=$(tail -1 slide.order|awk '{print $1}') for srcfile in $@; do # If we were give .[s]html names, but the .src files existed, assume we meant the .src # The suffix we want is stored in $html for later use case $srcfile in *.html) html=html newfile=${srcfile%.$html}.src [[ -r $newfile ]] && srcfile=$newfile ;; *.shtml) html=shtml newfile=${srcfile%.$html}.src [[ -r $newfile ]] && srcfile=$newfile ;; esac # strip off the .{src|html} to get a base name - use that to look up the # file numbering from slide.order, or just use it directly as a fileID case $srcfile in *.src) base=${srcfile%.src} ;; *) base=$srcfile%.$html} esac # The slide.order file is [spaces]NN[spaces]filename # The counting starts at 0, with the introduction page. # $fidno is the number of the file in the slide.order list. # $fid is the "fileID" we will use to label the pages. fidno=$(grep " $base\." slide.order | awk '{print $1}') fid=$fidno # If that didn't work, just use $base [[ -z $fid ]] && fid=$base # We don't want "page numbers" for the intro and slide list - they look silly so special case it here case $fid in ${intro%.*}.*) fid= ;; (*slide_list*|0) fid= ;; esac # Finally, if fid is non-null, put it in () [[ ! -z $fid ]] && fid="($fid)" # Get the modified time for $srcfile, for use in the footers. mtime=$(mtime $srcfile) # Make the outfile by replacing .src with .$html. If the given filename # wasn't a .src file, this will just append .$html. We need this for the prev/next links # even if we are writing to stdout. outfile=${srcfile%.src}.$html # Get the "Previous" and "Next" files from the slide.order list for the navigation buttons # $i is an integer, so null evaluates to 0 if [[ -z "$fidno" ]]; then # We weren't in the slide.order file, so skip the navigation buttons prev= next= else # look up the previous and next files for the nav buttons prev=$outfile next=$outfile i=$fidno [[ $i -gt 0 ]] && prev=$(egrep '^ *'$((i-1))' ' slide.order|awk '{print $2}') [[ $i -lt $maxslide ]] && next=$(egrep '^ *'$((i+1))' ' slide.order|awk '{print $2}') # for the multiframe model, the next/previous links are not the actual .html files, but the framesets # prev=${prev%.*html}.frameset.html # next=${next%.*html}.frameset.html fi ptitle=$(grep -i '##' -e 's#.*##') # Call the editing function on this file, with I/O redirected as needed # $fid is not optional, but may have a null value, so quote it. if [[ $stream = 1 ]]; then edit_file < $srcfile $srcfile "$fid" $prev $next "$ptitle" # If we are writing to stdout, we skip the "detail" file else edit_file < $srcfile > $outfile $srcfile "$fid" $prev $next "$ptitle" # If the srcfile contains DETAIL metatags, create the *.detail.html file # Note that edit_file() leaves DETAIL tags alone, since we need to treat them # differently for the "standard" file and the "detail" file. if grep -q '' $srcfile ; then create_detail $outfile fi # Multiframe version needed functions to create *.frameset.html and *.nav.html files # per named source file. See previous version for that code. fi done [[ $print = 1 ]] && print_end "$ctitle" "$baseurl" else # There are no arguments - do the same thing to stdin, write to stdout. No FileID mtime=$(date '+%d/%m/%Y') [[ $print = 1 ]] && print_start "Class Notes" "$baseurl" edit_file [[ $print = 1 ]] && print_end "Class Notes" "$baseurl" fi exit 0