#!/bin/ksh # Read a ksh file, given as argument, and generate an HTML rendering of it to stdout # Uses sed to prepare the HTML file from the shell script. The sed scripts are complex # and hard to maintain, but repetitive, so we generate them in scratch files. This is inefficient if # this script is called multiple times, so we cache them in /tmp and only regenerate them when # needed. # This is just text editing, not command parsing, so it can't handle all situations. # # It works on Irix and TRU64. Solaris has 3 versions of sed available. # /usr/xpg4/bin/sed and /usr/ucb/sed: # - need to have '^' '$' outside of a \(..\) RE in order to match EOL -- taken literally inside \( ..\) # /usr/bin/sed # - fails with "line too long" # # Using \<..\> to isolate words doesn't seem to work (IRIX, SOLARIS /usr/ucb/sed) # Using [..]+, [..]* dont work in all sed. [..]\{1,\} and [..]\{0,\} work properly. # # Sed will only accept about 200 commands in one file, so we generate multiple passes # # First version 2001/11/14 Richard Brittain, Dartmouth College # # 2003/10/16 RB Rewrite to use dynamically generated sed scripts # 2005/03/10 RB Added -i flag to create an HTML include file - no headers etc. # Change generated HTML to lower case. # Make sure we get the XPG4 version of sed if we are on Solaris PATH=/usr/xpg4/bin:$PATH; export PATH htinc= # Option -i = format for HTML include file (default is stand-alone HTML) while getopts i o ; do case $o in i) htinc=1;; esac done shift $OPTIND-1 scriptname=$1 # Colours for HTML output sh_keywords=DARKBLUE sh_comments=DARKGREEN sh_commandsubs=PURPLE sh_vars=BLACK sh_lineno=GRAY default_colour=DARKRED seddir=/tmp/$USER/ksh2html needscripts=0 if [[ -d $seddir && -w $seddir ]] ; then # Directory already exists and is writeable if [[ -r $seddir/pass1.sed && $0 -ot $seddir/pass1.sed ]]; then print -u2 "$0: using previously generated HTML generation scripts" else needscripts=1 fi elif [[ ! -d $seddir ]]; then if mkdir -p $seddir ; then # We need to regenerate files needscripts=1 else print -u2 "$0: Unable to create $seddir for editing scripts" exit 1 fi else print -u2 "$0: Unable to access $seddir for editing scripts" exit 1 fi if [[ $needscripts -eq 1 ]]; then print -u2 "$0: generating HTML editing scripts" sed -e "s/sh_comments/$sh_comments/g" \ -e "s/sh_commandsubs/$sh_commandsubs/g" \ -e "s/sh_vars/$sh_vars/g" \ -e "s/sh_keywords/$sh_keywords/g" > $seddir/prelim.sed \ <<"EndOfPrelim" # first Protect all things that look like HTML tags - unfortunately that means I/O redirection too s#\&#g s#<#\<#g s#>#\>#g # Detect comment lines and shuffle them off to the end so we don't interpret the contents /^[ ]*\#/ b comment # Terminating comments, following active code, are left so that we can interpret the code first. # Variable references $var and ${var} # Ideally we'd not include ones in 'quoted strings' s#\(\$[a-zA-Z0-9_*@]\{1,\}\)#\1#g s#\(\${[a-zA-Z0-9_]\{1,\}.*}\)#\1#g # Command Substitution s#\$(\([^(].*\))#\$(\1)#g s#`\(.*\)`#`\1`#g # Variable assignment. Only look for first thing on line (not really correct, but avoids getting # similar things in quoted strings) s#^\([ ]*\)\([A-Za-z_][A-Za-z0-9_]*=\)#\1\2# # Arithmetic operations $((..)) s#\(\$((.*))\)#\1#g : comment # Comment lines are italicised and coloured # Multiple comment characters are additionally bolded s#^\([ ]\{0,\}\)\(\#\#.*\)$#\1\2# s#^\([ ]\{0,\}\)\(\#.*\)$#\1\2# EndOfPrelim sed -e "s/sh_comments/$sh_comments/g" > $seddir/final.sed <<"EndOfFinal" # Last pass - get only the end-of-line comments, possibly after other translations on the line s#\([ ]\{1,\}\)\(\#\#.*\)$#\1\2# s#\([ ]\{1,\}\)\(\#.*\)$#\1\2# EndOfFinal # Use open filedescriptors instead of multiple open/append/close exec 3>$seddir/pass1.sed 4>$seddir/pass2.sed 5>$seddir/pass3.sed 6>$seddir/pass4.sed for shellcode in \ if then elif else fi case esac for do done while until exec eval readonly typeset \ integer unalias unset trap kill wait getopts let bg read test \\\[ exit cd export \ alias break return echo print pwd shift times ulimit umask jobs fc : continue function do # ksh intrinsics - any location, Front anchor to ^ or ];&| with optional leading spaces # End anchor to space or $ print -r -u3 's#^\([ ]\{0,\}\)\('$shellcode'[ ;]\)#\1\2#' # >>$seddir/pass1.sed print -r -u4 's#^\([ ]\{0,\}\)\('$shellcode'\)$#\1\2#' # >>$seddir/pass2.sed print -r -u5 's#\([];&|][ ]\{0,\}\)\('$shellcode' \)#\1\2#g' # >>$seddir/pass3.sed print -r -u6 's#\([];&|][ ]\{0,\}\)\('$shellcode'\)$#\1\2#g' # >>$seddir/pass4.sed done 3>&- ; 4>&- ; 5>&- ; 6>&- fi if [[ -z "$htinc" ]]; then # generate stand-alone HTML with headers print "" print "
" print ""
sed -f $seddir/prelim.sed $scriptname |\
sed -f $seddir/pass1.sed |\
sed -f $seddir/pass2.sed |\
sed -f $seddir/pass3.sed |\
sed -f $seddir/pass4.sed |\
sed -f $seddir/final.sed |\
awk '{printf "%4d: %s\n",NR,$0}'
print ""
if [[ -z "$htinc" ]]; then
print ""
print ""
fi
exit 0