Example script: ksh2html.ksh


   1: #!/bin/ksh
   2: # Read a ksh file, given as argument, and generate an HTML rendering of it to stdout
   3: # Uses sed to prepare the HTML file from the shell script.  The sed scripts are complex
   4: # and hard to maintain, but repetitive, so we generate them in scratch files.  This is inefficient if
   5: # this script is called multiple times, so we cache them in /tmp and only regenerate them when
   6: # needed.
   7: 
   8: # This is just text editing, not command parsing, so it can't handle all situations.
   9: # 
  10: # It works on Irix and TRU64.  Solaris has 3 versions of sed available.  
  11: # /usr/xpg4/bin/sed and /usr/ucb/sed:
  12: #   - need to have '^' '$' outside of a \(..\) RE in order to match EOL -- taken literally inside \( ..\)
  13: # /usr/bin/sed 
  14: #   - fails with "line too long"
  15: #
  16: # Using \<..\> to isolate words doesn't seem to work (IRIX, SOLARIS /usr/ucb/sed)
  17: # Using [..]+, [..]* dont work in all sed.  [..]\{1,\} and [..]\{0,\} work properly.
  18: #
  19: # Sed will only accept about 200 commands in one file, so we generate multiple passes
  20: #
  21: # First version  2001/11/14  Richard Brittain,  Dartmouth College
  22: #
  23: # 2003/10/16 RB Rewrite to use dynamically generated sed scripts
  24: # 2005/03/10 RB Added -i flag to create an HTML include file - no headers etc.
  25: #               Change generated HTML to lower case.
  26: 
  27: # Make sure we get the XPG4 version of sed if we are on Solaris
  28: PATH=/usr/xpg4/bin:$PATH; export PATH
  29: 
  30: htinc=
  31: # Option -i = format for HTML include file (default is stand-alone HTML)
  32: while getopts i o ; do
  33:    case $o in
  34:    i) htinc=1;;
  35:    esac
  36: done
  37: shift $OPTIND-1
  38: 
  39: scriptname=$1
  40: 
  41: # Colours for HTML output
  42: sh_keywords=DARKBLUE 
  43: sh_comments=DARKGREEN
  44: sh_commandsubs=PURPLE
  45: sh_vars=BLACK
  46: sh_lineno=GRAY
  47: default_colour=DARKRED
  48: 
  49: seddir=/tmp/$USER/ksh2html
  50: needscripts=0
  51: if [[ -d $seddir && -w $seddir ]] ; then
  52:    # Directory already exists and is writeable
  53:    if [[ -r $seddir/pass1.sed && $0 -ot $seddir/pass1.sed ]]; then
  54:       print -u2 "$0: using previously generated HTML generation scripts"
  55:    else
  56:       needscripts=1
  57:    fi
  58: elif [[ ! -d $seddir ]]; then
  59:    if mkdir -p  $seddir ; then
  60:       # We need to regenerate files
  61:       needscripts=1
  62:    else
  63:       print -u2 "$0: Unable to create $seddir for editing scripts"
  64:       exit 1
  65:    fi
  66: else
  67:    print -u2 "$0: Unable to access $seddir for editing scripts"
  68:    exit 1
  69: fi
  70: 
  71: if [[ $needscripts -eq 1 ]]; then
  72:    print -u2 "$0: generating HTML editing scripts"
  73:    sed -e "s/sh_comments/$sh_comments/g" \
  74:              -e "s/sh_commandsubs/$sh_commandsubs/g" \
  75:              -e "s/sh_vars/$sh_vars/g" \
  76:              -e "s/sh_keywords/$sh_keywords/g"   > $seddir/prelim.sed \
  77:      <<"EndOfPrelim"
  78: # first Protect all things that look like HTML tags - unfortunately that means I/O redirection too
  79: s#&#\&amp;#g
  80: s#<#\&lt;#g
  81: s#>#\&gt;#g
  82: 
  83: # Detect comment lines and shuffle them off to the end so we don't interpret the contents
  84: /^[     ]*\#/ b comment
  85: # Terminating comments, following active code, are left so that we can interpret the code first.
  86: 
  87: # Variable references $var and ${var}
  88: # Ideally we'd not include ones in 'quoted strings'
  89: s#\(\$[a-zA-Z0-9_*@]\{1,\}\)#<font color=sh_vars>\1</font>#g
  90: s#\(\${[a-zA-Z0-9_]\{1,\}.*}\)#<font color=sh_vars>\1</font>#g
  91: 
  92: # Command Substitution 
  93: s#\$(\([^(].*\))#\$(<font color=sh_commandsubs>\1</font>)#g
  94: s#`\(.*\)`#`<font color=sh_commandsubs>\1</font>`#g
  95: 
  96: # Variable assignment.  Only look for first thing on line (not really correct, but avoids getting
  97: # similar things in quoted strings)
  98: s#^\([  ]*\)\([A-Za-z_][A-Za-z0-9_]*=\)#\1<font color=sh_vars>\2</font>#
  99: 
 100: # Arithmetic operations $((..)) 
 101: s#\(\$((.*))\)#<font color=sh_keywords>\1</font>#g
 102: 
 103: : comment
 104: # Comment lines are italicised and coloured
 105: # Multiple comment characters are additionally bolded
 106: s#^\([  ]\{0,\}\)\(\#\#.*\)$#\1<font color=sh_comments><i><b>\2</b></i></font>#
 107: s#^\([  ]\{0,\}\)\(\#.*\)$#\1<font color=sh_comments><i>\2</i></font>#
 108: 
 109: EndOfPrelim
 110: 
 111:    sed -e "s/sh_comments/$sh_comments/g" > $seddir/final.sed <<"EndOfFinal"
 112: # Last pass - get only the end-of-line comments, possibly after other translations on the line
 113: s#\([  ]\{1,\}\)\(\#\#.*\)$#\1<font color=sh_comments><i><b>\2</b></i></font>#
 114: s#\([  ]\{1,\}\)\(\#.*\)$#\1<font color=sh_comments><i>\2</i></font>#
 115: 
 116: EndOfFinal
 117: 
 118:    # Use open filedescriptors instead of multiple open/append/close
 119:    exec 3>$seddir/pass1.sed 4>$seddir/pass2.sed 5>$seddir/pass3.sed 6>$seddir/pass4.sed
 120:    for shellcode in \
 121:     if then elif else fi case esac for do done while until exec eval readonly typeset \
 122:     integer unalias unset trap kill wait getopts let bg read test \\\[ exit cd export \
 123:     alias break return echo print pwd shift times ulimit umask jobs fc : continue function
 124:      do
 125:      # ksh intrinsics - any location, Front anchor to ^ or ];&| with optional leading spaces
 126:      #                                End anchor to space or $
 127:   
 128:      print -r -u3 's#^\([  ]\{0,\}\)\('$shellcode'[ ;]\)#\1<font color='$sh_keywords'>\2</font>#'    #  >>$seddir/pass1.sed
 129:      print -r -u4 's#^\([  ]\{0,\}\)\('$shellcode'\)$#\1<font color='$sh_keywords'>\2</font>#'       #  >>$seddir/pass2.sed
 130:      print -r -u5 's#\([];&|][  ]\{0,\}\)\('$shellcode' \)#\1<font color='$sh_keywords'>\2</font>#g' #  >>$seddir/pass3.sed
 131:      print -r -u6 's#\([];&|][  ]\{0,\}\)\('$shellcode'\)$#\1<font color='$sh_keywords'>\2</font>#g' #  >>$seddir/pass4.sed
 132: 
 133:   done
 134:   3>&- ; 4>&- ; 5>&- ; 6>&- 
 135: fi
 136: 
 137: if [[ -z "$htinc" ]]; then
 138:    # generate stand-alone HTML with headers
 139:    print "<html>"
 140:    print "<head>"
 141:    print "<title>Script: $scriptname</title>"
 142:    print "</head>"
 143:    print "<body>"
 144:    print "<h2>Example script: $scriptname</h2>"
 145: fi
 146: print "<pre><font color=$default_colour>"
 147: sed -f $seddir/prelim.sed $scriptname |\
 148:   sed -f $seddir/pass1.sed |\
 149:   sed -f $seddir/pass2.sed |\
 150:   sed -f $seddir/pass3.sed |\
 151:   sed -f $seddir/pass4.sed |\
 152:   sed -f $seddir/final.sed |\
 153:   awk '{printf "<font color='$sh_lineno'>%4d:</font> %s\n",NR,$0}'
 154: print "</font></pre>"
 155: if [[ -z "$htinc" ]]; then
 156:    print "</body>"
 157:    print "</html>"
 158: fi
 159: exit 0



  last modified 02/04/2009 Introduction Table of Contents
(frame/no frame)
Printable
(single file)
© Dartmouth College