HTML

Below is the code I wrote to construct the tree structure of this website, i.e. to get the navigation bars at the top and left of the page generated automatically.

(* Define functions to produce html pages with a standardised layout *)

(* 
   At the top of the each page the title heading spans the width of the page.  
   Below this each page consists of a 2 column table with a thin vertical 
   navbar in the left column that shows the main categories. 
   At the righthand side  of the righthand column there is a 
   vertical subnavbar that shows subcategory links (and the parent 
   page link if it is not in the main navbar).  
   To the left of this navbar is the page content.  

   This file makes use of the planar tree (Ptree) module to
   determine the paths to the pages in the hierachy.
*)

(* **************** Type definitions ****************************** *)
(* 
   Type: pagedesc - Page description
   Important attributes for a page stored in a heirachical structure are: 
   1.  Name of file page is stored in (eg news.html)
         String
   2.  Title of page (eg Matt McDonnell's News Links)
         String
   3.  Heading at top of page (eg News)
         String
   4.  Stylesheet (eg matt.css)
         String
   5.  Path to other info for the header eg javascript
   6.  Path to where page data is stored (eg data/news.dat)
         String
   7.  Parent page
         String
*)
(* NB: in first iteration there was a subpage field as well, that stored the 
  names of nodes linked to this node.  I've decided to instead store this data
  in the tree connectivity *)
(* NB 2: back to including the hierachy in the page description, now by
         storing the name of the parent page (root node is its own parent) *)  

type pagedesc = {name: string; title: string; head: string;
                 style: string; headerinfo: string; 
                 content: string; parent: string};;

(* 
   Type: fullpagedesc - stores page description and location details
   Create a "Full Page Description" type, consisting of the page description
   of the page defined previously, the path to the page and the list of links
   in the navbar of the page.  (Don't store the raw html of the navbar at
   this stage as we want the freedom to be able to do horizontal or vertical
   navbars, instead store (name, loc, navtext)) 
*)

type fullpagedesc = {page : pagedesc; loc: string; 
                     nav: (string * string * string) list};;

(* **************** End of type definitions ********************** *)

(* **************** Higher order functions  ********************** *)
(* Some useful higher order functions:
   (1) compose - function composition
   (2) identity - acts as the base element for function composition
                  eg if recursively folding compose over a list of functions
                     identity is the base element (cf 0, [])
   (3) complement - given two lists a and b returns the list of elements
                    in b and not a (this may be the opposite to the normal 
                    definition of complement - check this!) 
   (4) union - Union of a single list, removing repeated entries
               (almost certainly this should be named differently
                since union usually acts on two lists - find out
                what correct terminology is and rename function)       *)
let compose f g x = f(g x);;
let id x = x;;
let complement alst blst = List.fold_right
  (fun e acc->if not(List.mem e alst) then e::acc else acc) blst [];;
let rec union l = match l with 
| [] -> []
| x :: xs -> [x] @ (union (List.filter (fun y->y<>x) xs));;
(* **************** End of Higher order functions *************** *)

(* ************** File input functions and text processing ******* *)
(* Function to convert a string array into a page description record *)
let make_page arr = {name=arr.(0); title=arr.(1); head=arr.(2);
                     style=arr.(3); headerinfo=arr.(4); 
                     content=arr.(5); parent=arr.(6)};;

(* Easiest way to enter description of the webpage is in a single plain text
   file of the node descriptions, with an extra index number field for 
   ease of constructing the connectivity tree.
   Therefore we need to write a function to parse a description file into
   (a) a list of page (node) descriptions
   (b) a list of connectivity pairs 
   Make the decision that fields of the page description are separated by
   a : character, as this is unlikely to crop up in a title etc *)

(* Function to read a file into a string list, each line (excluding \n char) 
   is an element of the list *)
let readfile fname =
    let cin = open_in fname in 
    let rec aux acc = try aux ((input_line cin)::acc)
    with End_of_file -> close_in cin; List.rev acc
in aux [];;

(* Split a string into a list of substrings based on a delimiter character *)
let split c str = 
  let rec aux s acc = 
    try  let ind=String.index s c in
         aux (String.sub s (ind+1) ((String.length s) - ind -1 )) 
              ((String.sub s 0 ind)::acc)       
    with Not_found -> List.rev (s::acc) 
  in aux str [];;

let read_pages fname =
  let split_line = split ':' in
  let cin = open_in fname in
  let rec read_file () = 
    try
      let data=make_page(Array.of_list (split_line (input_line cin))) in
      data :: (read_file ())
    with End_of_file -> close_in cin; []
  in read_file ();;
(* ******** End of File input functions and text processing ******* *) 

let make_page_node_list plst = List.map (fun x->(x.parent,x.name)) plst;;

let make_page_name_tree plst=List.fold_left 
  (fun t (p,c)->Ptree.insert p c t) 
   (Ptree.create ()) (make_page_node_list plst);;
(*
let node_children (Node(a,lst)) = 
  a::(List.rev (List.fold_left (fun acc (Node(x,l))->x::acc) [] lst));;
*)   
(* Function to return the directory path to each page.  The root node is
   excluded from the path. To actually make the directories it is
   necessary to use the Unix library Unix.mkdir function.  
   For interactive testing this means a new top level must be constructed 
   in order to access the Unix library - unixtop *)
let pagedirs pagenamet =  
   List.map (fun y->List.fold_left (fun acc x->acc^x^"/") "" y) 
            (List.map List.tl (Ptree.paths pagenamet));;

(* Function to construct (name, directory location) pairs from a 
   pagename tree.  Then use List.assoc name in future functions
   to get the directory location associated with each name. *)
let make_page_name_dir pagenamet = 
  let dirs = pagedirs pagenamet in
  let names =  List.map (fun x->List.hd(List.rev x))(Ptree.paths pagenamet) in
  List.combine names dirs;;

(* Function to get the names of the children of a named node *)
let get_child pglst str = 
  List.fold_right (fun (x,y) acc ->y :: acc)
  (List.filter (fun (x,y)->x = str) (make_page_node_list pglst)) [];;

(* Function to take a list of page descriptions and return a list of 
   triples of (page name, parent name, list of children names).
   Next step is to convert this triple into a navbar. *)
let get_children pglst =
  List.map (fun x->(x.name,x.parent,get_child pglst x.name)) pglst;;

(* Elements of the navbar consist of the parent node and the children.
   Produce a (name, navbarelements) list from get_children *)
let make_navlist pglst = 
   List.map (fun (n,p,c)->if n=p then (n,c) else (n,p::c)) 
            (get_children pglst);;

let make_full_desc pglst = 
  let pntree = make_page_name_tree pglst in (* creates tree structure 
                                               of pages *)
  let namlst = List.map (fun x->x.name) pglst in  
  let nplst = List.combine namlst pglst in
  let navtext s = (List.assoc s nplst).head in
  let pndir  = make_page_name_dir pntree in (* creates list of (name,loc) *)
  let childlist = make_navlist pglst in (* create list of (name, children) *)
  let nav_list = 
    List.map (fun (x,clst)->(x,
        (List.map (fun y->(y,List.assoc y pndir,navtext y)) clst)
    )) childlist in
  List.map (fun x->{page=List.assoc x nplst; loc=List.assoc x pndir;
                    nav=List.assoc x nav_list}) namlst;;

(* ************************ HTML functions ************************** *)
(* Wrap a HTML tag around a string *)
let wrap tag x = 
    (* start tag may have additional arguments eg href that we want
       to strip from the end tag, so we need to extract the part of
       the tag up to the first space *)
    let init = 
      try 
        let ind=String.index tag ' ' in
        String.sub tag 0 ind
      with Not_found -> tag in
    let stag = "<"^tag^">" in
    let etag = "</"^init^">" in
    stag ^ x ^ etag;;

(* Wrap a hyperlink around a string *) 
let wraploc dirstr = wrap ("a href=\"" ^ dirstr ^ "\"");; 

(* Wrap a class and hyperlink around a string *)
let wrapclassloc cl dirstr = wrap ("a class=\"" ^ cl ^ "\" " ^
                                      "href=\"" ^ dirstr ^ "\"");; 

(* Wrap multiple layers around text *)
let composewrap wraplst = List.fold_right (fun t acc->compose (wrap t) acc) 
                                   wraplst id;;

(* Function to construct the navbar from a list of wrappers (see below)
   and the list of (name, location, navbar text).
   The wrapper is the html tag used in the table eg ["td"] for
   a horizontal navbar, ["tr";"td"] for a vertical navbar,
   ["tr"; "td name=\"(css style name)\""] etc *)
let make_navbar txtclass wraplst navlist =
  let wrapfun = List.fold_right (fun t acc->compose (wrap t) acc) 
                                   wraplst id in  
  wrap "table valign=\"top\"" ("\n"^
  (List.fold_right 
   (fun (n,l,t) acc->(wrapfun 
          (wrapclassloc txtclass (l^n^".html") t))^"\n"^acc) 
   navlist ""));;

(* Function to construct a site map page *)

let make_site_map fpglst =
  let pglst = List.map (fun x-> x.page) fpglst in 
  let pntree = make_page_name_tree pglst in
  let namlst = List.map (fun x->x.name) pglst in
  let namfpglst = List.combine namlst fpglst in
  let aux pgname lst =   
    let fpg = (List.assoc pgname namfpglst) in
    let pg = fpg.page in 
    let lststr = "\n" ^ (wraploc (fpg.loc ^ pg.name ^ ".html") pg.head) in
      lststr ^ "\n" ^ (wrap "ul" 
                (List.fold_right (fun x acc-> (wrap "li"  x)^acc) lst "")) in
  let cout=open_out "data/sitemap.dat" in
  begin 
    Printf.fprintf cout "%s" (Ptree.fold aux pntree);
    close_out cout
  end;;

(* Function to take a full page description and
   (1) Create the directory structure
   (2) Open the output file for writing
   (3) write the header HTML including breaking the page into a two
       column table
   (4) Write the navbar to the file (* Future version:  option for rh navbar *)
   (5) Attempt to read the page data file: if successful write it to the
       output file, else write nothing to output file and go to next step
   (6) Write footer and close file
*)

(* Layout of each page consists of a title at the top of the page,
   navbar on the lefthand side that doesn't change from page to page
   (children of the root node), navbar at the top of the righthand column
   that links to the subpages. *)
let make_final_html layout base mainnavlst fullpg =
  let pg = fullpg.page in 
  let pgdir = fullpg.loc in
  let pgname= pg.name in
  let headerinfo =
    try List.fold_right (fun x acc->x ^ "\n" ^ acc) (readfile pg.headerinfo) ""
    with Sys_error s-> "" | Not_found -> ""
  in
  let header = wrap "head"("\n"^
                (wrap "title" pg.title)^ "\n" ^
                ("<base href=\""^base^"\">\n")^
                ("<link rel=\"stylesheet\" href=\"" ^ pg.style ^ "\" " ^
                 "type=\"text/css\">\n") ^ 
                (headerinfo ^ "\n")) in
  let mainnav =make_navbar "bignav" ["tr";"td class=\"bignav\""] mainnavlst in
  let navbar = make_navbar "smallnav" ["td class=\"smallnav\""] 
       (complement mainnavlst fullpg.nav) in
  let content=
    try List.fold_right (fun x acc->x ^ "\n" ^ acc) (readfile pg.content) ""
    with Sys_error s-> "" | Not_found -> ""
  in
  let nav_and_content = if layout="top" then (
         (composewrap ["tr align=center";"td valign=\"top\""] 
                             navbar) ^ "\n" ^
         (composewrap ["tr";"td valign=\"top\""] content) ^ "\n" )
    else ( wrap "tr" (
         (composewrap ["td valign=\"top\""] content) ^ "\n" ^
         (composewrap ["td valign=\"top\""] 
             (make_navbar "smallnav" ["tr";"td class=\"smallnav\""] 
             (complement mainnavlst fullpg.nav))) ^ "\n"
          ))
   in 
  let doctypestr = 
    "<DOCTYPE HTML PUBLIC  \"-//W3C//DTD HTML 3.2 Final//EN\">" in 
  doctypestr ^ "\n" ^
   (wrap "html" 
     ("\n\n" ^ header ^ "\n\n" ^ 
       (wrap "body" (
         (wrap "h1 class=\"big\"" pg.head) ^ "\n" ^ 
         (wrap "table width=\"100%\" border=0 align=left cellpadding=0 cellspacing=0" 
           (
             "\n" ^ (wrap "td valign=\"top\"" ("\n" ^ mainnav ^ "\n")) ^
             "\n" ^ (wrap "td valign=\"top\"" 
               (wrap "table valign=\"top\"" nav_and_content)
             )
             ^ "\n"
           ) ^ "\n"        
          )
       ) ^ "\n"
     )
   ));;

let make_final_page base mainnavlst fullpage = 
  let mask=Unix.umask 0 in
  let fname="output/" ^ fullpage.loc ^ fullpage.page.name ^ ".html" in
  let cout = open_out fname in
  Printf.fprintf cout "%s" (make_final_html "right" base mainnavlst fullpage);
  close_out cout;
  Unix.chmod fname 0o644;
  ignore (Unix.umask mask);
  ();;

(* Construct the website directory structure from the page name tree *)
let make_dirstruct pagenamet = 
  let dirs = pagedirs pagenamet in
  let mask = Unix.umask 0 in
  begin
    Unix.mkdir "output" 0o755;
    List.iter (fun str->if str="output/" then () else Unix.mkdir str 0o755) 
              (List.map (fun str->"output/"^str) dirs);
    ignore(Unix.umask mask)
  end;;

(* Print out the directory structure to a file *)
let print_dirstruct fplst =
  let dirlst= List.map (fun x->x.loc) fplst in
  let cout = open_out "output/dirlst.txt" in
  begin
    List.iter (fun x->output_string cout (x^"\n")) (List.tl dirlst);
    flush cout;
    close_out cout
  end;;

(* The final function to make the entire website *)
let make_website base pfname =
  let plst = read_pages pfname in  
  let fplst = make_full_desc plst in 
  let sitemap = make_site_map fplst in
  let pagelst=List.map (fun x->x.page) fplst in
  let mainnavlst = (List.hd fplst).nav in
  let pntree =make_page_name_tree pagelst in
  begin
    ignore(make_dirstruct pntree);
    print_dirstruct fplst;
    List.iter (make_final_page base mainnavlst) fplst;
  end;;

(* Put all of the output into an output directory that is a child of the
   current directory.  For testing purposes set the baseref of each page to
   this directory. *)
let baseref="file://localhost" ^ (Unix.getcwd ()) ^ "/output/";;
let webbaseref="http://nodens.physics.ox.ac.uk/~mcdonnell/";;
(* ************* End of HTML functions ********************* ****** *)

(* ************* Functions for calling from command line ********** *)
(* entry point *)
let usage () =
  print_string "Usage: htmltree baseref pagedescname\n";
  exit 2;;

let main () =
  if Array.length Sys.argv <> 3 then usage () else
  begin
    let baseref=Sys.argv.(1) in
    let pdname =Sys.argv.(2) in
    make_website baseref pdname 
  end;;

if !Sys.interactive then () else main ();;
(* ************* End of command line functions ******************** *)