Below is the code I wrote to construct the tree structure of this website, i.e. to get the navigation bars at the top and left of the page generated automatically.
(* Define functions to produce html pages with a standardised layout *)
(*
At the top of the each page the title heading spans the width of the page.
Below this each page consists of a 2 column table with a thin vertical
navbar in the left column that shows the main categories.
At the righthand side of the righthand column there is a
vertical subnavbar that shows subcategory links (and the parent
page link if it is not in the main navbar).
To the left of this navbar is the page content.
This file makes use of the planar tree (Ptree) module to
determine the paths to the pages in the hierachy.
*)
(* **************** Type definitions ****************************** *)
(*
Type: pagedesc - Page description
Important attributes for a page stored in a heirachical structure are:
1. Name of file page is stored in (eg news.html)
String
2. Title of page (eg Matt McDonnell's News Links)
String
3. Heading at top of page (eg News)
String
4. Stylesheet (eg matt.css)
String
5. Path to other info for the header eg javascript
6. Path to where page data is stored (eg data/news.dat)
String
7. Parent page
String
*)
(* NB: in first iteration there was a subpage field as well, that stored the
names of nodes linked to this node. I've decided to instead store this data
in the tree connectivity *)
(* NB 2: back to including the hierachy in the page description, now by
storing the name of the parent page (root node is its own parent) *)
type pagedesc = {name: string; title: string; head: string;
style: string; headerinfo: string;
content: string; parent: string};;
(*
Type: fullpagedesc - stores page description and location details
Create a "Full Page Description" type, consisting of the page description
of the page defined previously, the path to the page and the list of links
in the navbar of the page. (Don't store the raw html of the navbar at
this stage as we want the freedom to be able to do horizontal or vertical
navbars, instead store (name, loc, navtext))
*)
type fullpagedesc = {page : pagedesc; loc: string;
nav: (string * string * string) list};;
(* **************** End of type definitions ********************** *)
(* **************** Higher order functions ********************** *)
(* Some useful higher order functions:
(1) compose - function composition
(2) identity - acts as the base element for function composition
eg if recursively folding compose over a list of functions
identity is the base element (cf 0, [])
(3) complement - given two lists a and b returns the list of elements
in b and not a (this may be the opposite to the normal
definition of complement - check this!)
(4) union - Union of a single list, removing repeated entries
(almost certainly this should be named differently
since union usually acts on two lists - find out
what correct terminology is and rename function) *)
let compose f g x = f(g x);;
let id x = x;;
let complement alst blst = List.fold_right
(fun e acc->if not(List.mem e alst) then e::acc else acc) blst [];;
let rec union l = match l with
| [] -> []
| x :: xs -> [x] @ (union (List.filter (fun y->y<>x) xs));;
(* **************** End of Higher order functions *************** *)
(* ************** File input functions and text processing ******* *)
(* Function to convert a string array into a page description record *)
let make_page arr = {name=arr.(0); title=arr.(1); head=arr.(2);
style=arr.(3); headerinfo=arr.(4);
content=arr.(5); parent=arr.(6)};;
(* Easiest way to enter description of the webpage is in a single plain text
file of the node descriptions, with an extra index number field for
ease of constructing the connectivity tree.
Therefore we need to write a function to parse a description file into
(a) a list of page (node) descriptions
(b) a list of connectivity pairs
Make the decision that fields of the page description are separated by
a : character, as this is unlikely to crop up in a title etc *)
(* Function to read a file into a string list, each line (excluding \n char)
is an element of the list *)
let readfile fname =
let cin = open_in fname in
let rec aux acc = try aux ((input_line cin)::acc)
with End_of_file -> close_in cin; List.rev acc
in aux [];;
(* Split a string into a list of substrings based on a delimiter character *)
let split c str =
let rec aux s acc =
try let ind=String.index s c in
aux (String.sub s (ind+1) ((String.length s) - ind -1 ))
((String.sub s 0 ind)::acc)
with Not_found -> List.rev (s::acc)
in aux str [];;
let read_pages fname =
let split_line = split ':' in
let cin = open_in fname in
let rec read_file () =
try
let data=make_page(Array.of_list (split_line (input_line cin))) in
data :: (read_file ())
with End_of_file -> close_in cin; []
in read_file ();;
(* ******** End of File input functions and text processing ******* *)
let make_page_node_list plst = List.map (fun x->(x.parent,x.name)) plst;;
let make_page_name_tree plst=List.fold_left
(fun t (p,c)->Ptree.insert p c t)
(Ptree.create ()) (make_page_node_list plst);;
(*
let node_children (Node(a,lst)) =
a::(List.rev (List.fold_left (fun acc (Node(x,l))->x::acc) [] lst));;
*)
(* Function to return the directory path to each page. The root node is
excluded from the path. To actually make the directories it is
necessary to use the Unix library Unix.mkdir function.
For interactive testing this means a new top level must be constructed
in order to access the Unix library - unixtop *)
let pagedirs pagenamet =
List.map (fun y->List.fold_left (fun acc x->acc^x^"/") "" y)
(List.map List.tl (Ptree.paths pagenamet));;
(* Function to construct (name, directory location) pairs from a
pagename tree. Then use List.assoc name in future functions
to get the directory location associated with each name. *)
let make_page_name_dir pagenamet =
let dirs = pagedirs pagenamet in
let names = List.map (fun x->List.hd(List.rev x))(Ptree.paths pagenamet) in
List.combine names dirs;;
(* Function to get the names of the children of a named node *)
let get_child pglst str =
List.fold_right (fun (x,y) acc ->y :: acc)
(List.filter (fun (x,y)->x = str) (make_page_node_list pglst)) [];;
(* Function to take a list of page descriptions and return a list of
triples of (page name, parent name, list of children names).
Next step is to convert this triple into a navbar. *)
let get_children pglst =
List.map (fun x->(x.name,x.parent,get_child pglst x.name)) pglst;;
(* Elements of the navbar consist of the parent node and the children.
Produce a (name, navbarelements) list from get_children *)
let make_navlist pglst =
List.map (fun (n,p,c)->if n=p then (n,c) else (n,p::c))
(get_children pglst);;
let make_full_desc pglst =
let pntree = make_page_name_tree pglst in (* creates tree structure
of pages *)
let namlst = List.map (fun x->x.name) pglst in
let nplst = List.combine namlst pglst in
let navtext s = (List.assoc s nplst).head in
let pndir = make_page_name_dir pntree in (* creates list of (name,loc) *)
let childlist = make_navlist pglst in (* create list of (name, children) *)
let nav_list =
List.map (fun (x,clst)->(x,
(List.map (fun y->(y,List.assoc y pndir,navtext y)) clst)
)) childlist in
List.map (fun x->{page=List.assoc x nplst; loc=List.assoc x pndir;
nav=List.assoc x nav_list}) namlst;;
(* ************************ HTML functions ************************** *)
(* Wrap a HTML tag around a string *)
let wrap tag x =
(* start tag may have additional arguments eg href that we want
to strip from the end tag, so we need to extract the part of
the tag up to the first space *)
let init =
try
let ind=String.index tag ' ' in
String.sub tag 0 ind
with Not_found -> tag in
let stag = "<"^tag^">" in
let etag = "</"^init^">" in
stag ^ x ^ etag;;
(* Wrap a hyperlink around a string *)
let wraploc dirstr = wrap ("a href=\"" ^ dirstr ^ "\"");;
(* Wrap a class and hyperlink around a string *)
let wrapclassloc cl dirstr = wrap ("a class=\"" ^ cl ^ "\" " ^
"href=\"" ^ dirstr ^ "\"");;
(* Wrap multiple layers around text *)
let composewrap wraplst = List.fold_right (fun t acc->compose (wrap t) acc)
wraplst id;;
(* Function to construct the navbar from a list of wrappers (see below)
and the list of (name, location, navbar text).
The wrapper is the html tag used in the table eg ["td"] for
a horizontal navbar, ["tr";"td"] for a vertical navbar,
["tr"; "td name=\"(css style name)\""] etc *)
let make_navbar txtclass wraplst navlist =
let wrapfun = List.fold_right (fun t acc->compose (wrap t) acc)
wraplst id in
wrap "table valign=\"top\"" ("\n"^
(List.fold_right
(fun (n,l,t) acc->(wrapfun
(wrapclassloc txtclass (l^n^".html") t))^"\n"^acc)
navlist ""));;
(* Function to construct a site map page *)
let make_site_map fpglst =
let pglst = List.map (fun x-> x.page) fpglst in
let pntree = make_page_name_tree pglst in
let namlst = List.map (fun x->x.name) pglst in
let namfpglst = List.combine namlst fpglst in
let aux pgname lst =
let fpg = (List.assoc pgname namfpglst) in
let pg = fpg.page in
let lststr = "\n" ^ (wraploc (fpg.loc ^ pg.name ^ ".html") pg.head) in
lststr ^ "\n" ^ (wrap "ul"
(List.fold_right (fun x acc-> (wrap "li" x)^acc) lst "")) in
let cout=open_out "data/sitemap.dat" in
begin
Printf.fprintf cout "%s" (Ptree.fold aux pntree);
close_out cout
end;;
(* Function to take a full page description and
(1) Create the directory structure
(2) Open the output file for writing
(3) write the header HTML including breaking the page into a two
column table
(4) Write the navbar to the file (* Future version: option for rh navbar *)
(5) Attempt to read the page data file: if successful write it to the
output file, else write nothing to output file and go to next step
(6) Write footer and close file
*)
(* Layout of each page consists of a title at the top of the page,
navbar on the lefthand side that doesn't change from page to page
(children of the root node), navbar at the top of the righthand column
that links to the subpages. *)
let make_final_html layout base mainnavlst fullpg =
let pg = fullpg.page in
let pgdir = fullpg.loc in
let pgname= pg.name in
let headerinfo =
try List.fold_right (fun x acc->x ^ "\n" ^ acc) (readfile pg.headerinfo) ""
with Sys_error s-> "" | Not_found -> ""
in
let header = wrap "head"("\n"^
(wrap "title" pg.title)^ "\n" ^
("<base href=\""^base^"\">\n")^
("<link rel=\"stylesheet\" href=\"" ^ pg.style ^ "\" " ^
"type=\"text/css\">\n") ^
(headerinfo ^ "\n")) in
let mainnav =make_navbar "bignav" ["tr";"td class=\"bignav\""] mainnavlst in
let navbar = make_navbar "smallnav" ["td class=\"smallnav\""]
(complement mainnavlst fullpg.nav) in
let content=
try List.fold_right (fun x acc->x ^ "\n" ^ acc) (readfile pg.content) ""
with Sys_error s-> "" | Not_found -> ""
in
let nav_and_content = if layout="top" then (
(composewrap ["tr align=center";"td valign=\"top\""]
navbar) ^ "\n" ^
(composewrap ["tr";"td valign=\"top\""] content) ^ "\n" )
else ( wrap "tr" (
(composewrap ["td valign=\"top\""] content) ^ "\n" ^
(composewrap ["td valign=\"top\""]
(make_navbar "smallnav" ["tr";"td class=\"smallnav\""]
(complement mainnavlst fullpg.nav))) ^ "\n"
))
in
let doctypestr =
"<DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\">" in
doctypestr ^ "\n" ^
(wrap "html"
("\n\n" ^ header ^ "\n\n" ^
(wrap "body" (
(wrap "h1 class=\"big\"" pg.head) ^ "\n" ^
(wrap "table width=\"100%\" border=0 align=left cellpadding=0 cellspacing=0"
(
"\n" ^ (wrap "td valign=\"top\"" ("\n" ^ mainnav ^ "\n")) ^
"\n" ^ (wrap "td valign=\"top\""
(wrap "table valign=\"top\"" nav_and_content)
)
^ "\n"
) ^ "\n"
)
) ^ "\n"
)
));;
let make_final_page base mainnavlst fullpage =
let mask=Unix.umask 0 in
let fname="output/" ^ fullpage.loc ^ fullpage.page.name ^ ".html" in
let cout = open_out fname in
Printf.fprintf cout "%s" (make_final_html "right" base mainnavlst fullpage);
close_out cout;
Unix.chmod fname 0o644;
ignore (Unix.umask mask);
();;
(* Construct the website directory structure from the page name tree *)
let make_dirstruct pagenamet =
let dirs = pagedirs pagenamet in
let mask = Unix.umask 0 in
begin
Unix.mkdir "output" 0o755;
List.iter (fun str->if str="output/" then () else Unix.mkdir str 0o755)
(List.map (fun str->"output/"^str) dirs);
ignore(Unix.umask mask)
end;;
(* Print out the directory structure to a file *)
let print_dirstruct fplst =
let dirlst= List.map (fun x->x.loc) fplst in
let cout = open_out "output/dirlst.txt" in
begin
List.iter (fun x->output_string cout (x^"\n")) (List.tl dirlst);
flush cout;
close_out cout
end;;
(* The final function to make the entire website *)
let make_website base pfname =
let plst = read_pages pfname in
let fplst = make_full_desc plst in
let sitemap = make_site_map fplst in
let pagelst=List.map (fun x->x.page) fplst in
let mainnavlst = (List.hd fplst).nav in
let pntree =make_page_name_tree pagelst in
begin
ignore(make_dirstruct pntree);
print_dirstruct fplst;
List.iter (make_final_page base mainnavlst) fplst;
end;;
(* Put all of the output into an output directory that is a child of the
current directory. For testing purposes set the baseref of each page to
this directory. *)
let baseref="file://localhost" ^ (Unix.getcwd ()) ^ "/output/";;
let webbaseref="http://nodens.physics.ox.ac.uk/~mcdonnell/";;
(* ************* End of HTML functions ********************* ****** *)
(* ************* Functions for calling from command line ********** *)
(* entry point *)
let usage () =
print_string "Usage: htmltree baseref pagedescname\n";
exit 2;;
let main () =
if Array.length Sys.argv <> 3 then usage () else
begin
let baseref=Sys.argv.(1) in
let pdname =Sys.argv.(2) in
make_website baseref pdname
end;;
if !Sys.interactive then () else main ();;
(* ************* End of command line functions ******************** *)