Matt's Blog

Ocaml for CGI. URL encoding and decoding

Sat May 5 19:59:03 EST 2007

I want to try to use Ocaml for Common Gateway Interface (CGI). There are a number of libraries out there to do this, but as an exercise I want to have a shot myself first. The first task is to write functions that escape or unescape characters in a URI. Checking [RFC 2396] shows that aside from a limited set of unreserved characters everything else needs to be encoded into the form %20 = space, %25 = % etc if it is to be passed as a query string parameter to the URI, or as POST data.

let inrange x low high = (x >= low) && (x <= high)
let isupper c = inrange c 'A' 'Z'
let islower c = inrange c 'a' 'z'
let isnum c = inrange c '0' '9'
let isalphanum c = (isupper c) || (islower c) || (isnum c)
let ismarker c = match c with
  |'-' | '_' | '.' | '!' 
  | '~' | '*' | '\'' | '(' | ')' -> true
  | _ -> false
let unreserved c = (isalphanum c) || (ismarker c)

let char_to_str = String.make 1

(* Escape a character:  if the character is on the unreserved list
then just create a string of length one, otherwise convert to hex code
and prefix with a percentage sign *)
let escape_char c = if (unreserved c) then char_to_str c
  else Printf.sprintf "%%%x" (Char.code c);;

let escape s = 
  let strlen = String.length s in
  let rec aux k acc = 
    if k = strlen then acc 
    else aux (k+1) (acc ^ (escape_char s.[k])) 
  in aux 0 ""

(* Unescape a string: go through the string looking for % signs.
Every character between the current position and the % sign can be
blitted onto the output string.  The two characters immediately after
the % sign are the hex code of the character, which should be added to
the output string *)
let unescape s = 
  let strlen = String.length s in
  let rec aux k acc =
    try 
      let next_mark = String.index_from s k '%' in
      let len_run = next_mark - k in
      let addstr = String.sub s k len_run in
      let estr = String.sub s next_mark 3 in
      aux (next_mark+3) 
        (acc ^ addstr ^ 
          (char_to_str (Scanf.sscanf  estr "%%%x" Char.chr))) 
    with Not_found -> 
      acc ^ (String.sub s k (strlen - k))
  in aux 0 ""

[ocaml]

[permalink]

code (31)

erlang (6)
ideas (24)
lisp (1)
me (15)
notes (6)
ocaml (5)
physics (46)
qo (7)
unix (7)
vim (4)