Ocaml for CGI. URL encoding and decoding
Sat May 5 19:59:03 EST 2007
I want to try to use Ocaml for Common Gateway Interface (CGI). There are a number of libraries out there to do this, but as an exercise I want to have a shot myself first. The first task is to write functions that escape or unescape characters in a URI. Checking [RFC 2396] shows that aside from a limited set of unreserved characters everything else needs to be encoded into the form %20 = space, %25 = % etc if it is to be passed as a query string parameter to the URI, or as POST data.
let inrange x low high = (x >= low) && (x <= high)
let isupper c = inrange c 'A' 'Z'
let islower c = inrange c 'a' 'z'
let isnum c = inrange c '0' '9'
let isalphanum c = (isupper c) || (islower c) || (isnum c)
let ismarker c = match c with
|'-' | '_' | '.' | '!'
| '~' | '*' | '\'' | '(' | ')' -> true
| _ -> false
let unreserved c = (isalphanum c) || (ismarker c)
let char_to_str = String.make 1
(* Escape a character: if the character is on the unreserved list
then just create a string of length one, otherwise convert to hex code
and prefix with a percentage sign *)
let escape_char c = if (unreserved c) then char_to_str c
else Printf.sprintf "%%%x" (Char.code c);;
let escape s =
let strlen = String.length s in
let rec aux k acc =
if k = strlen then acc
else aux (k+1) (acc ^ (escape_char s.[k]))
in aux 0 ""
(* Unescape a string: go through the string looking for % signs.
Every character between the current position and the % sign can be
blitted onto the output string. The two characters immediately after
the % sign are the hex code of the character, which should be added to
the output string *)
let unescape s =
let strlen = String.length s in
let rec aux k acc =
try
let next_mark = String.index_from s k '%' in
let len_run = next_mark - k in
let addstr = String.sub s k len_run in
let estr = String.sub s next_mark 3 in
aux (next_mark+3)
(acc ^ addstr ^
(char_to_str (Scanf.sscanf estr "%%%x" Char.chr)))
with Not_found ->
acc ^ (String.sub s k (strlen - k))
in aux 0 ""
[ocaml]