Ocaml for CGI. URL encoding and decoding
Sat May 5 19:59:03 EST 2007
I want to try to use Ocaml for Common Gateway Interface (CGI). There are a number of libraries out there to do this, but as an exercise I want to have a shot myself first. The first task is to write functions that escape or unescape characters in a URI. Checking [RFC 2396] shows that aside from a limited set of unreserved characters everything else needs to be encoded into the form %20 = space, %25 = % etc if it is to be passed as a query string parameter to the URI, or as POST data.
let inrange x low high = (x >= low) && (x <= high) let isupper c = inrange c 'A' 'Z' let islower c = inrange c 'a' 'z' let isnum c = inrange c '0' '9' let isalphanum c = (isupper c) || (islower c) || (isnum c) let ismarker c = match c with |'-' | '_' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' -> true | _ -> false let unreserved c = (isalphanum c) || (ismarker c) let char_to_str = String.make 1 (* Escape a character: if the character is on the unreserved list then just create a string of length one, otherwise convert to hex code and prefix with a percentage sign *) let escape_char c = if (unreserved c) then char_to_str c else Printf.sprintf "%%%x" (Char.code c);; let escape s = let strlen = String.length s in let rec aux k acc = if k = strlen then acc else aux (k+1) (acc ^ (escape_char s.[k])) in aux 0 "" (* Unescape a string: go through the string looking for % signs. Every character between the current position and the % sign can be blitted onto the output string. The two characters immediately after the % sign are the hex code of the character, which should be added to the output string *) let unescape s = let strlen = String.length s in let rec aux k acc = try let next_mark = String.index_from s k '%' in let len_run = next_mark - k in let addstr = String.sub s k len_run in let estr = String.sub s next_mark 3 in aux (next_mark+3) (acc ^ addstr ^ (char_to_str (Scanf.sscanf estr "%%%x" Char.chr))) with Not_found -> acc ^ (String.sub s k (strlen - k)) in aux 0 ""