X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=scripts%2Fwikilib.ml;h=081385fbf885d2970efafc4d36943c6eea826540;hb=eeb304015c65ccf593a77058d5db5f5a3e3b45d5;hp=da60d681095654fd2b0b0fe126d58c0a7eb526f9;hpb=11b93485a29771f4d826c50d9efc6d3607dfa50f;p=cocanwiki.git diff --git a/scripts/wikilib.ml b/scripts/wikilib.ml index da60d68..081385f 100644 --- a/scripts/wikilib.ml +++ b/scripts/wikilib.ml @@ -1,7 +1,7 @@ (* COCANWIKI - a wiki written in Objective CAML. * Written by Richard W.M. Jones . * Copyright (C) 2004 Merjis Ltd. - * $Id: wikilib.ml,v 1.3 2004/09/09 12:21:22 rich Exp $ + * $Id: wikilib.ml,v 1.7 2004/09/25 16:05:03 rich Exp $ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -27,6 +27,8 @@ open Printf open ExtString +open Cocanwiki_strings + (* Generate a URL for a new page with the given title. This code checks * if the URL already exists in the database and can return one of several * errors. @@ -40,8 +42,9 @@ let nontrivial_re = Pcre.regexp ~flags:[`CASELESS] "[a-z0-9]" let generate_url_of_title (dbh : Dbi.connection) hostid title = (* Create a suitable URL from this title. *) - let url = String.map (function '\000' .. ' ' | '<' | '>' | '&' | '"' -> '_' - | c -> Char.lowercase c) title in + let url = + String.map (function '\000' .. ' ' | '<' | '>' | '&' | '"' | '+' -> '_' + | c -> Char.lowercase c) title in (* Check URL is not too trivial. *) if not (Pcre.pmatch ~rex:nontrivial_re url) then @@ -346,8 +349,8 @@ let _markup_paragraph dbh hostid text = escape_html first :: "<" :: elem :: ">" :: loop (rest, elem :: stack) | FoundLink (first, link, rest) -> - (* pop everything off the stack first *) - escape_html first :: loop ("", stack) @ link :: loop (rest, []) + (* link *) + escape_html first :: link :: loop (rest, stack) ) in @@ -381,6 +384,202 @@ let markup_pre lines = String.concat "\n" (List.map Cgi_escape.escape_html lines) ^ "\n\n" +(* Validate HTML permitted in between ... markers. + * Note that what we support is a very limited but strict subset of XHTML + * 1.0. Actually, that's not true. We should really use an XML parser + * and a proper DTD here to ensure elements only appear in the correct + * context ... + *) +let split_tags_re = Pcre.regexp ~flags:[`DOTALL] "<.*?>|[^<]+" + +let open_attr_re = Pcre.regexp "^<([a-z]+)\\s*([^>]*?)(/?)>$" +let close_attr_re = Pcre.regexp "^$" + +let allowed_elements = + let basic = [ + "p", []; + "ul", []; "ol", []; "li", []; + "pre", []; "blockquote", ["cite"]; + "strong", []; "em", []; "dfn", []; "code", []; "samp", []; "kbd", []; + "var", []; "cite", []; "sup", []; "sub", []; "q", []; + "abbr", []; "acronym", []; + "b", []; "i", []; + "div", []; "span", []; + "br", []; + ] in + let headers = [ "h3", []; "h4", []; "h5", []; "h6", [] ] in + let links = [ "a", ["href"] ] in + let images = [ "img", ["src"; "alt"; "width"; "height"; "longdesc"] ] in + + let forms = [ + "form", [ "method"; "action"; "enctype" ]; + "input", [ "name"; "value"; "type"; "size"; "maxlength" ]; + "textarea", [ "name"; "rows"; "cols" ]; + ] in + + let tables = [ + "table", []; "tr", []; + "th", [ "colspan"; "rowspan" ]; "td", [ "colspan"; "rowspan" ]; + "thead", []; "tbody", [] + ] in + + basic @ headers @ links @ images @ forms @ tables + +let standard_tags = [ "title"; "lang"; "class"; "id" ] + +(* Parse a list of tags like: + * name="value" name="value with space" + * into an assoc list. The tricky bit is that there may be + * spaces within the quoted strings. + *) +let parse_tags str = + if str = "" then [] (* Very common case. *) + else ( + let len = String.length str in + + let fail () = invalid_arg ("bad tags near: " ^ truncate 20 str) in + let get_alphas i = + let b = Buffer.create 100 in + let rec loop i = + if i < len && isalpha str.[i] then ( + Buffer.add_char b str.[i]; + loop (i+1) + ) else + Buffer.contents b, i + in + loop i + in + let get_to_next_quote i = + let b = Buffer.create 100 in + let rec loop i = + if i < len && str.[i] <> '"' then ( + Buffer.add_char b str.[i]; + loop (i+1) + ) else + Buffer.contents b, (i+1) + in + loop i + in + + let r = ref [] in + let rec loop i = + if i >= len then !r + else ( + let c = str.[i] in + if isspace c then loop (i+1) + else if isalpha c then ( + let name, i = get_alphas i in + if String.length str > i && str.[i] = '=' && str.[i+1] = '"' then ( + let value, i = get_to_next_quote (i+2) in + r := (name, value) :: !r; + loop i + ) + else fail () + ) + else fail () + ) + in + loop 0 + ) + +type valid_t = VText of string + | VOpen of string * (string * string) list + | VClose of string + +let validate html = + (* Split into attrs and non-attrs. We end up with a list like this: + * [ "