X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=scripts%2Fwikilib.ml;h=77ba13a9882cf0e582994512f5065871bccf43c1;hb=799d629c5af725596497b7df2055db6ac6da91b7;hp=2599f0e2d21be03b83e5268b35559a93e98b14c9;hpb=3062d573a7617339324c23cdd4755f8f04956b92;p=cocanwiki.git diff --git a/scripts/wikilib.ml b/scripts/wikilib.ml index 2599f0e..77ba13a 100644 --- a/scripts/wikilib.ml +++ b/scripts/wikilib.ml @@ -1,7 +1,22 @@ -(* Library of functions useful for people implementing a Wiki. +(* COCANWIKI - a wiki written in Objective CAML. * Written by Richard W.M. Jones . * Copyright (C) 2004 Merjis Ltd. - * $Id: wikilib.ml,v 1.1 2004/09/07 10:14:09 rich Exp $ + * $Id: wikilib.ml,v 1.6 2004/09/17 15:09:48 rich Exp $ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. *) open Apache @@ -12,7 +27,7 @@ open Printf open ExtString -open Merjisforwiki +open Cocanwiki_strings (* Generate a URL for a new page with the given title. This code checks * if the URL already exists in the database and can return one of several @@ -333,8 +348,8 @@ let _markup_paragraph dbh hostid text = escape_html first :: "<" :: elem :: ">" :: loop (rest, elem :: stack) | FoundLink (first, link, rest) -> - (* pop everything off the stack first *) - escape_html first :: loop ("", stack) @ link :: loop (rest, []) + (* link *) + escape_html first :: link :: loop (rest, stack) ) in @@ -368,6 +383,202 @@ let markup_pre lines = String.concat "\n" (List.map Cgi_escape.escape_html lines) ^ "\n\n" +(* Validate HTML permitted in between ... markers. + * Note that what we support is a very limited but strict subset of XHTML + * 1.0. Actually, that's not true. We should really use an XML parser + * and a proper DTD here to ensure elements only appear in the correct + * context ... + *) +let split_tags_re = Pcre.regexp ~flags:[`DOTALL] "<.*?>|[^<]+" + +let open_attr_re = Pcre.regexp "^<([a-z]+)\\s*([^>]*?)(/?)>$" +let close_attr_re = Pcre.regexp "^$" + +let allowed_elements = + let basic = [ + "p", []; + "ul", []; "ol", []; "li", []; + "pre", []; "blockquote", ["cite"]; + "strong", []; "em", []; "dfn", []; "code", []; "samp", []; "kbd", []; + "var", []; "cite", []; "sup", []; "sub", []; "q", []; + "abbr", []; "acronym", []; + "b", []; "i", []; + "div", []; "span", []; + "br", []; + ] in + let headers = [ "h3", []; "h4", []; "h5", []; "h6", [] ] in + let links = [ "a", ["href"] ] in + let images = [ "img", ["src"; "alt"; "width"; "height"; "longdesc"] ] in + + let forms = [ + "form", [ "method"; "action"; "enctype" ]; + "input", [ "name"; "value"; "type"; "size"; "maxlength" ]; + "textarea", [ "name"; "rows"; "cols" ]; + ] in + + let tables = [ + "table", []; "tr", []; + "th", [ "colspan"; "rowspan" ]; "td", [ "colspan"; "rowspan" ]; + "thead", []; "tbody", [] + ] in + + basic @ headers @ links @ images @ forms @ tables + +let standard_tags = [ "title"; "lang"; "class"; "id" ] + +(* Parse a list of tags like: + * name="value" name="value with space" + * into an assoc list. The tricky bit is that there may be + * spaces within the quoted strings. + *) +let parse_tags str = + if str = "" then [] (* Very common case. *) + else ( + let len = String.length str in + + let fail () = invalid_arg ("bad tags near: " ^ truncate 20 str) in + let get_alphas i = + let b = Buffer.create 100 in + let rec loop i = + if i < len && isalpha str.[i] then ( + Buffer.add_char b str.[i]; + loop (i+1) + ) else + Buffer.contents b, i + in + loop i + in + let get_to_next_quote i = + let b = Buffer.create 100 in + let rec loop i = + if i < len && str.[i] <> '"' then ( + Buffer.add_char b str.[i]; + loop (i+1) + ) else + Buffer.contents b, (i+1) + in + loop i + in + + let r = ref [] in + let rec loop i = + if i >= len then !r + else ( + let c = str.[i] in + if isspace c then loop (i+1) + else if isalpha c then ( + let name, i = get_alphas i in + if String.length str > i && str.[i] = '=' && str.[i+1] = '"' then ( + let value, i = get_to_next_quote (i+2) in + r := (name, value) :: !r; + loop i + ) + else fail () + ) + else fail () + ) + in + loop 0 + ) + +type valid_t = VText of string + | VOpen of string * (string * string) list + | VClose of string + +let validate html = + (* Split into attrs and non-attrs. We end up with a list like this: + * [ "