(* COCANWIKI - a wiki written in Objective CAML.
* Written by Richard W.M. Jones <rich@merjis.com>.
* Copyright (C) 2004 Merjis Ltd.
- * $Id: wikilib.ml,v 1.5 2006/03/27 16:43:44 rich Exp $
+ * $Id: wikilib.ml,v 1.10 2006/08/17 08:03:47 rich Exp $
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
open ExtString
open Cocanwiki_strings
+open Cocanwiki_extensions
(* Generate a URL for a new page with the given title. This code checks
* if the URL already exists in the database and can return one of several
| GenURL_BadURL
| GenURL_Duplicate of string
-let nontrivial_re = Pcre.regexp ~flags:[`CASELESS] "[a-z0-9]"
+let trivial str =
+ let len = String.length str in
+ if len < 1 then true
+ else (
+ let rec loop i =
+ if i >= len then true
+ else (
+ let c = UTF8.look str i in
+ if iswebsafe c then false
+ else loop (UTF8.next str i)
+ )
+ in
+ loop 0
+ )
-let generate_url_of_title dbh hostid title =
- (* Create a suitable URL from this title. *)
+let generate_url_of_title r dbh hostid title =
+ (* Create a suitable URL from this title.
+ * This version happens to be UTF-8 safe.
+ *)
let url =
String.map (function
| '\000' .. ' ' | '<' | '>' | '&' | '"'
| '+' | '#' | '%' | '?'
-> '_'
- | c -> Char.lowercase c) title in
+ | ('A' .. 'Z' as c) -> Char.lowercase c
+ | c -> c) title in
(* Check URL is not too trivial. *)
- if not (Pcre.pmatch ~rex:nontrivial_re url) then
+ if trivial url then
GenURL_TooShort
(* URL cannot begin with '_'. *)
else if url.[0] = '_' then
GenURL_BadURL
(* Titles which begin or end with spaces are probably mistakes. *)
- else if isspace title.[0] || isspace title.[String.length title - 1] then
+ else if isspace (UTF8.get title 0)
+ || isspace (UTF8.look title (UTF8.last title)) then
GenURL_BadURL
else (
(* Check that the URL doesn't already exist in the database. If it does
(* This matches any markup. *)
let markup_re =
+ (* A link, like [[...]]. *)
let link = "\\[\\[\\s*(?:.+?)\\s*(?:\\|.+?\\s*)?\\]\\]" in
- let tag = "</?(?:b|i|strong|em|code|tt|sup|sub|nowiki|big|small|strike|s|br)>" in
- Pcre.regexp ("(.*?)((?:" ^ link ^ ")|(?:" ^ tag ^ "))(.*)")
+ (* A restricted HTML element, like <b> or </b>. *)
+ let tag =
+ "</?(?:b|i|strong|em|code|tt|sup|sub|nowiki|big|small|strike|s|br)>" in
+ (* An external function call, like {{call}} or {{call:arg}}. *)
+ let func = "{{(?:\\w+)(?::.*?)?}}" in
+ (* Combined. *)
+ Pcre.regexp ("(.*?)((?:" ^ link ^ ")|(?:" ^ tag ^ ")|(?:" ^ func ^ "))(.*)")
(* This matches links only, and should be compatible with the link contained
* in the above regexp.
let url_re = Pcre.regexp "^[a-z]+://"
let mailto_re = Pcre.regexp "^mailto:"
+(* This matches external function calls only, and should be compatible
+ * with the link contained in the above regexp.
+ *)
+let func_re = Pcre.regexp "{{(\\w+)(?::(.*?))?}}"
+
(* Links. *)
-let markup_link dbh hostid link =
+let markup_link r dbh hostid link =
let subs = Pcre.exec ~rex:link_re link in
let url = Pcre.get_substring subs 1 in
escape_html text ^ "</a>"
)
+let markup_function r dbh hostid str =
+ let subs = Pcre.exec ~rex:func_re str in
+ let function_name = Pcre.get_substring subs 1 in
+ let function_arg =
+ try Some (Pcre.get_substring subs 2) with Not_found -> None in
+
+ (* Look to see if there is a registered external function
+ * with that name.
+ *)
+ try
+ let fn = List.assoc function_name !external_functions in
+
+ (* Call the external function and return the result. *)
+ fn r dbh hostid function_arg
+
+ with
+ Not_found ->
+ str (* Not found - return the original string. *)
+
type find_t = FoundNothing
| FoundOpen of string * string * string
| FoundClose of string * string * string * string
| FoundLink of string * string * string
+ | FoundCall of string * string * string
-let _markup_paragraph dbh hostid text =
+let _markup_paragraph r dbh hostid text =
let find_earliest_markup text =
let convert_b_and_i elem =
if elem = "b" then "strong"
let rest = Pcre.get_substring subs 3 in
if String.length markup > 2 &&
markup.[0] = '[' && markup.[1] = '[' then (
- let link = markup_link dbh hostid markup in
+ let link = markup_link r dbh hostid markup in
FoundLink (first, link, rest)
)
else if String.length markup > 2 &&
let elem = convert_b_and_i elem in
FoundOpen (first, elem, rest)
)
+ else if String.length markup > 2 &&
+ markup.[0] = '{' && markup.[1] = '{' then (
+ let call = markup_function r dbh hostid markup in
+ FoundCall (first, call, rest)
+ )
else
failwith ("bad regexp: markup is '" ^ markup ^ "'");
with
(* close tags ignored *)
escape_html first :: "</" :: escape_html elem :: ">" ::
loop (rest, [])
- | FoundOpen (first, elem, rest) when elem = "nowiki" ->
+ | FoundOpen (first, "nowiki", rest) ->
(* handle <nowiki> specially ... *)
- escape_html first :: loop (rest, elem :: [])
- | FoundOpen (first, elem, rest) when elem = "br" ->
+ escape_html first :: loop (rest, "nowiki" :: [])
+ | FoundOpen (first, "br", rest) ->
(* handle <br> specially ... *)
escape_html first :: "<br/>" :: loop (rest, [])
| FoundOpen (first, elem, rest) ->
escape_html first :: "<" :: elem :: ">" :: loop (rest, [elem])
| FoundLink (first, link, rest) ->
escape_html first :: link :: loop (rest, [])
+ | FoundCall (first, link, rest) ->
+ escape_html first :: link :: loop (rest, [])
)
| text, ((x :: xs) as stack) ->
| FoundClose (first, elem, rest, elem_rest) ->
(* non-matching close tag *)
escape_html first :: "</" :: x :: ">" :: loop (elem_rest, xs)
- | FoundOpen (first, elem, rest) when elem = "nowiki" ->
+ | FoundOpen (first, "nowiki", rest) ->
(* handle <nowiki> specially ... *)
- escape_html first :: loop (rest, elem :: stack)
- | FoundOpen (first, elem, rest) when elem = "br" ->
+ escape_html first :: loop (rest, "nowiki" :: stack)
+ | FoundOpen (first, "br", rest) ->
(* handle <br> specially ... *)
escape_html first :: "<br/>" :: loop (rest, stack)
| FoundOpen (first, elem, rest) ->
| FoundLink (first, link, rest) ->
(* link *)
escape_html first :: link :: loop (rest, stack)
+ | FoundCall (first, link, rest) ->
+ (* external function *)
+ escape_html first :: link :: loop (rest, stack)
)
in
(*prerr_endline ("after loop = " ^ text);*)
text
-let markup_paragraph ~first_para dbh hostid text =
+let markup_paragraph ~first_para r dbh hostid text =
let p = if first_para then "<p class=\"first_para\">" else "<p>" in
- p ^ _markup_paragraph dbh hostid text ^ "</p>"
+ p ^ _markup_paragraph r dbh hostid text ^ "</p>"
-let markup_heading dbh hostid level text =
- let text = _markup_paragraph dbh hostid text in
+let markup_heading r dbh hostid level text =
+ let text = _markup_paragraph r dbh hostid text in
sprintf "<h%d>%s</h%d>" level text level
-let markup_ul dbh hostid lines =
+let markup_ul r dbh hostid lines =
"<ul><li>" ^
String.concat "</li>\n<li>"
- (List.map (fun t -> _markup_paragraph dbh hostid t) lines) ^
+ (List.map (fun t -> _markup_paragraph r dbh hostid t) lines) ^
"</li></ul>"
-let markup_ol dbh hostid lines =
+let markup_ol r dbh hostid lines =
"<ol><li>" ^
String.concat "</li>\n<li>"
- (List.map (fun t -> _markup_paragraph dbh hostid t) lines) ^
+ (List.map (fun t -> _markup_paragraph r dbh hostid t) lines) ^
"</li></ol>"
let markup_pre lines =
else (
let len = String.length str in
- let fail () = invalid_arg ("bad tags near: " ^ truncate 20 str) in
- let get_alphas i =
- let b = Buffer.create 100 in
+ let fail reason =
+ invalid_arg ("bad tags near: " ^ truncate 20 str ^ ": " ^ reason)
+ in
+
+ let get_alnums i =
+ let b = UTF8.Buf.create 100 in
let rec loop i =
- if i < len && isalpha str.[i] then (
- Buffer.add_char b str.[i];
- loop (i+1)
- ) else
- Buffer.contents b, i
+ if i >= len then UTF8.Buf.contents b, i
+ else (
+ let c = UTF8.look str i in
+ if iswesternalnum c then (
+ UTF8.Buf.add_char b c;
+ loop (i+1)
+ )
+ else UTF8.Buf.contents b, i
+ )
in
loop i
in
let get_to_next_quote i =
- let b = Buffer.create 100 in
+ let b = UTF8.Buf.create 100 in
let rec loop i =
- if i < len && str.[i] <> '"' then (
- Buffer.add_char b str.[i];
- loop (i+1)
- ) else
- Buffer.contents b, (i+1)
+ if i >= len then fail "no close quote"
+ else (
+ let c = UTF8.look str i in
+ if UChar.code c <> 34 (* quote char *) then (
+ UTF8.Buf.add_char b c;
+ loop (UTF8.next str i)
+ ) else
+ UTF8.Buf.contents b, UTF8.next str i
+ )
in
loop i
in
let rec loop i =
if i >= len then !r
else (
- let c = str.[i] in
- if isspace c then loop (i+1)
- else if isalpha c then (
- let name, i = get_alphas i in
- if String.length str > i && str.[i] = '=' && str.[i+1] = '"' then (
+ let c = UTF8.look str i in
+ if isspace c then loop (UTF8.next str i)
+ else if iswesternalpha c then (
+ let name, i = get_alnums i in
+ if i+1 < len && str.[i] = '=' && str.[i+1] = '"' then (
let value, i = get_to_next_quote (i+2) in
r := (name, value) :: !r;
loop i
)
- else fail ()
+ else fail "must have tag=\"value\""
)
- else fail ()
+ else fail "tag name is not alphabetical"
)
in
loop 0
let preformatted_re = Pcre.regexp "^ (.*)"
let html_open_re = Pcre.regexp "^<html>\\s*$"
let html_close_re = Pcre.regexp "^</html>\\s*$"
+let macro_re = Pcre.regexp "^{{(\\w+)}}\\s*$"
-let xhtml_of_content dbh hostid text =
+let xhtml_of_content r dbh hostid text =
(* Split the text into lines. *)
let lines = Pcre.split ~rex:split_lines_re text in
+ (* Do macro expansion before anything else, because macros could
+ * contain <html> sections, etc.
+ *)
+ let is_macro line =
+ try
+ let subs = Pcre.exec ~rex:macro_re line in
+ let name = Pcre.get_substring subs 1 in
+ let rows = PGSQL(dbh) "select 1 from macros
+ where hostid = $hostid and name = $name" in
+ (match rows with
+ | [] -> false (* Not an actual macro name from the database. *)
+ | [_] -> true (* Is an actual macro name. *)
+ | _ -> assert false (* Uniqueness should stop this from happening. *)
+ )
+ with
+ Not_found -> false
+ in
+ let expand_macro line =
+ try
+ let subs = Pcre.exec ~rex:macro_re line in
+ let name = Pcre.get_substring subs 1 in
+ let content =
+ List.hd (
+ PGSQL(dbh) "select content from macros
+ where hostid = $hostid and name = $name"
+ ) in
+ (* Split the content into lines of text. *)
+ let lines = Pcre.split ~rex:split_lines_re content in
+ lines
+ with
+ (Not_found | Failure "hd" | ExtList.List.Empty_list) as exn ->
+ failwith ("Wikilib: expand_macro: you should never see this: " ^
+ Printexc.to_string exn)
+ in
+ let rec loop = function
+ | [] -> []
+ | line :: xs when is_macro line -> expand_macro line @ loop xs
+ | x :: xs -> x :: loop xs
+ in
+ let lines = loop lines in
+
(* HTML blocks span multiple lines, so isolate these out first. *)
let rec loop = function
| [] -> []
(* Iterate over the lines to isolate headers and paragraphs. *)
let lines =
- List.map
- (function
- | STpLine line ->
- if Pcre.pmatch ~rex:preformatted_re line then (
- let subs = Pcre.exec ~rex:preformatted_re line in
- let line = Pcre.get_substring subs 1 in
- STPreformatted [line]
- )
- else if Pcre.pmatch ~rex:blank_re line then
- STBlank
- else if Pcre.pmatch ~rex:heading_re line then (
- let subs = Pcre.exec ~rex:heading_re line in
- let count = String.length (Pcre.get_substring subs 1) + 2 in
- let line = Pcre.get_substring subs 2 in
- STHeading (count, line)
- )
- else if Pcre.pmatch ~rex:unnumbered_re line then (
- let subs = Pcre.exec ~rex:unnumbered_re line in
- let line = Pcre.get_substring subs 2 in
- STUnnumbered [line]
- )
- else if Pcre.pmatch ~rex:numbered_re line then (
- let subs = Pcre.exec ~rex:numbered_re line in
- let line = Pcre.get_substring subs 2 in
- STNumbered [line]
- ) else
- STParagraph line
- | STpHTML html ->
- STHTML html
- ) lines in
+ List.map (
+ function
+ | STpLine line ->
+ if Pcre.pmatch ~rex:preformatted_re line then (
+ let subs = Pcre.exec ~rex:preformatted_re line in
+ let line = Pcre.get_substring subs 1 in
+ STPreformatted [line]
+ )
+ else if Pcre.pmatch ~rex:blank_re line then
+ STBlank
+ else if Pcre.pmatch ~rex:heading_re line then (
+ let subs = Pcre.exec ~rex:heading_re line in
+ let count = String.length (Pcre.get_substring subs 1) + 2 in
+ let line = Pcre.get_substring subs 2 in
+ STHeading (count, line)
+ )
+ else if Pcre.pmatch ~rex:unnumbered_re line then (
+ let subs = Pcre.exec ~rex:unnumbered_re line in
+ let line = Pcre.get_substring subs 2 in
+ STUnnumbered [line]
+ )
+ else if Pcre.pmatch ~rex:numbered_re line then (
+ let subs = Pcre.exec ~rex:numbered_re line in
+ let line = Pcre.get_substring subs 2 in
+ STNumbered [line]
+ )
+ else
+ STParagraph line
+ | STpHTML html ->
+ STHTML html
+ ) lines in
(* Aggregate paragraphs and lists. *)
let rec loop = function
match st with
| STBlank -> assert false (* Should never happen. *)
| STParagraph para ->
- markup_paragraph ~first_para:!first_para dbh hostid para
+ let first_para = !first_para in
+ markup_paragraph ~first_para r dbh hostid para
| STHeading (level, text) ->
- markup_heading dbh hostid level text
+ markup_heading r dbh hostid level text
| STUnnumbered lines ->
- markup_ul dbh hostid lines
+ markup_ul r dbh hostid lines
| STNumbered lines ->
- markup_ol dbh hostid lines
+ markup_ol r dbh hostid lines
| STPreformatted lines ->
markup_pre lines
| STHTML html ->