(* COCANWIKI - a wiki written in Objective CAML.
* Written by Richard W.M. Jones <rich@merjis.com>.
* Copyright (C) 2004 Merjis Ltd.
- * $Id: search.ml,v 1.4 2004/09/09 12:21:22 rich Exp $
+ * $Id: search.ml,v 1.13 2006/12/06 09:46:57 rich Exp $
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
open Printf
open Cocanwiki
+open Cocanwiki_template
+open Cocanwiki_strings
+open Cocanwiki_date
-let search : ('a -> 'b -> 'c, unit, string, string) format4 =
- "http://www.google.com/search?q=site:%s+%s"
+let split_words = Pcre.regexp "\\W+"
-let run r (q : cgi) (dbh : Dbi.connection) hostid { hostname = hostname } _ =
- let query = q#param "q" in
+let run r (q : cgi) dbh hostid host user =
+ let template = get_template r dbh hostid "search.html" in
- let query = sprintf search (escape_url hostname) (escape_url query) in
+ template#set "canonical_hostname" host.canonical_hostname;
- q#redirect query
+ (* Get the query, if it exists. *)
+ let query = try q#param "q" with Not_found -> "" in
+ let have_query = not (string_is_whitespace query) in
+ template#set "query" query;
+ template#conditional "have_query" have_query;
+
+ (* Permissions. *)
+ let can_edit = can_edit host user in
+ template#conditional "can_edit" can_edit;
+
+ (* Search old versions? Only permit this if can_edit is true. *)
+ let old_versions =
+ if can_edit then (
+ try q#param_true "old_versions"
+ with Not_found -> false
+ )
+ else false in
+
+ (* If we have a query, make some results. *)
+ let have_results =
+ if have_query then (
+ (* Get the keywords from the query string. *)
+ let keywords = Pcre.split ~rex:split_words query in
+ let keywords =
+ List.filter (fun s -> not (string_is_whitespace s)) keywords in
+ let keywords = List.map lowercase keywords in
+
+ (* Turn the keywords into a tsearch2 ts_query string. *)
+ let tsquery = String.concat "&" keywords in
+
+ (* Search the titles first. *)
+ let rows =
+ if not old_versions then
+ PGSQL(dbh)
+ "select id, url, url_deleted, title, last_modified_date,
+ (lower (title) = lower ($query)) as exact
+ from pages
+ where hostid = $hostid
+ and url is not null
+ and redirect is null
+ and title_description_fti @@ to_tsquery ('default', $tsquery)
+ order by exact desc, last_modified_date desc, title"
+ else
+ PGSQL(dbh)
+ "select id, url, url_deleted, title, last_modified_date,
+ (lower (title) = lower ($query)) as exact
+ from pages
+ where hostid = $hostid
+ and redirect is null
+ and title_description_fti @@ to_tsquery ('default', $tsquery)
+ order by exact desc, last_modified_date desc, title" in
+
+ let titles =
+ List.map (function
+ | (_, Some url, None, title, last_modified, _) ->
+ url, title, None, last_modified
+ | (version, None, Some url, title, last_modified, _) ->
+ url, title, Some version, last_modified
+ | _ -> assert false) rows in
+
+ let have_titles = titles <> [] in
+ template#conditional "have_titles" have_titles;
+
+ (* Search the contents. *)
+ let rows =
+ if not old_versions then
+ PGSQL(dbh)
+ "select c.id, p.id, p.url, p.url_deleted, p.title,
+ p.last_modified_date
+ from contents c, pages p
+ where c.pageid = p.id
+ and p.hostid = $hostid
+ and url is not null
+ and p.redirect is null
+ and c.content_fti @@ to_tsquery ('default', $tsquery)
+ order by p.last_modified_date desc, p.title
+ limit 50"
+ else
+ PGSQL(dbh)
+ "select c.id, p.id, p.url, p.url_deleted, p.title,
+ p.last_modified_date
+ from contents c, pages p
+ where c.pageid = p.id
+ and p.hostid = $hostid
+ and p.redirect is null
+ and c.content_fti @@ to_tsquery ('default', $tsquery)
+ order by p.last_modified_date desc, p.title
+ limit 50" in
+
+ let contents =
+ List.map (function
+ | (contentid, _, Some url, None, title, last_modified) ->
+ contentid, url, title, None, last_modified
+ | (contentid, version, None, Some url, title,
+ last_modified) ->
+ contentid, url, title, Some version, last_modified
+ | _ -> assert false) rows in
+
+ let have_contents = contents <> [] in
+ template#conditional "have_contents" have_contents;
+
+ (* Pull out the actual text which matched so we can generate a summary.
+ * XXX tsearch2 can actually do better than this by emboldening
+ * the text which maps.
+ *)
+ let content_map =
+ if contents = [] then []
+ else (
+ let rows =
+ let contentids =
+ List.map (fun (contentid, _,_,_,_) -> contentid) contents in
+ PGSQL(dbh)
+ "select id, sectionname, content from contents
+ where id in $@contentids" in
+ List.map (fun (id, sectionname, content) ->
+ id, (sectionname, content)) rows
+ ) in
+
+ (* Generate the final tables. *)
+ let table =
+ List.map (fun (url, title, version, last_modified) ->
+ let have_version, version =
+ match version with
+ None -> false, 0l
+ | Some version -> true, version in
+ let last_modified = printable_date last_modified in
+ [ "url", Template.VarString url;
+ "title", Template.VarString title;
+ "have_version", Template.VarConditional have_version;
+ "version", Template.VarString (Int32.to_string version);
+ "last_modified", Template.VarString last_modified ]
+ ) titles in
+ template#table "titles" table;
+
+ let table =
+ List.map
+ (fun (contentid, url, title, version, last_modified) ->
+ let have_version, version =
+ match version with
+ None -> false, 0l
+ | Some version -> true, version in
+ let sectionname, content = List.assoc contentid content_map in
+ let have_sectionname, sectionname =
+ match sectionname with
+ None -> false, ""
+ | Some sectionname -> true, sectionname in
+ let content =
+ truncate 160
+ (Wikilib.text_of_xhtml
+ (Wikilib.xhtml_of_content r dbh hostid content)) in
+ let linkname = linkname_of_sectionname sectionname in
+ let last_modified = printable_date last_modified in
+ [ "url", Template.VarString url;
+ "title", Template.VarString title;
+ "have_version", Template.VarConditional have_version;
+ "version", Template.VarString (Int32.to_string version);
+ "have_sectionname", Template.VarConditional have_sectionname;
+ "sectionname", Template.VarString sectionname;
+ "linkname", Template.VarString linkname;
+ "content", Template.VarString content;
+ "last_modified", Template.VarString last_modified ]
+ ) contents in
+ template#table "contents" table;
+
+ (* Do we have any results? *)
+ let have_results = have_titles || have_contents in
+ have_results
+ )
+ else false in
+ template#conditional "have_results" have_results;
+
+ q#template template
let () =
- register_script run
+ register_script ~restrict:[CanView] run