From ee2be7d8d29f4ab6c9ed78e3ed12beedbead7587 Mon Sep 17 00:00:00 2001 From: rich Date: Tue, 2 Nov 2004 18:47:54 +0000 Subject: [PATCH] Full text search. --- MANIFEST | 2 + conf/cocanwiki.conf | 3 +- html/_css/search.css | 23 +++++++ scripts/links.ml | 4 +- scripts/search.ml | 169 +++++++++++++++++++++++++++++++++++++++++++++++--- scripts/source.ml | 4 +- templates/page.html | 2 +- templates/search.html | 84 +++++++++++++++++++++++++ 8 files changed, 278 insertions(+), 13 deletions(-) create mode 100644 html/_css/search.css create mode 100644 templates/search.html diff --git a/MANIFEST b/MANIFEST index 999c0ac..96c645b 100644 --- a/MANIFEST +++ b/MANIFEST @@ -23,6 +23,7 @@ html/_css/login.css html/_css/markup.css html/_css/new_page.css html/_css/print.css +html/_css/search.css html/_css/standard.css html/_css/stats_top.css html/_css/tables.css @@ -276,6 +277,7 @@ templates/recent.html templates/recent_rss.xml templates/recently_visited.html templates/restore_form.html +templates/search.html templates/send_feedback.txt templates/send_feedback_form.html templates/set_password_form.html diff --git a/conf/cocanwiki.conf b/conf/cocanwiki.conf index a0aab97..4c0ae5a 100644 --- a/conf/cocanwiki.conf +++ b/conf/cocanwiki.conf @@ -1,5 +1,5 @@ # Apache configuration for COCANWIKI. -# $Id: cocanwiki.conf,v 1.18 2004/11/01 17:46:21 rich Exp $ +# $Id: cocanwiki.conf,v 1.19 2004/11/02 18:47:54 rich Exp $ # Uncomment the following lines if necessary. You will probably need # to adjust the paths to reflect where cocanwiki is really installed. @@ -69,6 +69,7 @@ RewriteRule ^/_pe_confirm$ /_bin/page_email_confirm.cmo [PT,L,QSA] RewriteRule ^/_pe_unsub$ /_bin/page_email_unsubscribe.cmo [PT,L,QSA] RewriteRule ^/_recent$ /_bin/recent.cmo [PT,L,QSA] RewriteRule ^/_recent.rss$ /_bin/recent_rss.cmo [PT,L,QSA] +RewriteRule ^/_search$ /_bin/search.cmo [PT,L,QSA] RewriteRule ^/_sitemap$ /_bin/sitemap.cmo [PT,L,QSA] RewriteRule ^/_userprefs$ /_bin/user_prefs_form.cmo [PT,L,QSA] RewriteRule ^/_users$ /_bin/users.cmo [PT,L,QSA] diff --git a/html/_css/search.css b/html/_css/search.css new file mode 100644 index 0000000..624b08b --- /dev/null +++ b/html/_css/search.css @@ -0,0 +1,23 @@ +/* $Id: search.css,v 1.1 2004/11/02 18:47:54 rich Exp $ */ + +div#titles { + border: 1px solid #ccc; + width: 80%; + margin-left: 10%; +} + +ul#titles { + list-style: none; +} + +ul#contents { + list-style: none; +} + +span.content { + font-size: 0.7em; +} + +span.last_modified_date { + font-size: 0.7em; +} \ No newline at end of file diff --git a/scripts/links.ml b/scripts/links.ml index e84f49b..b9e2aec 100644 --- a/scripts/links.ml +++ b/scripts/links.ml @@ -1,7 +1,7 @@ (* COCANWIKI - a wiki written in Objective CAML. * Written by Richard W.M. Jones . * Copyright (C) 2004 Merjis Ltd. - * $Id: links.ml,v 1.1 2004/10/27 21:14:05 rich Exp $ + * $Id: links.ml,v 1.2 2004/11/02 18:47:54 rich Exp $ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -138,4 +138,4 @@ let run r (q : cgi) (dbh : Dbi.connection) hostid _ _ = ) let () = - register_script run + register_script ~restrict:[CanView] run diff --git a/scripts/search.ml b/scripts/search.ml index bf27600..0ee9f2e 100644 --- a/scripts/search.ml +++ b/scripts/search.ml @@ -1,7 +1,7 @@ (* COCANWIKI - a wiki written in Objective CAML. * Written by Richard W.M. Jones . * Copyright (C) 2004 Merjis Ltd. - * $Id: search.ml,v 1.5 2004/10/04 15:19:56 rich Exp $ + * $Id: search.ml,v 1.6 2004/11/02 18:47:54 rich Exp $ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,16 +26,171 @@ open Cgi_escape open Printf open Cocanwiki +open Cocanwiki_template +open Cocanwiki_strings +open Cocanwiki_date -let search : ('a -> 'b -> 'c, unit, string, string) format4 = - "http://www.google.com/search?q=site:%s+%s" +let split_words = Pcre.regexp "\\W+" -let run r (q : cgi) (dbh : Dbi.connection) hostid { hostname = hostname } _ = - let query = q#param "q" in +let run r (q : cgi) (dbh : Dbi.connection) hostid host user = + let template = get_template dbh hostid "search.html" in - let query = sprintf search (escape_url hostname) (escape_url query) in + (* Get the query, if it exists. *) + let query = try q#param "q" with Not_found -> "" in + let have_query = not (string_is_whitespace query) in + template#set "query" query; + template#conditional "have_query" have_query; - q#redirect query + (* Permissions. *) + let can_edit = can_edit host user in + template#conditional "can_edit" can_edit; + + (* Search old versions? Only permit this if can_edit is true. *) + let old_versions = + if can_edit then ( + try q#param_true "old_versions" + with Not_found -> false + ) + else false in + + (* If we have a query, make some results. *) + let have_results = + if have_query then ( + (* Get the keywords from the query string. *) + let keywords = Pcre.split ~rex:split_words query in + let keywords = List.map String.lowercase keywords in + + (* Turn the keywords into a tsearch2 ts_query string. *) + let tsquery = String.concat "&" keywords in + + (* Search the titles first. *) + let sth = + dbh#prepare_cached + ("select id, url, url_deleted, title, last_modified_date, + (lower (title) = lower (?)) as exact + from pages + where hostid = ? " ^ + (if not old_versions then "and url is not null " else "") ^ " + and redirect is null + and title_description_fti @@ to_tsquery (?, ?) + order by exact desc, last_modified_date desc, title") in + sth#execute [`String query; + `Int hostid; `String "default"; `String tsquery]; + + let titles = + sth#map (function + | [_; `String url; `Null; `String title; + `Timestamp last_modified; _] -> + url, title, None, last_modified + | [`Int version; `Null; `String url; `String title; + `Timestamp last_modified; _] -> + url, title, Some version, last_modified + | _ -> assert false) in + + let have_titles = titles <> [] in + template#conditional "have_titles" have_titles; + + (* Search the contents. *) + let sth = + dbh#prepare_cached + ("select c.id, p.id, p.url, p.url_deleted, p.title, + p.last_modified_date + from contents c, pages p + where c.pageid = p.id + and p.hostid = ? " ^ + (if not old_versions then "and url is not null " else "") ^ " + and p.redirect is null + and c.content_fti @@ to_tsquery (?, ?) + order by p.last_modified_date desc, p.title + limit 50") in + sth#execute [`Int hostid; `String "default"; `String tsquery]; + + let contents = + sth#map (function + | [`Int contentid; _; `String url; `Null; + `String title; `Timestamp last_modified] -> + contentid, url, title, None, last_modified + | [`Int contentid; `Int version; `Null; `String url; + `String title; `Timestamp last_modified] -> + contentid, url, title, Some version, last_modified + | _ -> assert false) in + + let have_contents = contents <> [] in + template#conditional "have_contents" have_contents; + + (* Pull out the actual text which matched so we can generate a summary.*) + let content_map = + if contents = [] then [] + else ( + let qs = Dbi.placeholders (List.length contents) in + let sth = + dbh#prepare_cached + ("select id, sectionname, content from contents + where id in " ^ qs) in + sth#execute + (List.map (fun (contentid, _,_,_,_) -> `Int contentid) contents); + sth#map (function + | [ `Int id; `Null; `String content ] -> + id, (None, content) + | [ `Int id; `String sectionname; `String content ] -> + id, (Some sectionname, content) + | _ -> assert false) + ) in + + (* Generate the final tables. *) + let table = + List.map (fun (url, title, version, last_modified) -> + let have_version, version = + match version with + None -> false, 0 + | Some version -> true, version in + let last_modified = printable_date last_modified in + [ "url", Template.VarString url; + "title", Template.VarString title; + "have_version", Template.VarConditional have_version; + "version", Template.VarString (string_of_int version); + "last_modified", Template.VarString last_modified ] + ) titles in + template#table "titles" table; + + let table = + List.map + (fun (contentid, url, title, version, last_modified) -> + let have_version, version = + match version with + None -> false, 0 + | Some version -> true, version in + let sectionname, content = List.assoc contentid content_map in + let have_sectionname, sectionname = + match sectionname with + None -> false, "" + | Some sectionname -> true, sectionname in + let content = + truncate 160 + (Wikilib.text_of_xhtml + (Wikilib.xhtml_of_content dbh hostid content)) in + let linkname = linkname_of_sectionname sectionname in + let last_modified = printable_date last_modified in + [ "url", Template.VarString url; + "title", Template.VarString title; + "have_version", Template.VarConditional have_version; + "version", Template.VarString (string_of_int version); + "have_sectionname", Template.VarConditional have_sectionname; + "sectionname", Template.VarString sectionname; + "linkname", Template.VarString linkname; + "content", Template.VarString content; + "last_modified", Template.VarString last_modified ] + ) contents in + template#table "contents" table; + + (* Do we have any results? *) + let have_results = have_titles || have_contents in + have_results + ) + else false in + template#conditional "have_results" have_results; + + q#template template let () = register_script ~restrict:[CanView] run diff --git a/scripts/source.ml b/scripts/source.ml index d724e01..269a3ad 100644 --- a/scripts/source.ml +++ b/scripts/source.ml @@ -1,7 +1,7 @@ (* COCANWIKI - a wiki written in Objective CAML. * Written by Richard W.M. Jones . * Copyright (C) 2004 Merjis Ltd. - * $Id: source.ml,v 1.1 2004/10/27 21:14:05 rich Exp $ + * $Id: source.ml,v 1.2 2004/11/02 18:47:54 rich Exp $ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -87,4 +87,4 @@ let run r (q : cgi) (dbh : Dbi.connection) hostid _ _ = ignore (print_newline r)) model.contents let () = - register_script run + register_script ~restrict:[CanView] run diff --git a/templates/page.html b/templates/page.html index 7f0e9f9..cbf2acd 100644 --- a/templates/page.html +++ b/templates/page.html @@ -98,7 +98,7 @@ ::if(search_box)::
-
+
diff --git a/templates/search.html b/templates/search.html new file mode 100644 index 0000000..1d1167c --- /dev/null +++ b/templates/search.html @@ -0,0 +1,84 @@ + + + +Search this site + + + + + + +

Search this site

+ +
+ + + + + + + + + +
Search: + +
+ ::if(can_edit):: + +
+ ::end:: +
+
+ +::if(have_results):: + +::if(have_titles):: +
+
    +::table(titles):: +
  • + ::title_html:: + - Last change: ::last_modified_html:: +
  • +::end:: +
+
+::end:: + +::if(have_contents):: + +::end:: + +::else:: +::if(have_query):: + +

+There are no results for your search. +

+ +

+Some tips for finding what you want: +

+ + + +::end:: +::end:: + +::include(footer.html):: + + \ No newline at end of file -- 1.8.3.1