Make the 404 page more informative - it now actually performs the
authorrich <rich>
Thu, 3 Aug 2006 13:33:15 +0000 (13:33 +0000)
committerrich <rich>
Thu, 3 Aug 2006 13:33:15 +0000 (13:33 +0000)
search for you.

html/_graphics/searching.gif [new file with mode: 0644]
html/_graphics/searching.xcf [new file with mode: 0644]
scripts/page.ml
templates/page_404.html
templates/page_404_header.html [new file with mode: 0644]

diff --git a/html/_graphics/searching.gif b/html/_graphics/searching.gif
new file mode 100644 (file)
index 0000000..40a85ff
Binary files /dev/null and b/html/_graphics/searching.gif differ
diff --git a/html/_graphics/searching.xcf b/html/_graphics/searching.xcf
new file mode 100644 (file)
index 0000000..19a2822
Binary files /dev/null and b/html/_graphics/searching.xcf differ
index 4a3b020..47e6ae9 100644 (file)
@@ -1,7 +1,7 @@
 (* COCANWIKI - a wiki written in Objective CAML.
  * Written by Richard W.M. Jones <rich@merjis.com>.
  * Copyright (C) 2004 Merjis Ltd.
- * $Id: page.ml,v 1.49 2006/08/01 14:50:47 rich Exp $
+ * $Id: page.ml,v 1.50 2006/08/03 13:33:15 rich Exp $
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -34,6 +34,7 @@ open Cocanwiki_date
 open Cocanwiki_server_settings
 open Cocanwiki_links
 open Cocanwiki_extensions
+open Cocanwiki_strings
 
 type fp_status = FPOK of int32 * string * string * Calendar.t * bool
               | FPInternalRedirect of string
@@ -74,6 +75,7 @@ let run r (q : cgi) dbh hostid
   let template_page = get_template ~page dbh hostid "page.html" in
 
   (* This is the simpler template for 404 pages. *)
+  let template_404_header  = get_template dbh hostid "page_404_header.html" in
   let template_404  = get_template dbh hostid "page_404.html" in
 
   (* Host-specific fields. *)
@@ -397,8 +399,8 @@ let run r (q : cgi) dbh hostid
   let make_404 () =
     Request.set_status r 404;          (* Return a 404 error code. *)
 
-    let t = template_404 in
-    t#set "page" page;
+    let th = template_404_header in
+    th#set "page" page;
 
     let search_terms =
       String.map
@@ -406,13 +408,129 @@ let run r (q : cgi) dbh hostid
              ('a'..'z' | 'A'..'Z' | '0'..'9') as c -> c
            | _ -> ' ') page in
 
-    t#set "search_terms" search_terms;
+    th#set "search_terms" search_terms;
 
-    t#conditional "can_edit" can_edit;
-    t#conditional "can_manage_users" can_manage_users;
-    t#conditional "has_stats" has_stats;
+    (* Flush out the header while we start the search. *)
+    q#header ();
+    ignore (print_string r th#to_string);
+    ignore (Request.rflush r);
+
+    let t = template_404 in
+    t#set "query" search_terms;
+    t#set "canonical_hostname" host.canonical_hostname;
+
+    (* This is a simplified version of the code in search.ml. *)
+    let have_results =
+      (* Get the keywords from the query string. *)
+      let keywords = Pcre.split ~rex:split_words search_terms in
+      let keywords =
+       List.filter (fun s -> not (string_is_whitespace s)) keywords in
+      let keywords = List.map String.lowercase keywords in
+
+      (* Turn the keywords into a tsearch2 ts_query string. *)
+      let tsquery = String.concat "&" keywords in
+
+      (* Search the titles first. *)
+      let rows =
+       PGSQL(dbh)
+           "select url, title, last_modified_date,
+                    (lower (title) = lower ($search_terms)) as exact
+               from pages
+              where hostid = $hostid
+               and url is not null
+               and redirect is null
+                and title_description_fti @@ to_tsquery ('default', $tsquery)
+              order by exact desc, last_modified_date desc, title" in
+
+      let titles =
+       List.map (function
+                 | (Some url, title, last_modified, _) ->
+                     url, title, last_modified
+                 | _ -> assert false) rows in
+
+      let have_titles = titles <> [] in
+      t#conditional "have_titles" have_titles;
+
+      (* Search the contents. *)
+      let rows =
+       PGSQL(dbh)
+         "select c.id, p.url, p.title, p.last_modified_date
+             from contents c, pages p
+            where c.pageid = p.id
+              and p.hostid = $hostid
+              and url is not null
+              and p.redirect is null
+              and c.content_fti @@ to_tsquery ('default', $tsquery)
+            order by p.last_modified_date desc, p.title
+            limit 50" in
+
+      let contents =
+       List.map (function
+                 | (contentid, Some url, title, last_modified) ->
+                     contentid, url, title, last_modified
+                 | _ -> assert false) rows in
+
+      let have_contents = contents <> [] in
+      t#conditional "have_contents" have_contents;
+
+      (* Pull out the actual text which matched so we can generate a summary.
+       * XXX tsearch2 can actually do better than this by emboldening
+       * the text which maps.
+       *)
+      let content_map =
+       if contents = [] then []
+       else (
+         let rows =
+           let contentids =
+             List.map (fun (contentid, _,_,_) -> contentid) contents in
+           PGSQL(dbh)
+             "select id, sectionname, content from contents
+                where id in $@contentids" in
+         List.map (fun (id, sectionname, content) ->
+                     id, (sectionname, content)) rows
+       ) in
 
-    q#template t
+      (* Generate the final tables. *)
+      let table =
+       List.map (fun (url, title, last_modified) ->
+                   let last_modified = printable_date last_modified in
+                   [ "url", Template.VarString url;
+                     "title", Template.VarString title;
+                     "last_modified", Template.VarString last_modified ]
+                ) titles in
+      t#table "titles" table;
+
+      let table =
+       List.map
+         (fun (contentid, url, title, last_modified) ->
+            let sectionname, content = List.assoc contentid content_map in
+            let have_sectionname, sectionname =
+              match sectionname with
+                None -> false, ""
+              | Some sectionname -> true, sectionname in
+            let content =
+              truncate 160
+                (Wikilib.text_of_xhtml
+                   (Wikilib.xhtml_of_content r dbh hostid content)) in
+            let linkname = linkname_of_sectionname sectionname in
+            let last_modified = printable_date last_modified in
+            [ "url", Template.VarString url;
+              "title", Template.VarString title;
+              "have_sectionname", Template.VarConditional have_sectionname;
+              "sectionname", Template.VarString sectionname;
+              "linkname", Template.VarString linkname;
+              "content", Template.VarString content;
+              "last_modified", Template.VarString last_modified ]
+         ) contents in
+      t#table "contents" table;
+
+      (* Do we have any results? *)
+      let have_results = have_titles || have_contents in
+      have_results in
+    t#conditional "have_results" have_results;
+
+    (* Deliver the rest of the page. *)
+    ignore (print_string r t#to_string)
   in
 
   (* Fetch a page by name.  This function can give three answers:
index d6dda8d..f799265 100644 (file)
@@ -1,24 +1,55 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
-<head>
-<title>Page not found</title>
-<meta name="robots" content="noindex,nofollow"/>
-<meta name="author" content="http://www.merjis.com/" />
-<link rel="stylesheet" href="::theme_css_html_tag::" type="text/css" title="Standard"/>
-<link rel="alternate stylesheet" href="/_css/easytoread.css" type="text/css" title="High contrast, big fonts"/>
-</head><body>
-
-<h1><span>Page not found</span></h1>
+::if(have_results)::
+
+::if(have_titles)::
+<div id="titles">
+<ul id="titles">
+::table(titles)::
+<li>
+  <a href="/::url_html_tag::">::title_html::</a>
+  <span class="last_modified_date">- Last change: ::last_modified_html::</span>
+</li>
+::end::
+</ul>
+</div>
+::end::
+
+::if(have_contents)::
+<ul id="contents">
+::table(contents)::
+<li>
+  <a href="/::url_html_tag::">::title_html::</a>
+  ::if(have_sectionname)::
+    (<a href="/::url_html_tag::#::linkname_html_tag::">::sectionname_html::</a>)
+  ::end:: <br/>
+  <span class="content">::content::</span> <br/>
+  <span class="last_modified_date">Last change: ::last_modified_html::</span>
+</li>
+::end::
+</ul>
+::end::
+
+::else::
 
 <p>
-Search our site for this page:
+<strong>There are no similar pages found.</strong>
 </p>
 
-<form method="post" action="/_search">
 <p>
-<input name="q" value="::search_terms_html_tag::" size="60"/><input type="submit" value="Search"/>
+Some tips for finding what you want:
 </p>
-</form>
+
+<ul>
+<li> Use fewer words. </li>
+<li> <a href="/_sitemap">Try browsing the sitemap.</a> </li>
+<li> <a href="http://www.google.com/search?q=site&#58;::canonical_hostname_url::+::query_url::">Try using Google to find similar pages.</a>
+</ul>
+
+::end::
+
+<script type="text/javascript"><!--
+var img = document.getElementById ("searching_img");
+img.style.display = "none";
+//--></script>
 
 ::include(footer.html)::
 </body>
diff --git a/templates/page_404_header.html b/templates/page_404_header.html
new file mode 100644 (file)
index 0000000..f666742
--- /dev/null
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<title>Page not found</title>
+<meta name="robots" content="noindex,nofollow"/>
+<meta name="author" content="http://www.merjis.com/" />
+<link rel="stylesheet" href="::theme_css_html_tag::" type="text/css" title="Standard"/>
+<link rel="stylesheet" href="/_css/search.css" type="text/css" title="Standard"/>
+<link rel="alternate stylesheet" href="/_css/easytoread.css" type="text/css" title="High contrast, big fonts"/>
+</head><body onload="document.f.q.focus ()">
+
+<h1><span>Page not found</span></h1>
+
+<p>
+Search our site for this page:
+</p>
+
+<form method="post" action="/_search" name="f">
+<p>
+<input name="q" value="::search_terms_html_tag::" size="60"/><input type="submit" value="Search"/>
+</p>
+</form>
+
+<p>
+<img width="87" height="16" src="/_graphics/searching.gif" alt="Searching ..." id="searching_img"/>
+</p>
\ No newline at end of file