X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=scripts%2Fpage.ml;h=82fa0e714b14d9f1712d177be7e0b7acddf9180f;hb=63ef27854a7eb10dd3cc06531114ca171f430cbe;hp=3657919e65846c7d7a7d112503c18fa013ec9bc6;hpb=7130fa13b57c8f83e30781ad9030f8759a0dedbe;p=cocanwiki.git diff --git a/scripts/page.ml b/scripts/page.ml index 3657919..82fa0e7 100644 --- a/scripts/page.ml +++ b/scripts/page.ml @@ -1,7 +1,7 @@ (* COCANWIKI - a wiki written in Objective CAML. * Written by Richard W.M. Jones . * Copyright (C) 2004 Merjis Ltd. - * $Id: page.ml,v 1.33 2004/10/17 19:43:19 rich Exp $ + * $Id: page.ml,v 1.55 2006/08/14 17:56:59 rich Exp $ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -33,10 +33,15 @@ open Cocanwiki_ok open Cocanwiki_date open Cocanwiki_server_settings open Cocanwiki_links +open Cocanwiki_extensions +open Cocanwiki_strings -type fp_status = FPOK of int * string * string * Dbi.datetime * bool - | FPRedirect of string - | FPNotFound +type fp_status = + | FPOK of int32 * string * string * string option * Calendar.t * bool + * bool option + | FPInternalRedirect of string + | FPExternalRedirect of string + | FPNotFound (* Referer strings which help us decide if the user came from * a search engine and highlight terms in the page appropriately. @@ -52,36 +57,43 @@ let split_qs_re = Pcre.regexp "\\?" let xhtml_re = Pcre.regexp "<.*?>|[^<>]+" -let run r (q : cgi) (dbh : Dbi.connection) hostid - ({ edit_anon = edit_anon; - view_anon = view_anon } as host) +let run r (q : cgi) dbh hostid + ({ edit_anon = edit_anon; view_anon = view_anon } as host) user = let page = q#param "page" in let page = if page = "" then "index" else page in + (* The main "page" template is split in two to improve the speed of + * delivery of the page. The very first part ("page_header.html") + * contains the page , crucially including all the links to the + * stylesheets. We send this first and flush it out to the client so + * that the client can begin requesting stylesheets, background images + * and so on. After this we compose the main page ("page.html") and + * send it out second. + *) + + let template_page_header = + get_template ~page dbh hostid "page_header.html" in let template_page = get_template ~page dbh hostid "page.html" in + + (* This is the simpler template for 404 pages. *) + let template_404_header = get_template dbh hostid "page_404_header.html" in let template_404 = get_template dbh hostid "page_404.html" in (* Host-specific fields. *) - let sth = dbh#prepare_cached "select css is not null, - feedback_email is not null, - mailing_list, search_box, navigation - from hosts where id = ?" in - sth#execute [`Int hostid]; - let has_host_css, has_feedback_email, mailing_list, search_box, navigation = - match sth#fetch1 () with - | [ `Bool has_host_css; `Bool has_feedback_email; `Bool mailing_list; - `Bool search_box; `Bool navigation ] -> - has_host_css, has_feedback_email, mailing_list, search_box, - navigation + let rows = + PGSQL(dbh) + "select feedback_email is not null, mailing_list, navigation + from hosts where id = $hostid" in + let has_feedback_email, mailing_list, navigation = + match rows with + | [Some has_feedback_email, mailing_list, navigation] -> + has_feedback_email, mailing_list, navigation | _ -> assert false in - (* Can the user edit? Manage users? etc. *) + (* User permissions. *) let can_edit = can_edit host user in let can_manage_users = can_manage_users host user in - let can_manage_contacts = can_manage_contacts host user in - let can_manage_site = can_manage_site host user in - let can_edit_global_css = can_edit_global_css host user in (* Do we have a stats page set up? *) let has_stats = server_settings_stats_page dbh <> None in @@ -158,55 +170,149 @@ let run r (q : cgi) (dbh : Dbi.connection) hostid (* Check the templates table for extensions. *) let get_extension url = - let sth = dbh#prepare_cached "select extension from templates - where ? ~ url_regexp - order by ordering - limit 1" in - sth#execute [`String url]; - try - let name = sth#fetch1string () in + let name = + List.hd ( + PGSQL(dbh) "select extension from templates + where $url ~ url_regexp + order by ordering + limit 1" + ) in Some (List.assoc name !extensions) with - Not_found -> None + Not_found | ExtList.List.Empty_list | Failure "hd" -> None in (* This code generates ordinary pages. *) - let make_page title description pageid last_modified_date has_page_css + let make_page title description keywords + pageid last_modified_date has_page_css noodp version page page' extension = let t = template_page in - t#set "title" title; + let th = template_page_header in + (*t#set "title" title; - nothing uses ::title:: on page.html - removed *) + + (* Page title, h1 and superdirs (if any). *) + th#set "title" title; + + let superdirs, h1 = + match String.nsplit title "/" with + | [] -> [], "" + | [h1] -> [], h1 + | xs -> + let xs = List.rev xs in + let h1 = List.hd xs in + let superdirs = List.rev (List.tl xs) in + + (* Check the superdirs are reasonable, then convert them + * into paths or redlinks. + * If any of this fails, then there are no superdirs. + *) + try + let pathsofar = ref "" in + let superdirs = + List.mapi ( + fun i name -> + (* Path will be something like "Dir1/Dir2". We want + * a URL like "dir1/dir2". + *) + let path = + if i = 0 then name else !pathsofar ^ "/" ^ name in + (* Path so far reasonable? *) + let url, redlink = + match Wikilib.generate_url_of_title r dbh hostid path with + | Wikilib.GenURL_Duplicate url -> url, None + | Wikilib.GenURL_OK url -> url, Some path + | Wikilib.GenURL_BadURL | Wikilib.GenURL_TooShort -> + raise Exit in + pathsofar := path; + name, url, redlink + ) superdirs in + superdirs, h1 + with + Exit -> [], title in + + let superdirs = List.map ( + fun (name, url, redlink) -> + let is_redlink, redlink_title = + match redlink with + | None -> false, "" + | Some title -> true, title in + [ "url", Template.VarString url; + "name", Template.VarString name; + "is_redlink", Template.VarConditional is_redlink; + "redlink_title", Template.VarString redlink_title ] + ) superdirs in + + th#conditional "has_superdirs" (superdirs <> []); + th#table "superdirs" superdirs; + th#set "h1" h1; + t#set "last_modified_date" last_modified_date; (match description with - None -> t#conditional "has_description" false + None -> th#conditional "has_description" false | Some description -> - t#conditional "has_description" true; - t#set "description" description); + th#conditional "has_description" true; + th#set "description" description); + + (match keywords with + None -> th#conditional "has_keywords" false + | Some keywords -> + th#conditional "has_keywords" true; + th#set "keywords" keywords); if page <> page' then (* redirection *) ( t#set "page" page'; + th#set "page" page'; t#set "original_page" page; (* XXX title - get it from database *) t#conditional "redirected" true ) else ( t#set "page" page; + th#set "page" page; t#conditional "redirected" false ); - t#conditional "has_host_css" has_host_css; - t#conditional "has_page_css" has_page_css; + th#conditional "has_page_css" has_page_css; + + (* If the per-page noodp is not null, set the noodp flag here. Otherwise + * we will use the default (from hosts.global_noodp) which was set + * in Cocanwiki_template. + *) + (match noodp with + | None -> () + | Some b -> th#conditional "noodp" b); + + (* Are we showing an old version of the page? If so, warn. *) + (match version with + None -> + t#conditional "is_old_version" false; + th#conditional "is_old_version" false + | Some pageid -> + t#conditional "is_old_version" true; + th#conditional "is_old_version" true; + t#set "old_version" (Int32.to_string pageid); + th#set "old_version" (Int32.to_string pageid)); + + (* Just before we show the header, call any registered pre-page + * handlers. They might want to send cookies. + *) + List.iter (fun handler -> + handler r q dbh hostid page') !pre_page_handlers; + + (* At this point, we can print out the header and flush it back to + * the user, allowing the browser to start fetching stylesheets + * and background images while we compose the page. + *) + q#header (); + ignore (print_string r th#to_string); + ignore (Request.rflush r); t#conditional "has_feedback_email" has_feedback_email; t#conditional "mailing_list" mailing_list; - t#conditional "search_box" search_box; t#conditional "navigation" navigation; t#conditional "can_edit" can_edit; t#conditional "can_manage_users" can_manage_users; - t#conditional "can_manage_contacts" can_manage_contacts; - t#conditional "can_manage_site" can_manage_site; - t#conditional "can_edit_global_css" can_edit_global_css; - t#conditional "has_stats" has_stats; (* Pull out the sections in this page. *) @@ -214,43 +320,44 @@ let run r (q : cgi) (dbh : Dbi.connection) hostid match pageid with None -> [] | Some pageid -> - let sth = dbh#prepare_cached - "select ordering, sectionname, content, divname - from contents where pageid = ? order by ordering" in - sth#execute [`Int pageid]; - - sth#map - (function [`Int ordering; - (`Null | `String _) as sectionname; - `String content; - (`Null | `String _) as divname] -> + let rows = PGSQL(dbh) + "select ordering, sectionname, content, divname, jsgo + from contents where pageid = $pageid order by ordering" in + + List.map + (fun (ordering, sectionname, content, divname, jsgo) -> let divname, has_divname = match divname with - `Null -> "", false - | `String divname -> divname, true in + | None -> "", false + | Some divname -> divname, true in + let jsgo, has_jsgo = + match jsgo with + | None -> "", false + | Some jsgo -> jsgo, true in let sectionname, has_sectionname = match sectionname with - `Null -> "", false - | `String sectionname -> sectionname, true in + | None -> "", false + | Some sectionname -> sectionname, true in let linkname = linkname_of_sectionname sectionname in - [ "ordering", Template.VarString (string_of_int ordering); + [ "ordering", Template.VarString (Int32.to_string ordering); "has_sectionname", Template.VarConditional has_sectionname; "sectionname", Template.VarString sectionname; "linkname", Template.VarString linkname; "content", Template.VarString - (Wikilib.xhtml_of_content dbh hostid content); + (Wikilib.xhtml_of_content r dbh hostid content); "has_divname", Template.VarConditional has_divname; - "divname", Template.VarString divname ] - | _ -> assert false) in + "divname", Template.VarString divname; + "has_jsgo", Template.VarConditional has_jsgo; + "jsgo", Template.VarString jsgo ]) rows in (* Call an extension to generate the first section in this page? *) let sections = match extension with None -> sections | Some extension -> - let content = extension dbh hostid page' in + let content = extension r dbh hostid page' in let section = [ "ordering", Template.VarString "0"; "has_sectionname", Template.VarConditional false; @@ -258,40 +365,47 @@ let run r (q : cgi) (dbh : Dbi.connection) hostid "content", Template.VarString content; "has_divname", Template.VarConditional true; "divname", Template.VarString "form_div"; + "has_jsgo", Template.VarConditional false; + "jsgo", Template.VarString ""; ] in section :: sections in t#table "sections" sections; - (* Are we showing an old version of the page? If so, warn. *) - (match version with - None -> - t#conditional "is_old_version" false - | Some pageid -> - t#conditional "is_old_version" true; - t#set "old_version" (string_of_int pageid)); - (* Login status. *) (match user with Anonymous -> t#conditional "user_logged_in" false - | User (_, username, _) -> + | User (_, username, _, _) -> t#conditional "user_logged_in" true; t#set "username" username); + (* Can anonymous users create accounts? If not them we don't + * want to offer to create accounts for them. + *) + t#conditional "create_account_anon" host.create_account_anon; + (* If logged in, we want to update the recently_visited table. *) if pageid <> None then ( match user with - | User (userid, _, _) -> - let sth = dbh#prepare_cached "delete from recently_visited - where hostid = ? and userid = ? - and url = ?" in - sth#execute [`Int hostid; `Int userid; `String page']; - let sth = dbh#prepare_cached - "insert into recently_visited (hostid, userid, url) - values (?, ?, ?)" in - sth#execute [`Int hostid; `Int userid; `String page']; - dbh#commit () + | User (userid, _, _, _) -> + (try + PGSQL(dbh) + "delete from recently_visited + where hostid = $hostid and userid = $userid + and url = $page'"; + PGSQL(dbh) + "insert into recently_visited (hostid, userid, url) + values ($hostid, $userid, $page')"; + PGOCaml.commit dbh; + with + exn -> + (* Exceptions here are non-fatal. Just print them. *) + prerr_endline "exception updating recently_visited:"; + prerr_endline (Printexc.to_string exn); + PGOCaml.rollback dbh; + ); + PGOCaml.begin_work dbh; | _ -> () ); @@ -306,29 +420,24 @@ let run r (q : cgi) (dbh : Dbi.connection) hostid let rv = match user with - | User (userid, _, _) -> + | User (userid, _, _, _) -> (* Recently visited URLs, but don't repeat any from the 'what * links here' section, and don't link to self. *) let not_urls = page' :: wlh_urls in - let limit = max_links - List.length wlh_urls in - let qs = Dbi.placeholders (List.length not_urls) in - let sth = - dbh#prepare_cached - ("select rv.url, p.title, rv.visit_time - from recently_visited rv, pages p - where rv.hostid = ? and rv.userid = ? - and rv.url not in " ^ qs ^ " - and rv.hostid = p.hostid and rv.url = p.url - order by 3 desc - limit ?") in - let args = List.map (fun s -> `String s) not_urls in - sth#execute - ([`Int hostid; `Int userid] @ args @ [`Int limit]); - sth#map - (function [`String url; `String title; _] -> - url, title - | _ -> assert false) + let limit = Int32.of_int (max_links - List.length wlh_urls) in + let rows = + PGSQL(dbh) + "select rv.url, p.title, rv.visit_time + from recently_visited rv, pages p + where rv.hostid = $hostid and rv.userid = $userid + and rv.url not in $@not_urls + and rv.hostid = p.hostid and rv.url = p.url + order by 3 desc + limit $limit" in + List.map ( + fun (url, title, _) -> url, title + ) rows | _ -> [] in (* Links to page. *) @@ -360,20 +469,19 @@ let run r (q : cgi) (dbh : Dbi.connection) hostid let xhtml = highlight_search_terms xhtml search_terms "search_term" in (* Deliver the page. *) - q#header (); - print_string r xhtml + ignore (print_string r xhtml) with Not_found -> (* No referer / no search terms / not a search engine referer. *) - q#template t + ignore (print_string r t#to_string) in (* This code generates 404 pages. *) let make_404 () = Request.set_status r 404; (* Return a 404 error code. *) - let t = template_404 in - t#set "page" page; + let th = template_404_header in + th#set "page" page; let search_terms = String.map @@ -381,19 +489,129 @@ let run r (q : cgi) (dbh : Dbi.connection) hostid ('a'..'z' | 'A'..'Z' | '0'..'9') as c -> c | _ -> ' ') page in - t#set "search_terms" search_terms; - - t#conditional "has_host_css" has_host_css; - - t#conditional "can_edit" can_edit; - t#conditional "can_manage_users" can_manage_users; - t#conditional "can_manage_contacts" can_manage_contacts; - t#conditional "can_manage_site" can_manage_site; - t#conditional "can_edit_global_css" can_edit_global_css; + th#set "search_terms" search_terms; - t#conditional "has_stats" has_stats; + (* Flush out the header while we start the search. *) + q#header (); + ignore (print_string r th#to_string); + ignore (Request.rflush r); - q#template t + let t = template_404 in + t#set "query" search_terms; + t#set "canonical_hostname" host.canonical_hostname; + + (* This is a simplified version of the code in search.ml. *) + let have_results = + (* Get the keywords from the query string. *) + let keywords = Pcre.split ~rex:split_words search_terms in + let keywords = + List.filter (fun s -> not (string_is_whitespace s)) keywords in + let keywords = List.map String.lowercase keywords in + + (* Turn the keywords into a tsearch2 ts_query string. *) + let tsquery = String.concat "&" keywords in + + (* Search the titles first. *) + let rows = + PGSQL(dbh) + "select url, title, last_modified_date, + (lower (title) = lower ($search_terms)) as exact + from pages + where hostid = $hostid + and url is not null + and redirect is null + and title_description_fti @@ to_tsquery ('default', $tsquery) + order by exact desc, last_modified_date desc, title" in + + let titles = + List.map (function + | (Some url, title, last_modified, _) -> + url, title, last_modified + | _ -> assert false) rows in + + let have_titles = titles <> [] in + t#conditional "have_titles" have_titles; + + (* Search the contents. *) + let rows = + PGSQL(dbh) + "select c.id, p.url, p.title, p.last_modified_date + from contents c, pages p + where c.pageid = p.id + and p.hostid = $hostid + and url is not null + and p.redirect is null + and c.content_fti @@ to_tsquery ('default', $tsquery) + order by p.last_modified_date desc, p.title + limit 50" in + + let contents = + List.map (function + | (contentid, Some url, title, last_modified) -> + contentid, url, title, last_modified + | _ -> assert false) rows in + + let have_contents = contents <> [] in + t#conditional "have_contents" have_contents; + + (* Pull out the actual text which matched so we can generate a summary. + * XXX tsearch2 can actually do better than this by emboldening + * the text which maps. + *) + let content_map = + if contents = [] then [] + else ( + let rows = + let contentids = + List.map (fun (contentid, _,_,_) -> contentid) contents in + PGSQL(dbh) + "select id, sectionname, content from contents + where id in $@contentids" in + List.map (fun (id, sectionname, content) -> + id, (sectionname, content)) rows + ) in + + (* Generate the final tables. *) + let table = + List.map (fun (url, title, last_modified) -> + let last_modified = printable_date last_modified in + [ "url", Template.VarString url; + "title", Template.VarString title; + "last_modified", Template.VarString last_modified ] + ) titles in + t#table "titles" table; + + let table = + List.map + (fun (contentid, url, title, last_modified) -> + let sectionname, content = List.assoc contentid content_map in + let have_sectionname, sectionname = + match sectionname with + None -> false, "" + | Some sectionname -> true, sectionname in + let content = + truncate 160 + (Wikilib.text_of_xhtml + (Wikilib.xhtml_of_content r dbh hostid content)) in + let linkname = linkname_of_sectionname sectionname in + let last_modified = printable_date last_modified in + [ "url", Template.VarString url; + "title", Template.VarString title; + "have_sectionname", Template.VarConditional have_sectionname; + "sectionname", Template.VarString sectionname; + "linkname", Template.VarString linkname; + "content", Template.VarString content; + "last_modified", Template.VarString last_modified ] + ) contents in + t#table "contents" table; + + (* Do we have any results? *) + let have_results = have_titles || have_contents in + have_results in + t#conditional "have_results" have_results; + + (* Deliver the rest of the page. *) + ignore (print_string r t#to_string) in (* Fetch a page by name. This function can give three answers: @@ -401,66 +619,57 @@ let run r (q : cgi) (dbh : Dbi.connection) hostid * (2) Page is a redirect (fetches the name of the redirect page). * (3) Page not found in database, could be template or 404 error. *) - (* XXX Should do a case-insensitive matching of URLs, and if the URL differs - * in case only should redirect to the lowercase version. - *) let fetch_page page version allow_redirect = match version with | None -> if allow_redirect then ( - let sth = - dbh#prepare_cached - "select redirect, id, title, description, last_modified_date, - css is not null - from pages where hostid = ? and url = ?" in - sth#execute [`Int hostid; `String page]; - (try - (match sth#fetch1 () with - | [ `Null; `Int id; `String title; `String description; - `Timestamp last_modified_date; `Bool has_page_css ] -> - FPOK (id, title, description, last_modified_date, - has_page_css) - | `String redirect :: _ -> - FPRedirect redirect - | _ -> assert false) - with - Not_found -> FPNotFound) + let rows = PGSQL(dbh) + "select url, redirect, id, title, description, keywords, + last_modified_date, css is not null, noodp + from pages + where hostid = $hostid and lower (url) = lower ($page)" in + match rows with + | [Some page', _, _, _, _, _, _, _, _] + when page <> page' -> (* different case *) + FPExternalRedirect page' + | [ _, None, id, title, description, keywords, + last_modified_date, has_page_css, noodp ] -> + let has_page_css = Option.get has_page_css in + FPOK (id, title, description, keywords, last_modified_date, + has_page_css, noodp) + | [_, Some redirect, _, _, _, _, _, _, _] -> + FPInternalRedirect redirect + | [] -> FPNotFound + | _ -> assert false ) else (* redirects not allowed ... *) ( - let sth = - dbh#prepare_cached - "select id, title, description, last_modified_date, - css is not null - from pages where hostid = ? and url = ?" in - sth#execute [`Int hostid; `String page]; - (try - (match sth#fetch1 () with - | [ `Int id; `String title; `String description; - `Timestamp last_modified_date; `Bool has_page_css ] -> - FPOK (id, title, description, last_modified_date, - has_page_css) - | _ -> assert false) - with - Not_found -> FPNotFound) + let rows = PGSQL(dbh) + "select id, title, description, keywords, last_modified_date, + css is not null, noodp + from pages where hostid = $hostid and url = $page" in + match rows with + | [ id, title, description, keywords, + last_modified_date, has_page_css, noodp ] -> + let has_page_css = Option.get has_page_css in + FPOK (id, title, description, keywords, last_modified_date, + has_page_css, noodp) + | [] -> FPNotFound + | _ -> assert false ) | Some version -> - let sth = - dbh#prepare_cached - "select id, title, description, last_modified_date, - css is not null - from pages - where hostid = ? and id = ? and - (url = ? or url_deleted = ?)" in - sth#execute [`Int hostid; `Int version; - `String page; `String page]; - (try - (match sth#fetch1 () with - | [ `Int id; `String title; `String description; - `Timestamp last_modified_date; `Bool has_page_css ] -> - FPOK (id, title, description, last_modified_date, - has_page_css) - | _ -> assert false) - with - Not_found -> FPNotFound) + let rows = PGSQL(dbh) + "select id, title, description, keywords, last_modified_date, + css is not null, noodp + from pages + where hostid = $hostid and id = $version and + (url = $page or url_deleted = $page)" in + match rows with + | [ id, title, description, keywords, + last_modified_date, has_page_css, noodp ] -> + let has_page_css = Option.get has_page_css in + FPOK (id, title, description, keywords, last_modified_date, + has_page_css, noodp) + | [] -> FPNotFound + | _ -> assert false in (* Here we deal with the complex business of redirects and versions. *) @@ -468,34 +677,41 @@ let run r (q : cgi) (dbh : Dbi.connection) hostid let allow_redirect, version = if can_edit then ( not (q#param_true "no_redirect"), - try Some (int_of_string (q#param "version")) with Not_found -> None + try Some (Int32.of_string (q#param "version")) with Not_found -> None ) else (true, None) in let rec loop page' i = if i > max_redirect then ( error ~title:"Too many redirections" ~back_button:true - q ("Too many redirects between pages. This may happen because " ^ - "of a cycle of redirections."); + dbh hostid q + ("Too many redirects between pages. This may happen because " ^ + "of a cycle of redirections."); return () ) else match fetch_page page' version allow_redirect with - | FPOK (pageid, title, description, last_modified_date, has_page_css)-> + | FPOK (pageid, title, description, keywords, + last_modified_date, has_page_css, noodp)-> (* Check if the page is also a template. *) let extension = get_extension page' in - make_page title (Some description) (Some pageid) - (printable_date last_modified_date) has_page_css + make_page title (Some description) keywords (Some pageid) + (printable_date last_modified_date) has_page_css noodp version page page' extension - | FPRedirect page' -> + | FPInternalRedirect page' -> loop page' (i+1) + | FPExternalRedirect page' -> + (* This normally happens when a user has requested an uppercase + * page name. We redirect to the true (lowercase) version. + *) + q#redirect ("http://" ^ host.hostname ^ "/" ^ page') | FPNotFound -> (* Might be a templated page with no content in it. *) let extension = get_extension page' in (match extension with | (Some _) as extension -> let title = page' in - make_page title None None - "Now" false None page page' + make_page title None None None + "Now" false None None page page' extension | None -> make_404 ())