(* COCANWIKI - a wiki written in Objective CAML. * Written by Richard W.M. Jones . * Copyright (C) 2004 Merjis Ltd. * $Id: cocanwiki_mail.ml,v 1.1 2004/10/21 11:42:05 rich Exp $ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; see the file COPYING. If not, write to * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. *) open Printf open ExtString open Cocanwiki_pages open Cocanwiki_template open Cocanwiki_date (* Given a subject line, return the "base" subject. * eg. "Re: my message" -> "my message" *) let re_re = Pcre.regexp "^Re\\[\\d+\\]:\\s*" let base_subject subject = let rec loop subject = let n = String.length subject in if String.starts_with subject "Re: " then loop (String.sub subject 4 (n-4)) else if String.starts_with subject "Re:" then loop (String.sub subject 3 (n-3)) else if String.starts_with subject "RE: " then loop (String.sub subject 4 (n-4)) else if String.starts_with subject "RE:" then loop (String.sub subject 3 (n-3)) else if Pcre.pmatch ~rex:re_re subject then ( let subs = Pcre.exec ~rex:re_re subject in let i = String.length (Pcre.get_substring subs 0) in loop (String.sub subject i (n-i)) ) else subject in let base_subject = loop subject in let is_reply = base_subject <> subject in base_subject, is_reply (* This abstract data type represents a 'forest' and is used for * the implementation of threading below. *) module type FOREST = sig type 'a t exception Already_linked exception Cycle_found val create : int -> 'a t val add : 'a t -> 'a -> unit val link : 'a t -> 'a -> 'a -> unit val unlink : 'a t -> 'a -> unit val get_roots : 'a t -> 'a list val get_children : 'a t -> 'a -> 'a list end module Forest : FOREST = struct type 'a node_data = { level : int; parent : 'a option; children : 'a list; root : 'a; } type 'a t = ('a, 'a node_data) Hashtbl.t exception Already_linked exception Cycle_found let create n = Hashtbl.create n (* Add node [n] to forest [f]. *) let add f n = Hashtbl.replace f n { level = 0; parent = None; children = []; root = n } (* Set the level field of [n]'s children to increasing * values, starting with [lvl]. Point all the root * fields of the children to [rt]. *) let rec update f lvl rt n = let n_data = Hashtbl.find f n in Hashtbl.replace f n { n_data with level = lvl; root = rt }; List.iter (update f (lvl+1) rt) n_data.children (* Link child [na] to parent [nb]. Raises [Already_linked] * if either [na] has a parent already. Raises [Cycle_found] * if the link would introduce a cycle. *) let link f na nb = let na_data = Hashtbl.find f na in let nb_data = Hashtbl.find f nb in match na_data.parent with | Some _ -> raise Already_linked | None when nb_data.root = na -> raise Cycle_found | _ -> Hashtbl.replace f na { na_data with parent = Some nb }; let nb_data = { nb_data with children = na :: nb_data.children } in Hashtbl.replace f nb nb_data; update f (nb_data.level+1) nb_data.root na (* Remove the parent link of [n]. If there is no such * link, does nothing. *) let unlink f n = let n_data = Hashtbl.find f n in match n_data.parent with | None -> () | Some p -> Hashtbl.replace f n { n_data with parent = None }; let p_data = Hashtbl.find f p in Hashtbl.replace f p { p_data with children = List.filter ((!=) n) p_data.children}; update f 0 n n (* Return the roots in forest [f]. *) let get_roots f = let save_if_root n n_data roots = match n_data.parent with | Some _ -> roots | None -> n :: roots in Hashtbl.fold save_if_root f [] (* Return [n]'s children. *) let get_children f n = let n_data = Hashtbl.find f n in n_data.children end type message = { id : int; inet_message_id : string; references : string list; subject : string; base_subject : string; is_reply : bool; message_date : Dbi.datetime } type tree = Tree of message option * tree list (* Rebuild mail threads for (year, month). * The algorithm was originally by JWZ, http://www.jwz.org/doc/threading.html, * simplified and implemented by Radu Grigore . *) let thread_mail (dbh : Dbi.connection) hostid ?user ?r year month = (* Pull out all the emails relevant to this month. *) let sth = dbh#prepare_cached "select id, subject, inet_message_id, message_date from messages where hostid = ? and extract (year from message_date) = ? and extract (month from message_date) = ?" in sth#execute [`Int hostid; `Int year; `Int month]; let msgs = sth#map (function [`Int id; `String subject; `String inet_message_id; `Timestamp message_date] -> id, (inet_message_id, subject, message_date) | _ -> assert false) in let references = if msgs <> [] then ( let sth = let qs = Dbi.placeholders (List.length msgs) in dbh#prepare_cached ("select message_id, inet_message_id, ordering from msg_references where message_id in " ^ qs ^ " order by message_id, ordering") in sth#execute (List.map (fun (id, _) -> `Int id) msgs); sth#map (function [`Int id; `String inet_message_id; _] -> id, inet_message_id | _ -> assert false) ) else [] in (* Aggregate the msgs and references structures together. * Note that references will be in the correct order (because of the * 'order by' clause in the select statement above), with the parent * message appearing first in the list. *) let msgs = List.map (fun (id, (inet_message_id, subject, message_date)) -> let references = List.filter (fun (i, _) -> i = id) references in let references = List.map snd references in id, inet_message_id, references, subject, message_date) msgs in (* Get the base subject lines (removing Re:, etc.), and convert to * list of message structs. *) let msgs = List.map (fun (id, inet_message_id, references, subject, message_date) -> let base_subject, is_reply = base_subject subject in { id = id; inet_message_id = inet_message_id; references = references; subject = subject; base_subject = base_subject; is_reply = is_reply; message_date = message_date }) msgs in (* Create a hash of inet_message_id -> message structure, which we'll * need later. *) let msgmap = let h = Hashtbl.create (List.length msgs) in let add ({ inet_message_id = inet_message_id } as message) = Hashtbl.replace h inet_message_id message in List.iter add msgs; h in (* Step 1: Build the forest. *) let forest = Forest.create 1024 in List.iter (fun { inet_message_id = inet_message_id; references = references } -> Forest.add forest inet_message_id; List.iter (Forest.add forest) references) msgs; let add_msg_data f { inet_message_id = inet_message_id; references = references } = let rec add_one f n lst = match lst with | [] -> () | h :: t -> (try Forest.link f n h with Forest.Already_linked | Forest.Cycle_found -> ()); add_one f h t in match references with | [] -> () | h :: t -> Forest.unlink f inet_message_id; Forest.link f inet_message_id h; add_one f h t in List.iter (add_msg_data forest) msgs; (* Step 2: Find the root set. Convert the forest into an ordinary tree * structure now (actually, a list of tree structures) since the FOREST * type is no longer needed. *) let threads = Forest.get_roots forest in let threads = let rec make_tree root = (* Is there a message associated with this inet_message_id? *) let message = try Some (Hashtbl.find msgmap root) with Not_found -> None in (* Get the children. *) let children = Forest.get_children forest root in let children = List.map make_tree children in Tree (message, children) in List.map make_tree threads in (* Step 4A: Prune empty containers. *) let threads = let prune = List.filter (function Tree (None, []) -> false | _ -> true) in let rec loop (Tree (message, children)) = let children = prune children in let children = List.map loop children in Tree (message, children) in List.map loop threads in (* Step 4B: Promote children of (some) empty containers. *) let threads = (* Below the top level there should be no empty containers after * this. Any empty container with children has those children * promoted up. *) let rec promote = function [] -> [] | Tree (None, children) :: xs -> let children = promote children in children @ promote xs | Tree (message, children) :: xs -> let children = promote children in Tree (message, children) :: promote xs in let threads = List.map (fun (Tree (message, children)) -> let children = promote children in Tree (message, children)) threads in (* At the top level we're allowed to have empty containers. However * if we have an empty container with just a single child, then * promote that child. *) let threads = List.map (function Tree (None, [child]) -> child | message -> message) threads in threads in (* Step 5: Group root set by subject. *) (* Couldn't be arsed to implement this. If someone really cares about * mailers which don't set References headers (probably some made by * our friends at Microsoft, I wouldn't mind betting), then send me * a patch ... XXX *) (* Step 7: Sort the siblings into date order. *) let threads = let compare (Tree (m1, _)) (Tree (m2, _)) = let md1 = match m1 with Some { message_date = message_date } -> Some message_date | None -> None in let md2 = match m2 with Some { message_date = message_date } -> Some message_date | None -> None in compare md1 md2 in let rec sort ms = let ms = List.sort compare ms in List.map (fun (Tree (message, children)) -> Tree (message, sort children)) ms in sort threads in (*----- End of threading algorithm. -----*) let title = sprintf "Mail/%04d/%02d/Thread Index" year month in let url = match Wikilib.generate_url_of_title dbh hostid title with Wikilib.GenURL_OK url -> url | Wikilib.GenURL_Duplicate url -> url | Wikilib.GenURL_TooShort | Wikilib.GenURL_BadURL -> failwith ("error generating URL for title: " ^ title) in let template = _get_template "mail_thread.txt" in (* Rebuild the thread index page. *) let model = try load_page dbh hostid ~url () with Not_found -> new_page (Title title) in let first_section = let sectionname = sprintf "Thread index for %s %04d" (long_month month) year in let content = template#set "year" (string_of_int year); template#set "month" (sprintf "%02d" month); template#set "long_month" (long_month month); let prev_year, prev_month = if month = 1 then year - 1, 12 else year, month - 1 in template#set "prev_year" (string_of_int prev_year); template#set "prev_month" (sprintf "%02d" prev_month); let next_year, next_month = if month = 12 then year + 1, 1 else year, month + 1 in template#set "next_year" (string_of_int next_year); template#set "next_month" (sprintf "%02d" next_month); let rec markup threads = let f = function | Tree (None, children) -> let html = markup children in "
  • -\n" :: html @ ["
  • \n"] | Tree (Some message, children) -> let {id = id; subject = subject} = message in let url = let title = sprintf "Mail/%s (%d)" subject id in match Wikilib.generate_url_of_title dbh hostid title with Wikilib.GenURL_OK url | Wikilib.GenURL_Duplicate url -> url | Wikilib.GenURL_TooShort | Wikilib.GenURL_BadURL -> failwith ("error finding URL for message: " ^ title) in let html = markup children in sprintf "
  • %s\n" url (Cgi_escape.escape_html subject) :: html @ [ "
  • \n" ] in "\n"] in let html = markup threads in let html = String.concat "" html in template#set "threads" html; template#to_string in (sectionname, "", content) in let contents = match model.contents with | [] | [_] -> [ first_section ] | x :: xs -> first_section :: xs in let model = { model with contents = contents } in (* Save the page. *) try ignore (save_page dbh hostid ?user ?r model) with | SaveURLError -> failwith "cocanwiki_mail: thread_mail: unexpected SaveURLError" (* The following error should be noted, but is not too bad. We * expect to rebuild the thread indexes frequently, so hopefully * the next time it is rebuilt it will succeed. *) | SaveConflict _ -> prerr_endline "cocanwiki_mail: thread_mail: SaveConflict (ignored)"