(* COCANWIKI - a wiki written in Objective CAML.
* Written by Richard W.M. Jones <rich@merjis.com>.
* Copyright (C) 2004 Merjis Ltd.
- * $Id: cocanwiki_mail.ml,v 1.1 2004/10/14 15:57:15 rich Exp $
+ * $Id: cocanwiki_mail.ml,v 1.2 2004/10/20 15:17:18 rich Exp $
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* Boston, MA 02111-1307, USA.
*)
+open Printf
+
open ExtString
-(*
-(* Rebuild mail threads for (year, month).
- * The algorithm used is by JWZ - see:
- * http://www.jwz.org/doc/threading.html
+open Cocanwiki_pages
+open Cocanwiki_template
+open Cocanwiki_date
+
+(* Given a subject line, return the "base" subject.
+ * eg. "Re: my message" -> "my message"
+ *)
+let re_re = Pcre.regexp "^Re\\[\\d+\\]:\\s*"
+
+let base_subject subject =
+ let rec loop subject =
+ let n = String.length subject in
+ if String.starts_with subject "Re: " then
+ loop (String.sub subject 4 (n-4))
+ else if String.starts_with subject "Re:" then
+ loop (String.sub subject 3 (n-3))
+ else if String.starts_with subject "RE: " then
+ loop (String.sub subject 4 (n-4))
+ else if String.starts_with subject "RE:" then
+ loop (String.sub subject 3 (n-3))
+ else if Pcre.pmatch ~rex:re_re subject then (
+ let subs = Pcre.exec ~rex:re_re subject in
+ let i = String.length (Pcre.get_substring subs 0) in
+ loop (String.sub subject i (n-i))
+ ) else
+ subject
+ in
+ let base_subject = loop subject in
+ let is_reply = base_subject <> subject in
+ base_subject, is_reply
+
+(* This abstract data type represents a 'forest' and is used for
+ * the implementation of threading below.
*)
-class ['a] container m =
-object (self)
- val mutable message = (m : 'a option)
- val mutable parent = (None : 'a container option)
- val mutable children = ([] : 'a container list)
+module type FOREST =
+sig
+ type 'a t
+ exception Already_linked
+ exception Cycle_found
+ val create : int -> 'a t
+ val add : 'a t -> 'a -> unit
+ val link : 'a t -> 'a -> 'a -> unit
+ val unlink : 'a t -> 'a -> unit
+ val get_roots : 'a t -> 'a list
+ val get_children : 'a t -> 'a -> 'a list
+end
+
+module Forest : FOREST =
+struct
+ type 'a node_data = {
+ level : int;
+ parent : 'a option;
+ children : 'a list;
+ root : 'a;
+ }
+
+ type 'a t = ('a, 'a node_data) Hashtbl.t
+
+ exception Already_linked
+ exception Cycle_found
+
+ let create n = Hashtbl.create n
- method message = message
- method set_message m = message <- Some m
+ (* Add node [n] to forest [f]. *)
+ let add f n =
+ Hashtbl.replace f n { level = 0;
+ parent = None;
+ children = [];
+ root = n }
- (* Don't call 'set_parent' explicitly. I wish I could understand the
- * section in the manual on friend methods ...
+ (* Set the level field of [n]'s children to increasing
+ * values, starting with [lvl]. Point all the root
+ * fields of the children to [rt].
*)
- method parent = parent
- method set_parent c = parent <- Some c
- method set_no_parent () = parent <- None
-
- method children = children
- method add_child c =
- assert (not (List.exists (fun child -> Oo.id child = Oo.id c) children));
- children <- c :: children;
- assert (c#parent = None);
- c#set_parent self
- method remove_child c =
- match c#parent with
- None
- | Some parent when Oo.id parent <> Oo.id self
- let n = List.length children in
- children <- List.filter (fun child -> Oo.id child <> Oo.id c) children;
- assert (List.length children = n-1)
+ let rec update f lvl rt n =
+ let n_data = Hashtbl.find f n in
+ Hashtbl.replace f n { n_data with level = lvl; root = rt };
+ List.iter (update f (lvl+1) rt) n_data.children
+
+ (* Link child [na] to parent [nb]. Raises [Already_linked]
+ * if either [na] has a parent already. Raises [Cycle_found]
+ * if the link would introduce a cycle.
+ *)
+ let link f na nb =
+ let na_data = Hashtbl.find f na in
+ let nb_data = Hashtbl.find f nb in
+ match na_data.parent with
+ | Some _ -> raise Already_linked
+ | None when nb_data.root = na -> raise Cycle_found
+ | _ ->
+ Hashtbl.replace f na { na_data with parent = Some nb };
+ let nb_data = { nb_data with children = na :: nb_data.children } in
+ Hashtbl.replace f nb nb_data;
+ update f (nb_data.level+1) nb_data.root na
+
+ (* Remove the parent link of [n]. If there is no such
+ * link, does nothing.
+ *)
+ let unlink f n =
+ let n_data = Hashtbl.find f n in
+ match n_data.parent with
+ | None -> ()
+ | Some p ->
+ let p_data = Hashtbl.find f p in
+ Hashtbl.replace f p
+ { p_data with
+ children = List.filter ((!=) n) p_data.children};
+ update f 0 n n
+
+ (* Return the roots in forest [f]. *)
+ let get_roots f =
+ let save_if_root n n_data roots =
+ match n_data.parent with
+ | Some _ -> roots
+ | None -> n :: roots in
+ Hashtbl.fold save_if_root f []
+
+ (* Return [n]'s children. *)
+ let get_children f n =
+ let n_data = Hashtbl.find f n in
+ n_data.children
end
-let re_re = Pcre.regexp "^Re\\[\\d+\\]:\\s*"
+type message =
+ { id : int;
+ inet_message_id : string;
+ references : string list;
+ subject : string;
+ base_subject : string;
+ is_reply : bool;
+ message_date : Dbi.datetime }
-let thread_mail (dbh : Dbi.connection) hostid year month =
- let title = "Mail/%04d/%02d/Thread Index" in
+type tree = Tree of message option * tree list
+(* Rebuild mail threads for (year, month).
+ * The algorithm was originally by JWZ, http://www.jwz.org/doc/threading.html,
+ * simplified and implemented by Radu Grigore <radugrigore@yahoo.com>.
+ *)
+let thread_mail (dbh : Dbi.connection) hostid ?user ?r year month =
(* Pull out all the emails relevant to this month. *)
let sth =
dbh#prepare_cached "select id, subject, inet_message_id, message_date
let sth =
let qs = Dbi.placeholders (List.length msgs) in
dbh#prepare_cached ("select message_id, inet_message_id, ordering
- from messages
+ from msg_references
where message_id in " ^ qs ^ "
order by message_id, ordering") in
sth#execute (List.map (fun (id, _) -> `Int id) msgs);
let references =
List.filter (fun (i, _) -> i = id) references in
let references = List.map snd references in
- id, (inet_message_id, references, subject, message_date))
+ id, inet_message_id, references, subject, message_date)
msgs in
- (* Get the base subject lines (removing Re:, etc.). *)
+ (* Get the base subject lines (removing Re:, etc.), and convert to
+ * list of message structs.
+ *)
let msgs =
- List.map (fun (id, (inet_message_id, references, subject, message_date)) ->
- let rec loop subject =
- let n = String.length subject in
- if String.starts_with subject "Re: " then
- loop (String.sub subject 4 (n-4))
- else if String.starts_with subject "Re:" then
- loop (String.sub subject 3 (n-3))
- else if String.starts_with subject "RE: " then
- loop (String.sub subject 4 (n-4))
- else if String.starts_with subject "RE:" then
- loop (String.sub subject 3 (n-3))
- else if Pcre.pmatch ~rex:re_re subject then (
- let subs = Pcre.exec ~rex:re_re subject in
- let i = String.length (Pcre.get_substring subs 0) in
- loop (String.sub subject i (n-i))
- ) else
- subject
- in
- let base_subject = loop subject in
- let is_reply = base_subject <> subject in
- id, (inet_message_id, references,
- subject, base_subject, is_reply, message_date)) msgs in
-
- (*--- Step 1. ---*)
- (* Hash of inet_message_id -> container. *)
- let id_table = Hashtbl 1024 in
+ List.map (fun (id, inet_message_id, references, subject, message_date) ->
+ let base_subject, is_reply = base_subject subject in
+ { id = id; inet_message_id = inet_message_id;
+ references = references; subject = subject;
+ base_subject = base_subject; is_reply = is_reply;
+ message_date = message_date }) msgs in
+
+ (* Create a hash of inet_message_id -> message structure, which we'll
+ * need later.
+ *)
+ let msgmap =
+ let h = Hashtbl.create (List.length msgs) in
+ let add ({ inet_message_id = inet_message_id } as message) =
+ Hashtbl.replace h inet_message_id message in
+ List.iter add msgs;
+ h in
+
+ (* Step 1: Build the forest. *)
+ let forest = Forest.create 1024 in
List.iter
- (fun ((id, (inet_message_id, references, _, _, _, _)) as message) ->
- let container =
- try
- let container = Hashtbl.find id_table inet_message_id in
- if container#message = None then container#set_message message;
- container
- with
- Not_found ->
- let container = new container (Some message) in
- Hashtbl.add id_table inet_message_id container;
- container in
-
- (* References. *)
- let ref_containers =
- List.map
- (fun inet_message_id ->
- try
- Hashtbl.find id_table inet_message_id
- with
- Not_found ->
- let container = new container None in
- Hashtbl.add id_table inet_message_id container;
- container) references in
- (* Link the reference containers together. *)
- iter_in_pairs
- (fun child parent ->
- if not (reachable parent child) && not (reachable child parent)
- then (
- if parent#child = None then parent#set_child = child;
- if child#parent = None then child#set_parent = parent;
- )) ref_containers;
-
- (* Parent of this message is first element in references. *)
- match ref_containers with
- [] ->
- let old_parent = container#parent in
- container#set_no_parent ();
- if old_parent <>
- | parent :: _ when Oo.id parent <> my_parent_id ->
- container#set_parent
-
-
-
- ) msgs;
-*)
+ (fun { inet_message_id = inet_message_id; references = references } ->
+ Forest.add forest inet_message_id;
+ List.iter (Forest.add forest) references) msgs;
+
+ let add_msg_data f { inet_message_id = inet_message_id;
+ references = references } =
+ let rec add_one f n lst =
+ match lst with
+ | [] -> ()
+ | h :: t ->
+ (try Forest.link f n h
+ with Forest.Already_linked | Forest.Cycle_found -> ());
+ add_one f h t
+ in
+ match references with
+ | [] -> ()
+ | h :: t ->
+ Forest.unlink f inet_message_id;
+ Forest.link f inet_message_id h;
+ add_one f h t
+ in
+ List.iter (add_msg_data forest) msgs;
+
+ (* Step 2: Find the root set. Convert the forest into an ordinary tree
+ * structure now (actually, a list of tree structures) since the FOREST
+ * type is no longer needed.
+ *)
+ let threads = Forest.get_roots forest in
+
+ let threads =
+ let rec make_tree root =
+ (* Is there a message associated with this inet_message_id? *)
+ let message =
+ try Some (Hashtbl.find msgmap root) with Not_found -> None in
+ (* Get the children. *)
+ let children = Forest.get_children forest root in
+ let children = List.map make_tree children in
+ Tree (message, children)
+ in
+ List.map make_tree threads in
+
+ (* Step 4A: Prune empty containers. *)
+ let threads =
+ let prune = List.filter (function Tree (None, []) -> false | _ -> true) in
+ let rec loop (Tree (message, children)) =
+ let children = prune children in
+ let children = List.map loop children in
+ Tree (message, children)
+ in
+ List.map loop threads in
+
+ (* Step 4B: Promote children of (some) empty containers. *)
+ let threads =
+ let promote (Tree (message, children)) =
+ (* Find the grandchildren to promote. *)
+ let children, grandchildren =
+ List.partition (function
+ | Tree (Some _, _) -> true
+ | Tree (None, _) -> false) children in
+ let grandchildren = List.map (fun (Tree (_, c)) -> c) grandchildren in
+ let grandchildren = List.concat grandchildren in
+ let children = children @ grandchildren in
+ Tree (message, children) in
+ let threads = List.map promote threads in
+
+ (* At the top level, find any empty containers with exactly one child
+ * and promote those children to top-level threads.
+ *)
+ let threads, new_threads =
+ List.partition (function
+ | Tree (None, [child]) -> false
+ | _ -> true) threads in
+ let new_threads = List.map (function
+ | Tree (_, [child]) -> child
+ | _ -> assert false) new_threads in
+ threads @ new_threads in
+
+ (* Step 5: Group root set by subject. *)
+ (* Couldn't be arsed to implement this. If someone really cares about
+ * mailers which don't set References headers (probably some made by
+ * our friends at Microsoft, I wouldn't mind betting), then send me
+ * a patch ... XXX
+ *)
+
+ (* Step 7: Sort the siblings into date order. *)
+ let threads =
+ let compare (Tree (m1, _)) (Tree (m2, _)) =
+ let md1 = match m1 with
+ Some { message_date = message_date } -> Some message_date
+ | None -> None in
+ let md2 = match m2 with
+ Some { message_date = message_date } -> Some message_date
+ | None -> None in
+ compare md1 md2
+ in
+ let rec sort ms =
+ let ms = List.sort compare ms in
+ List.map (fun (Tree (message, children)) ->
+ Tree (message, sort children)) ms
+ in
+ sort threads in
+
+ (*----- End of threading algorithm. -----*)
+
+ let title = sprintf "Mail/%04d/%02d/Thread Index" year month in
+ let url =
+ match Wikilib.generate_url_of_title dbh hostid title with
+ Wikilib.GenURL_OK url -> url
+ | Wikilib.GenURL_Duplicate url -> url
+ | Wikilib.GenURL_TooShort | Wikilib.GenURL_BadURL ->
+ failwith ("error generating URL for title: " ^ title) in
+
+ let template = _get_template "mail_thread.txt" in
+
+ (* Rebuild the thread index page. *)
+ let model =
+ try load_page dbh hostid ~url ()
+ with Not_found -> new_page (Title title) in
+
+ let first_section =
+ let sectionname =
+ sprintf "Thread index for %s %04d" (long_month month) year in
+ let content =
+ template#set "year" (string_of_int year);
+ template#set "month" (sprintf "%02d" month);
+ template#set "long_month" (long_month month);
+ let prev_year, prev_month =
+ if month = 1 then year - 1, 12
+ else year, month - 1 in
+ template#set "prev_year" (string_of_int prev_year);
+ template#set "prev_month" (sprintf "%02d" prev_month);
+ let next_year, next_month =
+ if month = 12 then year + 1, 1
+ else year, month + 1 in
+ template#set "next_year" (string_of_int next_year);
+ template#set "next_month" (sprintf "%02d" next_month);
+
+ let rec markup threads =
+ let f = function
+ | Tree (None, children) ->
+ let html = markup children in
+ "<li> -\n" :: html @ ["</li>\n"]
+ | Tree (Some message, children) ->
+ let {id = id; subject = subject} = message in
+
+ let url =
+ let title = sprintf "Mail/%s (%d)" subject id in
+ match Wikilib.generate_url_of_title dbh hostid title with
+ Wikilib.GenURL_OK url | Wikilib.GenURL_Duplicate url -> url
+ | Wikilib.GenURL_TooShort | Wikilib.GenURL_BadURL ->
+ failwith ("error finding URL for message: " ^ title) in
+
+ let html = markup children in
+ sprintf "<li> <a href=\"/%s\">%s</a>\n" url subject
+ :: html @ [ "</li>\n" ]
+ in
+ "<ul>\n" :: List.concat (List.map f threads) @ ["</ul>\n"]
+ in
+ let html = markup threads in
+ let html = String.concat "" html in
+ template#set "threads" html;
+
+ template#to_string
+ in
+ (sectionname, "", content)
+ in
+
+ let contents =
+ match model.contents with
+ | [] | [_] -> [ first_section ]
+ | x :: xs -> first_section :: xs in
+
+ let model = { model with contents = contents } in
+
+ (* Save the page. *)
+ try
+ ignore (save_page dbh hostid ?user ?r model)
+ with
+ | SaveURLError ->
+ failwith "cocanwiki_mail: thread_mail: unexpected SaveURLError"
+
+ (* The following error should be noted, but is not too bad. We
+ * expect to rebuild the thread indexes frequently, so hopefully
+ * the next time it is rebuilt it will succeed.
+ *)
+ | SaveConflict _ ->
+ prerr_endline "cocanwiki_mail: thread_mail: SaveConflict (ignored)"