1 (* COCANWIKI - a wiki written in Objective CAML.
2 * Written by Richard W.M. Jones <rich@merjis.com>.
3 * Copyright (C) 2004 Merjis Ltd.
4 * $Id: cocanwiki_mail.ml,v 1.2 2004/10/20 15:17:18 rich Exp $
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; see the file COPYING. If not, write to
18 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
27 open Cocanwiki_template
30 (* Given a subject line, return the "base" subject.
31 * eg. "Re: my message" -> "my message"
33 let re_re = Pcre.regexp "^Re\\[\\d+\\]:\\s*"
35 let base_subject subject =
36 let rec loop subject =
37 let n = String.length subject in
38 if String.starts_with subject "Re: " then
39 loop (String.sub subject 4 (n-4))
40 else if String.starts_with subject "Re:" then
41 loop (String.sub subject 3 (n-3))
42 else if String.starts_with subject "RE: " then
43 loop (String.sub subject 4 (n-4))
44 else if String.starts_with subject "RE:" then
45 loop (String.sub subject 3 (n-3))
46 else if Pcre.pmatch ~rex:re_re subject then (
47 let subs = Pcre.exec ~rex:re_re subject in
48 let i = String.length (Pcre.get_substring subs 0) in
49 loop (String.sub subject i (n-i))
53 let base_subject = loop subject in
54 let is_reply = base_subject <> subject in
55 base_subject, is_reply
57 (* This abstract data type represents a 'forest' and is used for
58 * the implementation of threading below.
63 exception Already_linked
65 val create : int -> 'a t
66 val add : 'a t -> 'a -> unit
67 val link : 'a t -> 'a -> 'a -> unit
68 val unlink : 'a t -> 'a -> unit
69 val get_roots : 'a t -> 'a list
70 val get_children : 'a t -> 'a -> 'a list
73 module Forest : FOREST =
82 type 'a t = ('a, 'a node_data) Hashtbl.t
84 exception Already_linked
87 let create n = Hashtbl.create n
89 (* Add node [n] to forest [f]. *)
91 Hashtbl.replace f n { level = 0;
96 (* Set the level field of [n]'s children to increasing
97 * values, starting with [lvl]. Point all the root
98 * fields of the children to [rt].
100 let rec update f lvl rt n =
101 let n_data = Hashtbl.find f n in
102 Hashtbl.replace f n { n_data with level = lvl; root = rt };
103 List.iter (update f (lvl+1) rt) n_data.children
105 (* Link child [na] to parent [nb]. Raises [Already_linked]
106 * if either [na] has a parent already. Raises [Cycle_found]
107 * if the link would introduce a cycle.
110 let na_data = Hashtbl.find f na in
111 let nb_data = Hashtbl.find f nb in
112 match na_data.parent with
113 | Some _ -> raise Already_linked
114 | None when nb_data.root = na -> raise Cycle_found
116 Hashtbl.replace f na { na_data with parent = Some nb };
117 let nb_data = { nb_data with children = na :: nb_data.children } in
118 Hashtbl.replace f nb nb_data;
119 update f (nb_data.level+1) nb_data.root na
121 (* Remove the parent link of [n]. If there is no such
122 * link, does nothing.
125 let n_data = Hashtbl.find f n in
126 match n_data.parent with
129 let p_data = Hashtbl.find f p in
132 children = List.filter ((!=) n) p_data.children};
135 (* Return the roots in forest [f]. *)
137 let save_if_root n n_data roots =
138 match n_data.parent with
140 | None -> n :: roots in
141 Hashtbl.fold save_if_root f []
143 (* Return [n]'s children. *)
144 let get_children f n =
145 let n_data = Hashtbl.find f n in
151 inet_message_id : string;
152 references : string list;
154 base_subject : string;
156 message_date : Dbi.datetime }
158 type tree = Tree of message option * tree list
160 (* Rebuild mail threads for (year, month).
161 * The algorithm was originally by JWZ, http://www.jwz.org/doc/threading.html,
162 * simplified and implemented by Radu Grigore <radugrigore@yahoo.com>.
164 let thread_mail (dbh : Dbi.connection) hostid ?user ?r year month =
165 (* Pull out all the emails relevant to this month. *)
167 dbh#prepare_cached "select id, subject, inet_message_id, message_date
170 and extract (year from message_date) = ?
171 and extract (month from message_date) = ?" in
172 sth#execute [`Int hostid; `Int year; `Int month];
176 (function [`Int id; `String subject; `String inet_message_id;
177 `Timestamp message_date] ->
178 id, (inet_message_id, subject, message_date)
179 | _ -> assert false) in
184 let qs = Dbi.placeholders (List.length msgs) in
185 dbh#prepare_cached ("select message_id, inet_message_id, ordering
187 where message_id in " ^ qs ^ "
188 order by message_id, ordering") in
189 sth#execute (List.map (fun (id, _) -> `Int id) msgs);
190 sth#map (function [`Int id; `String inet_message_id; _] ->
195 (* Aggregate the msgs and references structures together.
196 * Note that references will be in the correct order (because of the
197 * 'order by' clause in the select statement above), with the parent
198 * message appearing first in the list.
201 List.map (fun (id, (inet_message_id, subject, message_date)) ->
203 List.filter (fun (i, _) -> i = id) references in
204 let references = List.map snd references in
205 id, inet_message_id, references, subject, message_date)
208 (* Get the base subject lines (removing Re:, etc.), and convert to
209 * list of message structs.
212 List.map (fun (id, inet_message_id, references, subject, message_date) ->
213 let base_subject, is_reply = base_subject subject in
214 { id = id; inet_message_id = inet_message_id;
215 references = references; subject = subject;
216 base_subject = base_subject; is_reply = is_reply;
217 message_date = message_date }) msgs in
219 (* Create a hash of inet_message_id -> message structure, which we'll
223 let h = Hashtbl.create (List.length msgs) in
224 let add ({ inet_message_id = inet_message_id } as message) =
225 Hashtbl.replace h inet_message_id message in
229 (* Step 1: Build the forest. *)
230 let forest = Forest.create 1024 in
232 (fun { inet_message_id = inet_message_id; references = references } ->
233 Forest.add forest inet_message_id;
234 List.iter (Forest.add forest) references) msgs;
236 let add_msg_data f { inet_message_id = inet_message_id;
237 references = references } =
238 let rec add_one f n lst =
242 (try Forest.link f n h
243 with Forest.Already_linked | Forest.Cycle_found -> ());
246 match references with
249 Forest.unlink f inet_message_id;
250 Forest.link f inet_message_id h;
253 List.iter (add_msg_data forest) msgs;
255 (* Step 2: Find the root set. Convert the forest into an ordinary tree
256 * structure now (actually, a list of tree structures) since the FOREST
257 * type is no longer needed.
259 let threads = Forest.get_roots forest in
262 let rec make_tree root =
263 (* Is there a message associated with this inet_message_id? *)
265 try Some (Hashtbl.find msgmap root) with Not_found -> None in
266 (* Get the children. *)
267 let children = Forest.get_children forest root in
268 let children = List.map make_tree children in
269 Tree (message, children)
271 List.map make_tree threads in
273 (* Step 4A: Prune empty containers. *)
275 let prune = List.filter (function Tree (None, []) -> false | _ -> true) in
276 let rec loop (Tree (message, children)) =
277 let children = prune children in
278 let children = List.map loop children in
279 Tree (message, children)
281 List.map loop threads in
283 (* Step 4B: Promote children of (some) empty containers. *)
285 let promote (Tree (message, children)) =
286 (* Find the grandchildren to promote. *)
287 let children, grandchildren =
288 List.partition (function
289 | Tree (Some _, _) -> true
290 | Tree (None, _) -> false) children in
291 let grandchildren = List.map (fun (Tree (_, c)) -> c) grandchildren in
292 let grandchildren = List.concat grandchildren in
293 let children = children @ grandchildren in
294 Tree (message, children) in
295 let threads = List.map promote threads in
297 (* At the top level, find any empty containers with exactly one child
298 * and promote those children to top-level threads.
300 let threads, new_threads =
301 List.partition (function
302 | Tree (None, [child]) -> false
303 | _ -> true) threads in
304 let new_threads = List.map (function
305 | Tree (_, [child]) -> child
306 | _ -> assert false) new_threads in
307 threads @ new_threads in
309 (* Step 5: Group root set by subject. *)
310 (* Couldn't be arsed to implement this. If someone really cares about
311 * mailers which don't set References headers (probably some made by
312 * our friends at Microsoft, I wouldn't mind betting), then send me
316 (* Step 7: Sort the siblings into date order. *)
318 let compare (Tree (m1, _)) (Tree (m2, _)) =
319 let md1 = match m1 with
320 Some { message_date = message_date } -> Some message_date
322 let md2 = match m2 with
323 Some { message_date = message_date } -> Some message_date
328 let ms = List.sort compare ms in
329 List.map (fun (Tree (message, children)) ->
330 Tree (message, sort children)) ms
334 (*----- End of threading algorithm. -----*)
336 let title = sprintf "Mail/%04d/%02d/Thread Index" year month in
338 match Wikilib.generate_url_of_title dbh hostid title with
339 Wikilib.GenURL_OK url -> url
340 | Wikilib.GenURL_Duplicate url -> url
341 | Wikilib.GenURL_TooShort | Wikilib.GenURL_BadURL ->
342 failwith ("error generating URL for title: " ^ title) in
344 let template = _get_template "mail_thread.txt" in
346 (* Rebuild the thread index page. *)
348 try load_page dbh hostid ~url ()
349 with Not_found -> new_page (Title title) in
353 sprintf "Thread index for %s %04d" (long_month month) year in
355 template#set "year" (string_of_int year);
356 template#set "month" (sprintf "%02d" month);
357 template#set "long_month" (long_month month);
358 let prev_year, prev_month =
359 if month = 1 then year - 1, 12
360 else year, month - 1 in
361 template#set "prev_year" (string_of_int prev_year);
362 template#set "prev_month" (sprintf "%02d" prev_month);
363 let next_year, next_month =
364 if month = 12 then year + 1, 1
365 else year, month + 1 in
366 template#set "next_year" (string_of_int next_year);
367 template#set "next_month" (sprintf "%02d" next_month);
369 let rec markup threads =
371 | Tree (None, children) ->
372 let html = markup children in
373 "<li> -\n" :: html @ ["</li>\n"]
374 | Tree (Some message, children) ->
375 let {id = id; subject = subject} = message in
378 let title = sprintf "Mail/%s (%d)" subject id in
379 match Wikilib.generate_url_of_title dbh hostid title with
380 Wikilib.GenURL_OK url | Wikilib.GenURL_Duplicate url -> url
381 | Wikilib.GenURL_TooShort | Wikilib.GenURL_BadURL ->
382 failwith ("error finding URL for message: " ^ title) in
384 let html = markup children in
385 sprintf "<li> <a href=\"/%s\">%s</a>\n" url subject
386 :: html @ [ "</li>\n" ]
388 "<ul>\n" :: List.concat (List.map f threads) @ ["</ul>\n"]
390 let html = markup threads in
391 let html = String.concat "" html in
392 template#set "threads" html;
396 (sectionname, "", content)
400 match model.contents with
401 | [] | [_] -> [ first_section ]
402 | x :: xs -> first_section :: xs in
404 let model = { model with contents = contents } in
408 ignore (save_page dbh hostid ?user ?r model)
411 failwith "cocanwiki_mail: thread_mail: unexpected SaveURLError"
413 (* The following error should be noted, but is not too bad. We
414 * expect to rebuild the thread indexes frequently, so hopefully
415 * the next time it is rebuilt it will succeed.
418 prerr_endline "cocanwiki_mail: thread_mail: SaveConflict (ignored)"