1 (* COCANWIKI - a wiki written in Objective CAML.
2 * Written by Richard W.M. Jones <rich@merjis.com>.
3 * Copyright (C) 2004 Merjis Ltd.
4 * $Id: cocanwiki_mail.ml,v 1.5 2006/08/17 09:11:31 rich Exp $
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; see the file COPYING. If not, write to
18 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
27 open Cocanwiki_template
30 (* Given a subject line, return the "base" subject.
31 * eg. "Re: my message" -> "my message"
33 let re_re = Pcre.regexp "^Re\\[\\d+\\]:\\s*"
35 let base_subject subject =
36 let rec loop subject =
37 let n = String.length subject in
38 if String.starts_with subject "Re: " then
39 loop (String.sub subject 4 (n-4))
40 else if String.starts_with subject "Re:" then
41 loop (String.sub subject 3 (n-3))
42 else if String.starts_with subject "RE: " then
43 loop (String.sub subject 4 (n-4))
44 else if String.starts_with subject "RE:" then
45 loop (String.sub subject 3 (n-3))
46 else if Pcre.pmatch ~rex:re_re subject then (
47 let subs = Pcre.exec ~rex:re_re subject in
48 let i = String.length (Pcre.get_substring subs 0) in
49 loop (String.sub subject i (n-i))
53 let base_subject = loop subject in
54 let is_reply = base_subject <> subject in
55 base_subject, is_reply
57 (* This abstract data type represents a 'forest' and is used for
58 * the implementation of threading below.
63 exception Already_linked
65 val create : int -> 'a t
66 val add : 'a t -> 'a -> unit
67 val link : 'a t -> 'a -> 'a -> unit
68 val unlink : 'a t -> 'a -> unit
69 val get_roots : 'a t -> 'a list
70 val get_children : 'a t -> 'a -> 'a list
73 module Forest : FOREST =
82 type 'a t = ('a, 'a node_data) Hashtbl.t
84 exception Already_linked
87 let create n = Hashtbl.create n
89 (* Add node [n] to forest [f]. *)
91 Hashtbl.replace f n { level = 0;
96 (* Set the level field of [n]'s children to increasing
97 * values, starting with [lvl]. Point all the root
98 * fields of the children to [rt].
100 let rec update f lvl rt n =
101 let n_data = Hashtbl.find f n in
102 Hashtbl.replace f n { n_data with level = lvl; root = rt };
103 List.iter (update f (lvl+1) rt) n_data.children
105 (* Link child [na] to parent [nb]. Raises [Already_linked]
106 * if either [na] has a parent already. Raises [Cycle_found]
107 * if the link would introduce a cycle.
110 let na_data = Hashtbl.find f na in
111 let nb_data = Hashtbl.find f nb in
112 match na_data.parent with
113 | Some _ -> raise Already_linked
114 | None when nb_data.root = na -> raise Cycle_found
116 Hashtbl.replace f na { na_data with parent = Some nb };
117 let nb_data = { nb_data with children = na :: nb_data.children } in
118 Hashtbl.replace f nb nb_data;
119 update f (nb_data.level+1) nb_data.root na
121 (* Remove the parent link of [n]. If there is no such
122 * link, does nothing.
125 let n_data = Hashtbl.find f n in
126 match n_data.parent with
129 Hashtbl.replace f n { n_data with parent = None };
130 let p_data = Hashtbl.find f p in
133 children = List.filter ((!=) n) p_data.children};
136 (* Return the roots in forest [f]. *)
138 let save_if_root n n_data roots =
139 match n_data.parent with
141 | None -> n :: roots in
142 Hashtbl.fold save_if_root f []
144 (* Return [n]'s children. *)
145 let get_children f n =
146 let n_data = Hashtbl.find f n in
152 inet_message_id : string;
153 references : string list;
155 base_subject : string;
157 message_date : PGOCaml.timestamptz }
159 type tree = Tree of message option * tree list
161 (* Rebuild mail threads for (year, month).
162 * The algorithm was originally by JWZ, http://www.jwz.org/doc/threading.html,
163 * simplified and implemented by Radu Grigore <radugrigore@yahoo.com>.
165 let thread_mail r dbh hostid ?user year month =
166 (* Pull out all the emails relevant to this month. *)
168 let year = Int32.of_int year in
169 let month = Int32.of_int month in
171 "select id, subject, inet_message_id, message_date
173 where hostid = $hostid
174 and extract (year from message_date) :: int = $year
175 and extract (month from message_date) :: int = $month" in
179 fun (id, subject, inet_message_id, message_date) ->
180 id, (inet_message_id, subject, message_date)
185 let ids = List.map fst msgs in
188 "select message_id, inet_message_id, ordering
190 where message_id in $@ids
191 order by message_id, ordering" in
193 fun (id, inet_message_id, _) ->
198 (* Aggregate the msgs and references structures together.
199 * Note that references will be in the correct order (because of the
200 * 'order by' clause in the select statement above), with the parent
201 * message appearing first in the list.
204 List.map (fun (id, (inet_message_id, subject, message_date)) ->
206 List.filter (fun (i, _) -> i = id) references in
207 let references = List.map snd references in
208 id, inet_message_id, references, subject, message_date)
211 (* Get the base subject lines (removing Re:, etc.), and convert to
212 * list of message structs.
215 List.map (fun (id, inet_message_id, references, subject, message_date) ->
216 let base_subject, is_reply = base_subject subject in
217 { id = id; inet_message_id = inet_message_id;
218 references = references; subject = subject;
219 base_subject = base_subject; is_reply = is_reply;
220 message_date = message_date }) msgs in
222 (* Create a hash of inet_message_id -> message structure, which we'll
226 let h = Hashtbl.create (List.length msgs) in
227 let add ({ inet_message_id = inet_message_id } as message) =
228 Hashtbl.replace h inet_message_id message in
232 (* Step 1: Build the forest. *)
233 let forest = Forest.create 1024 in
235 (fun { inet_message_id = inet_message_id; references = references } ->
236 Forest.add forest inet_message_id;
237 List.iter (Forest.add forest) references) msgs;
239 let add_msg_data f { inet_message_id = inet_message_id;
240 references = references } =
241 let rec add_one f n lst =
245 (try Forest.link f n h
246 with Forest.Already_linked | Forest.Cycle_found -> ());
249 match references with
252 Forest.unlink f inet_message_id;
253 Forest.link f inet_message_id h;
256 List.iter (add_msg_data forest) msgs;
258 (* Step 2: Find the root set. Convert the forest into an ordinary tree
259 * structure now (actually, a list of tree structures) since the FOREST
260 * type is no longer needed.
262 let threads = Forest.get_roots forest in
265 let rec make_tree root =
266 (* Is there a message associated with this inet_message_id? *)
268 try Some (Hashtbl.find msgmap root) with Not_found -> None in
269 (* Get the children. *)
270 let children = Forest.get_children forest root in
271 let children = List.map make_tree children in
272 Tree (message, children)
274 List.map make_tree threads in
276 (* Step 4A: Prune empty containers. *)
278 let prune = List.filter (function Tree (None, []) -> false | _ -> true) in
279 let rec loop (Tree (message, children)) =
280 let children = prune children in
281 let children = List.map loop children in
282 Tree (message, children)
284 List.map loop threads in
286 (* Step 4B: Promote children of (some) empty containers. *)
288 (* Below the top level there should be no empty containers after
289 * this. Any empty container with children has those children
292 let rec promote = function
294 | Tree (None, children) :: xs ->
295 let children = promote children in
296 children @ promote xs
297 | Tree (message, children) :: xs ->
298 let children = promote children in
299 Tree (message, children) :: promote xs
301 let threads = List.map (fun (Tree (message, children)) ->
302 let children = promote children in
303 Tree (message, children)) threads in
305 (* At the top level we're allowed to have empty containers. However
306 * if we have an empty container with just a single child, then
307 * promote that child.
309 let threads = List.map (function
310 Tree (None, [child]) -> child
311 | message -> message) threads in
315 (* Step 5: Group root set by subject. *)
316 (* Couldn't be arsed to implement this. If someone really cares about
317 * mailers which don't set References headers (probably some made by
318 * our friends at Microsoft, I wouldn't mind betting), then send me
322 (* Step 7: Sort the siblings into date order. *)
324 let compare (Tree (m1, _)) (Tree (m2, _)) =
325 let md1 = match m1 with
326 Some { message_date = message_date } -> Some message_date
328 let md2 = match m2 with
329 Some { message_date = message_date } -> Some message_date
334 let ms = List.sort compare ms in
335 List.map (fun (Tree (message, children)) ->
336 Tree (message, sort children)) ms
340 (*----- End of threading algorithm. -----*)
342 let title = sprintf "Mail/%04d/%02d/Thread Index" year month in
344 match Wikilib.generate_url_of_title r dbh hostid title with
345 Wikilib.GenURL_OK url -> url
346 | Wikilib.GenURL_Duplicate url -> url
347 | Wikilib.GenURL_TooShort | Wikilib.GenURL_BadURL ->
348 failwith ("error generating URL for title: " ^ title) in
350 let template = _get_template "mail_thread.txt" in
352 (* Rebuild the thread index page. *)
354 try load_page dbh hostid ~url ()
355 with Not_found -> new_page (Title title) in
359 sprintf "Thread index for %s %04d"
360 (!Printer.month_name (Date.month_of_int month)) year in
362 template#set "year" (string_of_int year);
363 template#set "month" (sprintf "%02d" month);
364 template#set "long_month"
365 (!Printer.month_name (Date.month_of_int month));
366 let prev_year, prev_month =
367 if month = 1 then year - 1, 12
368 else year, month - 1 in
369 template#set "prev_year" (string_of_int prev_year);
370 template#set "prev_month" (sprintf "%02d" prev_month);
371 let next_year, next_month =
372 if month = 12 then year + 1, 1
373 else year, month + 1 in
374 template#set "next_year" (string_of_int next_year);
375 template#set "next_month" (sprintf "%02d" next_month);
377 let rec markup threads =
379 | Tree (None, children) ->
380 let html = markup children in
381 "<li> -\n" :: html @ ["</li>\n"]
382 | Tree (Some message, children) ->
383 let {id = id; subject = subject} = message in
386 let title = sprintf "Mail/%s (%ld)" subject id in
387 match Wikilib.generate_url_of_title r dbh hostid title with
388 Wikilib.GenURL_OK url | Wikilib.GenURL_Duplicate url -> url
389 | Wikilib.GenURL_TooShort | Wikilib.GenURL_BadURL ->
390 failwith ("error finding URL for message: " ^ title) in
392 let html = markup children in
393 sprintf "<li> <a href=\"/%s\" class=\"internal\">%s</a>\n"
394 url (Cgi_escape.escape_html subject)
395 :: html @ [ "</li>\n" ]
397 "<ul>\n" :: List.concat (List.map f threads) @ ["</ul>\n"]
399 let html = markup threads in
400 let html = String.concat "" html in
401 template#set "threads" html;
405 (Some sectionname, None, None, None, content)
409 match model.contents_ with
410 | [] | [_] -> [ first_section ]
411 | x :: xs -> first_section :: xs in
413 let model = { model with contents_ = contents } in
417 ignore (save_page r dbh hostid ?user model)
420 failwith "cocanwiki_mail: thread_mail: unexpected SaveURLError"
422 (* The following error should be noted, but is not too bad. We
423 * expect to rebuild the thread indexes frequently, so hopefully
424 * the next time it is rebuilt it will succeed.
427 prerr_endline "cocanwiki_mail: thread_mail: SaveConflict (ignored)"