1 (* COCANWIKI - a wiki written in Objective CAML.
2 * Written by Richard W.M. Jones <rich@merjis.com>.
3 * Copyright (C) 2004 Merjis Ltd.
4 * $Id: cocanwiki_mail.ml,v 1.1 2004/10/21 11:42:05 rich Exp $
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; see the file COPYING. If not, write to
18 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
27 open Cocanwiki_template
30 (* Given a subject line, return the "base" subject.
31 * eg. "Re: my message" -> "my message"
33 let re_re = Pcre.regexp "^Re\\[\\d+\\]:\\s*"
35 let base_subject subject =
36 let rec loop subject =
37 let n = String.length subject in
38 if String.starts_with subject "Re: " then
39 loop (String.sub subject 4 (n-4))
40 else if String.starts_with subject "Re:" then
41 loop (String.sub subject 3 (n-3))
42 else if String.starts_with subject "RE: " then
43 loop (String.sub subject 4 (n-4))
44 else if String.starts_with subject "RE:" then
45 loop (String.sub subject 3 (n-3))
46 else if Pcre.pmatch ~rex:re_re subject then (
47 let subs = Pcre.exec ~rex:re_re subject in
48 let i = String.length (Pcre.get_substring subs 0) in
49 loop (String.sub subject i (n-i))
53 let base_subject = loop subject in
54 let is_reply = base_subject <> subject in
55 base_subject, is_reply
57 (* This abstract data type represents a 'forest' and is used for
58 * the implementation of threading below.
63 exception Already_linked
65 val create : int -> 'a t
66 val add : 'a t -> 'a -> unit
67 val link : 'a t -> 'a -> 'a -> unit
68 val unlink : 'a t -> 'a -> unit
69 val get_roots : 'a t -> 'a list
70 val get_children : 'a t -> 'a -> 'a list
73 module Forest : FOREST =
82 type 'a t = ('a, 'a node_data) Hashtbl.t
84 exception Already_linked
87 let create n = Hashtbl.create n
89 (* Add node [n] to forest [f]. *)
91 Hashtbl.replace f n { level = 0;
96 (* Set the level field of [n]'s children to increasing
97 * values, starting with [lvl]. Point all the root
98 * fields of the children to [rt].
100 let rec update f lvl rt n =
101 let n_data = Hashtbl.find f n in
102 Hashtbl.replace f n { n_data with level = lvl; root = rt };
103 List.iter (update f (lvl+1) rt) n_data.children
105 (* Link child [na] to parent [nb]. Raises [Already_linked]
106 * if either [na] has a parent already. Raises [Cycle_found]
107 * if the link would introduce a cycle.
110 let na_data = Hashtbl.find f na in
111 let nb_data = Hashtbl.find f nb in
112 match na_data.parent with
113 | Some _ -> raise Already_linked
114 | None when nb_data.root = na -> raise Cycle_found
116 Hashtbl.replace f na { na_data with parent = Some nb };
117 let nb_data = { nb_data with children = na :: nb_data.children } in
118 Hashtbl.replace f nb nb_data;
119 update f (nb_data.level+1) nb_data.root na
121 (* Remove the parent link of [n]. If there is no such
122 * link, does nothing.
125 let n_data = Hashtbl.find f n in
126 match n_data.parent with
129 Hashtbl.replace f n { n_data with parent = None };
130 let p_data = Hashtbl.find f p in
133 children = List.filter ((!=) n) p_data.children};
136 (* Return the roots in forest [f]. *)
138 let save_if_root n n_data roots =
139 match n_data.parent with
141 | None -> n :: roots in
142 Hashtbl.fold save_if_root f []
144 (* Return [n]'s children. *)
145 let get_children f n =
146 let n_data = Hashtbl.find f n in
152 inet_message_id : string;
153 references : string list;
155 base_subject : string;
157 message_date : Dbi.datetime }
159 type tree = Tree of message option * tree list
161 (* Rebuild mail threads for (year, month).
162 * The algorithm was originally by JWZ, http://www.jwz.org/doc/threading.html,
163 * simplified and implemented by Radu Grigore <radugrigore@yahoo.com>.
165 let thread_mail (dbh : Dbi.connection) hostid ?user ?r year month =
166 (* Pull out all the emails relevant to this month. *)
168 dbh#prepare_cached "select id, subject, inet_message_id, message_date
171 and extract (year from message_date) = ?
172 and extract (month from message_date) = ?" in
173 sth#execute [`Int hostid; `Int year; `Int month];
177 (function [`Int id; `String subject; `String inet_message_id;
178 `Timestamp message_date] ->
179 id, (inet_message_id, subject, message_date)
180 | _ -> assert false) in
185 let qs = Dbi.placeholders (List.length msgs) in
186 dbh#prepare_cached ("select message_id, inet_message_id, ordering
188 where message_id in " ^ qs ^ "
189 order by message_id, ordering") in
190 sth#execute (List.map (fun (id, _) -> `Int id) msgs);
191 sth#map (function [`Int id; `String inet_message_id; _] ->
196 (* Aggregate the msgs and references structures together.
197 * Note that references will be in the correct order (because of the
198 * 'order by' clause in the select statement above), with the parent
199 * message appearing first in the list.
202 List.map (fun (id, (inet_message_id, subject, message_date)) ->
204 List.filter (fun (i, _) -> i = id) references in
205 let references = List.map snd references in
206 id, inet_message_id, references, subject, message_date)
209 (* Get the base subject lines (removing Re:, etc.), and convert to
210 * list of message structs.
213 List.map (fun (id, inet_message_id, references, subject, message_date) ->
214 let base_subject, is_reply = base_subject subject in
215 { id = id; inet_message_id = inet_message_id;
216 references = references; subject = subject;
217 base_subject = base_subject; is_reply = is_reply;
218 message_date = message_date }) msgs in
220 (* Create a hash of inet_message_id -> message structure, which we'll
224 let h = Hashtbl.create (List.length msgs) in
225 let add ({ inet_message_id = inet_message_id } as message) =
226 Hashtbl.replace h inet_message_id message in
230 (* Step 1: Build the forest. *)
231 let forest = Forest.create 1024 in
233 (fun { inet_message_id = inet_message_id; references = references } ->
234 Forest.add forest inet_message_id;
235 List.iter (Forest.add forest) references) msgs;
237 let add_msg_data f { inet_message_id = inet_message_id;
238 references = references } =
239 let rec add_one f n lst =
243 (try Forest.link f n h
244 with Forest.Already_linked | Forest.Cycle_found -> ());
247 match references with
250 Forest.unlink f inet_message_id;
251 Forest.link f inet_message_id h;
254 List.iter (add_msg_data forest) msgs;
256 (* Step 2: Find the root set. Convert the forest into an ordinary tree
257 * structure now (actually, a list of tree structures) since the FOREST
258 * type is no longer needed.
260 let threads = Forest.get_roots forest in
263 let rec make_tree root =
264 (* Is there a message associated with this inet_message_id? *)
266 try Some (Hashtbl.find msgmap root) with Not_found -> None in
267 (* Get the children. *)
268 let children = Forest.get_children forest root in
269 let children = List.map make_tree children in
270 Tree (message, children)
272 List.map make_tree threads in
274 (* Step 4A: Prune empty containers. *)
276 let prune = List.filter (function Tree (None, []) -> false | _ -> true) in
277 let rec loop (Tree (message, children)) =
278 let children = prune children in
279 let children = List.map loop children in
280 Tree (message, children)
282 List.map loop threads in
284 (* Step 4B: Promote children of (some) empty containers. *)
286 (* Below the top level there should be no empty containers after
287 * this. Any empty container with children has those children
290 let rec promote = function
292 | Tree (None, children) :: xs ->
293 let children = promote children in
294 children @ promote xs
295 | Tree (message, children) :: xs ->
296 let children = promote children in
297 Tree (message, children) :: promote xs
299 let threads = List.map (fun (Tree (message, children)) ->
300 let children = promote children in
301 Tree (message, children)) threads in
303 (* At the top level we're allowed to have empty containers. However
304 * if we have an empty container with just a single child, then
305 * promote that child.
307 let threads = List.map (function
308 Tree (None, [child]) -> child
309 | message -> message) threads in
313 (* Step 5: Group root set by subject. *)
314 (* Couldn't be arsed to implement this. If someone really cares about
315 * mailers which don't set References headers (probably some made by
316 * our friends at Microsoft, I wouldn't mind betting), then send me
320 (* Step 7: Sort the siblings into date order. *)
322 let compare (Tree (m1, _)) (Tree (m2, _)) =
323 let md1 = match m1 with
324 Some { message_date = message_date } -> Some message_date
326 let md2 = match m2 with
327 Some { message_date = message_date } -> Some message_date
332 let ms = List.sort compare ms in
333 List.map (fun (Tree (message, children)) ->
334 Tree (message, sort children)) ms
338 (*----- End of threading algorithm. -----*)
340 let title = sprintf "Mail/%04d/%02d/Thread Index" year month in
342 match Wikilib.generate_url_of_title dbh hostid title with
343 Wikilib.GenURL_OK url -> url
344 | Wikilib.GenURL_Duplicate url -> url
345 | Wikilib.GenURL_TooShort | Wikilib.GenURL_BadURL ->
346 failwith ("error generating URL for title: " ^ title) in
348 let template = _get_template "mail_thread.txt" in
350 (* Rebuild the thread index page. *)
352 try load_page dbh hostid ~url ()
353 with Not_found -> new_page (Title title) in
357 sprintf "Thread index for %s %04d" (long_month month) year in
359 template#set "year" (string_of_int year);
360 template#set "month" (sprintf "%02d" month);
361 template#set "long_month" (long_month month);
362 let prev_year, prev_month =
363 if month = 1 then year - 1, 12
364 else year, month - 1 in
365 template#set "prev_year" (string_of_int prev_year);
366 template#set "prev_month" (sprintf "%02d" prev_month);
367 let next_year, next_month =
368 if month = 12 then year + 1, 1
369 else year, month + 1 in
370 template#set "next_year" (string_of_int next_year);
371 template#set "next_month" (sprintf "%02d" next_month);
373 let rec markup threads =
375 | Tree (None, children) ->
376 let html = markup children in
377 "<li> -\n" :: html @ ["</li>\n"]
378 | Tree (Some message, children) ->
379 let {id = id; subject = subject} = message in
382 let title = sprintf "Mail/%s (%d)" subject id in
383 match Wikilib.generate_url_of_title dbh hostid title with
384 Wikilib.GenURL_OK url | Wikilib.GenURL_Duplicate url -> url
385 | Wikilib.GenURL_TooShort | Wikilib.GenURL_BadURL ->
386 failwith ("error finding URL for message: " ^ title) in
388 let html = markup children in
389 sprintf "<li> <a href=\"/%s\" class=\"internal\">%s</a>\n"
390 url (Cgi_escape.escape_html subject)
391 :: html @ [ "</li>\n" ]
393 "<ul>\n" :: List.concat (List.map f threads) @ ["</ul>\n"]
395 let html = markup threads in
396 let html = String.concat "" html in
397 template#set "threads" html;
401 (sectionname, "", content)
405 match model.contents with
406 | [] | [_] -> [ first_section ]
407 | x :: xs -> first_section :: xs in
409 let model = { model with contents = contents } in
413 ignore (save_page dbh hostid ?user ?r model)
416 failwith "cocanwiki_mail: thread_mail: unexpected SaveURLError"
418 (* The following error should be noted, but is not too bad. We
419 * expect to rebuild the thread indexes frequently, so hopefully
420 * the next time it is rebuilt it will succeed.
423 prerr_endline "cocanwiki_mail: thread_mail: SaveConflict (ignored)"