1 (* COCANWIKI - a wiki written in Objective CAML.
2 * Written by Richard W.M. Jones <rich@merjis.com>.
3 * Copyright (C) 2004 Merjis Ltd.
4 * $Id: mail_import.ml,v 1.9 2005/11/24 14:54:12 rich Exp $
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; see the file COPYING. If not, write to
18 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
35 open Cocanwiki_template
37 open Cocanwiki_strings
41 let irt_re = Pcre.regexp "<.*?>"
42 let ws_re = Pcre.regexp "\\s+"
43 let comma_re = Pcre.regexp "\\s*,\\s*"
44 let lines_re = Pcre.regexp "\\r?\\n"
46 let run r (q : cgi) (dbh : Dbi.connection) hostid _ user =
47 let hdr_template = get_template dbh hostid "mail_import_header.txt" in
49 (* Overwrite old messages? *)
50 let overwrite = q#param_true "overwrite" in
51 (* Rebuild index after importing this one message? *)
52 let rebuild = q#param_true "rebuild" in
54 (* Get the uploaded file. *)
57 let upload = q#upload "file" in
61 error ~back_button:true ~title:"No message"
62 dbh hostid q "No message was uploaded.";
65 (* Parse the message. *)
68 (new input_string file)
70 let stm = new input_stream ch in
71 read_mime_message stm) in
73 (* Get the mail header for easy access. *)
76 (* Get the interesting headers which will go into the database. *)
77 let get_hdr name = try hdr#field name with Not_found -> "" in
78 let subject = get_hdr "subject" in
79 let inet_message_id = get_hdr "message-id" in
80 let date = get_hdr "date" in
81 let references = get_hdr "references" in
82 let in_reply_to = get_hdr "in-reply-to" in
84 (* If the message doesn't have a Date or Message-ID header, then we
85 * cannot thread it, so give up.
87 if date = "" || inet_message_id = "" then (
88 error ~back_button:true ~title:"Headers missing"
90 "Date or Message-ID header missing. Cannot handle this message. ";
94 (* Can't handle funny characters in subject lines - remove them. *)
95 let subject = String.map (fun c ->
96 if Char.code c < 32 then ' ' else c) subject in
98 (* Parse the date field. *)
101 let date = Netdate.parse date in
103 { Dbi.year = date.Netdate.year;
104 Dbi.month = date.Netdate.month;
105 Dbi.day = date.Netdate.day; },
106 { Dbi.hour = date.Netdate.hour;
107 Dbi.min = date.Netdate.minute;
108 Dbi.sec = date.Netdate.second;
110 Dbi.timezone = Some (date.Netdate.zone / 60); } in
113 Invalid_argument _ ->
114 failwith ("cannot parse date: " ^ date) in
116 (* Find the first thing in the In-Reply-To field which looks like a
121 let subs = Pcre.exec ~rex:irt_re in_reply_to in
122 Some (Pcre.get_substring subs 0)
126 (* References is a space-separated list of message IDs. Parse that up. *)
127 let references = Pcre.split ~rex:ws_re references in
129 (* Reverse the references list, because we most often want to see the
130 * head element (ie. the most immediate parent message).
132 let references = List.rev references in
134 (* If the head element of references != the in-reply-to message ID, then
138 match in_reply_to with
141 match references with
143 | m :: ms when m <> msgid -> msgid :: m :: ms
146 (* Does this message exist in the database already? If so, and overwrite
147 * is not specified, then silently skip this message. 'overwrite' becomes
148 * 'Some id' if we need to overwrite an existing message id in the database,
149 * else 'None' if this is a never-seen-before message.
152 let sth = dbh#prepare_cached "select id from messages
153 where hostid = ? and inet_message_id = ?" in
154 sth#execute [`Int hostid; `String inet_message_id];
156 let id = sth#fetch1int () in
157 if not overwrite then (
158 ok ~title:"Message exists"
159 dbh hostid q "Message already imported";
166 (* Save all of this in the database. *)
169 None -> (* Never-seen-before message. *)
172 "insert into messages (hostid, subject, inet_message_id,
173 message_date) values (?, ?, ?, ?)" in
174 sth#execute [`Int hostid; `String subject; `String inet_message_id;
175 `Timestamp (date, time)];
176 let msgid = Int64.to_int (sth#serial "messages_id_seq") in
180 "insert into msg_references (message_id, inet_message_id,
181 ordering) values (?, ?, ?)" in
182 let ordering = ref 0 in
183 List.iter (fun inet_message_id ->
184 incr ordering; let ordering = !ordering in
185 sth#execute [`Int msgid; `String inet_message_id;
186 `Int ordering]) references;
190 | Some msgid -> (* Overwrite an existing message. *)
191 (* All the fields in the messages table should be identical to
192 * last time we imported this message. Just return the msgid.
196 (* The message is referred to by a unique title.
197 * NB. Do not change this unique title - it is also used during thread
200 let title = sprintf "Mail/%s (%d)" subject msgid in
202 (* Choose a suitable URL. *)
204 match Wikilib.generate_url_of_title dbh hostid title with
205 (* Duplicate URL is OK - eg. in the case where we are overwriting
206 * an already imported message.
208 Wikilib.GenURL_OK url | Wikilib.GenURL_Duplicate url -> url
209 | Wikilib.GenURL_TooShort | Wikilib.GenURL_BadURL ->
210 failwith "generate_url_of_title returned 'TooShort' or 'BadURL'" in
212 (* To create the page, we need a few more headers ... *)
213 let from = get_hdr "from" in
214 let to_hdr = get_hdr "to" in
215 let cc = get_hdr "cc" in
217 (* Create the page. Or edit it (if we're overwriting ...). *)
220 | None -> new_page (Title title)
221 | Some _ -> load_page dbh hostid ~url () in
222 let model = { model with redirect = "" } in
224 (* Create the first section (mail header). *)
228 hdr_template#set "subject" subject;
229 hdr_template#set "from" from;
230 hdr_template#set "inet_message_id" inet_message_id;
232 let yyyy, mm, dd = date.Dbi.year, date.Dbi.month, date.Dbi.day in
233 hdr_template#set "yyyy" (sprintf "%04d" yyyy);
234 hdr_template#set "mm" (sprintf "%02d" mm);
235 hdr_template#set "dd" (sprintf "%02d" dd);
236 hdr_template#set "short_month" (short_month mm);
239 List.map (fun addr -> [ "addr", Template.VarString addr ])
240 (Pcre.split ~rex:comma_re hdr)
242 let table = get_table to_hdr in
243 hdr_template#table "to" table;
244 hdr_template#conditional "has_to" (table <> []);
245 let table = get_table cc in
246 hdr_template#table "cc" table;
247 hdr_template#conditional "has_cc" (table <> []);
249 hdr_template#to_string
251 "", "mail_header", content in
253 (* Create the second section (mail body).
254 * XXX Very simple. Should be extended to understand attachments and
255 * convert them into file or image uploads.
258 let is_text_plain hdr =
260 let ct = hdr#field "content-type" in
261 String.starts_with ct "text/plain"
265 (* Find the first text/plain body. *)
266 let rec find_body = function
267 (header, `Body mime_body) when is_text_plain header ->
269 | (_, `Body _) -> raise Not_found
270 | (_, `Parts []) -> raise Not_found (* should never happen *)
271 | (_, `Parts (m :: _)) ->
274 (* Markup a plain text body. *)
275 let markup_body text =
276 (* Get all the titles from the database! We're going to exclude
277 * mail messages from this.
281 "select lower (title) from pages where hostid = ?
282 and url is not null and title not like 'Mail/%'" in
283 sth#execute [`Int hostid];
284 let links = sth#map (function [`String s] -> s | _ -> assert false) in
286 (* This code cannot find titles which are split across multiple lines.
289 let lines = Pcre.split ~rex:lines_re text in
291 (* We don't want to mark up the same link twice, so keep track of the
292 * titles we've already used.
298 let find str sub_lc =
300 String.find (String.lowercase str) sub_lc
304 let rec loop line = function
307 let i = find line link in
308 if i >= 0 && not (List.mem link !used) then (
309 used := link :: !used;
310 let n = String.length link in
311 let n' = String.length line in
313 String.sub line 0 i ^
314 "[[" ^ String.sub line i n ^
315 "]]" ^ loop (String.sub line (i+n) (n' - (i+n))) links
321 loop line links) lines in
323 let lines = List.map trim lines in
324 let lines = List.map (fun line -> line ^ "<br>") lines in
326 String.concat "\n" lines
331 let text = find_body msg in
332 let content = markup_body text in
336 "No plain text message body found" in
337 "Message", "mail_body", content in
339 (* Overwrite the first two sections of the current page, regardless of
341 * XXX We might consider more advanced strategies here: for example,
342 * use the divname to identify the old mail_header and mail_body and
343 * overwrite those, or insert them if they don't exist.
345 let contents = model.contents in
348 [] | [_] -> [ section0; section1 ]
349 | _ :: _ :: xs -> section0 :: section1 :: xs in
350 let model = { model with contents = contents } in
352 (* Write the page back. This can throw several exceptions, but we ignore
353 * them because we want to script to fail abruptly if any of these
354 * unexpected conditions arises.
356 save_page dbh hostid ~user ~r model;
358 (* Rebuild threads? *)
360 thread_mail dbh hostid ~user ~r date.Dbi.year date.Dbi.month;
362 (* Commit to the database. *)
367 dbh hostid q ("Message " ^ inet_message_id ^ " was imported.")
370 register_script ~restrict:[CanImportMail] run