(* COCANWIKI - a wiki written in Objective CAML. * Written by Richard W.M. Jones . * Copyright (C) 2004 Merjis Ltd. * $Id: mail_import.ml,v 1.8 2005/11/17 10:14:42 rich Exp $ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; see the file COPYING. If not, write to * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. *) open Apache open Registry open Cgi open Printf open Netmime open Netchannels open Netstream open ExtString open Cocanwiki open Cocanwiki_ok open Cocanwiki_template open Cocanwiki_date open Cocanwiki_strings open Cocanwiki_pages open Cocanwiki_mail let irt_re = Pcre.regexp "<.*?>" let ws_re = Pcre.regexp "\\s+" let comma_re = Pcre.regexp "\\s*,\\s*" let lines_re = Pcre.regexp "\\r?\\n" let run r (q : cgi) (dbh : Dbi.connection) hostid _ user = let hdr_template = get_template dbh hostid "mail_import_header.txt" in (* Overwrite old messages? *) let overwrite = q#param_true "overwrite" in (* Rebuild index after importing this one message? *) let rebuild = q#param_true "rebuild" in (* Get the uploaded file. *) let file = try let upload = q#upload "file" in upload.upload_value with Not_found -> error ~back_button:true ~title:"No message" q "No message was uploaded."; return () in (* Parse the message. *) let msg = with_in_obj_channel (new input_string file) (fun ch -> let stm = new input_stream ch in read_mime_message stm) in (* Get the mail header for easy access. *) let hdr = fst msg in (* Get the interesting headers which will go into the database. *) let get_hdr name = try hdr#field name with Not_found -> "" in let subject = get_hdr "subject" in let inet_message_id = get_hdr "message-id" in let date = get_hdr "date" in let references = get_hdr "references" in let in_reply_to = get_hdr "in-reply-to" in (* If the message doesn't have a Date or Message-ID header, then we * cannot thread it, so give up. *) if date = "" || inet_message_id = "" then ( error ~back_button:true ~title:"Headers missing" q "Date or Message-ID header missing. Cannot handle this message. "; return () ); (* Can't handle funny characters in subject lines - remove them. *) let subject = String.map (fun c -> if Char.code c < 32 then ' ' else c) subject in (* Parse the date field. *) let date, time = try let date = Netdate.parse date in let date, time = { Dbi.year = date.Netdate.year; Dbi.month = date.Netdate.month; Dbi.day = date.Netdate.day; }, { Dbi.hour = date.Netdate.hour; Dbi.min = date.Netdate.minute; Dbi.sec = date.Netdate.second; Dbi.microsec = 0; Dbi.timezone = Some (date.Netdate.zone / 60); } in date, time with Invalid_argument _ -> failwith ("cannot parse date: " ^ date) in (* Find the first thing in the In-Reply-To field which looks like a * message ID. *) let in_reply_to = try let subs = Pcre.exec ~rex:irt_re in_reply_to in Some (Pcre.get_substring subs 0) with Not_found -> None in (* References is a space-separated list of message IDs. Parse that up. *) let references = Pcre.split ~rex:ws_re references in (* Reverse the references list, because we most often want to see the * head element (ie. the most immediate parent message). *) let references = List.rev references in (* If the head element of references != the in-reply-to message ID, then * prepend it. *) let references = match in_reply_to with | None -> references | Some msgid -> match references with | [] -> [msgid] | m :: ms when m <> msgid -> msgid :: m :: ms | ms -> ms in (* Does this message exist in the database already? If so, and overwrite * is not specified, then silently skip this message. 'overwrite' becomes * 'Some id' if we need to overwrite an existing message id in the database, * else 'None' if this is a never-seen-before message. *) let overwrite = let sth = dbh#prepare_cached "select id from messages where hostid = ? and inet_message_id = ?" in sth#execute [`Int hostid; `String inet_message_id]; try let id = sth#fetch1int () in if not overwrite then ( ok ~title:"Message exists" q "Message already imported"; return () ); Some id with Not_found -> None in (* Save all of this in the database. *) let msgid = match overwrite with None -> (* Never-seen-before message. *) let sth = dbh#prepare_cached "insert into messages (hostid, subject, inet_message_id, message_date) values (?, ?, ?, ?)" in sth#execute [`Int hostid; `String subject; `String inet_message_id; `Timestamp (date, time)]; let msgid = Int64.to_int (sth#serial "messages_id_seq") in let sth = dbh#prepare_cached "insert into msg_references (message_id, inet_message_id, ordering) values (?, ?, ?)" in let ordering = ref 0 in List.iter (fun inet_message_id -> incr ordering; let ordering = !ordering in sth#execute [`Int msgid; `String inet_message_id; `Int ordering]) references; msgid | Some msgid -> (* Overwrite an existing message. *) (* All the fields in the messages table should be identical to * last time we imported this message. Just return the msgid. *) msgid in (* The message is referred to by a unique title. * NB. Do not change this unique title - it is also used during thread * indexing. *) let title = sprintf "Mail/%s (%d)" subject msgid in (* Choose a suitable URL. *) let url = match Wikilib.generate_url_of_title dbh hostid title with (* Duplicate URL is OK - eg. in the case where we are overwriting * an already imported message. *) Wikilib.GenURL_OK url | Wikilib.GenURL_Duplicate url -> url | Wikilib.GenURL_TooShort | Wikilib.GenURL_BadURL -> failwith "generate_url_of_title returned 'TooShort' or 'BadURL'" in (* To create the page, we need a few more headers ... *) let from = get_hdr "from" in let to_hdr = get_hdr "to" in let cc = get_hdr "cc" in (* Create the page. Or edit it (if we're overwriting ...). *) let model = match overwrite with | None -> new_page (Title title) | Some _ -> load_page dbh hostid ~url () in let model = { model with redirect = "" } in (* Create the first section (mail header). *) let section0 = let content = (* XXX Escaping! *) hdr_template#set "subject" subject; hdr_template#set "from" from; hdr_template#set "inet_message_id" inet_message_id; let yyyy, mm, dd = date.Dbi.year, date.Dbi.month, date.Dbi.day in hdr_template#set "yyyy" (sprintf "%04d" yyyy); hdr_template#set "mm" (sprintf "%02d" mm); hdr_template#set "dd" (sprintf "%02d" dd); hdr_template#set "short_month" (short_month mm); let get_table hdr = List.map (fun addr -> [ "addr", Template.VarString addr ]) (Pcre.split ~rex:comma_re hdr) in let table = get_table to_hdr in hdr_template#table "to" table; hdr_template#conditional "has_to" (table <> []); let table = get_table cc in hdr_template#table "cc" table; hdr_template#conditional "has_cc" (table <> []); hdr_template#to_string in "", "mail_header", content in (* Create the second section (mail body). * XXX Very simple. Should be extended to understand attachments and * convert them into file or image uploads. *) let section1 = let is_text_plain hdr = try let ct = hdr#field "content-type" in String.starts_with ct "text/plain" with Not_found -> true in (* Find the first text/plain body. *) let rec find_body = function (header, `Body mime_body) when is_text_plain header -> mime_body#value | (_, `Body _) -> raise Not_found | (_, `Parts []) -> raise Not_found (* should never happen *) | (_, `Parts (m :: _)) -> find_body m in (* Markup a plain text body. *) let markup_body text = (* Get all the titles from the database! We're going to exclude * mail messages from this. *) let sth = dbh#prepare_cached "select lower (title) from pages where hostid = ? and url is not null and title not like 'Mail/%'" in sth#execute [`Int hostid]; let links = sth#map (function [`String s] -> s | _ -> assert false) in (* This code cannot find titles which are split across multiple lines. * XXX *) let lines = Pcre.split ~rex:lines_re text in (* We don't want to mark up the same link twice, so keep track of the * titles we've already used. *) let used = ref [] in let lines = List.map (fun line -> let find str sub_lc = try String.find (String.lowercase str) sub_lc with Invalid_string -> -1 in let rec loop line = function [] -> line | link :: links -> let i = find line link in if i >= 0 && not (List.mem link !used) then ( used := link :: !used; let n = String.length link in let n' = String.length line in let line = String.sub line 0 i ^ "[[" ^ String.sub line i n ^ "]]" ^ loop (String.sub line (i+n) (n' - (i+n))) links in line ) else loop line links in loop line links) lines in let lines = List.map trim lines in let lines = List.map (fun line -> line ^ "
") lines in String.concat "\n" lines in let content = try let text = find_body msg in let content = markup_body text in content with Not_found -> "No plain text message body found" in "Message", "mail_body", content in (* Overwrite the first two sections of the current page, regardless of * what they contain. * XXX We might consider more advanced strategies here: for example, * use the divname to identify the old mail_header and mail_body and * overwrite those, or insert them if they don't exist. *) let contents = model.contents in let contents = match contents with [] | [_] -> [ section0; section1 ] | _ :: _ :: xs -> section0 :: section1 :: xs in let model = { model with contents = contents } in (* Write the page back. This can throw several exceptions, but we ignore * them because we want to script to fail abruptly if any of these * unexpected conditions arises. *) save_page dbh hostid ~user ~r model; (* Rebuild threads? *) if rebuild then thread_mail dbh hostid ~user ~r date.Dbi.year date.Dbi.month; (* Commit to the database. *) dbh#commit (); (* Finish off. *) ok ~title:"Imported" q ("Message " ^ inet_message_id ^ " was imported.") let () = register_script ~restrict:[CanImportMail] run