(* COCANWIKI - a wiki written in Objective CAML. * Written by Richard W.M. Jones . * Copyright (C) 2004 Merjis Ltd. * $Id: mail_import.ml,v 1.11 2006/03/28 13:20:00 rich Exp $ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; see the file COPYING. If not, write to * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. *) open Apache open Registry open Cgi open Printf open Netmime open Netchannels open Netstream open ExtString open Cocanwiki open Cocanwiki_ok open Cocanwiki_template open Cocanwiki_date open Cocanwiki_strings open Cocanwiki_pages open Cocanwiki_mail let irt_re = Pcre.regexp "<.*?>" let ws_re = Pcre.regexp "\\s+" let comma_re = Pcre.regexp "\\s*,\\s*" let lines_re = Pcre.regexp "\\r?\\n" let run r (q : cgi) dbh hostid _ user = let hdr_template = get_template dbh hostid "mail_import_header.txt" in (* Overwrite old messages? *) let overwrite = q#param_true "overwrite" in (* Rebuild index after importing this one message? *) let rebuild = q#param_true "rebuild" in (* Get the uploaded file. *) let file = try let upload = q#upload "file" in upload.upload_value with Not_found -> error ~back_button:true ~title:"No message" dbh hostid q "No message was uploaded."; return () in (* Parse the message. *) let msg = with_in_obj_channel (new input_string file) (fun ch -> let stm = new input_stream ch in read_mime_message stm) in (* Get the mail header for easy access. *) let hdr = fst msg in (* Get the interesting headers which will go into the database. *) let get_hdr name = try hdr#field name with Not_found -> "" in let subject = get_hdr "subject" in let inet_message_id = get_hdr "message-id" in let date = get_hdr "date" in let references = get_hdr "references" in let in_reply_to = get_hdr "in-reply-to" in (* If the message doesn't have a Date or Message-ID header, then we * cannot thread it, so give up. *) if date = "" || inet_message_id = "" then ( error ~back_button:true ~title:"Headers missing" dbh hostid q "Date or Message-ID header missing. Cannot handle this message. "; return () ); (* Can't handle funny characters in subject lines - remove them. *) let subject = String.map (fun c -> if Char.code c < 32 then ' ' else c) subject in (* Parse the date field. *) let message_date = try let date = Netdate.parse date in let cal = Calendar.make date.Netdate.year date.Netdate.month date.Netdate.day date.Netdate.hour date.Netdate.minute date.Netdate.second in let tz = Time_Zone.UTC_Plus (date.Netdate.zone / 60) in cal, tz with Invalid_argument _ -> failwith ("cannot parse date: " ^ date) in (* Find the first thing in the In-Reply-To field which looks like a * message ID. *) let in_reply_to = try let subs = Pcre.exec ~rex:irt_re in_reply_to in Some (Pcre.get_substring subs 0) with Not_found -> None in (* References is a space-separated list of message IDs. Parse that up. *) let references = Pcre.split ~rex:ws_re references in (* Reverse the references list, because we most often want to see the * head element (ie. the most immediate parent message). *) let references = List.rev references in (* If the head element of references != the in-reply-to message ID, then * prepend it. *) let references = match in_reply_to with | None -> references | Some msgid -> match references with | [] -> [msgid] | m :: ms when m <> msgid -> msgid :: m :: ms | ms -> ms in (* Does this message exist in the database already? If so, and overwrite * is not specified, then silently skip this message. 'overwrite' becomes * 'Some id' if we need to overwrite an existing message id in the database, * else 'None' if this is a never-seen-before message. *) let overwrite = let rows = PGSQL(dbh) "select id from messages where hostid = $hostid and inet_message_id = $inet_message_id" in match rows with | [id] -> if not overwrite then ( ok ~title:"Message exists" dbh hostid q "Message already imported"; return () ); Some id | [] -> None | _ -> assert false in (* Save all of this in the database. *) let msgid = match overwrite with None -> (* Never-seen-before message. *) PGSQL(dbh) "insert into messages (hostid, subject, inet_message_id, message_date) values ($hostid, $subject, $inet_message_id, $message_date)"; let msgid = PGOCaml.serial4 dbh "messages_id_seq" in let ordering = ref 0 in List.iter (fun inet_message_id -> incr ordering; let ordering = Int32.of_int !ordering in PGSQL(dbh) "insert into msg_references (message_id, inet_message_id, ordering) values ($msgid, $inet_message_id, $ordering)" ) references; msgid | Some msgid -> (* Overwrite an existing message. *) (* All the fields in the messages table should be identical to * last time we imported this message. Just return the msgid. *) msgid in (* The message is referred to by a unique title. * NB. Do not change this unique title - it is also used during thread * indexing. *) let title = sprintf "Mail/%s (%d)" subject msgid in (* Choose a suitable URL. *) let url = match Wikilib.generate_url_of_title dbh hostid title with (* Duplicate URL is OK - eg. in the case where we are overwriting * an already imported message. *) Wikilib.GenURL_OK url | Wikilib.GenURL_Duplicate url -> url | Wikilib.GenURL_TooShort | Wikilib.GenURL_BadURL -> failwith "generate_url_of_title returned 'TooShort' or 'BadURL'" in (* To create the page, we need a few more headers ... *) let from = get_hdr "from" in let to_hdr = get_hdr "to" in let cc = get_hdr "cc" in (* Create the page. Or edit it (if we're overwriting ...). *) let model = match overwrite with | None -> new_page (Title title) | Some _ -> load_page dbh hostid ~url () in let model = { model with redirect = "" } in (* Create the first section (mail header). *) let section0 = let content = (* XXX Escaping! *) hdr_template#set "subject" subject; hdr_template#set "from" from; hdr_template#set "inet_message_id" inet_message_id; let yyyy, mm, dd = date.Dbi.year, date.Dbi.month, date.Dbi.day in hdr_template#set "yyyy" (sprintf "%04d" yyyy); hdr_template#set "mm" (sprintf "%02d" mm); hdr_template#set "dd" (sprintf "%02d" dd); hdr_template#set "short_month" (short_month mm); let get_table hdr = List.map (fun addr -> [ "addr", Template.VarString addr ]) (Pcre.split ~rex:comma_re hdr) in let table = get_table to_hdr in hdr_template#table "to" table; hdr_template#conditional "has_to" (table <> []); let table = get_table cc in hdr_template#table "cc" table; hdr_template#conditional "has_cc" (table <> []); hdr_template#to_string in "", "mail_header", content in (* Create the second section (mail body). * XXX Very simple. Should be extended to understand attachments and * convert them into file or image uploads. *) let section1 = let is_text_plain hdr = try let ct = hdr#field "content-type" in String.starts_with ct "text/plain" with Not_found -> true in (* Find the first text/plain body. *) let rec find_body = function (header, `Body mime_body) when is_text_plain header -> mime_body#value | (_, `Body _) -> raise Not_found | (_, `Parts []) -> raise Not_found (* should never happen *) | (_, `Parts (m :: _)) -> find_body m in (* Markup a plain text body. *) let markup_body text = (* Get all the titles from the database! We're going to exclude * mail messages from this. *) let links = PGSQL(dbh) "select lower (title) from pages where hostid = $hostid and url is not null and title not like 'Mail/%'" in let links = List.map Option.get links in (* This code cannot find titles which are split across multiple lines. * XXX *) let lines = Pcre.split ~rex:lines_re text in (* We don't want to mark up the same link twice, so keep track of the * titles we've already used. *) let used = ref [] in let lines = List.map (fun line -> let find str sub_lc = try String.find (String.lowercase str) sub_lc with Invalid_string -> -1 in let rec loop line = function [] -> line | link :: links -> let i = find line link in if i >= 0 && not (List.mem link !used) then ( used := link :: !used; let n = String.length link in let n' = String.length line in let line = String.sub line 0 i ^ "[[" ^ String.sub line i n ^ "]]" ^ loop (String.sub line (i+n) (n' - (i+n))) links in line ) else loop line links in loop line links) lines in let lines = List.map trim lines in let lines = List.map (fun line -> line ^ "
") lines in String.concat "\n" lines in let content = try let text = find_body msg in let content = markup_body text in content with Not_found -> "No plain text message body found" in "Message", "mail_body", content in (* Overwrite the first two sections of the current page, regardless of * what they contain. * XXX We might consider more advanced strategies here: for example, * use the divname to identify the old mail_header and mail_body and * overwrite those, or insert them if they don't exist. *) let contents = model.contents in let contents = match contents with [] | [_] -> [ section0; section1 ] | _ :: _ :: xs -> section0 :: section1 :: xs in let model = { model with contents = contents } in (* Write the page back. This can throw several exceptions, but we ignore * them because we want to script to fail abruptly if any of these * unexpected conditions arises. *) save_page dbh hostid ~user ~r model; (* Rebuild threads? *) if rebuild then thread_mail dbh hostid ~user ~r date.Dbi.year date.Dbi.month; (* Commit to the database. *) PGOCaml.commit dbh; (* Finish off. *) ok ~title:"Imported" dbh hostid q ("Message " ^ inet_message_id ^ " was imported.") let () = register_script ~restrict:[CanImportMail] run