X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=scripts%2Fmail_import.ml;h=ae04ef763f5183dc0fd2e1ec824066f0c2190d0c;hb=cd059731a60fd3d4dcf426430ad26ff227b91910;hp=dce9f5ab54bc97559ff2a0e5c8180dcf5e4cb2e5;hpb=6eacefcb7258db7b56fe796af84961cafac90eac;p=cocanwiki.git diff --git a/scripts/mail_import.ml b/scripts/mail_import.ml index dce9f5a..ae04ef7 100644 --- a/scripts/mail_import.ml +++ b/scripts/mail_import.ml @@ -1,7 +1,7 @@ (* COCANWIKI - a wiki written in Objective CAML. * Written by Richard W.M. Jones . * Copyright (C) 2004 Merjis Ltd. - * $Id: mail_import.ml,v 1.1 2004/10/11 14:13:04 rich Exp $ + * $Id: mail_import.ml,v 1.12 2006/03/28 16:24:07 rich Exp $ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -28,16 +28,22 @@ open Netmime open Netchannels open Netstream +open ExtString + open Cocanwiki open Cocanwiki_ok open Cocanwiki_template open Cocanwiki_date +open Cocanwiki_strings open Cocanwiki_pages +open Cocanwiki_mail let irt_re = Pcre.regexp "<.*?>" -let ws_re = Pcre.regexp "\\S+" +let ws_re = Pcre.regexp "\\s+" +let comma_re = Pcre.regexp "\\s*,\\s*" +let lines_re = Pcre.regexp "\\r?\\n" -let run r (q : cgi) (dbh : Dbi.connection) hostid _ user = +let run r (q : cgi) dbh hostid _ user = let hdr_template = get_template dbh hostid "mail_import_header.txt" in (* Overwrite old messages? *) @@ -52,10 +58,8 @@ let run r (q : cgi) (dbh : Dbi.connection) hostid _ user = upload.upload_value with Not_found -> - (* Force an error status which a script can detect. *) - Request.set_status r cHTTP_BAD_REQUEST; error ~back_button:true ~title:"No message" - q "No message was uploaded."; + dbh hostid q "No message was uploaded."; return () in (* Parse the message. *) @@ -81,29 +85,32 @@ let run r (q : cgi) (dbh : Dbi.connection) hostid _ user = * cannot thread it, so give up. *) if date = "" || inet_message_id = "" then ( - Request.set_status r cHTTP_BAD_REQUEST; error ~back_button:true ~title:"Headers missing" - q "Date or Message-ID header missing. Cannot handle this message. "; + dbh hostid q + "Date or Message-ID header missing. Cannot handle this message. "; return () ); + (* Can't handle funny characters in subject lines - remove them. *) + let subject = String.map (fun c -> + if Char.code c < 32 then ' ' else c) subject in + (* Parse the date field. *) - let date, time = + let message_date = try let date = Netdate.parse date in - let date, time = - { Dbi.year = date.Netdate.year; - Dbi.month = date.Netdate.month; - Dbi.day = date.Netdate.day; }, - { Dbi.hour = date.Netdate.hour; - Dbi.min = date.Netdate.minute; - Dbi.sec = date.Netdate.second; - Dbi.microsec = 0; - Dbi.timezone = Some (date.Netdate.zone / 60); } in - date, time + let cal = Calendar.make + date.Netdate.year + date.Netdate.month + date.Netdate.day + date.Netdate.hour + date.Netdate.minute + date.Netdate.second in + let tz = Time_Zone.UTC_Plus (date.Netdate.zone / 60) in + cal, tz with - Invalid_argument _ -> - failwith ("cannot parse date: " ^ date) in + Invalid_argument _ -> + failwith ("cannot parse date: " ^ date) in (* Find the first thing in the In-Reply-To field which looks like a * message ID. @@ -141,41 +148,38 @@ let run r (q : cgi) (dbh : Dbi.connection) hostid _ user = * else 'None' if this is a never-seen-before message. *) let overwrite = - let sth = dbh#prepare_cached "select id from messages - where hostid = ? and inet_message_id = ?" in - sth#execute [`Int hostid; `String inet_message_id]; - try - let id = sth#fetch1int () in - if not overwrite then ( - ok ~title:"Message exists" - q "Message already imported"; - return () - ); - Some id - with - Not_found -> None in + let rows = PGSQL(dbh) + "select id from messages + where hostid = $hostid and inet_message_id = $inet_message_id" in + match rows with + | [id] -> + if not overwrite then ( + ok ~title:"Message exists" + dbh hostid q "Message already imported"; + return () + ); + Some id + | [] -> None + | _ -> assert false in (* Save all of this in the database. *) let msgid = match overwrite with None -> (* Never-seen-before message. *) - let sth = - dbh#prepare_cached - "insert into messages (hostid, subject, inet_message_id, - message_date) values (?, ?, ?, ?)" in - sth#execute [`Int hostid; `String subject; `String inet_message_id; - `Timestamp (date, time)]; - let msgid = sth#serial "messages_id_seq" in - - let sth = - dbh#prepare_cached - "insert into msg_references (message_id, inet_message_id, - ordering) values (?, ?, ?)" in + PGSQL(dbh) + "insert into messages (hostid, subject, inet_message_id, + message_date) + values ($hostid, $subject, $inet_message_id, $message_date)"; + let msgid = PGOCaml.serial4 dbh "messages_id_seq" in + let ordering = ref 0 in List.iter (fun inet_message_id -> - incr ordering; let ordering = !ordering in - sth#execute [`Int msgid; `String inet_message_id; - `Int ordering]) references; + incr ordering; let ordering = Int32.of_int !ordering in + PGSQL(dbh) + "insert into msg_references (message_id, + inet_message_id, ordering) + values ($msgid, $inet_message_id, $ordering)" + ) references; msgid @@ -185,8 +189,11 @@ let run r (q : cgi) (dbh : Dbi.connection) hostid _ user = *) msgid in - (* The message is referred to by a unique title: *) - let title = sprintf "Mail/%s (%d)" subject msgid in + (* The message is referred to by a unique title. + * NB. Do not change this unique title - it is also used during thread + * indexing. + *) + let title = sprintf "Mail/%s (%ld)" subject msgid in (* Choose a suitable URL. *) let url = @@ -208,28 +215,122 @@ let run r (q : cgi) (dbh : Dbi.connection) hostid _ user = match overwrite with | None -> new_page (Title title) | Some _ -> load_page dbh hostid ~url () in - let model = { model with redirect = "" } in + let model = { model with redirect = None } in (* Create the first section (mail header). *) let section0 = let content = (* XXX Escaping! *) hdr_template#set "subject" subject; - let yyyy, mm, dd = date.Dbi.year, date.Dbi.month, date.Dbi.day in - hdr_template#set "yyyy" (sprintf "%04d" yyyy); - hdr_template#set "mm" (sprintf "%02d" mm); - hdr_template#set "dd" (sprintf "%02d" dd); - hdr_template#set "short_month" (short_month mm); hdr_template#set "from" from; hdr_template#set "inet_message_id" inet_message_id; + + let date = fst message_date in + hdr_template#set "yyyy" (Printer.CalendarPrinter.sprint "%Y" date); + hdr_template#set "mm" (Printer.CalendarPrinter.sprint "%m" date); + hdr_template#set "dd" (Printer.CalendarPrinter.sprint "%d" date); + hdr_template#set "short_month" + (Printer.short_name_of_month (Calendar.month date)); + + let get_table hdr = + List.map (fun addr -> [ "addr", Template.VarString addr ]) + (Pcre.split ~rex:comma_re hdr) + in + let table = get_table to_hdr in + hdr_template#table "to" table; + hdr_template#conditional "has_to" (table <> []); + let table = get_table cc in + hdr_template#table "cc" table; + hdr_template#conditional "has_cc" (table <> []); + hdr_template#to_string in - "", "mail_header", content in + None, Some "mail_header", content in - (* Create the second section (mail body). *) + (* Create the second section (mail body). + * XXX Very simple. Should be extended to understand attachments and + * convert them into file or image uploads. + *) let section1 = - let content = "(mail body should go here XXX)" in - "Message", "mail_body", content in + let is_text_plain hdr = + try + let ct = hdr#field "content-type" in + String.starts_with ct "text/plain" + with + Not_found -> true in + + (* Find the first text/plain body. *) + let rec find_body = function + (header, `Body mime_body) when is_text_plain header -> + mime_body#value + | (_, `Body _) -> raise Not_found + | (_, `Parts []) -> raise Not_found (* should never happen *) + | (_, `Parts (m :: _)) -> + find_body m in + + (* Markup a plain text body. *) + let markup_body text = + (* Get all the titles from the database! We're going to exclude + * mail messages from this. + *) + let links = + PGSQL(dbh) + "select lower (title) from pages where hostid = $hostid + and url is not null and title not like 'Mail/%'" in + let links = List.map Option.get links in + + (* This code cannot find titles which are split across multiple lines. + * XXX + *) + let lines = Pcre.split ~rex:lines_re text in + + (* We don't want to mark up the same link twice, so keep track of the + * titles we've already used. + *) + let used = ref [] in + let lines = + List.map + (fun line -> + let find str sub_lc = + try + String.find (String.lowercase str) sub_lc + with + Invalid_string -> -1 + in + let rec loop line = function + [] -> line + | link :: links -> + let i = find line link in + if i >= 0 && not (List.mem link !used) then ( + used := link :: !used; + let n = String.length link in + let n' = String.length line in + let line = + String.sub line 0 i ^ + "[[" ^ String.sub line i n ^ + "]]" ^ loop (String.sub line (i+n) (n' - (i+n))) links + in + line + ) else + loop line links + in + loop line links) lines in + + let lines = List.map trim lines in + let lines = List.map (fun line -> line ^ "
") lines in + + String.concat "\n" lines + in + + let content = + try + let text = find_body msg in + let content = markup_body text in + content + with + Not_found -> + "No plain text message body found" in + Some "Message", Some "mail_body", content in (* Overwrite the first two sections of the current page, regardless of * what they contain. @@ -237,25 +338,31 @@ let run r (q : cgi) (dbh : Dbi.connection) hostid _ user = * use the divname to identify the old mail_header and mail_body and * overwrite those, or insert them if they don't exist. *) - let contents = model.contents in + let contents = model.contents_ in let contents = match contents with - [] | [_] -> [ section0; section1 ] - | _ :: _ :: xs -> section0 :: section1 :: xs in - let model = { model with contents = contents } in + | [] | [_] -> [ section0; section1 ] + | _ :: _ :: xs -> section0 :: section1 :: xs in + let model = { model with contents_ = contents } in (* Write the page back. This can throw several exceptions, but we ignore * them because we want to script to fail abruptly if any of these * unexpected conditions arises. *) - save_page dbh hostid ~user ~r model; + ignore (save_page dbh hostid ~user ~r model); + + (* Rebuild threads? *) + if rebuild then + thread_mail dbh hostid ~user ~r + (Calendar.year (fst message_date)) + (Date.int_of_month (Calendar.month (fst message_date))); (* Commit to the database. *) - dbh#commit (); + PGOCaml.commit dbh; (* Finish off. *) ok ~title:"Imported" - q ("Message " ^ inet_message_id ^ " was imported.") + dbh hostid q ("Message " ^ inet_message_id ^ " was imported.") let () = register_script ~restrict:[CanImportMail] run