1 (* An RSS feed reader function.
2 * $Id: cocanwiki_func_rss.ml,v 1.1 2006/12/07 17:05:47 rich Exp $
11 open Cocanwiki_extensions
12 open Cocanwiki_template
14 (* Argument is a whitespace-separated list of key=value pairs.
17 * {{rss:url=http://blog.merjis.com/feed/ max_items=3 refresh=daily}}
20 * url The URL of the RSS feed (required).
21 * show What to show. A comma-separated list of:
24 * description Blog description
25 * items Items in the feed
26 * item_title Title of items
27 * item_link Link to items
28 * item_description Description of items
29 * Default is everything.
30 * max_items Maximum number of items to show.
31 * refresh How often to recheck feed: hourly, daily, weekly,
33 * Default is daily. Warning: we only check the
34 * feed when the page is loaded, so making this
35 * too frequent will slow page loading.
37 let ws_re = Pcre.regexp "\\s+"
38 let key_re = Pcre.regexp "^(\\w+)=(.*)$"
39 let refresh_re = Pcre.regexp "^(\\d+)(h|d)$"
41 (* This exception is thrown if there is an error during processing. The
42 * string is an error message.
44 exception Error of string
46 type show = Title | Link | Description
47 | Items | Item_title | Item_link | Item_description
48 let default_show = [ Title; Link; Description;
49 Items; Item_title; Item_link; Item_description ]
51 let event_system = Unixqueue.create_unix_event_system ()
53 let connection = new Http_client.pipeline in
54 connection#set_event_system event_system;
58 prerr_endline ("fetching URL: " ^ url);
60 let msg = new Http_client.get url in
65 | Http_client.Http_protocol exn
67 (* Log the real exception in the logfile, for security. *)
68 prerr_endline (Printexc.to_string exn);
69 raise (Error ("error fetching url; " ^
70 "note that for security reasons " ^
71 "the real error is only written to error_log"))
73 (* OCamlRSS uses its own date format. *)
74 let printable_date { year = year; month = month; day = day;
75 hour = hour; minute = minute; second = second } =
76 let date = Calendar.lmake ~year ~month ~day ~hour ~minute ~second () in
77 Cocanwiki_date.printable_date_time date
79 let rss r dbh hostid arg =
80 let template = _get_template "cocanwiki_func_rss.html" in
83 (* Is RSS enabled for this host? This is a safety feature to ensure
84 * that people only turn this feature on if they really mean to.
87 PGSQL(dbh) "select enable_rss_func from hosts where id = $hostid" in
88 if rows <> [Some true] then
89 raise (Error "RSS is disabled on this host. You have to enable it manually by setting enable_rss_func for the relevant row in the hosts table. RSS introduces a potential security hole (cross-site scripting) and should not be used where you have untrusted editors or with untrusted RSS feeds.");
91 let arg = match arg with
92 | None -> raise (Error ("missing url: try {{rss:url=http://....}}"))
95 (* Parse the argument. *)
96 let args = Pcre.split ~rex:ws_re arg in
100 let subs = Pcre.exec ~rex:key_re arg in
101 let key = Pcre.get_substring subs 1 in
102 let value = Pcre.get_substring subs 2 in
106 raise (Error ("invalid argument: " ^ arg))
109 (* Get known arguments or their defaults. *)
111 try List.assoc "url" args
113 raise (Error "missing url: try {{rss:url=http://....}}") in
117 let n = List.assoc "max_items" args in
118 let n = int_of_string n in
122 | Failure "int_of_string" ->
123 raise (Error "max_items is not a number") in
127 let p = List.assoc "refresh" args in
129 | "hourly" -> Calendar.Period.hour 1
130 | "daily" -> Calendar.Period.day 1
131 | "weekly" -> Calendar.Period.week 1
134 let subs = Pcre.exec ~rex:refresh_re str in
135 let n = int_of_string (Pcre.get_substring subs 1) in
136 match Pcre.get_substring subs 2 with
137 | "h" -> Calendar.Period.hour n
138 | "d" -> Calendar.Period.day n
142 raise (Error "refresh is hourly|daily|weekly|<nn>h|<nn>d")
145 Calendar.Period.day 1 in
149 let xs = List.assoc "show" args in
150 let xs = String.nsplit xs "," in
155 | "description" -> Description
157 | "item_title" -> Item_title
158 | "item_link" -> Item_link
159 | "item_description" -> Item_description
161 raise (Error ("show: invalid item: " ^ str))
165 Not_found -> default_show in
167 (* Process the show parameter into template conditionals. *)
168 template#conditional "show_title" false;
169 template#conditional "show_link" false;
170 template#conditional "show_description" false;
171 template#conditional "show_items" false;
172 template#conditional "show_item_title" false;
173 template#conditional "show_item_link" false;
174 template#conditional "show_item_description" false;
177 | Title -> template#conditional "show_title" true
178 | Link -> template#conditional "show_link" true
179 | Description -> template#conditional "show_description" true
180 | Items -> template#conditional "show_items" true
181 | Item_title -> template#conditional "show_item_title" true
182 | Item_link -> template#conditional "show_item_link" true
183 | Item_description -> template#conditional "show_item_description" true
186 (* Grab the RSS from our database cache. If it's too old
187 * or we don't have it cached, grab it from the server.
192 "select t, rss from rss_cache where url = $url" in
193 let need_to_fetch, rss =
195 | [] -> true, "" (* not in cache, so have to fetch URL *)
198 (Calendar.add t refresh) (Calendar.now ()) < 0 then
202 if need_to_fetch then (
203 let rss = fetch_url url in
204 (* This lock is OK - refetches are supposed to be infrequent. *)
205 PGSQL(dbh) "lock table rss_cache in share mode";
206 PGSQL(dbh) "delete from rss_cache where url = $url";
207 PGSQL(dbh) "insert into rss_cache (url, rss) values ($url, $rss)";
208 PGOCaml.commit dbh; (* XXX How to avoid? *)
209 PGOCaml.begin_work dbh;
214 (* Try to parse the RSS. *)
217 channel_of_string rss
220 raise (Error ("error parsing feed: XML: " ^ Xml.error err))
222 raise (Error ("error parsing feed: RSS: " ^ err)) in
224 (* Generate some XML. *)
225 template#set "title" channel.ch_title;
226 template#set "link" channel.ch_link;
227 template#set "description" channel.ch_desc;
229 let items = channel.ch_items in
233 | Some n -> List.take n items in
234 let items = List.map (
236 [ "title", Template.VarString (Option.default "" item.item_title);
237 "link", Template.VarString (Option.default "" item.item_link);
239 Template.VarString (Option.default "" item.item_desc);
241 Template.VarString (Option.map_default
242 printable_date "" item.item_pubdate); ]
244 template#table "items" items;
249 "rss: " ^ Cgi_escape.escape_html msg
251 (* Register function. *)
253 external_functions := ("rss", rss) :: !external_functions