1 (* An RSS feed reader function.
2 * $Id: cocanwiki_func_rss.ml,v 1.3 2006/12/08 14:10:37 rich Exp $
11 open Cocanwiki_extensions
12 open Cocanwiki_template
14 (* Argument is a whitespace-separated list of key=value pairs.
17 * {{rss:url=http://blog.merjis.com/feed/ max_items=3 refresh=daily}}
20 * url The URL of the RSS feed (required).
21 * show What to show. A comma-separated list of:
24 * description Blog description
25 * items Items in the feed
26 * item_title Title of items
27 * item_date Date and time of items
28 * item_link Link to items
29 * item_description Description of items
30 * Default is everything.
31 * max_items Maximum number of items to show.
32 * refresh How often to recheck feed: hourly, daily, weekly,
34 * Default is daily. Warning: we only check the
35 * feed when the page is loaded, so making this
36 * too frequent will slow page loading.
38 let ws_re = Pcre.regexp "\\s+"
39 let key_re = Pcre.regexp "^(\\w+)=(.*)$"
40 let refresh_re = Pcre.regexp "^(\\d+)(h|d)$"
42 (* This exception is thrown if there is an error during processing. The
43 * string is an error message.
45 exception Error of string
47 type show = Title | Link | Description
48 | Items | Item_title | Item_date | Item_link | Item_description
49 let default_show = [ Title; Link; Description;
50 Items; Item_title; Item_date;
51 Item_link; Item_description ]
53 let event_system = Unixqueue.create_unix_event_system ()
55 let connection = new Http_client.pipeline in
56 connection#set_event_system event_system;
60 prerr_endline ("fetching URL: " ^ url);
62 let msg = new Http_client.get url in
67 | Http_client.Http_protocol exn
69 (* Log the real exception in the logfile, for security. *)
70 prerr_endline (Printexc.to_string exn);
71 raise (Error ("error fetching url; " ^
72 "note that for security reasons " ^
73 "the real error is only written to error_log"))
75 (* OCamlRSS uses its own date format. *)
76 let printable_date { year = year; month = month; day = day;
77 hour = hour; minute = minute; second = second } =
78 let date = Calendar.lmake ~year ~month ~day ~hour ~minute ~second () in
79 Cocanwiki_date.printable_date_time date
81 let rss r dbh hostid arg =
82 let template = _get_template "cocanwiki_func_rss.html" in
85 (* Is RSS enabled for this host? This is a safety feature to ensure
86 * that people only turn this feature on if they really mean to.
89 PGSQL(dbh) "select enable_rss_func from hosts where id = $hostid" in
90 if rows <> [true] then
91 raise (Error "RSS is disabled on this host. You have to enable it manually by setting enable_rss_func for the relevant row in the hosts table. RSS introduces a potential security hole (cross-site scripting) and should not be used where you have untrusted editors or with untrusted RSS feeds.");
93 let arg = match arg with
94 | None -> raise (Error ("missing url: try {{rss:url=http://....}}"))
97 (* Parse the argument. *)
98 let args = Pcre.split ~rex:ws_re arg in
102 let subs = Pcre.exec ~rex:key_re arg in
103 let key = Pcre.get_substring subs 1 in
104 let value = Pcre.get_substring subs 2 in
108 raise (Error ("invalid argument: " ^ arg))
111 (* Get known arguments or their defaults. *)
113 try List.assoc "url" args
115 raise (Error "missing url: try {{rss:url=http://....}}") in
119 let n = List.assoc "max_items" args in
120 let n = int_of_string n in
124 | Failure "int_of_string" ->
125 raise (Error "max_items is not a number") in
129 let p = List.assoc "refresh" args in
131 | "hourly" -> Calendar.Period.hour 1
132 | "daily" -> Calendar.Period.day 1
133 | "weekly" -> Calendar.Period.week 1
136 let subs = Pcre.exec ~rex:refresh_re str in
137 let n = int_of_string (Pcre.get_substring subs 1) in
138 match Pcre.get_substring subs 2 with
139 | "h" -> Calendar.Period.hour n
140 | "d" -> Calendar.Period.day n
144 raise (Error "refresh is hourly|daily|weekly|<nn>h|<nn>d")
147 Calendar.Period.day 1 in
151 let xs = List.assoc "show" args in
152 let xs = String.nsplit xs "," in
157 | "description" -> Description
159 | "item_title" -> Item_title
160 | "item_date" -> Item_date
161 | "item_link" -> Item_link
162 | "item_description" -> Item_description
164 raise (Error ("show: invalid item: " ^ str))
168 Not_found -> default_show in
170 (* Process the show parameter into template conditionals. *)
171 template#conditional "show_title" false;
172 template#conditional "show_link" false;
173 template#conditional "show_description" false;
174 template#conditional "show_items" false;
175 template#conditional "show_item_title" false;
176 template#conditional "show_item_date" false;
177 template#conditional "show_item_link" false;
178 template#conditional "show_item_description" false;
181 | Title -> template#conditional "show_title" true
182 | Link -> template#conditional "show_link" true
183 | Description -> template#conditional "show_description" true
184 | Items -> template#conditional "show_items" true
185 | Item_title -> template#conditional "show_item_title" true
186 | Item_date -> template#conditional "show_item_date" true
187 | Item_link -> template#conditional "show_item_link" true
188 | Item_description -> template#conditional "show_item_description" true
191 (* Grab the RSS from our database cache. If it's too old
192 * or we don't have it cached, grab it from the server.
197 "select t, rss from rss_cache where url = $url" in
198 let need_to_fetch, rss =
200 | [] -> true, None (* not in cache, so have to fetch URL *)
203 (Calendar.add t refresh) (Calendar.now ()) < 0 then
207 if need_to_fetch then (
208 (* If the fetch fails, keep using the old cached RSS.
209 * fetch_url prints the error in the error log in this case.
213 with Error _ as exn ->
216 | None -> raise exn in
218 (* This lock is OK - refetches are supposed to be infrequent. *)
219 PGSQL(dbh) "lock table rss_cache in share mode";
220 PGSQL(dbh) "delete from rss_cache where url = $url";
221 PGSQL(dbh) "insert into rss_cache (url, rss) values ($url, $rss)";
222 PGOCaml.commit dbh; (* XXX How to avoid? *)
223 PGOCaml.begin_work dbh;
228 | None -> assert false
231 (* Try to parse the RSS. *)
234 channel_of_string rss
237 raise (Error ("error parsing feed: XML: " ^ Xml.error err))
239 raise (Error ("error parsing feed: RSS: " ^ err)) in
241 (* Generate some XML. *)
242 template#set "title" channel.ch_title;
243 template#set "link" channel.ch_link;
244 template#set "description" channel.ch_desc;
246 let items = channel.ch_items in
250 | Some n -> List.take n items in
251 let items = List.map (
253 [ "title", Template.VarString (Option.default "" item.item_title);
254 "link", Template.VarString (Option.default "" item.item_link);
256 Template.VarString (Option.default "" item.item_desc);
258 Template.VarString (Option.map_default
259 printable_date "" item.item_pubdate); ]
261 template#table "items" items;
266 "rss: " ^ Cgi_escape.escape_html msg
268 (* Register function. *)
270 external_functions := ("rss", rss) :: !external_functions