(* COCANWIKI - a wiki written in Objective CAML.
* Written by Richard W.M. Jones <rich@merjis.com>.
* Copyright (C) 2004 Merjis Ltd.
- * $Id: cocanwiki_links.ml,v 1.2 2004/09/28 11:28:39 rich Exp $
+ * $Id: cocanwiki_links.ml,v 1.3 2004/10/07 12:22:11 rich Exp $
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
*)
open ExtString
+open ExtList
let split_tags_re = Pcre.regexp ~flags:[`DOTALL] "<.*?>|[^<]+"
let internal_re = Pcre.regexp "class=\"internal\""
+let newpage_re = Pcre.regexp "class=\"newpage\""
let href_re = Pcre.regexp "href=\"/(.*?)\""
+let title_re = Pcre.regexp "title=\"(.*?)\""
let get_links_from_section dbh hostid content =
let html = Wikilib.xhtml_of_content dbh hostid content in
(* Only interested in the <a> tags. *)
let html = List.filter (fun str -> String.starts_with str "<a ") html in
- (* Only interested in the tags with class="internal". *)
- let html =
- List.filter (fun str ->
- Pcre.pmatch ~rex:internal_re str
- && Pcre.pmatch ~rex:href_re str)
- html in
-
- (* Extract the URL names. *)
- let links = List.map (fun str ->
- let subs =
- try Pcre.exec ~rex:href_re str
- with Not_found -> assert false in
- Pcre.get_substring subs 1) html in
-
- (* Return the list of links. *)
- links
+ (* Only interested in the tags with class="internal" or class="newpage". *)
+ let internal_links =
+ let html =
+ List.filter (fun str ->
+ Pcre.pmatch ~rex:internal_re str
+ && Pcre.pmatch ~rex:href_re str)
+ html in
+
+ (* Extract the URL names. *)
+ List.map (fun str ->
+ let subs =
+ try Pcre.exec ~rex:href_re str
+ with Not_found -> assert false in
+ Pcre.get_substring subs 1) html in
+
+ let newpage_links =
+ let html =
+ List.filter (fun str ->
+ Pcre.pmatch ~rex:newpage_re str
+ && Pcre.pmatch ~rex:title_re str)
+ html in
+
+ (* Extract the titles. *)
+ let titles =
+ List.map (fun str ->
+ let subs =
+ try Pcre.exec ~rex:title_re str
+ with Not_found -> assert false in
+ Pcre.get_substring subs 1) html in
+
+ (* Map the titles to URLs. *)
+ List.filter_map
+ (fun title ->
+ match Wikilib.generate_url_of_title dbh hostid title with
+ | Wikilib.GenURL_OK url -> Some url
+ | _ -> None) titles in
+
+ (* Return the complete list of links. *)
+ internal_links @ newpage_links
let insert_link dbh hostid from_url to_url =
if from_url <> to_url then (