1 (* Example program which uses LWP::UserAgent and HTML::TreeBuilder to
2 * download an HTTP page and parse it.
3 * Copyright (C) 2003 Merjis Ltd.
4 * $Id: loadpage.ml,v 1.2 2003-10-14 16:05:21 rich Exp $
11 open Pl_HTML_TreeBuilder
15 (* This is a hack which shouldn't be needed in future. *)
16 Perl.eval "use LWP::UserAgent";
17 Perl.eval "use HTML::TreeBuilder";
20 if Array.length Sys.argv >= 2 then
23 "http://www.merjis.com/" in
25 (* Create the UserAgent object. *)
26 let ua = Pl_LWP_UserAgent.new_ ~env_proxy:true () in
29 let req = Pl_HTTP_Request.new_ "GET" ~uri:site () in
30 let res = ua#request req in
32 if not res#is_success then
33 failwith ("Error while fetching " ^ site ^ ": " ^ res#status_line);
35 (* Extract the content of the page. *)
36 let content = res#content in
38 (* Parse it using HTML::TreeBuilder. *)
39 let tree = Pl_HTML_TreeBuilder.new_from_content content in
41 (* Turn the tree into an HTML::Element. *)
42 let tree = tree#elementify in
44 (* Print out the resulting tree. *)
47 let attrs = root#all_external_attr in
48 let subnodes = root#content_list in
50 printf "Start tag: %s\n" tag;
51 List.iter (fun (name, value) ->
52 printf "\tAttr: %s=\"%s\"\n" name value) attrs;
54 List.iter (fun node ->
56 Element node -> print node
58 printf "String: %s\n" str) subnodes;
59 printf "End tag: %s\n" tag
63 (* Destroy the Perl interpreter. *)