1 (* Example program which uses LWP::UserAgent and HTML::TreeBuilder to
2 * download an HTTP page and parse it.
3 * Copyright (C) 2003 Merjis Ltd.
4 * $Id: loadpage.ml,v 1.4 2003-10-16 11:03:52 rich Exp $
11 open Pl_HTML_TreeBuilder
16 if Array.length Sys.argv >= 2 then
19 "http://www.merjis.com/" in
21 (* Create the UserAgent object. *)
22 let ua = Pl_LWP_UserAgent.new_ ~env_proxy:true () in
25 let req = Pl_HTTP_Request.new_ "GET" ~uri:site () in
26 let res = ua#request req in
28 if not res#is_success then
29 failwith ("Error while fetching " ^ site ^ ": " ^ res#status_line);
31 (* Extract the content of the page. *)
32 let content = res#content in
34 (* Parse it using HTML::TreeBuilder. *)
35 let tree = Pl_HTML_TreeBuilder.new_from_content content in
37 (* Turn the tree into an HTML::Element. *)
38 let tree = tree#elementify in
40 (* Print out the resulting tree. *)
43 let attrs = root#all_external_attr in
44 let subnodes = root#content_list in
46 printf "Start tag: %s\n" tag;
47 List.iter (fun (name, value) ->
48 printf "\tAttr: %s=\"%s\"\n" name value) attrs;
50 List.iter (fun node ->
52 Element node -> print node
54 printf "String: %s\n" str) subnodes;
55 printf "End tag: %s\n" tag
59 (* Destroy the Perl interpreter. *)
60 Perl.destroy (Perl.current_interpreter ());
62 (* Perform a full collection - good way to find GC/allocation bugs. *)