(* Example program which uses LWP::UserAgent and HTML::TreeBuilder to * download an HTTP page and parse it. * Copyright (C) 2003 Merjis Ltd. * $Id: loadpage.ml,v 1.5 2003-12-11 17:41:52 rich Exp $ *) open Printf open Pl_LWP_UserAgent open Pl_HTTP_Request open Pl_HTML_TreeBuilder open Pl_HTML_Element let () = let site = if Array.length Sys.argv >= 2 then Sys.argv.(1) else "http://www.merjis.com/" in (* Create the UserAgent object. *) let ua = Pl_LWP_UserAgent.new_ ~env_proxy:true () in (* Fetch the page. *) let req = Pl_HTTP_Request.new_ "GET" ~uri:site () in let res = ua#request req in if not res#is_success then failwith ("Error while fetching " ^ site ^ ": " ^ res#status_line); (* Extract the content of the page. *) let content = res#content in (* Parse it using HTML::TreeBuilder. *) let tree = Pl_HTML_TreeBuilder.new_from_content content in (* Turn the tree into an HTML::Element. *) let tree = tree#elementify in (* Print out the resulting tree. *) let rec print root = let tag = root#tag in let attrs = root#all_external_attr in let subnodes = root#content_list in printf "Start tag: %s\n" tag; List.iter (fun (name, value) -> printf "\tAttr: %s=\"%s\"\n" name value) attrs; List.iter (fun node -> match node with Element node -> print node | String str -> printf "String: %s\n" str) subnodes; printf "End tag: %s\n" tag in print tree; (* Perform a full collection - good way to find GC/allocation bugs. *) Gc.full_major ()