1 (* Load in large weblogs and see if they can still be used.
2 * $Id: test_ancient_weblogs.ml,v 1.4 2006-10-06 12:25:20 rich Exp $
9 let gc_stats = true (* If true, print GC stats before processing each day. *)
11 let (//) = Filename.concat
15 else a :: range (succ a) b
17 (* Cartesian join of two lists. *)
28 let file_readable filename =
29 try Unix.access filename [Unix.R_OK]; true
30 with Unix.Unix_error _ -> false
32 (* Suppress warning messages. *)
33 let () = Weblogs.quiet := true
36 eprintf "compacting ... %!";
39 let stat = Gc.stat () in
40 let live_words = stat.Gc.live_words in
41 eprintf "live words = %d (%d MB)\n%!"
42 live_words (live_words * 8 / 1024 / 1024)
45 (* Find the list of files. Some which should exist don't, so
46 * warnings about those so we can chase up.
49 let dir = "/home/rich/oversized-logfiles/perrys" in
51 [ "burns"; "gronholm"; "rohrl"; "sainz"; "solberg"; "vatanen" ] in
52 let dates = range 1 31 in
53 let dates = List.map (fun day -> sprintf "200608%02d" day) dates in
54 let files = cartesian drivers dates in
56 List.map (fun (driver, date) ->
57 sprintf "%s-perrys-access.log.%s.gz" driver date) files in
61 let path = dir // filename in
62 if not (file_readable path) then (
63 prerr_endline ("warning: " ^ filename ^ " not found - ignored");
70 eprintf "number of files = %d\n%!" (List.length files);
75 (* XXX Linux/AMD64-specific hack to avoid bad mmap(2) allocation. *)
76 let baseaddr = Nativeint.of_string "0x440000000000"
80 Unix.openfile "test_ancient_weblogs.data"
81 [Unix.O_RDWR; Unix.O_CREAT; Unix.O_TRUNC] 0o644 in
82 Ancient.attach fd baseaddr
84 (* Load each file into memory and make it ancient. *)
89 let basename = Filename.basename filename in
90 eprintf "Importing logfile %s\n%!" basename;
91 let rows = Weblogs.import_file filename in
92 ignore (Ancient.share md key rows) in
100 let fd = Unix.openfile "test_ancient_weblogs.data" [Unix.O_RDWR] 0o644 in
101 let md = Ancient.attach fd 0n in
103 eprintf "Flattening ...\n%!";
105 (* Concatenate all the logs together. *)
110 let rows : Weblogs.t Ancient.ancient = Ancient.get md key in
111 let rows = Ancient.follow rows in
116 eprintf "After flattening: %!";
119 (* Detect visitors. Save to key 1023 in the file. The detect_visitors
120 * function sorts each visitor.
122 let visitors = Weblogs.detect_visitors rows in
123 ignore (Ancient.share md 1023 visitors);
125 eprintf "After detecting visitors: %!";