1 (* Load in large weblogs and see if they can still be used.
2 * $Id: test_ancient_weblogs.ml,v 1.1 2006-09-27 14:05:07 rich Exp $
9 let gc_stats = true (* If true, print GC stats before processing each day. *)
11 let (//) = Filename.concat
15 else a :: range (succ a) b
17 (* Cartesian join of two lists. *)
28 let file_readable filename =
29 try Unix.access filename [Unix.R_OK]; true
30 with Unix.Unix_error _ -> false
32 (* Suppress warning messages. *)
33 let () = Weblogs.quiet := true
36 eprintf "compacting ... %!";
39 let stat = Gc.stat () in
40 let live_words = stat.Gc.live_words in
41 eprintf "live words = %d (%d MB)\n%!"
42 live_words (live_words * 8 / 1024 / 1024)
45 (* Find the list of files. Some which should exist don't, so
46 * warnings about those so we can chase up.
49 let dir = "/home/rich/oversized-logfiles/perrys" in
51 [ "burns"; "gronholm"; "rohrl"; "sainz"; "solberg"; "vatanen" ] in
52 let dates = range 1 31 in
53 let dates = List.map (fun day -> sprintf "200608%02d" day) dates in
54 let files = cartesian drivers dates in
56 List.map (fun (driver, date) ->
57 sprintf "%s-perrys-access.log.%s.gz" driver date) files in
61 let path = dir // filename in
62 if not (file_readable path) then (
63 prerr_endline ("warning: " ^ filename ^ " not found - ignored");
70 eprintf "number of files = %d\n%!" (List.length files);
74 (* Load each file into memory and make it ancient. *)
79 eprintf "Importing file %s\n%!" filename;
81 let rows = Weblogs.import_file filename in