(* csv.ml - comma separated values parser
*
- * $Id: csv.ml,v 1.7 2005-11-25 14:06:58 rich Exp $
+ * $Id: csv.ml,v 1.15 2008-10-27 21:57:48 rich Exp $
*)
(* The format of CSV files:
* subset of it into a matrix.
*)
+open Printf
+
+(* Uncomment the next line to enable Extlib's List function. These
+ * avoid stack overflows on really huge CSV files.
+ *)
+(*open ExtList*)
+
type t = string list list
exception Bad_CSV_file of string
List.rev !csv
let load ?separator filename =
- let chan = open_in filename in
+ let chan, close =
+ match filename with
+ | "-" -> stdin, false
+ | filename -> open_in filename, true in
let csv = load_in ?separator chan in
- close_in chan;
- csv
+ if close then close_in chan;
+ csv
let trim ?(top=true) ?(left=true) ?(right=true) ?(bottom=true) csv =
let rec empty_row = function
let remove_left_col =
List.map (function [] -> [] | x :: xs -> xs) in
let rec loop csv =
- if empty_left_col csv then (
- let csv = remove_left_col csv in
- loop csv
- ) else csv
+ if empty_left_col csv then
+ remove_left_col csv
+ else
+ csv
in
let csv = if left then loop csv else csv in
let csv = set_columns cols csv in
csv
+(* Compare two rows for semantic equality - ignoring any blank cells
+ * at the end of each row.
+ *)
+let rec compare_row (row1 : string list) row2 =
+ match row1, row2 with
+ | [], [] -> 0
+ | x :: xs, y :: ys ->
+ let c = compare x y in
+ if c <> 0 then c else compare_row xs ys
+ | "" :: xs , [] ->
+ compare_row xs []
+ | x :: xs, [] ->
+ 1
+ | [], "" :: ys ->
+ compare_row [] ys
+ | [], y :: ys ->
+ -1
+
+(* Semantic equality for CSV files. *)
+let rec compare (csv1 : t) csv2 =
+ match csv1, csv2 with
+ | [], [] -> 0
+ | x :: xs, y :: ys ->
+ let c = compare_row x y in
+ if c <> 0 then c else compare xs ys
+ | x :: xs, [] ->
+ let c = compare_row x [] in
+ if c <> 0 then c else compare xs []
+ | [], y :: ys ->
+ let c = compare_row [] y in
+ if c <> 0 then c else compare [] ys
+
+(* Concatenate - arrange left to right. *)
+let rec concat = function
+ | [] -> []
+ | [csv] -> csv
+ | left_csv :: csvs ->
+ (* Concatenate the remaining CSV files. *)
+ let right_csv = concat csvs in
+
+ (* Set the height of the left and right CSVs to the same. *)
+ let nr_rows = max (lines left_csv) (lines right_csv) in
+ let left_csv = set_rows nr_rows left_csv in
+ let right_csv = set_rows nr_rows right_csv in
+
+ (* Square off the left CSV. *)
+ let left_csv = square left_csv in
+
+ (* Prepend the right CSV rows with the left CSV rows. *)
+ List.map (
+ fun (left_row, right_row) -> List.append left_row right_row
+ ) (List.combine left_csv right_csv)
+
let to_array csv =
Array.of_list (List.map Array.of_list csv)
let save_out_readable chan csv =
(* Escape all the strings in the CSV file first. *)
+ (* XXX Why are we doing this? I commented it out anyway.
let csv = List.map (List.map String.escaped) csv in
-
- let csv = square csv in
+ *)
(* Find the width of each column. *)
let widths =
+ (* Don't consider rows with only a single element - typically
+ * long titles.
+ *)
+ let csv = List.filter (function [_] -> false | _ -> true) csv in
+
+ (* Square the CSV file - makes the next step simpler to implement. *)
+ let csv = square csv in
+
match csv with
| [] -> []
- | r :: _ ->
- let n = List.length r in
- let lengths = List.map (List.map String.length) csv in
+ | row1 :: rest ->
+ let lengths_row1 = List.map String.length row1 in
+ let lengths_rest = List.map (List.map String.length) rest in
let max2rows r1 r2 =
- let rp = List.combine r1 r2 in
+ let rp =
+ try List.combine r1 r2
+ with
+ Invalid_argument "List.combine" ->
+ failwith (sprintf "Csv.save_out_readable: internal error: length r1 = %d, length r2 = %d" (List.length r1) (List.length r2)) in
List.map (fun ((a : int), (b : int)) -> max a b) rp
in
- let rec repeat x = function
- | 0 -> []
- | i -> x :: repeat x (i-1)
- in
- List.fold_left max2rows (repeat 0 n) lengths in
+ List.fold_left max2rows lengths_row1 lengths_rest in
(* Print out each cell at the correct width. *)
let rec repeat f = function
| i -> f (); repeat f (i-1)
in
List.iter (
- fun row ->
- let row = List.combine widths row in
- List.iter (
- fun (width, cell) ->
- output_string chan cell;
- let n = String.length cell in
- repeat (fun () -> output_char chan ' ') (width - n + 1)
- ) row;
- output_char chan '\n'
+ function
+ | [cell] -> (* Single column. *)
+ output_string chan cell;
+ output_char chan '\n'
+ | row -> (* Other. *)
+ (* Pair up each cell with its max width. *)
+ let row =
+ let rec loop = function
+ | ([], _) -> []
+ | (_, []) -> failwith "Csv.save_out_readable: internal error"
+ | (cell :: cells, width :: widths) ->
+ (cell, width) :: loop (cells, widths)
+ in
+ loop (row, widths) in
+ List.iter (
+ fun (cell, width) ->
+ output_string chan cell;
+ let n = String.length cell in
+ repeat (fun () -> output_char chan ' ') (width - n + 1)
+ ) row;
+ output_char chan '\n'
) csv
let print_readable = save_out_readable stdout