X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=csv.ml;h=0b6e649f7446c22768b44a8b6338c0f60245b35c;hb=3a2b495714080c9f72b89cada8bc45b22768a059;hp=2a22bf983eef6e3e372b825de554e0e08eee65fe;hpb=ae6f56ba8e80a1c05aa076404dd14145b306780a;p=ocaml-csv.git diff --git a/csv.ml b/csv.ml index 2a22bf9..0b6e649 100644 --- a/csv.ml +++ b/csv.ml @@ -1,6 +1,6 @@ (* csv.ml - comma separated values parser * - * $Id: csv.ml,v 1.8 2006-02-15 13:25:58 rich Exp $ + * $Id: csv.ml,v 1.14 2006-11-24 15:49:24 rich Exp $ *) (* The format of CSV files: @@ -40,6 +40,13 @@ * subset of it into a matrix. *) +open Printf + +(* Uncomment the next line to enable Extlib's List function. These + * avoid stack overflows on really huge CSV files. + *) +(*open ExtList*) + type t = string list list exception Bad_CSV_file of string @@ -171,10 +178,13 @@ let load_in ?separator chan = List.rev !csv let load ?separator filename = - let chan = open_in filename in + let chan, close = + match filename with + | "-" -> stdin, false + | filename -> open_in filename, true in let csv = load_in ?separator chan in - close_in chan; - csv + if close then close_in chan; + csv let trim ?(top=true) ?(left=true) ?(right=true) ?(bottom=true) csv = let rec empty_row = function @@ -265,6 +275,59 @@ let sub r c rows cols csv = let csv = set_columns cols csv in csv +(* Compare two rows for semantic equality - ignoring any blank cells + * at the end of each row. + *) +let rec compare_row (row1 : string list) row2 = + match row1, row2 with + | [], [] -> 0 + | x :: xs, y :: ys -> + let c = compare x y in + if c <> 0 then c else compare_row xs ys + | "" :: xs , [] -> + compare_row xs [] + | x :: xs, [] -> + 1 + | [], "" :: ys -> + compare_row [] ys + | [], y :: ys -> + -1 + +(* Semantic equality for CSV files. *) +let rec compare (csv1 : t) csv2 = + match csv1, csv2 with + | [], [] -> 0 + | x :: xs, y :: ys -> + let c = compare_row x y in + if c <> 0 then c else compare xs ys + | x :: xs, [] -> + let c = compare_row x [] in + if c <> 0 then c else compare xs [] + | [], y :: ys -> + let c = compare_row [] y in + if c <> 0 then c else compare [] ys + +(* Concatenate - arrange left to right. *) +let rec concat = function + | [] -> [] + | [csv] -> csv + | left_csv :: csvs -> + (* Concatenate the remaining CSV files. *) + let right_csv = concat csvs in + + (* Set the height of the left and right CSVs to the same. *) + let nr_rows = max (lines left_csv) (lines right_csv) in + let left_csv = set_rows nr_rows left_csv in + let right_csv = set_rows nr_rows right_csv in + + (* Square off the left CSV. *) + let left_csv = square left_csv in + + (* Prepend the right CSV rows with the left CSV rows. *) + List.map ( + fun (left_row, right_row) -> List.append left_row right_row + ) (List.combine left_csv right_csv) + let to_array csv = Array.of_list (List.map Array.of_list csv) @@ -337,18 +400,18 @@ let save_out_readable chan csv = match csv with | [] -> [] - | r :: _ -> - let n = List.length r in - let lengths = List.map (List.map String.length) csv in + | row1 :: rest -> + let lengths_row1 = List.map String.length row1 in + let lengths_rest = List.map (List.map String.length) rest in let max2rows r1 r2 = - let rp = List.combine r1 r2 in + let rp = + try List.combine r1 r2 + with + Invalid_argument "List.combine" -> + failwith (sprintf "Csv.save_out_readable: internal error: length r1 = %d, length r2 = %d" (List.length r1) (List.length r2)) in List.map (fun ((a : int), (b : int)) -> max a b) rp in - let rec repeat x = function - | 0 -> [] - | i -> x :: repeat x (i-1) - in - List.fold_left max2rows (repeat 0 n) lengths in + List.fold_left max2rows lengths_row1 lengths_rest in (* Print out each cell at the correct width. *) let rec repeat f = function @@ -361,9 +424,17 @@ let save_out_readable chan csv = output_string chan cell; output_char chan '\n' | row -> (* Other. *) - let row = List.combine widths row in + (* Pair up each cell with its max width. *) + let row = + let rec loop = function + | ([], _) -> [] + | (_, []) -> failwith "Csv.save_out_readable: internal error" + | (cell :: cells, width :: widths) -> + (cell, width) :: loop (cells, widths) + in + loop (row, widths) in List.iter ( - fun (width, cell) -> + fun (cell, width) -> output_string chan cell; let n = String.length cell in repeat (fun () -> output_char chan ' ') (width - n + 1)