X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=csv.ml;h=b76f4b30ae8b23466f4035d9868e0e00a724982f;hb=af6726a634255b49167e1aa90d92103c3769dc90;hp=c4381ee1f6329c36e8e2b233c6a58ca582e40f2c;hpb=1619b9577d8020825aa39f66c778046448de54b5;p=ocaml-csv.git diff --git a/csv.ml b/csv.ml index c4381ee..b76f4b3 100644 --- a/csv.ml +++ b/csv.ml @@ -1,6 +1,6 @@ (* csv.ml - comma separated values parser * - * $Id: csv.ml,v 1.4 2005-01-19 17:10:19 rich Exp $ + * $Id: csv.ml,v 1.12 2006-10-18 14:56:12 rich Exp $ *) (* The format of CSV files: @@ -40,6 +40,8 @@ * subset of it into a matrix. *) +open Printf + type t = string list list exception Bad_CSV_file of string @@ -49,6 +51,15 @@ let rec dropwhile f = function | x :: xs when f x -> dropwhile f xs | xs -> xs +(* from extlib: *) +let rec drop n = function + | _ :: l when n > 0 -> drop (n-1) l + | l -> l + +let rec take n = function + | x :: xs when n > 0 -> x :: take (pred n) xs + | _ -> [] + let lines = List.length let columns csv = @@ -94,7 +105,7 @@ let load_rows ?(separator = ',') f chan = if c != '\r' then ( (* Always ignore \r characters. *) match !state with StartField -> (* Expecting quote or other char. *) - if c = '\"' then ( + if c = '"' then ( state := InQuotedField; field := [] ) else if c = separator then (* Empty field. *) @@ -115,12 +126,12 @@ let load_rows ?(separator = ',') f chan = ) else field := c :: !field | InQuotedField -> (* Reading chars to end of field. *) - if c = '\"' then + if c = '"' then state := InQuotedFieldAfterQuote else field := c :: !field | InQuotedFieldAfterQuote -> - if c = '\"' then ( (* Doubled quote. *) + if c = '"' then ( (* Doubled quote. *) field := c :: !field; state := InQuotedField ) else if c = '0' then ( (* Quote-0 is ASCII NUL. *) @@ -131,6 +142,9 @@ let load_rows ?(separator = ',') f chan = else if c = '\n' then ( (* End of field and end of row. *) end_of_field (); end_of_row () + ) else ( (* Bad single quote in field. *) + field := c :: '"' :: !field; + state := InQuotedField ) ); (* end of match *) loop () @@ -204,6 +218,114 @@ let trim ?(top=true) ?(left=true) ?(right=true) ?(bottom=true) csv = csv +let square csv = + let columns = columns csv in + List.map ( + fun row -> + let n = List.length row in + let row = List.rev row in + let rec loop acc = function + | 0 -> acc + | i -> "" :: loop acc (i-1) + in + let row = loop row (columns - n) in + List.rev row + ) csv + +let is_square csv = + let columns = columns csv in + List.for_all (fun row -> List.length row = columns) csv + +let rec set_columns cols = function + | [] -> [] + | r :: rs -> + let rec loop i cells = + if i < cols then ( + match cells with + | [] -> "" :: loop (succ i) [] + | c :: cs -> c :: loop (succ i) cs + ) + else [] + in + loop 0 r :: set_columns cols rs + +let rec set_rows rows csv = + if rows > 0 then ( + match csv with + | [] -> [] :: set_rows (pred rows) [] + | r :: rs -> r :: set_rows (pred rows) rs + ) + else [] + +let set_size rows cols csv = + set_columns cols (set_rows rows csv) + +let sub r c rows cols csv = + let csv = drop r csv in + let csv = List.map (drop c) csv in + let csv = set_rows rows csv in + let csv = set_columns cols csv in + csv + +(* Compare two rows for semantic equality - ignoring any blank cells + * at the end of each row. + *) +let rec compare_row (row1 : string list) row2 = + match row1, row2 with + | [], [] -> 0 + | x :: xs, y :: ys -> + let c = compare x y in + if c <> 0 then c else compare_row xs ys + | "" :: xs , [] -> + compare_row xs [] + | x :: xs, [] -> + 1 + | [], "" :: ys -> + compare_row [] ys + | [], y :: ys -> + -1 + +(* Semantic equality for CSV files. *) +let rec compare (csv1 : t) csv2 = + match csv1, csv2 with + | [], [] -> 0 + | x :: xs, y :: ys -> + let c = compare_row x y in + if c <> 0 then c else compare xs ys + | x :: xs, [] -> + let c = compare_row x [] in + if c <> 0 then c else compare xs [] + | [], y :: ys -> + let c = compare_row [] y in + if c <> 0 then c else compare [] ys + +(* Concatenate - arrange left to right. *) +let rec concat = function + | [] -> [] + | [csv] -> csv + | left_csv :: csvs -> + (* Concatenate the remaining CSV files. *) + let right_csv = concat csvs in + + (* Set the height of the left and right CSVs to the same. *) + let nr_rows = max (lines left_csv) (lines right_csv) in + let left_csv = set_rows nr_rows left_csv in + let right_csv = set_rows nr_rows right_csv in + + (* Square off the left CSV. *) + let left_csv = square left_csv in + + (* Prepend the right CSV rows with the left CSV rows. *) + List.map ( + fun (left_row, right_row) -> List.append left_row right_row + ) (List.combine left_csv right_csv) + +let to_array csv = + Array.of_list (List.map Array.of_list csv) + +let of_array csv = + List.map Array.to_list (Array.to_list csv) + let associate header data = let nr_cols = List.length header in let rec trunc = function @@ -251,3 +373,65 @@ let save ?separator file csv = let chan = open_out file in save_out ?separator chan csv; close_out chan + +let save_out_readable chan csv = + (* Escape all the strings in the CSV file first. *) + (* XXX Why are we doing this? I commented it out anyway. + let csv = List.map (List.map String.escaped) csv in + *) + + (* Find the width of each column. *) + let widths = + (* Don't consider rows with only a single element - typically + * long titles. + *) + let csv = List.filter (function [_] -> false | _ -> true) csv in + + (* Square the CSV file - makes the next step simpler to implement. *) + let csv = square csv in + + match csv with + | [] -> [] + | row1 :: rest -> + let lengths_row1 = List.map String.length row1 in + let lengths_rest = List.map (List.map String.length) rest in + let max2rows r1 r2 = + let rp = + try List.combine r1 r2 + with + Invalid_argument "List.combine" -> + failwith (sprintf "Csv.save_out_readable: internal error: length r1 = %d, length r2 = %d" (List.length r1) (List.length r2)) in + List.map (fun ((a : int), (b : int)) -> max a b) rp + in + List.fold_left max2rows lengths_row1 lengths_rest in + + (* Print out each cell at the correct width. *) + let rec repeat f = function + | 0 -> () + | i -> f (); repeat f (i-1) + in + List.iter ( + function + | [cell] -> (* Single column. *) + output_string chan cell; + output_char chan '\n' + | row -> (* Other. *) + (* Pair up each cell with its max width. *) + let row = + let rec loop = function + | ([], _) -> [] + | (_, []) -> failwith "Csv.save_out_readable: internal error" + | (cell :: cells, width :: widths) -> + (cell, width) :: loop (cells, widths) + in + loop (row, widths) in + List.iter ( + fun (cell, width) -> + output_string chan cell; + let n = String.length cell in + repeat (fun () -> output_char chan ' ') (width - n + 1) + ) row; + output_char chan '\n' + ) csv + +let print_readable = save_out_readable stdout