X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=csv.ml;h=82d8d02f218ca5df9f07b49e4f01a4b4d6542298;hb=3f5ce21cf955364c5b665ac98dab8545b3f1c6b3;hp=93da2c46e0d02f73d265d4c259bda9223c25529b;hpb=f04d2c293ee8ee855fd28e7fa51e5789d8b43e5c;p=ocaml-csv.git diff --git a/csv.ml b/csv.ml index 93da2c4..82d8d02 100644 --- a/csv.ml +++ b/csv.ml @@ -1,6 +1,6 @@ (* csv.ml - comma separated values parser * - * $Id: csv.ml,v 1.2 2004-12-06 17:40:50 rich Exp $ + * $Id: csv.ml,v 1.10 2006-04-21 12:44:42 rich Exp $ *) (* The format of CSV files: @@ -40,6 +40,8 @@ * subset of it into a matrix. *) +open Printf + type t = string list list exception Bad_CSV_file of string @@ -49,6 +51,15 @@ let rec dropwhile f = function | x :: xs when f x -> dropwhile f xs | xs -> xs +(* from extlib: *) +let rec drop n = function + | _ :: l when n > 0 -> drop (n-1) l + | l -> l + +let rec take n = function + | x :: xs when n > 0 -> x :: take (pred n) xs + | _ -> [] + let lines = List.length let columns csv = @@ -59,7 +70,7 @@ type state_t = StartField | InQuotedField | InQuotedFieldAfterQuote -let load_rows f chan = +let load_rows ?(separator = ',') f chan = let row = ref [] in (* Current row. *) let field = ref [] in (* Current field. *) let state = ref StartField in (* Current state. *) @@ -94,10 +105,10 @@ let load_rows f chan = if c != '\r' then ( (* Always ignore \r characters. *) match !state with StartField -> (* Expecting quote or other char. *) - if c = '\"' then ( + if c = '"' then ( state := InQuotedField; field := [] - ) else if c = ',' then (* Empty field. *) + ) else if c = separator then (* Empty field. *) empty_field () else if c = '\n' then ( (* Empty field, end of row. *) empty_field (); @@ -107,7 +118,7 @@ let load_rows f chan = field := [c] ) | InUnquotedField -> (* Reading chars to end of field. *) - if c = ',' then (* End of field. *) + if c = separator then (* End of field. *) end_of_field () else if c = '\n' then ( (* End of field and end of row. *) end_of_field (); @@ -115,22 +126,25 @@ let load_rows f chan = ) else field := c :: !field | InQuotedField -> (* Reading chars to end of field. *) - if c = '\"' then + if c = '"' then state := InQuotedFieldAfterQuote else field := c :: !field | InQuotedFieldAfterQuote -> - if c = '\"' then ( (* Doubled quote. *) + if c = '"' then ( (* Doubled quote. *) field := c :: !field; state := InQuotedField ) else if c = '0' then ( (* Quote-0 is ASCII NUL. *) field := '\000' :: !field; state := InQuotedField - ) else if c = ',' then (* End of field. *) + ) else if c = separator then (* End of field. *) end_of_field () else if c = '\n' then ( (* End of field and end of row. *) end_of_field (); end_of_row () + ) else ( (* Bad single quote in field. *) + field := c :: '"' :: !field; + state := InQuotedField ) ); (* end of match *) loop () @@ -150,17 +164,17 @@ let load_rows f chan = raise (Bad_CSV_file "Missing end quote after quoted field.") ) -let load_in chan = +let load_in ?separator chan = let csv = ref [] in let f row = csv := row :: !csv in - load_rows f chan; + load_rows ?separator f chan; List.rev !csv -let load filename = +let load ?separator filename = let chan = open_in filename in - let csv = load_in chan in + let csv = load_in ?separator chan in close_in chan; csv @@ -204,35 +218,199 @@ let trim ?(top=true) ?(left=true) ?(right=true) ?(bottom=true) csv = csv -(* Quote a single CSV field. *) -let quote_field field = - if String.contains field ',' || - String.contains field '\"' || - String.contains field '\n' - then ( - let buffer = Buffer.create 100 in - Buffer.add_char buffer '\"'; - for i = 0 to (String.length field) - 1 do - match field.[i] with - '\"' -> Buffer.add_string buffer "\"\"" - | c -> Buffer.add_char buffer c - done; - Buffer.add_char buffer '\"'; - Buffer.contents buffer - ) - else - field - -let save_out chan csv = +let square csv = + let columns = columns csv in + List.map ( + fun row -> + let n = List.length row in + let row = List.rev row in + let rec loop acc = function + | 0 -> acc + | i -> "" :: loop acc (i-1) + in + let row = loop row (columns - n) in + List.rev row + ) csv + +let is_square csv = + let columns = columns csv in + List.for_all (fun row -> List.length row = columns) csv + +let rec set_columns cols = function + | [] -> [] + | r :: rs -> + let rec loop i cells = + if i < cols then ( + match cells with + | [] -> "" :: loop (succ i) [] + | c :: cs -> c :: loop (succ i) cs + ) + else [] + in + loop 0 r :: set_columns cols rs + +let rec set_rows rows csv = + if rows > 0 then ( + match csv with + | [] -> [] :: set_rows (pred rows) [] + | r :: rs -> r :: set_rows (pred rows) rs + ) + else [] + +let set_size rows cols csv = + set_columns cols (set_rows rows csv) + +let sub r c rows cols csv = + let csv = drop r csv in + let csv = List.map (drop c) csv in + let csv = set_rows rows csv in + let csv = set_columns cols csv in + csv + +(* Compare two rows for semantic equality - ignoring any blank cells + * at the end of each row. + *) +let rec compare_row (row1 : string list) row2 = + match row1, row2 with + | [], [] -> 0 + | x :: xs, y :: ys -> + let c = compare x y in + if c <> 0 then c else compare_row xs ys + | "" :: xs , [] -> + compare_row xs [] + | x :: xs, [] -> + 1 + | [], "" :: ys -> + compare_row [] ys + | [], y :: ys -> + -1 + +(* Semantic equality for CSV files. *) +let rec compare (csv1 : t) csv2 = + match csv1, csv2 with + | [], [] -> 0 + | x :: xs, y :: ys -> + let c = compare_row x y in + if c <> 0 then c else compare xs ys + | x :: xs, [] -> + let c = compare_row x [] in + if c <> 0 then c else compare xs [] + | [], y :: ys -> + let c = compare_row [] y in + if c <> 0 then c else compare [] ys + +let to_array csv = + Array.of_list (List.map Array.of_list csv) + +let of_array csv = + List.map Array.to_list (Array.to_list csv) + +let associate header data = + let nr_cols = List.length header in + let rec trunc = function + | 0, _ -> [] + | n, [] -> "" :: trunc (n-1, []) + | n, (x :: xs) -> x :: trunc (n-1, xs) + in + List.map ( + fun row -> + let row = trunc (nr_cols, row) in + List.combine header row + ) data + +let save_out ?(separator = ',') chan csv = + (* Quote a single CSV field. *) + let quote_field field = + if String.contains field separator || + String.contains field '\"' || + String.contains field '\n' + then ( + let buffer = Buffer.create 100 in + Buffer.add_char buffer '\"'; + for i = 0 to (String.length field) - 1 do + match field.[i] with + '\"' -> Buffer.add_string buffer "\"\"" + | c -> Buffer.add_char buffer c + done; + Buffer.add_char buffer '\"'; + Buffer.contents buffer + ) + else + field + in + + let separator = String.make 1 separator in List.iter (fun line -> - output_string chan (String.concat "," + output_string chan (String.concat separator (List.map quote_field line)); output_char chan '\n') csv -let print csv = - save_out stdout csv +let print ?separator csv = + save_out ?separator stdout csv; flush stdout -let save file csv = +let save ?separator file csv = let chan = open_out file in - save_out chan csv; + save_out ?separator chan csv; close_out chan + +let save_out_readable chan csv = + (* Escape all the strings in the CSV file first. *) + (* XXX Why are we doing this? I commented it out anyway. + let csv = List.map (List.map String.escaped) csv in + *) + + (* Find the width of each column. *) + let widths = + (* Don't consider rows with only a single element - typically + * long titles. + *) + let csv = List.filter (function [_] -> false | _ -> true) csv in + + (* Square the CSV file - makes the next step simpler to implement. *) + let csv = square csv in + + match csv with + | [] -> [] + | row1 :: rest -> + let lengths_row1 = List.map String.length row1 in + let lengths_rest = List.map (List.map String.length) rest in + let max2rows r1 r2 = + let rp = + try List.combine r1 r2 + with + Invalid_argument "List.combine" -> + failwith "Csv.save_out_readable: internal error: length r1 = %d, length r2 = %d" (List.length r1) (List.length r2) in + List.map (fun ((a : int), (b : int)) -> max a b) rp + in + List.fold_left max2rows lengths_row1 lengths_rest in + + (* Print out each cell at the correct width. *) + let rec repeat f = function + | 0 -> () + | i -> f (); repeat f (i-1) + in + List.iter ( + function + | [cell] -> (* Single column. *) + output_string chan cell; + output_char chan '\n' + | row -> (* Other. *) + (* Pair up each cell with its max width. *) + let row = + let rec loop = function + | ([], _) -> [] + | (_, []) -> failwith "Csv.save_out_readable: internal error" + | (cell :: cells, width :: widths) -> + (cell, width) :: loop (cells, widths) + in + loop (row, widths) in + List.iter ( + fun (cell, width) -> + output_string chan cell; + let n = String.length cell in + repeat (fun () -> output_char chan ' ') (width - n + 1) + ) row; + output_char chan '\n' + ) csv + +let print_readable = save_out_readable stdout