X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=csv.ml;h=0ff03710b43a2440be9845d23bcbad63279868b4;hb=f6dbacda332fa9a11a36b3424516409d387058a1;hp=837d2150d565b65c9c0e9d4bd61f11de665e66c7;hpb=383c2300507f7b33c4b4606fdd3b2f03a642e0fa;p=ocaml-csv.git diff --git a/csv.ml b/csv.ml index 837d215..0ff0371 100644 --- a/csv.ml +++ b/csv.ml @@ -1,6 +1,6 @@ (* csv.ml - comma separated values parser * - * $Id: csv.ml,v 1.3 2004-12-22 13:47:51 rich Exp $ + * $Id: csv.ml,v 1.7 2005-11-25 14:06:58 rich Exp $ *) (* The format of CSV files: @@ -49,6 +49,15 @@ let rec dropwhile f = function | x :: xs when f x -> dropwhile f xs | xs -> xs +(* from extlib: *) +let rec drop n = function + | _ :: l when n > 0 -> drop (n-1) l + | l -> l + +let rec take n = function + | x :: xs when n > 0 -> x :: take (pred n) xs + | _ -> [] + let lines = List.length let columns csv = @@ -94,7 +103,7 @@ let load_rows ?(separator = ',') f chan = if c != '\r' then ( (* Always ignore \r characters. *) match !state with StartField -> (* Expecting quote or other char. *) - if c = '\"' then ( + if c = '"' then ( state := InQuotedField; field := [] ) else if c = separator then (* Empty field. *) @@ -115,12 +124,12 @@ let load_rows ?(separator = ',') f chan = ) else field := c :: !field | InQuotedField -> (* Reading chars to end of field. *) - if c = '\"' then + if c = '"' then state := InQuotedFieldAfterQuote else field := c :: !field | InQuotedFieldAfterQuote -> - if c = '\"' then ( (* Doubled quote. *) + if c = '"' then ( (* Doubled quote. *) field := c :: !field; state := InQuotedField ) else if c = '0' then ( (* Quote-0 is ASCII NUL. *) @@ -131,6 +140,9 @@ let load_rows ?(separator = ',') f chan = else if c = '\n' then ( (* End of field and end of row. *) end_of_field (); end_of_row () + ) else ( (* Bad single quote in field. *) + field := c :: '"' :: !field; + state := InQuotedField ) ); (* end of match *) loop () @@ -204,6 +216,61 @@ let trim ?(top=true) ?(left=true) ?(right=true) ?(bottom=true) csv = csv +let square csv = + let columns = columns csv in + List.map ( + fun row -> + let n = List.length row in + let row = List.rev row in + let rec loop acc = function + | 0 -> acc + | i -> "" :: loop acc (i-1) + in + let row = loop row (columns - n) in + List.rev row + ) csv + +let is_square csv = + let columns = columns csv in + List.for_all (fun row -> List.length row = columns) csv + +let rec set_columns cols = function + | [] -> [] + | r :: rs -> + let rec loop i cells = + if i < cols then ( + match cells with + | [] -> "" :: loop (succ i) [] + | c :: cs -> c :: loop (succ i) cs + ) + else [] + in + loop 0 r :: set_columns cols rs + +let rec set_rows rows csv = + if rows > 0 then ( + match csv with + | [] -> [] :: set_rows (pred rows) [] + | r :: rs -> r :: set_rows (pred rows) rs + ) + else [] + +let set_size rows cols csv = + set_columns cols (set_rows rows csv) + +let sub r c rows cols csv = + let csv = drop r csv in + let csv = List.map (drop c) csv in + let csv = set_rows rows csv in + let csv = set_columns cols csv in + csv + +let to_array csv = + Array.of_list (List.map Array.of_list csv) + +let of_array csv = + List.map Array.to_list (Array.to_list csv) + let associate header data = let nr_cols = List.length header in let rec trunc = function @@ -245,9 +312,51 @@ let save_out ?(separator = ',') chan csv = output_char chan '\n') csv let print ?separator csv = - save_out ?separator stdout csv + save_out ?separator stdout csv; flush stdout let save ?separator file csv = let chan = open_out file in save_out ?separator chan csv; close_out chan + +let save_out_readable chan csv = + (* Escape all the strings in the CSV file first. *) + let csv = List.map (List.map String.escaped) csv in + + let csv = square csv in + + (* Find the width of each column. *) + let widths = + match csv with + | [] -> [] + | r :: _ -> + let n = List.length r in + let lengths = List.map (List.map String.length) csv in + let max2rows r1 r2 = + let rp = List.combine r1 r2 in + List.map (fun ((a : int), (b : int)) -> max a b) rp + in + let rec repeat x = function + | 0 -> [] + | i -> x :: repeat x (i-1) + in + List.fold_left max2rows (repeat 0 n) lengths in + + (* Print out each cell at the correct width. *) + let rec repeat f = function + | 0 -> () + | i -> f (); repeat f (i-1) + in + List.iter ( + fun row -> + let row = List.combine widths row in + List.iter ( + fun (width, cell) -> + output_string chan cell; + let n = String.length cell in + repeat (fun () -> output_char chan ' ') (width - n + 1) + ) row; + output_char chan '\n' + ) csv + +let print_readable = save_out_readable stdout