From: rich Date: Thu, 23 Feb 2006 15:24:25 +0000 (+0000) Subject: Csv.compare function. X-Git-Url: http://git.annexia.org/?a=commitdiff_plain;h=bde8fd1d0283f16620624155a9bfb0b4e67feca9;p=ocaml-csv.git Csv.compare function. --- diff --git a/csv.ml b/csv.ml index 2a22bf9..efb1bc0 100644 --- a/csv.ml +++ b/csv.ml @@ -1,6 +1,6 @@ (* csv.ml - comma separated values parser * - * $Id: csv.ml,v 1.8 2006-02-15 13:25:58 rich Exp $ + * $Id: csv.ml,v 1.9 2006-02-23 15:24:25 rich Exp $ *) (* The format of CSV files: @@ -265,6 +265,38 @@ let sub r c rows cols csv = let csv = set_columns cols csv in csv +(* Compare two rows for semantic equality - ignoring any blank cells + * at the end of each row. + *) +let rec compare_row (row1 : string list) row2 = + match row1, row2 with + | [], [] -> 0 + | x :: xs, y :: ys -> + let c = compare x y in + if c <> 0 then c else compare_row xs ys + | "" :: xs , [] -> + compare_row xs [] + | x :: xs, [] -> + 1 + | [], "" :: ys -> + compare_row [] ys + | [], y :: ys -> + -1 + +(* Semantic equality for CSV files. *) +let rec compare (csv1 : t) csv2 = + match csv1, csv2 with + | [], [] -> 0 + | x :: xs, y :: ys -> + let c = compare_row x y in + if c <> 0 then c else compare xs ys + | x :: xs, [] -> + let c = compare_row x [] in + if c <> 0 then c else compare xs [] + | [], y :: ys -> + let c = compare_row [] y in + if c <> 0 then c else compare [] ys + let to_array csv = Array.of_list (List.map Array.of_list csv) diff --git a/csv.mli b/csv.mli index baf9e5c..ef061e6 100644 --- a/csv.mli +++ b/csv.mli @@ -1,6 +1,6 @@ (** csv.mli - comma separated values parser * - * $Id: csv.mli,v 1.7 2005-11-25 14:08:46 rich Exp $ + * $Id: csv.mli,v 1.8 2006-02-23 15:24:25 rich Exp $ *) type t = string list list @@ -105,6 +105,13 @@ val sub : int -> int -> int -> int -> t -> t * The returned CSV will be square. *) +val compare : t -> t -> int +(** Compare two CSV files for equality, ignoring blank cells at the end + * of a row, and empty rows appended to one or the other. This is + * "semantic" equality - roughly speaking, the two CSV files would + * look the same if opened in a spreadsheet program. + *) + val to_array : t -> string array array val of_array : string array array -> t (** Convenience functions to convert to and from a matrix representation. diff --git a/test.ml b/test.ml index 2ab02ad..a8537a5 100644 --- a/test.ml +++ b/test.ml @@ -1,4 +1,4 @@ -(* $Id: test.ml,v 1.1 2003-12-17 16:05:08 rich Exp $ *) +(* $Id: test.ml,v 1.2 2006-02-23 15:24:25 rich Exp $ *) open Printf open Csv @@ -13,16 +13,17 @@ let do_testcsv filename expected = print expected; failwith "failed" ) + else () -let testcsv1 = +let () = do_testcsv "testcsv1.csv" [ [ "This is a test\nwith commas,,,,,\n\nand carriage returns." ] ] -let testcsv2 = +let () = do_testcsv "testcsv2.csv" [ [ "Normal field"; "Quoted field"; "Quoted field with \"\" quotes" ] ] -let testcsv3 = +let () = do_testcsv "testcsv3.csv" [ [ "" ]; @@ -30,23 +31,42 @@ let testcsv3 = [ ""; ""; "" ]; [ ""; ""; ""; "" ]; [ ""; ""; ""; ""; "" ] ] -let testcsv4 = +let () = do_testcsv "testcsv4.csv" [] -let testcsv5 = +let () = do_testcsv "testcsv5.csv" [ [ "This is a test\nwith commas,,,,,\n\nand carriage returns."; "a second field"; "a third field" ]; [ "a fourth field on a new line" ] ] -let testcsv6 = +let () = do_testcsv "testcsv6.csv" [ [ "This is a test\nwith commas,,,,,\n\nand carriage returns\nand \000"; "a second field"; "a third field" ]; [ "a fourth field on a new line" ] ] +let () = + let csv1 = [ [ "a"; "b"; "c"; ""; "" ]; + [ "f"; "g"; "h"; "i"; "" ]; + [ "" ]; + [ ] ] in + let csv2 = trim ~top:false ~left:false ~right:true ~bottom:true csv1 in + assert (compare csv1 csv2 = 0) +let () = + let csv1 = [ [ "a"; "b"; "c"; ""; "" ]; + [ "f"; "g"; "h"; "i"; "" ]; + [ "" ]; + [ ] ] in + let csv2 = [ [ "a"; "b"; "c"; "d"; "" ]; + [ "f"; "g"; "h"; "i"; "" ]; + [ "" ]; + [ ] ] in + assert (compare csv1 csv2 < 0) + + ;; print_endline "All tests succeeded."