From: rich Date: Thu, 17 Feb 2005 15:51:47 +0000 (+0000) Subject: Csv.square, Csv.print_readable functions. X-Git-Url: http://git.annexia.org/?a=commitdiff_plain;h=7a5c5674921367188fedab926582a39c3e17380c;p=ocaml-csv.git Csv.square, Csv.print_readable functions. Added some example code. --- diff --git a/.cvsignore b/.cvsignore index 08380bc..4a21178 100644 --- a/.cvsignore +++ b/.cvsignore @@ -4,4 +4,5 @@ *.cmx *.cmxa test +example ocaml-csv-*.tar.gz \ No newline at end of file diff --git a/.depend b/.depend index d77da96..1c6b70e 100644 --- a/.depend +++ b/.depend @@ -1,4 +1,6 @@ csv.cmo: csv.cmi csv.cmx: csv.cmi +example.cmo: csv.cmi +example.cmx: csv.cmx test.cmo: csv.cmi test.cmx: csv.cmx diff --git a/Makefile b/Makefile index 24445b2..618a3a7 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ -# $Id: Makefile,v 1.2 2003-12-22 10:07:19 rich Exp $ +# $Id: Makefile,v 1.3 2005-02-17 15:51:47 rich Exp $ PACKAGE := ocaml-csv -VERSION := 1.0.1 +VERSION := 1.0.2 OCAMLC := ocamlc OCAMLCINCS := @@ -16,7 +16,7 @@ OCAMLOPTLIBS := OBJS := csv.cmo XOBJS := $(OBJS:.cmo=.cmx) -all: csv.cma csv.cmxa +all: csv.cma csv.cmxa example csv.cma: $(OBJS) $(OCAMLC) $(OCAMLCFLAGS) -a -o $@ $^ @@ -24,8 +24,11 @@ csv.cma: $(OBJS) csv.cmxa: $(XOBJS) $(OCAMLOPT) $(OCAMLOPTFLAGS) -a -o $@ $^ +example: csv.cma example.ml + $(OCAMLC) $^ -o $@ + test: csv.cma test.ml - $(OCAMLC) csv.cma test.ml -o test + $(OCAMLC) $^ -o $@ ./test # Common rules for building OCaml objects. diff --git a/csv.ml b/csv.ml index c4381ee..a5207e7 100644 --- a/csv.ml +++ b/csv.ml @@ -1,6 +1,6 @@ (* csv.ml - comma separated values parser * - * $Id: csv.ml,v 1.4 2005-01-19 17:10:19 rich Exp $ + * $Id: csv.ml,v 1.5 2005-02-17 15:51:47 rich Exp $ *) (* The format of CSV files: @@ -204,6 +204,20 @@ let trim ?(top=true) ?(left=true) ?(right=true) ?(bottom=true) csv = csv +let square csv = + let columns = columns csv in + List.map ( + fun row -> + let n = List.length row in + let row = List.rev row in + let rec loop acc = function + | 0 -> acc + | i -> "" :: loop acc (i-1) + in + let row = loop row (columns - n) in + List.rev row + ) csv + let associate header data = let nr_cols = List.length header in let rec trunc = function @@ -251,3 +265,45 @@ let save ?separator file csv = let chan = open_out file in save_out ?separator chan csv; close_out chan + +let save_out_readable chan csv = + (* Escape all the strings in the CSV file first. *) + let csv = List.map (List.map String.escaped) csv in + + let csv = square csv in + + (* Find the width of each column. *) + let widths = + match csv with + | [] -> [] + | r :: _ -> + let n = List.length r in + let lengths = List.map (List.map String.length) csv in + let max2rows r1 r2 = + let rp = List.combine r1 r2 in + List.map (fun ((a : int), (b : int)) -> max a b) rp + in + let rec repeat x = function + | 0 -> [] + | i -> x :: repeat x (i-1) + in + List.fold_left max2rows (repeat 0 n) lengths in + + (* Print out each cell at the correct width. *) + let rec repeat f = function + | 0 -> () + | i -> f (); repeat f (i-1) + in + List.iter ( + fun row -> + let row = List.combine widths row in + List.iter ( + fun (width, cell) -> + output_string chan cell; + let n = String.length cell in + repeat (fun () -> output_char chan ' ') (width - n + 1) + ) row; + output_char chan '\n' + ) csv + +let print_readable = save_out_readable stdout diff --git a/csv.mli b/csv.mli index 589e195..d4d8cd9 100644 --- a/csv.mli +++ b/csv.mli @@ -1,6 +1,6 @@ (** csv.mli - comma separated values parser * - * $Id: csv.mli,v 1.3 2004-12-22 13:47:51 rich Exp $ + * $Id: csv.mli,v 1.4 2005-02-17 15:51:47 rich Exp $ *) type t = string list list @@ -65,6 +65,13 @@ val trim : ?top:bool -> ?left:bool -> ?right:bool -> ?bottom:bool -> t -> t * each row in isolation. *) +val square : t -> t +(** Make the CSV data "square" (actually rectangular). This pads out + * each row with empty cells so that all rows are the same length as + * the longest row. After this operation, every row will have length + * {!columns}. + *) + val associate : string list -> t -> (string * string) list list (** [associate header data] takes a block of data and converts each * row in turn into an assoc list which maps column header to data cell. @@ -110,3 +117,13 @@ val save_out : ?separator:char -> out_channel -> t -> unit val save : ?separator:char -> string -> t -> unit (** Save string list list to a file. *) + +val print_readable : t -> unit +(** Print the CSV data to [stdout] in a human-readable format. Not much + * is guaranteed about how the CSV is printed, except that it will be + * easier to follow than a "raw" output done with {!print}. This is + * a one-way operation. There is no easy way to parse the output of + * this command back into CSV data. + *) +val save_out_readable : out_channel -> t -> unit +(** As for {!print_readable}, allowing the output to be sent to a channel. *) diff --git a/example.ml b/example.ml new file mode 100644 index 0000000..6b30924 --- /dev/null +++ b/example.ml @@ -0,0 +1,15 @@ +(* See also 'test.ml' for examples, and 'csv.mli' for documentation. + * $Id: example.ml,v 1.1 2005-02-17 15:51:47 rich Exp $ *) + +open Printf +open Csv + +let csvs = + List.map (fun name -> name, load name) + [ "example1.csv"; "example2.csv" ] ;; + +List.iter ( + fun (name, csv) -> + print_endline name; + print_readable csv +) csvs diff --git a/example1.csv b/example1.csv new file mode 100644 index 0000000..820e581 --- /dev/null +++ b/example1.csv @@ -0,0 +1,18 @@ +"Banner clickins" +"Clickin","Number","Percentage", +"brand.adwords","4,878","14.4" +"vacation.advert2.adwords","4,454","13.1" +"affiliates.generic.tc1","1,608","4.7" +"brand.overture","1,576","4.6" +"vacation.cheap.adwords","1,515","4.5" +"affiliates.generic.vacation.biggestchoice","1,072","3.2" +"breaks.no-destination.adwords","1,015","3.0" +"fly.no-destination.flightshome.adwords","833","2.5" +"exchange.adwords","728","2.1" +"holidays.cyprus.cheap","574","1.7" +"travel.adwords","416","1.2" +"affiliates.vacation.generic.onlinediscount.200","406","1.2" +"promo.home.topX.ACE.189","373","1.1" +"homepage.hp_tx1b_20050126","369","1.1" +"travel.agents.adwords","358","1.1" +"promo.home.topX.SSH.366","310","0.9" \ No newline at end of file diff --git a/example2.csv b/example2.csv new file mode 100644 index 0000000..6ad52d3 --- /dev/null +++ b/example2.csv @@ -0,0 +1,13 @@ +"Visitors per search engine" +"Search engine","Number", +"Google","15,437" +"MSN","2,372" +"AOLSearch","1,885" +"Yahoo","1,555" +"Ask-Jeeves","998" +"Ntlworld","303" +"Myway","268" +"Myway.com","169" +"Freeserve","73" +"BBC","41" +"Altavista","26" \ No newline at end of file