From bc6c67f15be43d270d859cbe6ba5dc31e5b9a44b Mon Sep 17 00:00:00 2001 From: rich Date: Tue, 24 May 2005 13:52:50 +0000 Subject: [PATCH] Added csvtool for using CSV files on the command line. Added check-manifest, dist rules to Makefile Updated MANIFEST. Comment fix in csv.mli. --- .cvsignore | 1 + .depend | 2 + MANIFEST | 4 ++ Makefile | 31 +++++++++++-- csv.mli | 4 +- csvtool.ml | 150 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 186 insertions(+), 6 deletions(-) create mode 100644 csvtool.ml diff --git a/.cvsignore b/.cvsignore index 4a21178..098b8f2 100644 --- a/.cvsignore +++ b/.cvsignore @@ -5,4 +5,5 @@ *.cmxa test example +csvtool ocaml-csv-*.tar.gz \ No newline at end of file diff --git a/.depend b/.depend index 1c6b70e..14f7302 100644 --- a/.depend +++ b/.depend @@ -1,5 +1,7 @@ csv.cmo: csv.cmi csv.cmx: csv.cmi +csvtool.cmo: csv.cmi +csvtool.cmx: csv.cmx example.cmo: csv.cmi example.cmx: csv.cmx test.cmo: csv.cmi diff --git a/MANIFEST b/MANIFEST index fc74999..cbfbf46 100644 --- a/MANIFEST +++ b/MANIFEST @@ -4,6 +4,10 @@ Makefile MANIFEST csv.ml csv.mli +csvtool.ml +example.ml +example1.csv +example2.csv test.ml testcsv1.csv testcsv2.csv diff --git a/Makefile b/Makefile index 618a3a7..e1d5992 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ -# $Id: Makefile,v 1.3 2005-02-17 15:51:47 rich Exp $ +# $Id: Makefile,v 1.4 2005-05-24 13:52:50 rich Exp $ PACKAGE := ocaml-csv -VERSION := 1.0.2 +VERSION := 1.0.3 OCAMLC := ocamlc OCAMLCINCS := @@ -16,7 +16,7 @@ OCAMLOPTLIBS := OBJS := csv.cmo XOBJS := $(OBJS:.cmo=.cmx) -all: csv.cma csv.cmxa example +all: csv.cma csv.cmxa example csvtool csv.cma: $(OBJS) $(OCAMLC) $(OCAMLCFLAGS) -a -o $@ $^ @@ -31,6 +31,9 @@ test: csv.cma test.ml $(OCAMLC) $^ -o $@ ./test +csvtool: csv.cmxa csvtool.ml + $(OCAMLOPT) $^ -o $@ + # Common rules for building OCaml objects. .mli.cmi: @@ -62,4 +65,24 @@ endif # Build a distribution. dist: - tar zcf $(PACKAGE)-$(VERSION).tar.gz `cat MANIFEST` \ No newline at end of file + $(MAKE) check-manifest + rm -rf $(PACKAGE)-$(VERSION) + mkdir $(PACKAGE)-$(VERSION) + tar -cf - -T MANIFEST | tar -C $(PACKAGE)-$(VERSION) -xf - + tar zcf $(PACKAGE)-$(VERSION).tar.gz $(PACKAGE)-$(VERSION) + rm -rf $(PACKAGE)-$(VERSION) + ls -l $(PACKAGE)-$(VERSION).tar.gz + +check-manifest: + @for d in `find -type d -name CVS | grep -v '^\./debian/'`; \ + do \ + b=`dirname $$d`/; \ + awk -F/ '$$1 != "D" {print $$2}' $$d/Entries | \ + sed -e "s|^|$$b|" -e "s|^\./||"; \ + done | sort > .check-manifest; \ + sort MANIFEST > .orig-manifest; \ + diff -u .orig-manifest .check-manifest; rv=$$?; \ + rm -f .orig-manifest .check-manifest; \ + exit $$rv + +.PHONY: depend dist check-manifest diff --git a/csv.mli b/csv.mli index d4d8cd9..b35e480 100644 --- a/csv.mli +++ b/csv.mli @@ -1,13 +1,13 @@ (** csv.mli - comma separated values parser * - * $Id: csv.mli,v 1.4 2005-02-17 15:51:47 rich Exp $ + * $Id: csv.mli,v 1.5 2005-05-24 13:52:50 rich Exp $ *) type t = string list list (** Representation of CSV files. *) exception Bad_CSV_file of string -(** Badly formed CSV files throw this exception: *) +(** Badly formed CSV files throw this exception. *) val lines : t -> int (** Work out the number of lines in a CSV file. *) diff --git a/csvtool.ml b/csvtool.ml new file mode 100644 index 0000000..0b82a4b --- /dev/null +++ b/csvtool.ml @@ -0,0 +1,150 @@ +(* Handy tool for managing CSV files. + * $Id: csvtool.ml,v 1.1 2005-05-24 13:52:50 rich Exp $ + *) + +open Printf +open Csv + +let cmd_cols ~separator ~csv ~chan cols = + let cols = List.map int_of_string cols in + + let output = List.map ( + fun row -> + let n = List.length row in + let row = List.map ( + fun col_wanted -> + if 0 <= col_wanted && col_wanted < n then + List.nth row col_wanted + else + "" + ) cols in + row + ) csv in + save_out ~separator chan output + +let cmd_namedcols ~separator ~csv ~chan names = + let header, data = + match csv with + | [] -> failwith "no rows in this CSV file" + | h :: t -> h, t in + let data = associate header data in + let data = List.map ( + fun row -> List.map (fun name -> List.assoc name row) names + ) data in + save_out ~separator chan data + +let cmd_width ~csv ~chan () = + fprintf chan "%d\n" (columns csv) + +let cmd_height ~csv ~chan () = + fprintf chan "%d\n" (lines csv) + +let cmd_readable ~csv ~chan () = + save_out_readable chan csv + +(* Process the arguments. *) +let usage = + "csvtool - Copyright (C) 2005 Richard W.M. Jones, Merjis Ltd. + +csvtool is a tool for performing manipulations on CSV files from shell scripts. + +Summary: + csvtool [-options] command [command-args] < input.csv + +Commands: + col [col1] [col2] ... + Return one or more columns from the CSV file. Columns are numbered + starting from zero. + + namedcol [name1] [name2] ... + Assuming the first row of the CSV file is a list of column headings, + this returned the column(s) with the named headings. + + width + Return the maximum width of the CSV file (number of columns in the + widest row). + + height + Return the number of rows in the CSV file. + + readable + Print the input CSV in a readable format. + +Input and output files: + csvtool normally processes its input from stdin and writes its output + to stdout. Use the -i and -o options to override this behaviour. + +Options:" + +let () = + let input_sep = ref ',' in + let set_input_sep = function + | "TAB" -> input_sep := '\t' + | "COMMA" -> input_sep := ',' + | s -> input_sep := s.[0] + in + + let output_sep = ref ',' in + let set_output_sep = function + | "TAB" -> output_sep := '\t' + | "COMMA" -> output_sep := ',' + | s -> output_sep := s.[0] + in + + let input_file = ref "" in + let output_file = ref "" in + + let argspec = [ + "-t", Arg.String set_input_sep, + "Input separator char. Use -t TAB for tab separated input."; + "-u", Arg.String set_output_sep, + "Output separator char. Use -t TAB for tab separated output."; + "-i", Arg.Set_string input_file, + "Read CSV input from file (instead of stdin)"; + "-o", Arg.Set_string output_file, + "Write output to file (instead of stdout)" + ] in + + let rest = ref [] in + let set_rest str = + rest := str :: !rest + in + + Arg.parse argspec set_rest usage; + + let input_sep = !input_sep in + let output_sep = !output_sep in + let input_file = !input_file in + let output_file = !output_file in + let rest = List.rev !rest in + + let cmd, args = + match rest with + | [] -> prerr_endline (Sys.executable_name ^ " --help for usage"); exit 1 + | h :: t -> h, t in + + (* Read the input file. *) + let input = + if input_file <> "" then load ~separator:input_sep input_file + else load_in ~separator:input_sep stdin in + + (* Set up the output file. *) + let chan = + if output_file <> "" then open_out output_file + else stdout in + + (match cmd with + | "col" | "cols" -> + cmd_cols ~separator:output_sep ~csv:input ~chan args + | "namedcol" | "namedcols" -> + cmd_namedcols ~separator:output_sep ~csv:input ~chan args + | "width" -> + cmd_width ~csv:input ~chan () + | "height" -> + cmd_height ~csv:input ~chan () + | "readable" -> + cmd_readable ~csv:input ~chan () + | _ -> prerr_endline (Sys.executable_name ^ " --help for usage") + ); + + if output_file <> "" then close_out chan -- 1.8.3.1