(* Handy tool for managing CSV files.
- * $Id: csvtool.ml,v 1.7 2006-11-24 13:58:56 rich Exp $
+ * $Id: csvtool.ml,v 1.8 2006-11-24 15:49:24 rich Exp $
*)
open Printf
in
List.iter (
fun filename ->
- let in_chan = open_in filename in
+ let in_chan, close =
+ match filename with
+ | "-" -> stdin, false
+ | filename -> open_in filename, true in
load_rows ~separator:input_sep f in_chan;
- close_in in_chan
+ if close then close_in in_chan
+ ) files
+
+let cmd_set_columns ~input_sep ~output_sep ~chan cols files =
+ (* Avoid loading the whole file into memory. *)
+ let f row =
+ let csv = [row] in
+ let csv = set_columns cols csv in
+ save_out ~separator:output_sep chan csv
+ in
+ List.iter (
+ fun filename ->
+ let in_chan, close =
+ match filename with
+ | "-" -> stdin, false
+ | filename -> open_in filename, true in
+ load_rows ~separator:input_sep f in_chan;
+ if close then close_in in_chan
+ ) files
+
+let cmd_set_rows ~input_sep ~output_sep ~chan rows files =
+ let csv = List.concat (List.map (load ~separator:input_sep) files) in
+ let csv = set_rows rows csv in
+ save_out ~separator:output_sep chan csv
+
+let cmd_head ~input_sep ~output_sep ~chan rows files =
+ (* Avoid loading the whole file into memory, or even loading
+ * later files.
+ *)
+ let nr_rows = ref rows in
+ let f row =
+ if !nr_rows > 0 then (
+ decr nr_rows;
+ save_out ~separator:output_sep chan [row]
+ )
+ in
+ List.iter (
+ fun filename ->
+ if !nr_rows > 0 then (
+ let in_chan, close =
+ match filename with
+ | "-" -> stdin, false
+ | filename -> open_in filename, true in
+ load_rows ~separator:input_sep f in_chan;
+ if close then close_in in_chan
+ )
+ ) files
+
+let cmd_drop ~input_sep ~output_sep ~chan rows files =
+ (* Avoid loading the whole file into memory. *)
+ let nr_rows = ref rows in
+ let f row =
+ if !nr_rows = 0 then
+ save_out ~separator:output_sep chan [row]
+ else
+ decr nr_rows
+ in
+ List.iter (
+ fun filename ->
+ let in_chan, close =
+ match filename with
+ | "-" -> stdin, false
+ | filename -> open_in filename, true in
+ load_rows ~separator:input_sep f in_chan;
+ if close then close_in in_chan
) files
let cmd_square ~input_sep ~output_sep ~chan files =
height
Print the number of rows in the CSV file.
- readable
- Print the input CSV in a readable format.
+ For most CSV files this is equivalent to 'wc -l', but note that
+ some CSV files can contain a row which breaks over two (or more)
+ lines.
+
+ setcolumns cols
+ Set the number of columns to cols (this also makes the CSV file
+ square). Any short rows are padding with blank cells. Any
+ long rows are truncated.
+
+ setrows rows
+ 'setrows n' sets the number of rows to 'n'. If there are fewer
+ than 'n' rows in the CSV files, then empty blank lines are added.
+
+ head rows
+ take rows
+ 'head n' and 'take n' (which are synonyms) take the first 'n'
+ rows. If there are fewer than 'n' rows, padding is not added.
+
+ drop rows
+ Drop the first 'rows' rows and return the rest (if any).
+
+ Example:
+ To remove the headings from a CSV file with headings:
+ csvtool drop 1 input.csv > output.csv
+
+ To extract rows 11 through 20 from a file:
+ csvtool drop 10 input.csv | csvtool take 10 - > output.csv
cat
This concatenates the input files together and writes them to
the output. You can use this to change the separator character.
- Example: csvtool -t TAB -u , cat input.tsv > output.csv
+ Example: csvtool -t TAB -u COMMA cat input.tsv > output.csv
join <column-spec1> <column-spec2>
Join (collate) multiple CSV files together.
<column-spec2> controls which columns are copied into the new file.
Example:
- csvtool join 1 2 coll1.csv coll2.csv
+ csvtool join 1 2 coll1.csv coll2.csv > output.csv
If coll1.csv contains:
Computers,$40
Software,$100
square
Make the CSV square, so all rows have the same length.
+ Example: csvtool square input.csv > input-square.csv
+
sub r c rows cols
- Take a square subset of the CSV, top left at row r, column c (counting
- from 0), which is rows deep and cols wide.
+ Take a square subset of the CSV, top left at row r, column c, which
+ is rows deep and cols wide. 'r' and 'c' count from 1, or
+ from 0 if -z option is given.
replace <column-spec> update.csv original.csv
Replace rows in original.csv with rows from update.csv. The columns
csvtool replace 3 updates.csv original.csv > new.csv
mv new.csv original.csv
+ readable
+ Print the input CSV in a readable format.
+
Column specs:
A <column-spec> is a comma-separated list of column numbers
or column ranges.
Input files:
csvtool takes a list of input file(s) from the command line.
+ If an input filename is '-' then take input from stdin.
+
Output file:
Normally the output is written to stdout. Use the -o option
to override this.
let output_file = ref "" in
+ let rest = ref [] in
+ let set_rest str =
+ rest := str :: !rest
+ in
+
let argspec = [
"-t", Arg.String set_input_sep,
"Input separator char. Use -t TAB for tab separated input.";
"Write output to file (instead of stdout)";
"-z", Arg.Set count_zero,
"Number columns from 0 instead of 1";
+ "-", Arg.Unit (fun () -> set_rest "-"),
+ "" (* Hack to allow '-' for input from stdin. *)
] in
- let rest = ref [] in
- let set_rest str =
- rest := str :: !rest
- in
-
Arg.parse argspec set_rest usage;
let input_sep = !input_sep in
| ("namedcol"|"namedcols") :: names :: files ->
let names = nsplit names "," in
cmd_namedcols ~input_sep ~output_sep ~chan names files
- | "width" :: files ->
+ | ("width"|"columns") :: files ->
cmd_width ~input_sep ~chan files
- | "height" :: files ->
+ | ("height"|"rows") :: files ->
cmd_height ~input_sep ~chan files
| "readable" :: files ->
cmd_readable ~input_sep ~chan files
cmd_square ~input_sep ~output_sep ~chan files
| "sub" :: r :: c :: rows :: cols :: files ->
let r = int_of_string r in
+ let r = if not count_zero then r-1 else r in
let c = int_of_string c in
+ let c = if not count_zero then c-1 else c in
let rows = int_of_string rows in
let cols = int_of_string cols in
cmd_sub ~input_sep ~output_sep ~chan r c rows cols files
| "replace" :: colspec :: update :: files ->
let colspec = parse_colspec ~count_zero colspec in
cmd_replace ~input_sep ~output_sep ~chan colspec update files
+ | ("setcolumns"|"set_columns"|"set-columns"|
+ "setcols"|"set_cols"|"set-cols") :: cols :: files ->
+ let cols = int_of_string cols in
+ cmd_set_columns ~input_sep ~output_sep ~chan cols files
+ | ("setrows"|"set_rows"|"set-rows") :: rows :: files ->
+ let rows = int_of_string rows in
+ cmd_set_rows ~input_sep ~output_sep ~chan rows files
+ | ("head"|"take") :: rows :: files ->
+ let rows = int_of_string rows in
+ cmd_head ~input_sep ~output_sep ~chan rows files
+ | "drop" :: rows :: files ->
+ let rows = int_of_string rows in
+ cmd_drop ~input_sep ~output_sep ~chan rows files
| _ ->
prerr_endline (Sys.executable_name ^ " --help for usage");
exit 2