From 3a2b495714080c9f72b89cada8bc45b22768a059 Mon Sep 17 00:00:00 2001 From: rich Date: Fri, 24 Nov 2006 15:49:24 +0000 Subject: [PATCH] Allow '-' to mean read from stdin. csvtool: added 'setcolumns', 'setrows', 'head', 'drop'. --- csv.ml | 11 +++-- csv.mli | 3 +- csvtool.ml | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 142 insertions(+), 21 deletions(-) diff --git a/csv.ml b/csv.ml index ee0623e..0b6e649 100644 --- a/csv.ml +++ b/csv.ml @@ -1,6 +1,6 @@ (* csv.ml - comma separated values parser * - * $Id: csv.ml,v 1.13 2006-11-24 09:43:15 rich Exp $ + * $Id: csv.ml,v 1.14 2006-11-24 15:49:24 rich Exp $ *) (* The format of CSV files: @@ -178,10 +178,13 @@ let load_in ?separator chan = List.rev !csv let load ?separator filename = - let chan = open_in filename in + let chan, close = + match filename with + | "-" -> stdin, false + | filename -> open_in filename, true in let csv = load_in ?separator chan in - close_in chan; - csv + if close then close_in chan; + csv let trim ?(top=true) ?(left=true) ?(right=true) ?(bottom=true) csv = let rec empty_row = function diff --git a/csv.mli b/csv.mli index 983fce1..14901e4 100644 --- a/csv.mli +++ b/csv.mli @@ -1,6 +1,6 @@ (** csv.mli - comma separated values parser * - * $Id: csv.mli,v 1.9 2006-10-18 14:56:12 rich Exp $ + * $Id: csv.mli,v 1.10 2006-11-24 15:49:24 rich Exp $ *) type t = string list list @@ -26,6 +26,7 @@ val load_in : ?separator:char -> in_channel -> t val load : ?separator:char -> string -> t (** Load a CSV file. * @param filename CSV filename. + * If [filename] is ["-"] then load from [stdin]. *) val load_rows : ?separator:char -> (string list -> unit) -> in_channel -> unit diff --git a/csvtool.ml b/csvtool.ml index a046956..d944160 100644 --- a/csvtool.ml +++ b/csvtool.ml @@ -1,5 +1,5 @@ (* Handy tool for managing CSV files. - * $Id: csvtool.ml,v 1.7 2006-11-24 13:58:56 rich Exp $ + * $Id: csvtool.ml,v 1.8 2006-11-24 15:49:24 rich Exp $ *) open Printf @@ -197,9 +197,76 @@ let cmd_cat ~input_sep ~output_sep ~chan files = in List.iter ( fun filename -> - let in_chan = open_in filename in + let in_chan, close = + match filename with + | "-" -> stdin, false + | filename -> open_in filename, true in load_rows ~separator:input_sep f in_chan; - close_in in_chan + if close then close_in in_chan + ) files + +let cmd_set_columns ~input_sep ~output_sep ~chan cols files = + (* Avoid loading the whole file into memory. *) + let f row = + let csv = [row] in + let csv = set_columns cols csv in + save_out ~separator:output_sep chan csv + in + List.iter ( + fun filename -> + let in_chan, close = + match filename with + | "-" -> stdin, false + | filename -> open_in filename, true in + load_rows ~separator:input_sep f in_chan; + if close then close_in in_chan + ) files + +let cmd_set_rows ~input_sep ~output_sep ~chan rows files = + let csv = List.concat (List.map (load ~separator:input_sep) files) in + let csv = set_rows rows csv in + save_out ~separator:output_sep chan csv + +let cmd_head ~input_sep ~output_sep ~chan rows files = + (* Avoid loading the whole file into memory, or even loading + * later files. + *) + let nr_rows = ref rows in + let f row = + if !nr_rows > 0 then ( + decr nr_rows; + save_out ~separator:output_sep chan [row] + ) + in + List.iter ( + fun filename -> + if !nr_rows > 0 then ( + let in_chan, close = + match filename with + | "-" -> stdin, false + | filename -> open_in filename, true in + load_rows ~separator:input_sep f in_chan; + if close then close_in in_chan + ) + ) files + +let cmd_drop ~input_sep ~output_sep ~chan rows files = + (* Avoid loading the whole file into memory. *) + let nr_rows = ref rows in + let f row = + if !nr_rows = 0 then + save_out ~separator:output_sep chan [row] + else + decr nr_rows + in + List.iter ( + fun filename -> + let in_chan, close = + match filename with + | "-" -> stdin, false + | filename -> open_in filename, true in + load_rows ~separator:input_sep f in_chan; + if close then close_in in_chan ) files let cmd_square ~input_sep ~output_sep ~chan files = @@ -329,14 +396,39 @@ Commands: height Print the number of rows in the CSV file. - readable - Print the input CSV in a readable format. + For most CSV files this is equivalent to 'wc -l', but note that + some CSV files can contain a row which breaks over two (or more) + lines. + + setcolumns cols + Set the number of columns to cols (this also makes the CSV file + square). Any short rows are padding with blank cells. Any + long rows are truncated. + + setrows rows + 'setrows n' sets the number of rows to 'n'. If there are fewer + than 'n' rows in the CSV files, then empty blank lines are added. + + head rows + take rows + 'head n' and 'take n' (which are synonyms) take the first 'n' + rows. If there are fewer than 'n' rows, padding is not added. + + drop rows + Drop the first 'rows' rows and return the rest (if any). + + Example: + To remove the headings from a CSV file with headings: + csvtool drop 1 input.csv > output.csv + + To extract rows 11 through 20 from a file: + csvtool drop 10 input.csv | csvtool take 10 - > output.csv cat This concatenates the input files together and writes them to the output. You can use this to change the separator character. - Example: csvtool -t TAB -u , cat input.tsv > output.csv + Example: csvtool -t TAB -u COMMA cat input.tsv > output.csv join Join (collate) multiple CSV files together. @@ -346,7 +438,7 @@ Commands: controls which columns are copied into the new file. Example: - csvtool join 1 2 coll1.csv coll2.csv + csvtool join 1 2 coll1.csv coll2.csv > output.csv If coll1.csv contains: Computers,$40 Software,$100 @@ -359,9 +451,12 @@ Commands: square Make the CSV square, so all rows have the same length. + Example: csvtool square input.csv > input-square.csv + sub r c rows cols - Take a square subset of the CSV, top left at row r, column c (counting - from 0), which is rows deep and cols wide. + Take a square subset of the CSV, top left at row r, column c, which + is rows deep and cols wide. 'r' and 'c' count from 1, or + from 0 if -z option is given. replace update.csv original.csv Replace rows in original.csv with rows from update.csv. The columns @@ -372,6 +467,9 @@ Commands: csvtool replace 3 updates.csv original.csv > new.csv mv new.csv original.csv + readable + Print the input CSV in a readable format. + Column specs: A is a comma-separated list of column numbers or column ranges. @@ -387,6 +485,8 @@ Column specs: Input files: csvtool takes a list of input file(s) from the command line. + If an input filename is '-' then take input from stdin. + Output file: Normally the output is written to stdout. Use the -o option to override this. @@ -421,6 +521,11 @@ let () = let output_file = ref "" in + let rest = ref [] in + let set_rest str = + rest := str :: !rest + in + let argspec = [ "-t", Arg.String set_input_sep, "Input separator char. Use -t TAB for tab separated input."; @@ -430,13 +535,10 @@ let () = "Write output to file (instead of stdout)"; "-z", Arg.Set count_zero, "Number columns from 0 instead of 1"; + "-", Arg.Unit (fun () -> set_rest "-"), + "" (* Hack to allow '-' for input from stdin. *) ] in - let rest = ref [] in - let set_rest str = - rest := str :: !rest - in - Arg.parse argspec set_rest usage; let input_sep = !input_sep in @@ -457,9 +559,9 @@ let () = | ("namedcol"|"namedcols") :: names :: files -> let names = nsplit names "," in cmd_namedcols ~input_sep ~output_sep ~chan names files - | "width" :: files -> + | ("width"|"columns") :: files -> cmd_width ~input_sep ~chan files - | "height" :: files -> + | ("height"|"rows") :: files -> cmd_height ~input_sep ~chan files | "readable" :: files -> cmd_readable ~input_sep ~chan files @@ -473,13 +575,28 @@ let () = cmd_square ~input_sep ~output_sep ~chan files | "sub" :: r :: c :: rows :: cols :: files -> let r = int_of_string r in + let r = if not count_zero then r-1 else r in let c = int_of_string c in + let c = if not count_zero then c-1 else c in let rows = int_of_string rows in let cols = int_of_string cols in cmd_sub ~input_sep ~output_sep ~chan r c rows cols files | "replace" :: colspec :: update :: files -> let colspec = parse_colspec ~count_zero colspec in cmd_replace ~input_sep ~output_sep ~chan colspec update files + | ("setcolumns"|"set_columns"|"set-columns"| + "setcols"|"set_cols"|"set-cols") :: cols :: files -> + let cols = int_of_string cols in + cmd_set_columns ~input_sep ~output_sep ~chan cols files + | ("setrows"|"set_rows"|"set-rows") :: rows :: files -> + let rows = int_of_string rows in + cmd_set_rows ~input_sep ~output_sep ~chan rows files + | ("head"|"take") :: rows :: files -> + let rows = int_of_string rows in + cmd_head ~input_sep ~output_sep ~chan rows files + | "drop" :: rows :: files -> + let rows = int_of_string rows in + cmd_drop ~input_sep ~output_sep ~chan rows files | _ -> prerr_endline (Sys.executable_name ^ " --help for usage"); exit 2 -- 1.8.3.1