X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=csvtool.ml;h=d9441605c7d2e22cd7b04d9c1d91b6450082b5ac;hb=3a2b495714080c9f72b89cada8bc45b22768a059;hp=f6c1184150ba8d748ee29075c8c112ed50124874;hpb=e808ed9e7dae3273ba3614ba8cef960778f21b80;p=ocaml-csv.git diff --git a/csvtool.ml b/csvtool.ml index f6c1184..d944160 100644 --- a/csvtool.ml +++ b/csvtool.ml @@ -1,5 +1,5 @@ (* Handy tool for managing CSV files. - * $Id: csvtool.ml,v 1.5 2006-10-24 10:09:33 rich Exp $ + * $Id: csvtool.ml,v 1.8 2006-11-24 15:49:24 rich Exp $ *) open Printf @@ -153,6 +153,14 @@ let cmd_namedcols ~input_sep ~output_sep ~chan names files = match csv with | [] -> failwith "no rows in this CSV file" | h :: t -> h, t in + (* Do the headers requested exist in the CSV file? If not, + * throw an error. + *) + List.iter ( + fun name -> + if not (List.mem name header) then + failwith ("namedcol: requested header not in CSV file: " ^ name) + ) names; let data = associate header data in let data = List.map ( fun row -> List.map (fun name -> List.assoc name row) names @@ -189,9 +197,76 @@ let cmd_cat ~input_sep ~output_sep ~chan files = in List.iter ( fun filename -> - let in_chan = open_in filename in + let in_chan, close = + match filename with + | "-" -> stdin, false + | filename -> open_in filename, true in + load_rows ~separator:input_sep f in_chan; + if close then close_in in_chan + ) files + +let cmd_set_columns ~input_sep ~output_sep ~chan cols files = + (* Avoid loading the whole file into memory. *) + let f row = + let csv = [row] in + let csv = set_columns cols csv in + save_out ~separator:output_sep chan csv + in + List.iter ( + fun filename -> + let in_chan, close = + match filename with + | "-" -> stdin, false + | filename -> open_in filename, true in + load_rows ~separator:input_sep f in_chan; + if close then close_in in_chan + ) files + +let cmd_set_rows ~input_sep ~output_sep ~chan rows files = + let csv = List.concat (List.map (load ~separator:input_sep) files) in + let csv = set_rows rows csv in + save_out ~separator:output_sep chan csv + +let cmd_head ~input_sep ~output_sep ~chan rows files = + (* Avoid loading the whole file into memory, or even loading + * later files. + *) + let nr_rows = ref rows in + let f row = + if !nr_rows > 0 then ( + decr nr_rows; + save_out ~separator:output_sep chan [row] + ) + in + List.iter ( + fun filename -> + if !nr_rows > 0 then ( + let in_chan, close = + match filename with + | "-" -> stdin, false + | filename -> open_in filename, true in + load_rows ~separator:input_sep f in_chan; + if close then close_in in_chan + ) + ) files + +let cmd_drop ~input_sep ~output_sep ~chan rows files = + (* Avoid loading the whole file into memory. *) + let nr_rows = ref rows in + let f row = + if !nr_rows = 0 then + save_out ~separator:output_sep chan [row] + else + decr nr_rows + in + List.iter ( + fun filename -> + let in_chan, close = + match filename with + | "-" -> stdin, false + | filename -> open_in filename, true in load_rows ~separator:input_sep f in_chan; - close_in in_chan + if close then close_in in_chan ) files let cmd_square ~input_sep ~output_sep ~chan files = @@ -300,8 +375,7 @@ Summary: Commands: col - Return one or more columns from the CSV file. Columns are numbered - starting from zero. + Return one or more columns from the CSV file. For , see below. @@ -322,14 +396,39 @@ Commands: height Print the number of rows in the CSV file. - readable - Print the input CSV in a readable format. + For most CSV files this is equivalent to 'wc -l', but note that + some CSV files can contain a row which breaks over two (or more) + lines. + + setcolumns cols + Set the number of columns to cols (this also makes the CSV file + square). Any short rows are padding with blank cells. Any + long rows are truncated. + + setrows rows + 'setrows n' sets the number of rows to 'n'. If there are fewer + than 'n' rows in the CSV files, then empty blank lines are added. + + head rows + take rows + 'head n' and 'take n' (which are synonyms) take the first 'n' + rows. If there are fewer than 'n' rows, padding is not added. + + drop rows + Drop the first 'rows' rows and return the rest (if any). + + Example: + To remove the headings from a CSV file with headings: + csvtool drop 1 input.csv > output.csv + + To extract rows 11 through 20 from a file: + csvtool drop 10 input.csv | csvtool take 10 - > output.csv cat This concatenates the input files together and writes them to the output. You can use this to change the separator character. - Example: csvtool -t TAB -u , cat input.tsv > output.csv + Example: csvtool -t TAB -u COMMA cat input.tsv > output.csv join Join (collate) multiple CSV files together. @@ -339,7 +438,7 @@ Commands: controls which columns are copied into the new file. Example: - csvtool join 1 2 coll1.csv coll2.csv + csvtool join 1 2 coll1.csv coll2.csv > output.csv If coll1.csv contains: Computers,$40 Software,$100 @@ -352,9 +451,12 @@ Commands: square Make the CSV square, so all rows have the same length. + Example: csvtool square input.csv > input-square.csv + sub r c rows cols - Take a square subset of the CSV, top left at row r, column c (counting - from 0), which is rows deep and cols wide. + Take a square subset of the CSV, top left at row r, column c, which + is rows deep and cols wide. 'r' and 'c' count from 1, or + from 0 if -z option is given. replace update.csv original.csv Replace rows in original.csv with rows from update.csv. The columns @@ -365,6 +467,9 @@ Commands: csvtool replace 3 updates.csv original.csv > new.csv mv new.csv original.csv + readable + Print the input CSV in a readable format. + Column specs: A is a comma-separated list of column numbers or column ranges. @@ -380,6 +485,8 @@ Column specs: Input files: csvtool takes a list of input file(s) from the command line. + If an input filename is '-' then take input from stdin. + Output file: Normally the output is written to stdout. Use the -o option to override this. @@ -414,6 +521,11 @@ let () = let output_file = ref "" in + let rest = ref [] in + let set_rest str = + rest := str :: !rest + in + let argspec = [ "-t", Arg.String set_input_sep, "Input separator char. Use -t TAB for tab separated input."; @@ -423,13 +535,10 @@ let () = "Write output to file (instead of stdout)"; "-z", Arg.Set count_zero, "Number columns from 0 instead of 1"; + "-", Arg.Unit (fun () -> set_rest "-"), + "" (* Hack to allow '-' for input from stdin. *) ] in - let rest = ref [] in - let set_rest str = - rest := str :: !rest - in - Arg.parse argspec set_rest usage; let input_sep = !input_sep in @@ -450,9 +559,9 @@ let () = | ("namedcol"|"namedcols") :: names :: files -> let names = nsplit names "," in cmd_namedcols ~input_sep ~output_sep ~chan names files - | "width" :: files -> + | ("width"|"columns") :: files -> cmd_width ~input_sep ~chan files - | "height" :: files -> + | ("height"|"rows") :: files -> cmd_height ~input_sep ~chan files | "readable" :: files -> cmd_readable ~input_sep ~chan files @@ -466,13 +575,28 @@ let () = cmd_square ~input_sep ~output_sep ~chan files | "sub" :: r :: c :: rows :: cols :: files -> let r = int_of_string r in + let r = if not count_zero then r-1 else r in let c = int_of_string c in + let c = if not count_zero then c-1 else c in let rows = int_of_string rows in let cols = int_of_string cols in cmd_sub ~input_sep ~output_sep ~chan r c rows cols files | "replace" :: colspec :: update :: files -> let colspec = parse_colspec ~count_zero colspec in cmd_replace ~input_sep ~output_sep ~chan colspec update files + | ("setcolumns"|"set_columns"|"set-columns"| + "setcols"|"set_cols"|"set-cols") :: cols :: files -> + let cols = int_of_string cols in + cmd_set_columns ~input_sep ~output_sep ~chan cols files + | ("setrows"|"set_rows"|"set-rows") :: rows :: files -> + let rows = int_of_string rows in + cmd_set_rows ~input_sep ~output_sep ~chan rows files + | ("head"|"take") :: rows :: files -> + let rows = int_of_string rows in + cmd_head ~input_sep ~output_sep ~chan rows files + | "drop" :: rows :: files -> + let rows = int_of_string rows in + cmd_drop ~input_sep ~output_sep ~chan rows files | _ -> prerr_endline (Sys.executable_name ^ " --help for usage"); exit 2