1 (* Handy tool for managing CSV files.
2 * $Id: csvtool.ml,v 1.3 2006-06-06 16:01:52 rich Exp $
8 let cmd_cols ~separator ~csv ~chan cols =
9 let cols = List.map int_of_string cols in
11 let output = List.map (
13 let n = List.length row in
16 if 0 <= col_wanted && col_wanted < n then
17 List.nth row col_wanted
23 save_out ~separator chan output
25 let cmd_namedcols ~separator ~csv ~chan names =
28 | [] -> failwith "no rows in this CSV file"
30 let data = associate header data in
32 fun row -> List.map (fun name -> List.assoc name row) names
34 save_out ~separator chan data
36 let cmd_width ~csv ~chan () =
37 fprintf chan "%d\n" (columns csv)
39 let cmd_height ~csv ~chan () =
40 fprintf chan "%d\n" (lines csv)
42 let cmd_readable ~csv ~chan () =
43 save_out_readable chan csv
45 let cmd_square ~separator ~csv ~chan () =
46 let csv = square csv in
47 save_out ~separator chan csv
49 let cmd_sub ~separator ~csv ~chan args =
50 let r, c, rows, cols =
52 | [ r; c; rows; cols ] ->
53 int_of_string r, int_of_string c,
54 int_of_string rows, int_of_string cols
56 failwith "unknown arguments to 'sub' command" in
57 let csv = sub r c rows cols csv in
58 save_out ~separator chan csv
60 let cmd_replace ~separator ~csv ~chan args =
61 let ncols, replacement =
63 | [ ncols; replacement ] -> int_of_string ncols, replacement
65 failwith "unknown arguments to 'replace' command" in
66 (* Load the replacement CSV file in. *)
67 let replacement = Csv.load ~separator replacement in
69 (* Compare two rows for equality be considering only the first ncols. *)
70 let rec compare ncols row1 row2 =
71 if ncols <= 0 then true
78 let c = Pervasives.compare x y in
81 compare (ncols-1) xs ys
84 (* Look for rows in the original to be replaced by rows from the
85 * replacement file. This is an ugly O(n^2) hack (XXX).
87 let csv = List.filter (
88 fun row -> not (List.exists (compare ncols row) replacement)
90 let csv = csv @ replacement in
91 save_out ~separator chan csv
93 (* Process the arguments. *)
95 "csvtool - Copyright (C) 2005-2006 Richard W.M. Jones, Merjis Ltd.
97 csvtool is a tool for performing manipulations on CSV files from shell scripts.
100 csvtool [-options] command [command-args] < input.csv
103 col [col1] [col2] ...
104 Return one or more columns from the CSV file. Columns are numbered
107 namedcol [name1] [name2] ...
108 Assuming the first row of the CSV file is a list of column headings,
109 this returned the column(s) with the named headings.
112 Return the maximum width of the CSV file (number of columns in the
116 Return the number of rows in the CSV file.
119 Print the input CSV in a readable format.
122 Make the CSV square, so all rows have the same length.
125 Take a square subset of the CSV, top left at row r, column c (counting
126 from 0), which is rows deep and cols wide.
128 replace ncols file.csv
129 Replace rows in input.csv with rows from file.csv. The first ncols
130 columns only are used to compare rows in input.csv and file.csv to
131 see if they are candidates for replacement.
133 Input and output files:
134 csvtool normally processes its input from stdin and writes its output
135 to stdout. Use the -i and -o options to override this behaviour.
140 let input_sep = ref ',' in
141 let set_input_sep = function
142 | "TAB" -> input_sep := '\t'
143 | "COMMA" -> input_sep := ','
144 | s -> input_sep := s.[0]
147 let output_sep = ref ',' in
148 let set_output_sep = function
149 | "TAB" -> output_sep := '\t'
150 | "COMMA" -> output_sep := ','
151 | s -> output_sep := s.[0]
154 let input_file = ref "" in
155 let output_file = ref "" in
158 "-t", Arg.String set_input_sep,
159 "Input separator char. Use -t TAB for tab separated input.";
160 "-u", Arg.String set_output_sep,
161 "Output separator char. Use -t TAB for tab separated output.";
162 "-i", Arg.Set_string input_file,
163 "Read CSV input from file (instead of stdin)";
164 "-o", Arg.Set_string output_file,
165 "Write output to file (instead of stdout)"
173 Arg.parse argspec set_rest usage;
175 let input_sep = !input_sep in
176 let output_sep = !output_sep in
177 let input_file = !input_file in
178 let output_file = !output_file in
179 let rest = List.rev !rest in
183 | [] -> prerr_endline (Sys.executable_name ^ " --help for usage"); exit 1
186 (* Read the input file. *)
188 if input_file <> "" then load ~separator:input_sep input_file
189 else load_in ~separator:input_sep stdin in
191 (* Set up the output file. *)
193 if output_file <> "" then open_out output_file
198 cmd_cols ~separator:output_sep ~csv:input ~chan args
199 | "namedcol" | "namedcols" ->
200 cmd_namedcols ~separator:output_sep ~csv:input ~chan args
202 cmd_width ~csv:input ~chan ()
204 cmd_height ~csv:input ~chan ()
206 cmd_readable ~csv:input ~chan ()
208 cmd_square ~separator:output_sep ~csv:input ~chan ()
210 cmd_sub ~separator:output_sep ~csv:input ~chan args
212 cmd_replace ~separator:output_sep ~csv:input ~chan args
213 | _ -> prerr_endline (Sys.executable_name ^ " --help for usage")
216 if output_file <> "" then close_out chan