(* csv.ml - comma separated values parser
- * $Id: csv.ml,v 1.6 2005-08-13 10:10:31 rich Exp $
+ * $Id: csv.ml,v 1.7 2005-11-25 14:06:58 rich Exp $
(* The format of CSV files:
| x :: xs when f x -> dropwhile f xs
| xs -> xs
+(* from extlib: *)
+let rec drop n = function
+ | _ :: l when n > 0 -> drop (n-1) l
+ | l -> l
+let rec take n = function
+ | x :: xs when n > 0 -> x :: take (pred n) xs
+ | _ -> []
let lines = List.length
let columns csv =
List.rev row
) csv
+let is_square csv =
+ let columns = columns csv in
+ List.for_all (fun row -> List.length row = columns) csv
+let rec set_columns cols = function
+ | [] -> []
+ | r :: rs ->
+ let rec loop i cells =
+ if i < cols then (
+ match cells with
+ | [] -> "" :: loop (succ i) []
+ | c :: cs -> c :: loop (succ i) cs
+ )
+ else []
+ in
+ loop 0 r :: set_columns cols rs
+let rec set_rows rows csv =
+ if rows > 0 then (
+ match csv with
+ | [] -> [] :: set_rows (pred rows) []
+ | r :: rs -> r :: set_rows (pred rows) rs
+ )
+ else []
+let set_size rows cols csv =
+ set_columns cols (set_rows rows csv)
+let sub r c rows cols csv =
+ let csv = drop r csv in
+ let csv = List.map (drop c) csv in
+ let csv = set_rows rows csv in
+ let csv = set_columns cols csv in
+ csv
+let to_array csv =
+ Array.of_list (List.map Array.of_list csv)
+let of_array csv =
+ List.map Array.to_list (Array.to_list csv)
let associate header data =
let nr_cols = List.length header in
let rec trunc = function
(** csv.mli - comma separated values parser
- * $Id: csv.mli,v 1.5 2005-05-24 13:52:50 rich Exp $
+ * $Id: csv.mli,v 1.6 2005-11-25 14:06:58 rich Exp $
type t = string list list
* {!columns}.
+val is_square : t -> bool
+(** Return true iff the CSV is "square" (actually rectangular). This
+ * means that each row has the same number of cells.
+ *)
+val set_columns : int -> t -> t
+(** [set_columns cols csv] makes the CSV data square by forcing the width
+ * to the given number of [cols]. Any short rows are padded with blank
+ * cells. Any long rows are truncated.
+ *)
+val set_rows : int -> t -> t
+(** [set_rows rows csv] makes the CSV data have exactly [rows] rows
+ * by adding empty rows or truncating rows as necessary.
+ *
+ * Note that [set_rows] does not make the CSV square. If you want it
+ * to be square, call either {!Csv.square} or {!Csv.set_columns} after.
+ *)
+val set_size : int -> int -> t -> t
+(** [set_size rows cols csv] makes the CSV data square by forcing the
+ * size to [rows * cols], adding blank cells or truncating as necessary.
+ * It is the same as calling [set_columns cols (set_rows rows csv)]
+ *)
+val sub : int -> int -> int -> int -> t -> t
+(** [sub r c rows cols csv] returns a subset of [csv]. The subset is
+ * defined as having top left corner at row [r], column [c] (counting
+ * from [0]) and being [rows] deep and [cols] wide.
+ *
+ * The returned CSV will be square.
+ *)
+val to_array : t -> string array array
+val of_array : string array array -> t
+(** Convenience functions to convert to and from a matrix representation.
+ * [to_array] will produce a ragged matrix (not all rows will have the
+ * same length) unless you call {!Csv.square} first.
+ *)
val associate : string list -> t -> (string * string) list list
(** [associate header data] takes a block of data and converts each
* row in turn into an assoc list which maps column header to data cell.
(* Handy tool for managing CSV files.
- * $Id: csvtool.ml,v 1.1 2005-05-24 13:52:50 rich Exp $
+ * $Id: csvtool.ml,v 1.2 2005-11-25 14:06:58 rich Exp $
open Printf
let cmd_readable ~csv ~chan () =
save_out_readable chan csv
+let cmd_square ~separator ~csv ~chan () =
+ let csv = square csv in
+ save_out ~separator chan csv
+let cmd_sub ~separator ~csv ~chan args =
+ let r, c, rows, cols =
+ match args with
+ | [ r; c; rows; cols ] ->
+ int_of_string r, int_of_string c,
+ int_of_string rows, int_of_string cols
+ | _ ->
+ failwith "unknown arguments to 'sub' command" in
+ let csv = sub r c rows cols csv in
+ save_out ~separator chan csv
(* Process the arguments. *)
let usage =
"csvtool - Copyright (C) 2005 Richard W.M. Jones, Merjis Ltd.
Print the input CSV in a readable format.
+ square
+ Make the CSV square, so all rows have the same length.
+ sub r c rows cols
+ Take a square subset of the CSV, top left at row r, column c (counting
+ from 0), which is rows deep and cols wide.
Input and output files:
csvtool normally processes its input from stdin and writes its output
to stdout. Use the -i and -o options to override this behaviour.
cmd_height ~csv:input ~chan ()
| "readable" ->
cmd_readable ~csv:input ~chan ()
+ | "square" ->
+ cmd_square ~separator:output_sep ~csv:input ~chan ()
+ | "sub" ->
+ cmd_sub ~separator:output_sep ~csv:input ~chan args
| _ -> prerr_endline (Sys.executable_name ^ " --help for usage")