From f6dbacda332fa9a11a36b3424516409d387058a1 Mon Sep 17 00:00:00 2001 From: rich Date: Fri, 25 Nov 2005 14:06:58 +0000 Subject: [PATCH] Implemented: Csv.square Csv.is_square Csv.set_columns Csv.set_rows Csv.set_size Csv.sub Csv.to_array Csv.of_array --- csv.ml | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++- csv.mli | 42 +++++++++++++++++++++++++++++++++++++++++- csvtool.ml | 28 +++++++++++++++++++++++++++- 3 files changed, 119 insertions(+), 3 deletions(-) diff --git a/csv.ml b/csv.ml index 1c0ae62..0ff0371 100644 --- a/csv.ml +++ b/csv.ml @@ -1,6 +1,6 @@ (* csv.ml - comma separated values parser * - * $Id: csv.ml,v 1.6 2005-08-13 10:10:31 rich Exp $ + * $Id: csv.ml,v 1.7 2005-11-25 14:06:58 rich Exp $ *) (* The format of CSV files: @@ -49,6 +49,15 @@ let rec dropwhile f = function | x :: xs when f x -> dropwhile f xs | xs -> xs +(* from extlib: *) +let rec drop n = function + | _ :: l when n > 0 -> drop (n-1) l + | l -> l + +let rec take n = function + | x :: xs when n > 0 -> x :: take (pred n) xs + | _ -> [] + let lines = List.length let columns csv = @@ -221,6 +230,47 @@ let square csv = List.rev row ) csv +let is_square csv = + let columns = columns csv in + List.for_all (fun row -> List.length row = columns) csv + +let rec set_columns cols = function + | [] -> [] + | r :: rs -> + let rec loop i cells = + if i < cols then ( + match cells with + | [] -> "" :: loop (succ i) [] + | c :: cs -> c :: loop (succ i) cs + ) + else [] + in + loop 0 r :: set_columns cols rs + +let rec set_rows rows csv = + if rows > 0 then ( + match csv with + | [] -> [] :: set_rows (pred rows) [] + | r :: rs -> r :: set_rows (pred rows) rs + ) + else [] + +let set_size rows cols csv = + set_columns cols (set_rows rows csv) + +let sub r c rows cols csv = + let csv = drop r csv in + let csv = List.map (drop c) csv in + let csv = set_rows rows csv in + let csv = set_columns cols csv in + csv + +let to_array csv = + Array.of_list (List.map Array.of_list csv) + +let of_array csv = + List.map Array.to_list (Array.to_list csv) + let associate header data = let nr_cols = List.length header in let rec trunc = function diff --git a/csv.mli b/csv.mli index b35e480..a8e570d 100644 --- a/csv.mli +++ b/csv.mli @@ -1,6 +1,6 @@ (** csv.mli - comma separated values parser * - * $Id: csv.mli,v 1.5 2005-05-24 13:52:50 rich Exp $ + * $Id: csv.mli,v 1.6 2005-11-25 14:06:58 rich Exp $ *) type t = string list list @@ -72,6 +72,46 @@ val square : t -> t * {!columns}. *) +val is_square : t -> bool +(** Return true iff the CSV is "square" (actually rectangular). This + * means that each row has the same number of cells. + *) + +val set_columns : int -> t -> t +(** [set_columns cols csv] makes the CSV data square by forcing the width + * to the given number of [cols]. Any short rows are padded with blank + * cells. Any long rows are truncated. + *) + +val set_rows : int -> t -> t +(** [set_rows rows csv] makes the CSV data have exactly [rows] rows + * by adding empty rows or truncating rows as necessary. + * + * Note that [set_rows] does not make the CSV square. If you want it + * to be square, call either {!Csv.square} or {!Csv.set_columns} after. + *) + +val set_size : int -> int -> t -> t +(** [set_size rows cols csv] makes the CSV data square by forcing the + * size to [rows * cols], adding blank cells or truncating as necessary. + * It is the same as calling [set_columns cols (set_rows rows csv)] + *) + +val sub : int -> int -> int -> int -> t -> t +(** [sub r c rows cols csv] returns a subset of [csv]. The subset is + * defined as having top left corner at row [r], column [c] (counting + * from [0]) and being [rows] deep and [cols] wide. + * + * The returned CSV will be square. + *) + +val to_array : t -> string array array +val of_array : string array array -> t +(** Convenience functions to convert to and from a matrix representation. + * [to_array] will produce a ragged matrix (not all rows will have the + * same length) unless you call {!Csv.square} first. + *) + val associate : string list -> t -> (string * string) list list (** [associate header data] takes a block of data and converts each * row in turn into an assoc list which maps column header to data cell. diff --git a/csvtool.ml b/csvtool.ml index 0b82a4b..1cee449 100644 --- a/csvtool.ml +++ b/csvtool.ml @@ -1,5 +1,5 @@ (* Handy tool for managing CSV files. - * $Id: csvtool.ml,v 1.1 2005-05-24 13:52:50 rich Exp $ + * $Id: csvtool.ml,v 1.2 2005-11-25 14:06:58 rich Exp $ *) open Printf @@ -42,6 +42,21 @@ let cmd_height ~csv ~chan () = let cmd_readable ~csv ~chan () = save_out_readable chan csv +let cmd_square ~separator ~csv ~chan () = + let csv = square csv in + save_out ~separator chan csv + +let cmd_sub ~separator ~csv ~chan args = + let r, c, rows, cols = + match args with + | [ r; c; rows; cols ] -> + int_of_string r, int_of_string c, + int_of_string rows, int_of_string cols + | _ -> + failwith "unknown arguments to 'sub' command" in + let csv = sub r c rows cols csv in + save_out ~separator chan csv + (* Process the arguments. *) let usage = "csvtool - Copyright (C) 2005 Richard W.M. Jones, Merjis Ltd. @@ -70,6 +85,13 @@ Commands: readable Print the input CSV in a readable format. + square + Make the CSV square, so all rows have the same length. + + sub r c rows cols + Take a square subset of the CSV, top left at row r, column c (counting + from 0), which is rows deep and cols wide. + Input and output files: csvtool normally processes its input from stdin and writes its output to stdout. Use the -i and -o options to override this behaviour. @@ -144,6 +166,10 @@ let () = cmd_height ~csv:input ~chan () | "readable" -> cmd_readable ~csv:input ~chan () + | "square" -> + cmd_square ~separator:output_sep ~csv:input ~chan () + | "sub" -> + cmd_sub ~separator:output_sep ~csv:input ~chan args | _ -> prerr_endline (Sys.executable_name ^ " --help for usage") ); -- 1.8.3.1