X-Git-Url: http://git.annexia.org/?a=blobdiff_plain;f=csv.mli;h=14901e48120b68715cb95f83b9bb65a21fa85a75;hb=3a2b495714080c9f72b89cada8bc45b22768a059;hp=dacb3dca235ce7db859ca31f56b5da2bfeef7c03;hpb=f04d2c293ee8ee855fd28e7fa51e5789d8b43e5c;p=ocaml-csv.git diff --git a/csv.mli b/csv.mli index dacb3dc..14901e4 100644 --- a/csv.mli +++ b/csv.mli @@ -1,13 +1,13 @@ (** csv.mli - comma separated values parser * - * $Id: csv.mli,v 1.2 2004-12-06 17:40:50 rich Exp $ + * $Id: csv.mli,v 1.10 2006-11-24 15:49:24 rich Exp $ *) type t = string list list (** Representation of CSV files. *) exception Bad_CSV_file of string -(** Badly formed CSV files throw this exception: *) +(** Badly formed CSV files throw this exception. *) val lines : t -> int (** Work out the number of lines in a CSV file. *) @@ -18,17 +18,18 @@ val columns : t -> int * columns. *) -val load_in : in_channel -> t +val load_in : ?separator:char -> in_channel -> t (** Load a CSV file. * @param chan Input file stream *) -val load : string -> t +val load : ?separator:char -> string -> t (** Load a CSV file. * @param filename CSV filename. + * If [filename] is ["-"] then load from [stdin]. *) -val load_rows : (string list -> unit) -> in_channel -> unit +val load_rows : ?separator:char -> (string list -> unit) -> in_channel -> unit (** For very large CSV files which cannot be processed in memory at once, * this function is appropriate. It parses the input one row at a time and * calls your function once for each row. @@ -65,11 +66,122 @@ val trim : ?top:bool -> ?left:bool -> ?right:bool -> ?bottom:bool -> t -> t * each row in isolation. *) -val print : t -> unit +val square : t -> t +(** Make the CSV data "square" (actually rectangular). This pads out + * each row with empty cells so that all rows are the same length as + * the longest row. After this operation, every row will have length + * {!Csv.columns}. + *) + +val is_square : t -> bool +(** Return true iff the CSV is "square" (actually rectangular). This + * means that each row has the same number of cells. + *) + +val set_columns : int -> t -> t +(** [set_columns cols csv] makes the CSV data square by forcing the width + * to the given number of [cols]. Any short rows are padded with blank + * cells. Any long rows are truncated. + *) + +val set_rows : int -> t -> t +(** [set_rows rows csv] makes the CSV data have exactly [rows] rows + * by adding empty rows or truncating rows as necessary. + * + * Note that [set_rows] does not make the CSV square. If you want it + * to be square, call either {!Csv.square} or {!Csv.set_columns} after. + *) + +val set_size : int -> int -> t -> t +(** [set_size rows cols csv] makes the CSV data square by forcing the + * size to [rows * cols], adding blank cells or truncating as necessary. + * It is the same as calling [set_columns cols (set_rows rows csv)] + *) + +val sub : int -> int -> int -> int -> t -> t +(** [sub r c rows cols csv] returns a subset of [csv]. The subset is + * defined as having top left corner at row [r], column [c] (counting + * from [0]) and being [rows] deep and [cols] wide. + * + * The returned CSV will be square. + *) + +val compare : t -> t -> int +(** Compare two CSV files for equality, ignoring blank cells at the end + * of a row, and empty rows appended to one or the other. This is + * "semantic" equality - roughly speaking, the two CSV files would + * look the same if opened in a spreadsheet program. + *) + +val concat : t list -> t +(** Concatenate CSV files so that they appear side by side, arranged + * left to right across the page. Each CSV file (except the final + * one) is first squared. + * + * (To concatenate CSV files so that they appear from top to bottom, + * just use {!List.concat}). + *) + +val to_array : t -> string array array +val of_array : string array array -> t +(** Convenience functions to convert to and from a matrix representation. + * [to_array] will produce a ragged matrix (not all rows will have the + * same length) unless you call {!Csv.square} first. + *) + +val associate : string list -> t -> (string * string) list list +(** [associate header data] takes a block of data and converts each + * row in turn into an assoc list which maps column header to data cell. + * + * Typically a spreadsheet will have the format: + * {v + * header1 header2 header3 + * data11 data12 data13 + * data21 data22 data23 + * ... + * v} + * + * This function arranges the data into a more usable form which is + * robust against changes in column ordering. The output of the + * function is: + * {v + * [ ["header1", "data11"; "header2", "data12"; "header3", "data13"]; + * ["header1", "data21"; "header2", "data22"; "header3", "data23"]; + * etc. ] + * v} + * + * Each row is turned into an assoc list (see [List.assoc]). + * + * If a row is too short, it is padded with empty cells ([""]). If + * a row is too long, it is truncated. + * + * You would typically call this function as: + * + * {v + * let header, data = match csv with h :: d -> h, d | [] -> assert false;; + * let data = Csv.associate header data;; + * v} + * + * The header strings are shared, so the actual space in memory consumed + * by the spreadsheet is not much larger. + *) + +val print : ?separator:char -> t -> unit (** Print string list list - same as [save_out stdout] *) -val save_out : out_channel -> t -> unit +val save_out : ?separator:char -> out_channel -> t -> unit (** Save string list list to a channel. *) -val save : string -> t -> unit +val save : ?separator:char -> string -> t -> unit (** Save string list list to a file. *) + +val print_readable : t -> unit +(** Print the CSV data to [stdout] in a human-readable format. Not much + * is guaranteed about how the CSV is printed, except that it will be + * easier to follow than a "raw" output done with {!Csv.print}. This is + * a one-way operation. There is no easy way to parse the output of + * this command back into CSV data. + *) +val save_out_readable : out_channel -> t -> unit +(** As for {!Csv.print_readable}, allowing the output to be sent to a channel. + *)