1 (** csv.mli - comma separated values parser
3 * $Id: csv.mli,v 1.11 2007-04-23 16:42:33 rich Exp $
6 type t = string list list
7 (** Representation of CSV files. *)
9 exception Bad_CSV_file of string
10 (** Badly formed CSV files throw this exception. *)
13 (** Work out the number of lines in a CSV file. *)
15 val columns : t -> int
16 (** Work out the (maximum) number of columns in a CSV file. Note that each
17 * line may be a different length, so this finds the one with the most
21 val load_in : ?separator:char -> in_channel -> t
23 * @param chan Input file stream
26 val load : ?separator:char -> string -> t
28 * @param filename CSV filename.
29 * If [filename] is ["-"] then load from [stdin].
32 val load_rows : ?separator:char -> (string list -> unit) -> in_channel -> unit
33 (** For very large CSV files which cannot be processed in memory at once,
34 * this function is appropriate. It parses the input one row at a time and
35 * calls your function once for each row.
37 * @param f Callout function.
38 * @param chan Input file stream.
41 val trim : ?top:bool -> ?left:bool -> ?right:bool -> ?bottom:bool -> t -> t
42 (** This takes a CSV file and trims empty cells.
44 * All four of the option arguments ([~top], [~left], [~right], [~bottom])
47 * The exact behaviour is:
49 * [~right]: If true, remove any empty cells at the right hand end of
50 * any row. The number of columns in the resulting CSV structure will
51 * not necessarily be the same for each row.
53 * [~top]: If true, remove any empty rows (no cells, or containing just empty
54 * cells) from the top of the CSV structure.
56 * [~bottom]: If true, remove any empty rows from the bottom of the
59 * [~left]: If true, remove any empty columns from the left of the
60 * CSV structure. Note that [~left] and [~right] are quite different:
61 * [~left] considers the whole CSV structure, whereas [~right] considers
62 * each row in isolation.
66 (** Make the CSV data "square" (actually rectangular). This pads out
67 * each row with empty cells so that all rows are the same length as
68 * the longest row. After this operation, every row will have length
72 val is_square : t -> bool
73 (** Return true iff the CSV is "square" (actually rectangular). This
74 * means that each row has the same number of cells.
77 val set_columns : int -> t -> t
78 (** [set_columns cols csv] makes the CSV data square by forcing the width
79 * to the given number of [cols]. Any short rows are padded with blank
80 * cells. Any long rows are truncated.
83 val set_rows : int -> t -> t
84 (** [set_rows rows csv] makes the CSV data have exactly [rows] rows
85 * by adding empty rows or truncating rows as necessary.
87 * Note that [set_rows] does not make the CSV square. If you want it
88 * to be square, call either {!Csv.square} or {!Csv.set_columns} after.
91 val set_size : int -> int -> t -> t
92 (** [set_size rows cols csv] makes the CSV data square by forcing the
93 * size to [rows * cols], adding blank cells or truncating as necessary.
94 * It is the same as calling [set_columns cols (set_rows rows csv)]
97 val sub : int -> int -> int -> int -> t -> t
98 (** [sub r c rows cols csv] returns a subset of [csv]. The subset is
99 * defined as having top left corner at row [r], column [c] (counting
100 * from [0]) and being [rows] deep and [cols] wide.
102 * The returned CSV will be square.
105 val compare : t -> t -> int
106 (** Compare two CSV files for equality, ignoring blank cells at the end
107 * of a row, and empty rows appended to one or the other. This is
108 * "semantic" equality - roughly speaking, the two CSV files would
109 * look the same if opened in a spreadsheet program.
112 val concat : t list -> t
113 (** Concatenate CSV files so that they appear side by side, arranged
114 * left to right across the page. Each CSV file (except the final
115 * one) is first squared.
117 * (To concatenate CSV files so that they appear from top to bottom,
118 * just use {!List.concat}).
121 val to_array : t -> string array array
122 val of_array : string array array -> t
123 (** Convenience functions to convert to and from a matrix representation.
124 * [to_array] will produce a ragged matrix (not all rows will have the
125 * same length) unless you call {!Csv.square} first.
128 val associate : string list -> t -> (string * string) list list
129 (** [associate header data] takes a block of data and converts each
130 * row in turn into an assoc list which maps column header to data cell.
132 * Typically a spreadsheet will have the format:
134 * header1 header2 header3
135 * data11 data12 data13
136 * data21 data22 data23
140 * This function arranges the data into a more usable form which is
141 * robust against changes in column ordering. The output of the
144 * [ ["header1", "data11"; "header2", "data12"; "header3", "data13"];
145 * ["header1", "data21"; "header2", "data22"; "header3", "data23"];
149 * Each row is turned into an assoc list (see [List.assoc]).
151 * If a row is too short, it is padded with empty cells ([""]). If
152 * a row is too long, it is truncated.
154 * You would typically call this function as:
157 * let header, data = match csv with h :: d -> h, d | [] -> assert false;;
158 * let data = Csv.associate header data;;
161 * The header strings are shared, so the actual space in memory consumed
162 * by the spreadsheet is not much larger.
165 val print : ?separator:char -> t -> unit
166 (** Print string list list - same as [save_out stdout] *)
168 val save_out : ?separator:char -> out_channel -> t -> unit
169 (** Save string list list to a channel. *)
171 val save : ?separator:char -> string -> t -> unit
172 (** Save string list list to a file. *)
174 val print_readable : t -> unit
175 (** Print the CSV data to [stdout] in a human-readable format. Not much
176 * is guaranteed about how the CSV is printed, except that it will be
177 * easier to follow than a "raw" output done with {!Csv.print}. This is
178 * a one-way operation. There is no easy way to parse the output of
179 * this command back into CSV data.
181 val save_out_readable : out_channel -> t -> unit
182 (** As for {!Csv.print_readable}, allowing the output to be sent to a channel.