(* csv.ml - comma separated values parser
*
- * $Id: csv.ml,v 1.2 2004-12-06 17:40:50 rich Exp $
+ * $Id: csv.ml,v 1.3 2004-12-22 13:47:51 rich Exp $
*)
(* The format of CSV files:
| InQuotedField
| InQuotedFieldAfterQuote
-let load_rows f chan =
+let load_rows ?(separator = ',') f chan =
let row = ref [] in (* Current row. *)
let field = ref [] in (* Current field. *)
let state = ref StartField in (* Current state. *)
if c = '\"' then (
state := InQuotedField;
field := []
- ) else if c = ',' then (* Empty field. *)
+ ) else if c = separator then (* Empty field. *)
empty_field ()
else if c = '\n' then ( (* Empty field, end of row. *)
empty_field ();
field := [c]
)
| InUnquotedField -> (* Reading chars to end of field. *)
- if c = ',' then (* End of field. *)
+ if c = separator then (* End of field. *)
end_of_field ()
else if c = '\n' then ( (* End of field and end of row. *)
end_of_field ();
) else if c = '0' then ( (* Quote-0 is ASCII NUL. *)
field := '\000' :: !field;
state := InQuotedField
- ) else if c = ',' then (* End of field. *)
+ ) else if c = separator then (* End of field. *)
end_of_field ()
else if c = '\n' then ( (* End of field and end of row. *)
end_of_field ();
raise (Bad_CSV_file "Missing end quote after quoted field.")
)
-let load_in chan =
+let load_in ?separator chan =
let csv = ref [] in
let f row =
csv := row :: !csv
in
- load_rows f chan;
+ load_rows ?separator f chan;
List.rev !csv
-let load filename =
+let load ?separator filename =
let chan = open_in filename in
- let csv = load_in chan in
+ let csv = load_in ?separator chan in
close_in chan;
csv
csv
-(* Quote a single CSV field. *)
-let quote_field field =
- if String.contains field ',' ||
- String.contains field '\"' ||
- String.contains field '\n'
- then (
- let buffer = Buffer.create 100 in
- Buffer.add_char buffer '\"';
- for i = 0 to (String.length field) - 1 do
- match field.[i] with
- '\"' -> Buffer.add_string buffer "\"\""
- | c -> Buffer.add_char buffer c
- done;
- Buffer.add_char buffer '\"';
- Buffer.contents buffer
- )
- else
- field
-
-let save_out chan csv =
+let associate header data =
+ let nr_cols = List.length header in
+ let rec trunc = function
+ | 0, _ -> []
+ | n, [] -> "" :: trunc (n-1, [])
+ | n, (x :: xs) -> x :: trunc (n-1, xs)
+ in
+ List.map (
+ fun row ->
+ let row = trunc (nr_cols, row) in
+ List.combine header row
+ ) data
+
+let save_out ?(separator = ',') chan csv =
+ (* Quote a single CSV field. *)
+ let quote_field field =
+ if String.contains field separator ||
+ String.contains field '\"' ||
+ String.contains field '\n'
+ then (
+ let buffer = Buffer.create 100 in
+ Buffer.add_char buffer '\"';
+ for i = 0 to (String.length field) - 1 do
+ match field.[i] with
+ '\"' -> Buffer.add_string buffer "\"\""
+ | c -> Buffer.add_char buffer c
+ done;
+ Buffer.add_char buffer '\"';
+ Buffer.contents buffer
+ )
+ else
+ field
+ in
+
+ let separator = String.make 1 separator in
List.iter (fun line ->
- output_string chan (String.concat ","
+ output_string chan (String.concat separator
(List.map quote_field line));
output_char chan '\n') csv
-let print csv =
- save_out stdout csv
+let print ?separator csv =
+ save_out ?separator stdout csv
-let save file csv =
+let save ?separator file csv =
let chan = open_out file in
- save_out chan csv;
+ save_out ?separator chan csv;
close_out chan
(** csv.mli - comma separated values parser
*
- * $Id: csv.mli,v 1.2 2004-12-06 17:40:50 rich Exp $
+ * $Id: csv.mli,v 1.3 2004-12-22 13:47:51 rich Exp $
*)
type t = string list list
* columns.
*)
-val load_in : in_channel -> t
+val load_in : ?separator:char -> in_channel -> t
(** Load a CSV file.
* @param chan Input file stream
*)
-val load : string -> t
+val load : ?separator:char -> string -> t
(** Load a CSV file.
* @param filename CSV filename.
*)
-val load_rows : (string list -> unit) -> in_channel -> unit
+val load_rows : ?separator:char -> (string list -> unit) -> in_channel -> unit
(** For very large CSV files which cannot be processed in memory at once,
* this function is appropriate. It parses the input one row at a time and
* calls your function once for each row.
* each row in isolation.
*)
-val print : t -> unit
+val associate : string list -> t -> (string * string) list list
+(** [associate header data] takes a block of data and converts each
+ * row in turn into an assoc list which maps column header to data cell.
+ *
+ * Typically a spreadsheet will have the format:
+ * {v
+ * header1 header2 header3
+ * data11 data12 data13
+ * data21 data22 data23
+ * ...
+ * v}
+ *
+ * This function arranges the data into a more usable form which is
+ * robust against changes in column ordering. The output of the
+ * function is:
+ * {v
+ * [ ["header1", "data11"; "header2", "data12"; "header3", "data13"];
+ * ["header1", "data21"; "header2", "data22"; "header3", "data23"];
+ * etc. ]
+ * v}
+ *
+ * Each row is turned into an assoc list (see {!List.assoc}).
+ *
+ * If a row is too short, it is padded with empty cells ([""]). If
+ * a row is too long, it is truncated.
+ *
+ * You would typically call this function as:
+ *
+ * {v
+ * let header, data = match csv with h :: d -> h, d | [] -> assert false;;
+ * let data = Csv.associate header data;;
+ * v}
+ *
+ * The header strings are shared, so the actual space in memory consumed
+ * by the spreadsheet is not much larger.
+ *)
+
+val print : ?separator:char -> t -> unit
(** Print string list list - same as [save_out stdout] *)
-val save_out : out_channel -> t -> unit
+val save_out : ?separator:char -> out_channel -> t -> unit
(** Save string list list to a channel. *)
-val save : string -> t -> unit
+val save : ?separator:char -> string -> t -> unit
(** Save string list list to a file. *)