-let cmd_cols ~separator ~csv ~chan cols =
- let cols = List.map int_of_string cols in
-
- let output = List.map (
- fun row ->
- let n = List.length row in
- let row = List.map (
- fun col_wanted ->
- if 0 <= col_wanted && col_wanted < n then
- List.nth row col_wanted
- else
- ""
- ) cols in
- row
- ) csv in
- save_out ~separator chan output
-
-let cmd_namedcols ~separator ~csv ~chan names =
- let header, data =
- match csv with
- | [] -> failwith "no rows in this CSV file"
- | h :: t -> h, t in
- let data = associate header data in
- let data = List.map (
- fun row -> List.map (fun name -> List.assoc name row) names
- ) data in
- save_out ~separator chan data
-
-let cmd_width ~csv ~chan () =
- fprintf chan "%d\n" (columns csv)
-
-let cmd_height ~csv ~chan () =
- fprintf chan "%d\n" (lines csv)
-
-let cmd_readable ~csv ~chan () =
+(*------------------------------ start of code from extlib *)
+exception Invalid_string
+
+let find str sub =
+ let sublen = String.length sub in
+ if sublen = 0 then
+ 0
+ else
+ let found = ref 0 in
+ let len = String.length str in
+ try
+ for i = 0 to len - sublen do
+ let j = ref 0 in
+ while String.unsafe_get str (i + !j) = String.unsafe_get sub !j do
+ incr j;
+ if !j = sublen then begin found := i; raise Exit; end;
+ done;
+ done;
+ raise Invalid_string
+ with
+ Exit -> !found
+
+let split str sep =
+ let p = find str sep in
+ let len = String.length sep in
+ let slen = String.length str in
+ String.sub str 0 p, String.sub str (p + len) (slen - p - len)
+
+let nsplit str sep =
+ if str = "" then []
+ else (
+ let rec nsplit str sep =
+ try
+ let s1 , s2 = split str sep in
+ s1 :: nsplit s2 sep
+ with
+ Invalid_string -> [str]
+ in
+ nsplit str sep
+ )
+
+type 'a mut_list = {
+ hd: 'a;
+ mutable tl: 'a list
+}
+external inj : 'a mut_list -> 'a list = "%identity"
+
+let dummy_node () = { hd = Obj.magic (); tl = [] }
+
+let rec drop n = function
+ | _ :: l when n > 0 -> drop (n-1) l
+ | l -> l
+
+let take n l =
+ let rec loop n dst = function
+ | h :: t when n > 0 ->
+ let r = { hd = h; tl = [] } in
+ dst.tl <- inj r;
+ loop (n-1) r t
+ | _ ->
+ ()
+ in
+ let dummy = dummy_node() in
+ loop n dummy l;
+ dummy.tl
+(*------------------------------ end of extlib code *)
+
+(* Parse column specs. *)
+type colspec = range list
+and range =
+ | Col of int (* 0 *)
+ | Range of int * int (* 2-5 *)
+ | ToEnd of int (* 7- *)
+
+let parse_colspec ~count_zero colspec =
+ let cols = nsplit colspec "," in
+ let cols = List.map (
+ fun col ->
+ try
+ (try
+ let first, second = split col "-" in
+ if second <> "" then
+ Range (int_of_string first, int_of_string second)
+ else
+ ToEnd (int_of_string first)
+ with
+ Invalid_string ->
+ Col (int_of_string col)
+ )
+ with
+ Failure "int_of_string" ->
+ failwith (colspec ^ ":" ^ col ^ ": invalid column-spec")
+ ) cols in
+
+ (* Adjust so columns always count from zero. *)
+ if not count_zero then
+ List.map (
+ function
+ | Col c -> Col (c-1)
+ | Range (s, e) -> Range (s-1, e-1)
+ | ToEnd e -> ToEnd (e-1)
+ ) cols
+ else
+ cols
+
+let rec width_of_colspec = function
+ | [] -> 0
+ | Col c :: rest -> 1 + width_of_colspec rest
+ | Range (s, e) :: rest -> (e-s+1) + width_of_colspec rest
+ | ToEnd _ :: _ ->
+ failwith "width_of_colspec: cannot calculate width of an open column spec (one which contains 'N-')"
+
+(* For closed column specs, this preserves the correct width in the
+ * result.
+ *)
+let cols_of_colspec colspec row =
+ let rec loop = function
+ | [] -> []
+ | Col c :: rest ->
+ (try List.nth row c
+ with Failure "nth" -> "") :: loop rest
+ | Range (s, e) :: rest ->
+ let width = e-s+1 in
+ let range = take width (drop s row) in
+ let range = List.hd (set_columns width [range]) in
+ List.append range (loop rest)
+ | ToEnd e :: rest ->
+ List.append (drop e row) (loop rest)
+ in
+ loop colspec
+
+(* The actual commands. *)
+let cmd_cols ~input_sep ~output_sep ~chan colspec files =
+ List.iter (
+ fun filename ->
+ let csv = load ~separator:input_sep filename in
+ let csv = List.map (cols_of_colspec colspec) csv in
+ save_out ~separator:output_sep chan csv
+ ) files
+
+let cmd_namedcols ~input_sep ~output_sep ~chan names files =
+ List.iter (
+ fun filename ->
+ let csv = load ~separator:input_sep filename in
+ let header, data =
+ match csv with
+ | [] -> failwith "no rows in this CSV file"
+ | h :: t -> h, t in
+ let data = associate header data in
+ let data = List.map (
+ fun row -> List.map (fun name -> List.assoc name row) names
+ ) data in
+ save_out ~separator:output_sep chan data
+ ) files
+
+let cmd_width ~input_sep ~chan files =
+ let width = List.fold_left (
+ fun width filename ->
+ let csv = load ~separator:input_sep filename in
+ let width = max width (columns csv) in
+ width
+ ) 0 files in
+ fprintf chan "%d\n" width
+
+let cmd_height ~input_sep ~chan files =
+ let height = List.fold_left (
+ fun height filename ->
+ let csv = load ~separator:input_sep filename in
+ let height = height + lines csv in
+ height
+ ) 0 files in
+ fprintf chan "%d\n" height
+
+let cmd_readable ~input_sep ~chan files =
+ let csv = List.concat (List.map (load ~separator:input_sep) files) in