1 (* 'diskzip' command for intelligently compressing disk images.
2 (C) Copyright 2007 Richard W.M. Jones, Red Hat Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 open Diskzip_gettext.Gettext
28 type output = File of string | Dir of string
29 type extcompress = BZip2 | GZip | External of string
32 let max_image_name = 256
35 (* Program name changes behaviour. *)
37 let name = Sys.argv.(0) in
38 let name = Filename.basename name in (* just the executable name *)
39 let name = (* remove .opt or .exe *)
40 try Filename.chop_extension name
41 with Invalid_argument("Filename.chop_extension") -> name in
42 let name = String.lowercase name in
48 (f_"diskzip: unknown executable name '%s', assuming 'diskzip'\n")
51 let compressing = ref compressing in
53 (* Command line argument parsing. *)
55 printf "diskzip\n"; (* XXX version XXX *)
59 let output = ref None in
61 if !output <> None then (
62 prerr_endline (s_"diskzip: '-o' option cannot appear more than once");
66 let statbuf = stat path in
67 if statbuf.st_kind = S_DIR then
68 output := Some (Dir path)
70 output := Some (File path)
72 (* No such file or directory, assume it's a file output. *)
73 | Unix_error (ENOENT, _, _) -> output := Some (File path)
76 (* By default we don't use any external compression program. *)
77 let extcompress = ref None in
78 let set_extcompress t () =
79 if !extcompress <> None then (
80 prerr_endline (s_"diskzip: '-z' or '-j' cannot appear more than once");
86 let force = ref false in
88 let argspec = Arg.align [
89 "-d", Arg.Clear compressing,
90 " " ^ s_ "Uncompress (default: depends on executable name)";
91 "--debug", Arg.Set Diskimage.debug,
92 " " ^ s_ "Debug mode (default: false)";
94 " " ^ s_"Force compress even if stdout looks like a tty";
95 "-j", Arg.Unit (set_extcompress BZip2),
96 " " ^ s_"Pipe the output/input through bzip2";
97 "-o", Arg.String set_output,
98 "path " ^ s_"Set the output filename or directory name";
99 "-p", Arg.String (fun prog -> set_extcompress (External prog) ()),
100 "prog " ^ s_"Pipe the output/input through external program";
101 "--version", Arg.Unit version,
102 " " ^ s_"Display version and exit";
103 "-z", Arg.Unit (set_extcompress GZip),
104 " " ^ s_"Pipe the output/input through gzip";
108 let anon_fun str = args := str :: !args in
109 let usage_msg = s_"diskzip: Intelligently compress disk images
112 diskzip [-options] disk.img [disk.img ...] > output.dz
113 diskzcat [-options] output.dz > disk.img
117 Arg.parse argspec anon_fun usage_msg;
119 (* Turn refs back into normal values. *)
120 let compressing = !compressing in
121 let extcompress = !extcompress in
122 let output = !output in
123 let force = !force in
126 (* Check the arguments make sense. *)
127 if compressing && output <> None then (
128 prerr_endline (s_"diskzip: '-o' option cannot be used when compressing");
131 if compressing && args = [] then (
132 prerr_endline (s_"diskzip: no input");
135 if compressing && not force && isatty stdout then (
136 prerr_endline (s_"diskzip: compressed data not written to a terminal, use '-f' to force");
140 (* Run the compression or decompression functions. *)
142 go_compress extcompress args
144 go_decompress ?output extcompress args
146 (* Do compression. *)
147 and go_compress extcompress images =
148 (* Create a Diskimage machine description from the requested images. This
149 * also checks that everything we need is readable.
152 Diskimage.open_machine "diskzip" (List.map (fun n -> (n,n)) images) in
154 (* Scan the images for filesystems. *)
155 let machine = Diskimage.scan_machine machine in
157 (* Create ownership tables. *)
158 let ownership = Diskimage.create_ownership machine in
160 (* Redirect output through external pipe if asked. *)
161 (match extcompress with
166 | BZip2 -> "bzip2", [|"bzip2"; "-c"|]
167 | GZip -> "gzip", [|"gzip"; "-c"|]
168 | External prog -> "sh", [|"sh"; "-c"; prog |] in
169 let rfd, wfd = pipe () in
171 if pid = 0 then ( (* child *)
176 ) else ( (* parent *)
183 (* Write the main header. *)
184 write_header machine.Diskimage.m_disks;
186 (* Iterate over the disks. *)
188 fun disknum { Diskimage.d_name = name; d_dev = disk } ->
189 let blocksize = disk#blocksize in
190 let size = disk#size in (* Size in bytes. *)
191 let nr_blocks = size /^ blocksize in (* Number of disk sectors. *)
193 if !Diskimage.debug then
194 eprintf "Writing disk %s (%s sectors) ...\n%!"
195 disk#name (Int63.to_string nr_blocks);
197 (* Get the lookup function for this disk. *)
199 Diskimage.get_owners_lookup machine ownership disk in
201 (* Convenience function to look up a block and test freeness. *)
202 let block_is_free blk =
203 let offset = blk *^ blocksize in
204 Diskimage.offset_is_free (lookup_offset offset)
207 (* Look up owners for each sector in turn. *)
208 let rec loop start_blk =
209 if start_blk < nr_blocks then (
210 (* The current sector (start_blk) is either free or not free.
211 * Look for a stretch of sectors which are the same.
213 let current_free = block_is_free start_blk in
214 let rec find_end blk =
215 if blk < nr_blocks then (
216 if block_is_free blk = current_free then
217 find_end (Int63.succ blk)
221 nr_blocks (* End of the disk image. *)
223 let end_blk = find_end (Int63.succ start_blk) in
225 let len_blks = end_blk -^ start_blk in
227 let start_offset = start_blk *^ blocksize in
228 let len_bytes = len_blks *^ blocksize in
230 (* Current stretch is from start_blk .. end_blk-1. *)
231 if !Diskimage.debug then
232 eprintf " %s stretch %s to %s-1 (%s bytes)\n%!"
233 (if current_free then "free" else "used")
234 (Int63.to_string start_blk) (Int63.to_string end_blk)
235 (Int63.to_string (len_blks *^ blocksize));
237 (* Write the stretch to stdout. *)
238 write_stretch_header disknum current_free start_offset len_bytes;
240 (* Write the data (note: we only need to write it if
241 * it's not marked as free!).
243 if not current_free then write_stretch disk start_offset len_bytes;
249 ) machine.Diskimage.m_disks;
253 and go_decompress ?output extcompress args =
254 (* Read the input, which may be a single named file, or a series of
255 * files (we just concatenate them). We may have to feed the input
256 * through an external program.
260 | [] -> () (* Reading from stdin. *)
261 | [file] -> (* Read the named file. *)
262 let fd = openfile file [O_RDONLY] 0 in
265 | files -> (* Concatenate files. *)
266 let rfd, wfd = pipe () in
268 if pid = 0 then ( (* child *)
272 execvp "cat" (Array.of_list ("cat" :: "--" :: files))
273 ) else ( (* parent *)
279 (match extcompress with
284 | BZip2 -> "bzip2", [|"bzip2"; "-cd"|]
285 | GZip -> "gzip", [|"gzip"; "-cd"|]
286 | External prog -> "sh", [|"sh"; "-c"; prog |] in
287 let rfd, wfd = pipe () in
289 if pid = 0 then ( (* child *)
294 ) else ( (* parent *)
302 let header = read_header () in
314 (* Since we have the wonderful pa_bitmatch, might as well use it to
315 * define a robust binary format for the compressed files.
317 * These functions are in matched pairs "write_foo" / "read_foo" so
318 * you can check that the write and read protocols agree.
320 and write_header disks =
321 let nr_disks = List.length disks in
322 assert (nr_disks > 0);
324 (* Don't allow large numbers of disks. *)
325 if nr_disks > max_disks then (
326 eprintf (f_"diskzip: maximum number of disk images is limited by the current image format to %d") nr_disks;
329 let names = List.map (
330 fun { Diskimage.d_name = name } ->
333 let i = 1 + String.rindex name '/' in
334 String.sub name i (String.length name - i)
336 Invalid_string | Not_found -> name in
338 let contains_dotdot =
339 try ignore (String.find name ".."); true
340 with Invalid_string | Not_found -> false in
341 if contains_dotdot then (
342 prerr_endline (s_"diskzip: disk image names cannot contain \"..\"");
346 (* Don't allow very long names. *)
347 if String.length name > max_image_name then (
348 eprintf (f_"diskzip: maximum length of disk image name is limited by the current image format to %d bytes") max_image_name;
355 (* Header followed by names. *)
359 0xD152 : 16; 0x01 : 8; 0x00 : 8; (* file magic, version 1.0 *)
360 nr_disks : 8 (* number of disks being packed *)
362 let len = Bitmatch.bitstring_length bs in
363 assert (len land 7 = 0);
364 Bitmatch.string_of_bitstring bs in
365 let names = List.map (
369 String.length name : 16;
372 let len = Bitmatch.bitstring_length bs in
373 assert (len land 7 = 0);
374 Bitmatch.string_of_bitstring bs
377 (* Construct the final header. *)
378 header ^ String.concat "" names in
380 ignore (write stdout header 0 (String.length header))
384 (* Diskzip headers are limited to overall max size of 1024 bytes. *)
385 let bs = Bitmatch.bitstring_of_file_descr_max stdin 1024 in
388 | { 0xD152 : 16; (* file magic *)
389 0x01 : 8; (_ as minor) : 8; (* major, minor versions *)
392 (* Is this a later version (major != 1)? *)
393 | { 0xD152 : 16; (* file magic *)
394 (_ as major) : 8; (_ as minor) : 8 } when major <> 1 ->
395 eprintf (f_"diskzip: archive version %d.%d, this program only understands version 1.x")
399 (* If it looks like gzip or bzip2, exit with an informative error. *)
400 | { 0o37 : 8; 0o213 : 8 } -> (* gzip *)
401 prerr_endline (s_"diskzip: This looks like a gzip archive. Did you mean to pass the '-z' option?");
403 | { "BZh" : 24 : string } -> (* bzip2 *)
404 prerr_endline (s_"diskzip: This looks like a bzip2 archive. Did you mean to pass the '-j' option?");
407 (* If it looks like a disk image (MBR), give an error. *)
408 | { _ : 4080 : bitstring; 0x55 : 8; 0xAA : 8 } ->
409 prerr_endline (s_"diskzip: This looks like a disk image. Did you mean to compress it?");
413 prerr_endline (s_"diskzip: Not a diskzip archive.");
417 and write_stretch_header disknum free start_offset len_bytes =
418 let start_offset = Int63.to_int64 start_offset in
419 let len_bytes = Int63.to_int64 len_bytes in
422 (* Stretch header magic. Allows us to find synchronization errors. *)
424 free : 1; 0 : 7; (* Flags. *)
425 disknum : 8; (* Disk number. *)
426 start_offset : 64; (* Start offset in disk image (bytes)*)
427 len_bytes : 64 (* Length in bytes. *)
430 let str = Bitmatch.string_of_bitstring bs in
431 ignore (write stdout str 0 (String.length str))
433 and write_stretch dev start_offset len_bytes =
434 (* Limit size of blocks that we write, since some implementations
435 * of device class cannot handle large blocks. In any case I tested
436 * this and writing large blocks isn't any faster.
438 let blocksize = ~^65536 in
439 let rec loop offset len =
441 let n = min blocksize len in
442 ignore (write stdout (dev#read offset n) 0 (Int63.to_int n));
443 loop (offset+^n) (len-^n)
446 loop start_offset len_bytes
448 and write_trailer () =
452 let str = Bitmatch.string_of_bitstring bs in
453 ignore (write stdout str 0 (String.length str))