(* 'diskzip' command for intelligently compressing disk images. (C) Copyright 2007 Richard W.M. Jones, Red Hat Inc. http://libvirt.org/ This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *) open Unix open ExtList open ExtString open Printf open Int63.Operators open Diskzip_gettext.Gettext type output = File of string | Dir of string type extcompress = BZip2 | GZip | External of string let max_disks = 32 let max_image_name = 256 let rec main () = (* Program name changes behaviour. *) let compressing = let name = Sys.argv.(0) in let name = Filename.basename name in (* just the executable name *) let name = (* remove .opt or .exe *) try Filename.chop_extension name with Invalid_argument("Filename.chop_extension") -> name in let name = String.lowercase name in match name with | "diskzcat" -> false | "diskzip" -> true | _ -> eprintf (f_"diskzip: unknown executable name '%s', assuming 'diskzip'\n") name; true in let compressing = ref compressing in (* Command line argument parsing. *) let version () = printf "diskzip\n"; (* XXX version XXX *) exit 0 in let output = ref None in let set_output path = if !output <> None then ( prerr_endline (s_"diskzip: '-o' option cannot appear more than once"); exit 2 ); try let statbuf = stat path in if statbuf.st_kind = S_DIR then output := Some (Dir path) else output := Some (File path) with (* No such file or directory, assume it's a file output. *) | Unix_error (ENOENT, _, _) -> output := Some (File path) in (* By default we don't use any external compression program. *) let extcompress = ref None in let set_extcompress t () = if !extcompress <> None then ( prerr_endline (s_"diskzip: '-z' or '-j' cannot appear more than once"); exit 2 ); extcompress := Some t in let force = ref false in let argspec = Arg.align [ "-d", Arg.Clear compressing, " " ^ s_ "Uncompress (default: depends on executable name)"; "--debug", Arg.Set Diskimage.debug, " " ^ s_ "Debug mode (default: false)"; "-f", Arg.Set force, " " ^ s_"Force compress even if stdout looks like a tty"; "-j", Arg.Unit (set_extcompress BZip2), " " ^ s_"Pipe the output/input through bzip2"; "-o", Arg.String set_output, "path " ^ s_"Set the output filename or directory name"; "-p", Arg.String (fun prog -> set_extcompress (External prog) ()), "prog " ^ s_"Pipe the output/input through external program"; "--version", Arg.Unit version, " " ^ s_"Display version and exit"; "-z", Arg.Unit (set_extcompress GZip), " " ^ s_"Pipe the output/input through gzip"; ] in let args = ref [] in let anon_fun str = args := str :: !args in let usage_msg = s_"diskzip: Intelligently compress disk images SUMMARY diskzip [-options] disk.img [disk.img ...] > output.dz diskzcat [-options] output.dz > disk.img OPTIONS" in Arg.parse argspec anon_fun usage_msg; (* Turn refs back into normal values. *) let compressing = !compressing in let extcompress = !extcompress in let output = !output in let force = !force in let args = !args in (* Check the arguments make sense. *) if compressing && output <> None then ( prerr_endline (s_"diskzip: '-o' option cannot be used when compressing"); exit 2 ); if compressing && args = [] then ( prerr_endline (s_"diskzip: no input"); exit 2 ); if compressing && not force && isatty stdout then ( prerr_endline (s_"diskzip: compressed data not written to a terminal, use '-f' to force"); exit 2 ); (* Run the compression or decompression functions. *) if compressing then go_compress extcompress args else go_decompress ?output extcompress args (* Do compression. *) and go_compress extcompress images = (* Create a Diskimage machine description from the requested images. This * also checks that everything we need is readable. *) let machine = Diskimage.open_machine "diskzip" (List.map (fun n -> (n,n)) images) in (* Scan the images for filesystems. *) let machine = Diskimage.scan_machine machine in (* Create ownership tables. *) let ownership = Diskimage.create_ownership machine in (* Redirect output through external pipe if asked. *) (match extcompress with | None -> () | Some prog -> let prog, progargs = match prog with | BZip2 -> "bzip2", [|"bzip2"; "-c"|] | GZip -> "gzip", [|"gzip"; "-c"|] | External prog -> "sh", [|"sh"; "-c"; prog |] in let rfd, wfd = pipe () in let pid = fork () in if pid = 0 then ( (* child *) close wfd; dup2 rfd stdin; close rfd; execvp prog progargs ) else ( (* parent *) close rfd; dup2 wfd stdout; close wfd ) ); (* Write the main header. *) write_header machine.Diskimage.m_disks; (* Iterate over the disks. *) List.iteri ( fun disknum { Diskimage.d_name = name; d_dev = disk } -> let blocksize = disk#blocksize in let size = disk#size in (* Size in bytes. *) let nr_blocks = size /^ blocksize in (* Number of disk sectors. *) if !Diskimage.debug then eprintf "Writing disk %s (%s sectors) ...\n%!" disk#name (Int63.to_string nr_blocks); (* Get the lookup function for this disk. *) let lookup_offset = Diskimage.get_owners_lookup machine ownership disk in (* Convenience function to look up a block and test freeness. *) let block_is_free blk = let offset = blk *^ blocksize in Diskimage.offset_is_free (lookup_offset offset) in (* Look up owners for each sector in turn. *) let rec loop start_blk = if start_blk < nr_blocks then ( (* The current sector (start_blk) is either free or not free. * Look for a stretch of sectors which are the same. *) let current_free = block_is_free start_blk in let rec find_end blk = if blk < nr_blocks then ( if block_is_free blk = current_free then find_end (Int63.succ blk) else blk ) else nr_blocks (* End of the disk image. *) in let end_blk = find_end (Int63.succ start_blk) in let len_blks = end_blk -^ start_blk in let start_offset = start_blk *^ blocksize in let len_bytes = len_blks *^ blocksize in (* Current stretch is from start_blk .. end_blk-1. *) if !Diskimage.debug then eprintf " %s stretch %s to %s-1 (%s bytes)\n%!" (if current_free then "free" else "used") (Int63.to_string start_blk) (Int63.to_string end_blk) (Int63.to_string (len_blks *^ blocksize)); (* Write the stretch to stdout. *) write_stretch_header disknum current_free start_offset len_bytes; (* Write the data (note: we only need to write it if * it's not marked as free!). *) if not current_free then write_stretch disk start_offset len_bytes; loop end_blk ) in loop ~^0 ) machine.Diskimage.m_disks; write_trailer () and go_decompress ?output extcompress args = (* Read the input, which may be a single named file, or a series of * files (we just concatenate them). We may have to feed the input * through an external program. *) let () = match args with | [] -> () (* Reading from stdin. *) | [file] -> (* Read the named file. *) let fd = openfile file [O_RDONLY] 0 in dup2 fd stdin; close fd | files -> (* Concatenate files. *) let rfd, wfd = pipe () in let pid = fork () in if pid = 0 then ( (* child *) close rfd; dup2 wfd stdout; close wfd; execvp "cat" (Array.of_list ("cat" :: "--" :: files)) ) else ( (* parent *) close wfd; dup2 rfd stdin; close rfd ) in (match extcompress with | None -> () | Some prog -> let prog, progargs = match prog with | BZip2 -> "bzip2", [|"bzip2"; "-cd"|] | GZip -> "gzip", [|"gzip"; "-cd"|] | External prog -> "sh", [|"sh"; "-c"; prog |] in let rfd, wfd = pipe () in let pid = fork () in if pid = 0 then ( (* child *) close rfd; dup2 wfd stdout; close wfd; execvp prog progargs ) else ( (* parent *) close wfd; dup2 rfd stdin; close rfd ) ) (* let header = read_header () in XXX *) (* Since we have the wonderful pa_bitmatch, might as well use it to * define a robust binary format for the compressed files. * * These functions are in matched pairs "write_foo" / "read_foo" so * you can check that the write and read protocols agree. *) and write_header disks = let nr_disks = List.length disks in assert (nr_disks > 0); (* Don't allow large numbers of disks. *) if nr_disks > max_disks then ( eprintf (f_"diskzip: maximum number of disk images is limited by the current image format to %d") nr_disks; exit 2 ); let names = List.map ( fun { Diskimage.d_name = name } -> let name = try let i = 1 + String.rindex name '/' in String.sub name i (String.length name - i) with Invalid_string | Not_found -> name in let contains_dotdot = try ignore (String.find name ".."); true with Invalid_string | Not_found -> false in if contains_dotdot then ( prerr_endline (s_"diskzip: disk image names cannot contain \"..\""); exit 2 ); (* Don't allow very long names. *) if String.length name > max_image_name then ( eprintf (f_"diskzip: maximum length of disk image name is limited by the current image format to %d bytes") max_image_name; exit 2 ); name ) disks in (* Header followed by names. *) let header = let header = let bs = BITSTRING { 0xD152 : 16; 0x01 : 8; 0x00 : 8; (* file magic, version 1.0 *) nr_disks : 8 (* number of disks being packed *) } in let len = Bitmatch.bitstring_length bs in assert (len land 7 = 0); Bitmatch.string_of_bitstring bs in let names = List.map ( fun name -> let bs = BITSTRING { String.length name : 16; name : -1 : string } in let len = Bitmatch.bitstring_length bs in assert (len land 7 = 0); Bitmatch.string_of_bitstring bs ) names in (* Construct the final header. *) header ^ String.concat "" names in ignore (write stdout header 0 (String.length header)) (* and read_header () = (* Diskzip headers are limited to overall max size of 1024 bytes. *) let bs = Bitmatch.bitstring_of_file_descr_max stdin 1024 in bitmatch bs with | { 0xD152 : 16; (* file magic *) 0x01 : 8; (_ as minor) : 8; (* major, minor versions *) } -> (* Is this a later version (major != 1)? *) | { 0xD152 : 16; (* file magic *) (_ as major) : 8; (_ as minor) : 8 } when major <> 1 -> eprintf (f_"diskzip: archive version %d.%d, this program only understands version 1.x") major minor; exit 1 (* If it looks like gzip or bzip2, exit with an informative error. *) | { 0o37 : 8; 0o213 : 8 } -> (* gzip *) prerr_endline (s_"diskzip: This looks like a gzip archive. Did you mean to pass the '-z' option?"); exit 1 | { "BZh" : 24 : string } -> (* bzip2 *) prerr_endline (s_"diskzip: This looks like a bzip2 archive. Did you mean to pass the '-j' option?"); exit 1 (* If it looks like a disk image (MBR), give an error. *) | { _ : 4080 : bitstring; 0x55 : 8; 0xAA : 8 } -> prerr_endline (s_"diskzip: This looks like a disk image. Did you mean to compress it?"); exit 1 | { _ } -> prerr_endline (s_"diskzip: Not a diskzip archive."); exit 1 *) and write_stretch_header disknum free start_offset len_bytes = let start_offset = Int63.to_int64 start_offset in let len_bytes = Int63.to_int64 len_bytes in let bs = BITSTRING { (* Stretch header magic. Allows us to find synchronization errors. *) 0xD1525555_l : 32; free : 1; 0 : 7; (* Flags. *) disknum : 8; (* Disk number. *) start_offset : 64; (* Start offset in disk image (bytes)*) len_bytes : 64 (* Length in bytes. *) } in let str = Bitmatch.string_of_bitstring bs in ignore (write stdout str 0 (String.length str)) and write_stretch dev start_offset len_bytes = (* Limit size of blocks that we write, since some implementations * of device class cannot handle large blocks. In any case I tested * this and writing large blocks isn't any faster. *) let blocksize = ~^65536 in let rec loop offset len = if len > ~^0 then ( let n = min blocksize len in ignore (write stdout (dev#read offset n) 0 (Int63.to_int n)); loop (offset+^n) (len-^n) ) in loop start_offset len_bytes and write_trailer () = let bs = BITSTRING { 0xD152FFFF_l : 32 } in let str = Bitmatch.string_of_bitstring bs in ignore (write stdout str 0 (String.length str)) let () = main ()