From 806ddddc063eacdd8b4a8f989ab9caffea350ce8 Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Mon, 21 Oct 2013 13:59:16 +0100 Subject: [PATCH] Single thread uncompress working, no sparse support. --- configure.ac | 3 + pxzcat.c | 247 +++++++++++++++++++++++++---------------------------------- 2 files changed, 106 insertions(+), 144 deletions(-) diff --git a/configure.ac b/configure.ac index 7bdce4d..c34e1a1 100644 --- a/configure.ac +++ b/configure.ac @@ -16,6 +16,9 @@ test "x$U" != "x" && AC_MSG_ERROR([Compiler not ANSI compliant]) AM_PROG_CC_C_O +dnl Check support for 64 bit file offsets. +AC_SYS_LARGEFILE + PKG_CHECK_MODULES([LIBLZMA], [liblzma]) AC_CONFIG_HEADERS([config.h]) diff --git a/pxzcat.c b/pxzcat.c index 0ac0eb9..befc2c0 100644 --- a/pxzcat.c +++ b/pxzcat.c @@ -50,7 +50,7 @@ #define DEBUG 1 #if DEBUG -#define debug(fs,...) fprintf (stderr, "pxzcat: debug: " fs, ## __VA_ARGS__) +#define debug(fs,...) fprintf (stderr, "pxzcat: debug: " fs "\n", ## __VA_ARGS__) #else #define debug(fs,...) /* nothing */ #endif @@ -64,7 +64,7 @@ static void usage (int exitcode); static void xzfile_uncompress (const char *filename, const char *outputfile); static int check_header_magic (int fd); static lzma_index *parse_indexes (const char *filename, int fd); -static void iter_indexes (lzma_index *idx); +static void iter_blocks (lzma_index *idx, const char *filename, int fd, const char *outputfile, int ofd); static struct option long_options[] = { { "output", required_argument, 0, 'o' }, @@ -124,7 +124,7 @@ usage (int exitcode) static void xzfile_uncompress (const char *filename, const char *outputfile) { - int fd; + int fd, ofd; uint64_t size; lzma_index *idx; @@ -140,8 +140,18 @@ xzfile_uncompress (const char *filename, const char *outputfile) /* Read and parse the indexes. */ idx = parse_indexes (filename, fd); - /* Iterate over indexes and uncompress. */ - iter_indexes (idx); + /* Get the file uncompressed size, create the output file. */ + size = lzma_index_uncompressed_size (idx); + debug ("uncompressed size = %" PRIu64 " bytes", size); + + ofd = open (outputfile, O_WRONLY|O_CREAT|O_TRUNC|O_NOCTTY, 0644); + if (ofd == -1) + error (EXIT_FAILURE, errno, "open: %s", outputfile); + if (ftruncate (ofd, size) == -1) + error (EXIT_FAILURE, errno, "ftruncate: %s", outputfile); + + /* Iterate over blocks and uncompress. */ + iter_blocks (idx, filename, fd, outputfile, ofd); close (fd); } @@ -315,166 +325,115 @@ parse_indexes (const char *filename, int fd) return combined_index; } -/* Iterate over the indexes and uncompress. - */ +/* Iterate over the blocks and uncompress. */ static void -iter_indexes (lzma_index *idx) -{ - lzma_index_iter iter; - - lzma_index_iter_init (&iter, idx); - while (!lzma_index_iter_next (&iter, LZMA_INDEX_ITER_NONEMPTY_BLOCK)) { - abort (); - - - - - } -} - -#if 0 -char * -xzfile_read_block (xzfile *xz, uint64_t offset, - uint64_t *start_rtn, uint64_t *size_rtn) +iter_blocks (lzma_index *idx, + const char *filename, int fd, const char *outputfile, int ofd) { lzma_index_iter iter; uint8_t header[LZMA_BLOCK_HEADER_SIZE_MAX]; + ssize_t n; lzma_block block; lzma_filter filters[LZMA_FILTERS_MAX + 1]; lzma_ret r; lzma_stream strm = LZMA_STREAM_INIT; - char *data; - ssize_t n; + char outbuf[BUFSIZ]; size_t i; - /* Locate the block containing the uncompressed offset. */ - lzma_index_iter_init (&iter, xz->idx); - if (lzma_index_iter_locate (&iter, offset)) { - nbdkit_error ("cannot find offset %" PRIu64 " in the xz file", offset); - return NULL; - } - - *start_rtn = iter.block.uncompressed_file_offset; - *size_rtn = iter.block.uncompressed_size; - - nbdkit_debug ("seek: block number %d at file offset %" PRIu64, - (int) iter.block.number_in_file, - (uint64_t) iter.block.compressed_file_offset); - - if (lseek (xz->fd, iter.block.compressed_file_offset, SEEK_SET) == -1) { - nbdkit_error ("lseek: %m"); - return NULL; - } - - /* Read the block header. Start by reading a single byte which - * tell us how big the block header is. - */ - n = read (xz->fd, header, 1); - if (n == 0) { - nbdkit_error ("read: unexpected end of file reading block header byte"); - return NULL; - } - if (n == -1) { - nbdkit_error ("read: %m"); - return NULL; - } - - if (header[0] == '\0') { - nbdkit_error ("read: unexpected invalid block in file, header[0] = 0"); - return NULL; - } - - block.version = 0; - block.check = iter.stream.flags->check; - block.filters = filters; - block.header_size = lzma_block_header_size_decode (header[0]); + lzma_index_iter_init (&iter, idx); + while (!lzma_index_iter_next (&iter, LZMA_INDEX_ITER_NONEMPTY_BLOCK)) { + /* Seek to the start of the block in the input file. */ + if (lseek (fd, iter.block.compressed_file_offset, SEEK_SET) == -1) + error (EXIT_FAILURE, errno, "lseek"); - /* Now read and decode the block header. */ - n = read (xz->fd, &header[1], block.header_size-1); - if (n >= 0 && n != block.header_size-1) { - nbdkit_error ("read: unexpected end of file reading block header"); - return NULL; - } - if (n == -1) { - nbdkit_error ("read: %m"); - return NULL; - } + /* Read the block header. Start by reading a single byte which + * tell us how big the block header is. + */ + n = read (fd, header, 1); + if (n == 0) + error (EXIT_FAILURE, 0, + "%s: read: unexpected end of file reading block header byte", + filename); + if (n == -1) + error (EXIT_FAILURE, errno, "%s: read", filename); + + if (header[0] == '\0') + error (EXIT_FAILURE, errno, + "%s: read: unexpected invalid block in file, header[0] = 0", + filename); + + block.version = 0; + block.check = iter.stream.flags->check; + block.filters = filters; + block.header_size = lzma_block_header_size_decode (header[0]); + + /* Now read and decode the block header. */ + n = read (fd, &header[1], block.header_size-1); + if (n >= 0 && n != block.header_size-1) + error (EXIT_FAILURE, 0, + "%s: read: unexpected end of file reading block header", + filename); + if (n == -1) + error (EXIT_FAILURE, errno, "%s: read", filename); - r = lzma_block_header_decode (&block, NULL, header); - if (r != LZMA_OK) { - nbdkit_error ("invalid block header (error %d)", r); - return NULL; - } + r = lzma_block_header_decode (&block, NULL, header); + if (r != LZMA_OK) + error (EXIT_FAILURE, errno, "%s: invalid block header (error %d)", + filename, r); - /* What this actually does is it checks that the block header - * matches the index. - */ - r = lzma_block_compressed_size (&block, iter.block.unpadded_size); - if (r != LZMA_OK) { - nbdkit_error ("cannot calculate compressed size (error %d)", r); - goto err1; - } + /* What this actually does is it checks that the block header + * matches the index. + */ + r = lzma_block_compressed_size (&block, iter.block.unpadded_size); + if (r != LZMA_OK) + error (EXIT_FAILURE, errno, + "%s: cannot calculate compressed size (error %d)", filename, r); - /* Read the block data. */ - r = lzma_block_decoder (&strm, &block); - if (r != LZMA_OK) { - nbdkit_error ("invalid block (error %d)", r); - goto err1; - } + /* Read the block data and uncompress it. */ + r = lzma_block_decoder (&strm, &block); + if (r != LZMA_OK) + error (EXIT_FAILURE, 0, "%s: invalid block (error %d)", filename, r); - data = malloc (*size_rtn); - if (data == NULL) { - nbdkit_error ("malloc (%zu bytes): %m\n" - "NOTE: If this error occurs, you need to recompress your xz files with a smaller block size. Use: 'xz --block-size=16777216 ...'.", - *size_rtn); - goto err1; - } + strm.next_in = NULL; + strm.avail_in = 0; + strm.next_out = outbuf; + strm.avail_out = sizeof outbuf; - strm.next_in = NULL; - strm.avail_in = 0; - strm.next_out = (uint8_t *) data; - strm.avail_out = block.uncompressed_size; + for (;;) { + uint8_t buf[BUFSIZ]; + lzma_action action = LZMA_RUN; + + if (strm.avail_in == 0) { + strm.next_in = buf; + n = read (fd, buf, sizeof buf); + if (n == -1) + error (EXIT_FAILURE, errno, "%s: read", filename); + strm.avail_in = n; + if (n == 0) + action = LZMA_FINISH; + } - do { - uint8_t buf[BUFSIZ]; - lzma_action action = LZMA_RUN; + r = lzma_code (&strm, action); - if (strm.avail_in == 0) { - strm.next_in = buf; - n = read (xz->fd, buf, sizeof buf); - if (n == -1) { - nbdkit_error ("read: %m"); - goto err2; + if (strm.avail_out == 0 || r == LZMA_STREAM_END) { + size_t wsz = sizeof outbuf - strm.avail_out; + if (write (ofd, outbuf, wsz) != wsz) + /* XXX Handle short writes. */ + error (EXIT_FAILURE, errno, "%s: write", filename); + strm.next_out = outbuf; + strm.avail_out = sizeof outbuf; } - strm.avail_in = n; - if (n == 0) - action = LZMA_FINISH; + + if (r == LZMA_STREAM_END) + break; + if (r != LZMA_OK) + error (EXIT_FAILURE, 0, + "%s: could not parse block data (error %d)", filename, r); } - strm.avail_in = n; - strm.next_in = buf; - r = lzma_code (&strm, action); - } while (r == LZMA_OK); + lzma_end (&strm); - if (r != LZMA_OK && r != LZMA_STREAM_END) { - nbdkit_error ("could not parse block data (error %d)", r); - goto err2; + for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) + free (filters[i].options); } - - lzma_end (&strm); - - for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) - free (filters[i].options); - - return data; - - err2: - free (data); - lzma_end (&strm); - err1: - for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) - free (filters[i].options); - - return NULL; } -#endif -- 1.8.3.1