/* pxzcat derived from nbdkit * Copyright (C) 2013 Red Hat Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * * Neither the name of Red Hat nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define DEBUG 1 #if DEBUG #define debug(fs,...) fprintf (stderr, "pxzcat: debug: " fs, ## __VA_ARGS__) #else #define debug(fs,...) /* nothing */ #endif #define XZ_HEADER_MAGIC "\xfd" "7zXZ\0" #define XZ_HEADER_MAGIC_LEN 6 #define XZ_FOOTER_MAGIC "YZ" #define XZ_FOOTER_MAGIC_LEN 2 static void usage (int exitcode); static void xzfile_uncompress (const char *filename, const char *outputfile); static int check_header_magic (int fd); static lzma_index *parse_indexes (const char *filename, int fd); static void iter_indexes (lzma_index *idx); static struct option long_options[] = { { "output", required_argument, 0, 'o' }, { "help", 0, 0, '?' }, { NULL, 0, 0, 0 } }; static const char *options = "o:"; int main (int argc, char *argv[]) { int c; int longopt_index; const char *outputfile = NULL; for (;;) { c = getopt_long (argc, argv, options, long_options, &longopt_index); if (c == -1) break; switch (c) { /* Long option with no short opt equivalent. */ case 0: abort (); case 'o': outputfile = optarg; break; case '?': usage (EXIT_SUCCESS); default: usage (EXIT_FAILURE); } } if (outputfile == NULL) error (EXIT_FAILURE, 0, "you must give the -o (output file) option\n"); if (optind != argc - 1) usage (EXIT_FAILURE); xzfile_uncompress (argv[optind], outputfile); exit (EXIT_SUCCESS); } static void usage (int exitcode) { printf ("usage: pxzcat -o output input.xz\n"); exit (exitcode); } static void xzfile_uncompress (const char *filename, const char *outputfile) { int fd; uint64_t size; lzma_index *idx; /* Open the file. */ fd = open (filename, O_RDONLY); if (fd == -1) error (EXIT_FAILURE, errno, "open: %s", filename); /* Check file magic. */ if (!check_header_magic (fd)) error (EXIT_FAILURE, 0, "%s: not an xz file", filename); /* Read and parse the indexes. */ idx = parse_indexes (filename, fd); /* Iterate over indexes and uncompress. */ iter_indexes (idx); close (fd); } static int check_header_magic (int fd) { char buf[XZ_HEADER_MAGIC_LEN]; if (lseek (fd, 0, SEEK_SET) == -1) return 0; if (read (fd, buf, XZ_HEADER_MAGIC_LEN) != XZ_HEADER_MAGIC_LEN) return 0; if (memcmp (buf, XZ_HEADER_MAGIC, XZ_HEADER_MAGIC_LEN) != 0) return 0; return 1; } /* For explanation of this function, see src/xz/list.c:parse_indexes * in the xz sources. */ static lzma_index * parse_indexes (const char *filename, int fd) { lzma_ret r; off_t pos, index_size; uint8_t footer[LZMA_STREAM_HEADER_SIZE]; uint8_t header[LZMA_STREAM_HEADER_SIZE]; lzma_stream_flags footer_flags; lzma_stream_flags header_flags; lzma_stream strm = LZMA_STREAM_INIT; ssize_t n; lzma_index *combined_index = NULL; lzma_index *this_index = NULL; lzma_vli stream_padding = 0; size_t nr_streams = 0; /* Check file size is a multiple of 4 bytes. */ pos = lseek (fd, 0, SEEK_END); if (pos == (off_t) -1) error (EXIT_FAILURE, errno, "%s: lseek", filename); if ((pos & 3) != 0) error (EXIT_FAILURE, 0, "%s: not an xz file: size is not a multiple of 4 bytes", filename); /* Jump backwards through the file identifying each stream. */ while (pos > 0) { debug ("looping through streams: pos = %" PRIu64, (uint64_t) pos); if (pos < LZMA_STREAM_HEADER_SIZE) error (EXIT_FAILURE, 0, "%s: corrupted file at %" PRIu64, filename, (uint64_t) pos); if (lseek (fd, -LZMA_STREAM_HEADER_SIZE, SEEK_CUR) == -1) error (EXIT_FAILURE, errno, "%s: lseek", filename); if (read (fd, footer, LZMA_STREAM_HEADER_SIZE) != LZMA_STREAM_HEADER_SIZE) error (EXIT_FAILURE, errno, "%s: read stream footer", filename); /* Skip stream padding. */ if (footer[8] == 0 && footer[9] == 0 && footer[10] == 0 && footer[11] == 0) { stream_padding += 4; pos -= 4; continue; } pos -= LZMA_STREAM_HEADER_SIZE; nr_streams++; debug ("decode stream footer at pos = %" PRIu64, (uint64_t) pos); /* Does the stream footer look reasonable? */ r = lzma_stream_footer_decode (&footer_flags, footer); if (r != LZMA_OK) error (EXIT_FAILURE, 0, "%s: invalid stream footer (error %d)", filename, r); debug ("backward_size = %" PRIu64, (uint64_t) footer_flags.backward_size); index_size = footer_flags.backward_size; if (pos < index_size + LZMA_STREAM_HEADER_SIZE) error (EXIT_FAILURE, 0, "%s: invalid stream footer", filename); pos -= index_size; debug ("decode index at pos = %" PRIu64, (uint64_t) pos); /* Seek backwards to the index of this stream. */ if (lseek (fd, pos, SEEK_SET) == -1) error (EXIT_FAILURE, errno, "%s: lseek", filename); /* Decode the index. */ r = lzma_index_decoder (&strm, &this_index, UINT64_MAX); if (r != LZMA_OK) error (EXIT_FAILURE, 0, "%s: invalid stream index (error %d)", filename, r); do { uint8_t buf[BUFSIZ]; strm.avail_in = index_size; if (strm.avail_in > BUFSIZ) strm.avail_in = BUFSIZ; n = read (fd, &buf, strm.avail_in); if (n == -1) error (EXIT_FAILURE, errno, "%s: read", filename); index_size -= strm.avail_in; strm.next_in = buf; r = lzma_code (&strm, LZMA_RUN); } while (r == LZMA_OK); if (r != LZMA_STREAM_END) error (EXIT_FAILURE, 0, "%s: could not parse index (error %d)", filename, r); pos -= lzma_index_total_size (this_index) + LZMA_STREAM_HEADER_SIZE; debug ("decode stream header at pos = %" PRIu64, (uint64_t) pos); /* Read and decode the stream header. */ if (lseek (fd, pos, SEEK_SET) == -1) error (EXIT_FAILURE, errno, "%s: lseek", filename); if (read (fd, header, LZMA_STREAM_HEADER_SIZE) != LZMA_STREAM_HEADER_SIZE) error (EXIT_FAILURE, errno, "%s: read stream header", filename); r = lzma_stream_header_decode (&header_flags, header); if (r != LZMA_OK) error (EXIT_FAILURE, 0, "%s: invalid stream header (error %d)", filename, r); /* Header and footer of the stream should be equal. */ r = lzma_stream_flags_compare (&header_flags, &footer_flags); if (r != LZMA_OK) error (EXIT_FAILURE, 0, "%s: header and footer of stream are not equal (error %d)", filename, r); /* Store the decoded stream flags in this_index. */ r = lzma_index_stream_flags (this_index, &footer_flags); if (r != LZMA_OK) error (EXIT_FAILURE, 0, "%s: cannot read stream_flags from index (error %d)", filename, r); /* Store the amount of stream padding so far. Needed to calculate * compressed offsets correctly in multi-stream files. */ r = lzma_index_stream_padding (this_index, stream_padding); if (r != LZMA_OK) error (EXIT_FAILURE, 0, "%s: cannot set stream_padding in index (error %d)", filename, r); if (combined_index != NULL) { r = lzma_index_cat (this_index, combined_index, NULL); if (r != LZMA_OK) error (EXIT_FAILURE, 0, "%s: cannot combine indexes", filename); } combined_index = this_index; this_index = NULL; } lzma_end (&strm); return combined_index; } /* Iterate over the indexes and uncompress. */ static void iter_indexes (lzma_index *idx) { lzma_index_iter iter; lzma_index_iter_init (&iter, idx); while (!lzma_index_iter_next (&iter, LZMA_INDEX_ITER_NONEMPTY_BLOCK)) { abort (); } } #if 0 char * xzfile_read_block (xzfile *xz, uint64_t offset, uint64_t *start_rtn, uint64_t *size_rtn) { lzma_index_iter iter; uint8_t header[LZMA_BLOCK_HEADER_SIZE_MAX]; lzma_block block; lzma_filter filters[LZMA_FILTERS_MAX + 1]; lzma_ret r; lzma_stream strm = LZMA_STREAM_INIT; char *data; ssize_t n; size_t i; /* Locate the block containing the uncompressed offset. */ lzma_index_iter_init (&iter, xz->idx); if (lzma_index_iter_locate (&iter, offset)) { nbdkit_error ("cannot find offset %" PRIu64 " in the xz file", offset); return NULL; } *start_rtn = iter.block.uncompressed_file_offset; *size_rtn = iter.block.uncompressed_size; nbdkit_debug ("seek: block number %d at file offset %" PRIu64, (int) iter.block.number_in_file, (uint64_t) iter.block.compressed_file_offset); if (lseek (xz->fd, iter.block.compressed_file_offset, SEEK_SET) == -1) { nbdkit_error ("lseek: %m"); return NULL; } /* Read the block header. Start by reading a single byte which * tell us how big the block header is. */ n = read (xz->fd, header, 1); if (n == 0) { nbdkit_error ("read: unexpected end of file reading block header byte"); return NULL; } if (n == -1) { nbdkit_error ("read: %m"); return NULL; } if (header[0] == '\0') { nbdkit_error ("read: unexpected invalid block in file, header[0] = 0"); return NULL; } block.version = 0; block.check = iter.stream.flags->check; block.filters = filters; block.header_size = lzma_block_header_size_decode (header[0]); /* Now read and decode the block header. */ n = read (xz->fd, &header[1], block.header_size-1); if (n >= 0 && n != block.header_size-1) { nbdkit_error ("read: unexpected end of file reading block header"); return NULL; } if (n == -1) { nbdkit_error ("read: %m"); return NULL; } r = lzma_block_header_decode (&block, NULL, header); if (r != LZMA_OK) { nbdkit_error ("invalid block header (error %d)", r); return NULL; } /* What this actually does is it checks that the block header * matches the index. */ r = lzma_block_compressed_size (&block, iter.block.unpadded_size); if (r != LZMA_OK) { nbdkit_error ("cannot calculate compressed size (error %d)", r); goto err1; } /* Read the block data. */ r = lzma_block_decoder (&strm, &block); if (r != LZMA_OK) { nbdkit_error ("invalid block (error %d)", r); goto err1; } data = malloc (*size_rtn); if (data == NULL) { nbdkit_error ("malloc (%zu bytes): %m\n" "NOTE: If this error occurs, you need to recompress your xz files with a smaller block size. Use: 'xz --block-size=16777216 ...'.", *size_rtn); goto err1; } strm.next_in = NULL; strm.avail_in = 0; strm.next_out = (uint8_t *) data; strm.avail_out = block.uncompressed_size; do { uint8_t buf[BUFSIZ]; lzma_action action = LZMA_RUN; if (strm.avail_in == 0) { strm.next_in = buf; n = read (xz->fd, buf, sizeof buf); if (n == -1) { nbdkit_error ("read: %m"); goto err2; } strm.avail_in = n; if (n == 0) action = LZMA_FINISH; } strm.avail_in = n; strm.next_in = buf; r = lzma_code (&strm, action); } while (r == LZMA_OK); if (r != LZMA_OK && r != LZMA_STREAM_END) { nbdkit_error ("could not parse block data (error %d)", r); goto err2; } lzma_end (&strm); for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) free (filters[i].options); return data; err2: free (data); lzma_end (&strm); err1: for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) free (filters[i].options); return NULL; } #endif