1 /* pxzcat derived from nbdkit
2 * Copyright (C) 2013 Red Hat Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
9 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * * Neither the name of Red Hat nor the names of its contributors may be
17 * used to endorse or promote products derived from this software without
18 * specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
22 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
24 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
27 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43 #include <sys/types.h>
52 #define debug(fs...) fprintf (stderr, "pxzcat: debug: " fs ##__VA_ARGS__)
54 #define debug(fs...) /* nothing */
57 #define XZ_HEADER_MAGIC "\xfd" "7zXZ\0"
58 #define XZ_HEADER_MAGIC_LEN 6
59 #define XZ_FOOTER_MAGIC "YZ"
60 #define XZ_FOOTER_MAGIC_LEN 2
62 static void xzfile_uncompress (const char *filename, const char *outputfile);
63 static int check_header_magic (int fd);
64 static lzma_index *parse_indexes (const char *filename, int fd, size_t *);
65 static void iter_indexes (lzma_index *idx);
67 static struct option long_options[] = {
68 { "output", required_argument, 0, 'o' },
72 static const char *options = "o:";
75 main (int argc, char *argv[])
79 const char *outputfile = NULL;
82 c = getopt_long (argc, argv, options, long_options, &optind);
87 /* Long option with no short opt equivalent. */
97 error (EXIT_FAILURE, 0, "usage: %s -o output file\n", argv[0]);
101 if (outputfile == NULL)
102 error (EXIT_FAILURE, 0, "%s: you must give the -o (output file) option\n",
105 if (optind != argc - 1)
106 error (EXIT_FAILURE, 0, "%s: input.xz\n", argv[0]);
108 xzfile_uncompress (argv[optind], outputfile);
114 xzfile_uncompress (const char *filename, const char *outputfile)
121 fd = open (filename, O_RDONLY|O_CLOEXEC);
123 error (EXIT_FAILURE, errno, "open: %s", filename);
125 /* Check file magic. */
126 if (!check_header_magic (fd))
127 error (EXIT_FAILURE, 0, "%s: not an xz file", filename);
129 /* Read and parse the indexes. */
130 idx = parse_indexes (filename, fd);
132 /* Iterate over indexes and uncompress. */
139 check_header_magic (int fd)
141 char buf[XZ_HEADER_MAGIC_LEN];
143 if (lseek (fd, 0, SEEK_SET) == -1)
145 if (read (fd, buf, XZ_HEADER_MAGIC_LEN) != XZ_HEADER_MAGIC_LEN)
147 if (memcmp (buf, XZ_HEADER_MAGIC, XZ_HEADER_MAGIC_LEN) != 0)
152 /* For explanation of this function, see src/xz/list.c:parse_indexes
156 parse_indexes (const char *filename, int fd)
159 off_t pos, index_size;
160 uint8_t footer[LZMA_STREAM_HEADER_SIZE];
161 uint8_t header[LZMA_STREAM_HEADER_SIZE];
162 lzma_stream_flags footer_flags;
163 lzma_stream_flags header_flags;
164 lzma_stream strm = LZMA_STREAM_INIT;
166 lzma_index *combined_index = NULL;
167 lzma_index *this_index = NULL;
168 lzma_vli stream_padding = 0;
169 size_t nr_streams = 0;
171 /* Check file size is a multiple of 4 bytes. */
172 pos = lseek (fd, 0, SEEK_END);
173 if (pos == (off_t) -1)
174 error (EXIT_FAILURE, errno, "%s: lseek", filename);
177 error (EXIT_FAILURE, 0,
178 "%s: not an xz file: size is not a multiple of 4 bytes",
181 /* Jump backwards through the file identifying each stream. */
183 debug ("looping through streams: pos = %" PRIu64, (uint64_t) pos);
185 if (pos < LZMA_STREAM_HEADER_SIZE)
186 error (EXIT_FAILURE, 0,
187 "%s: corrupted file at %" PRIu64, filename, (uint64_t) pos);
189 if (lseek (fd, -LZMA_STREAM_HEADER_SIZE, SEEK_CUR) == -1)
190 error (EXIT_FAILURE, errno, "%s: lseek", filename);
192 if (read (fd, footer, LZMA_STREAM_HEADER_SIZE) != LZMA_STREAM_HEADER_SIZE)
193 error (EXIT_FAILURE, errno, "%s: read stream footer", filename);
195 /* Skip stream padding. */
196 if (footer[8] == 0 && footer[9] == 0 &&
197 footer[10] == 0 && footer[11] == 0) {
203 pos -= LZMA_STREAM_HEADER_SIZE;
206 debug ("decode stream footer at pos = %" PRIu64, (uint64_t) pos);
208 /* Does the stream footer look reasonable? */
209 r = lzma_stream_footer_decode (&footer_flags, footer);
211 error (EXIT_FAILURE, 0,
212 "%s: invalid stream footer (error %d)", filename, r);
214 debug ("backward_size = %" PRIu64, (uint64_t) footer_flags.backward_size);
215 index_size = footer_flags.backward_size;
216 if (pos < index_size + LZMA_STREAM_HEADER_SIZE)
217 error (EXIT_FAILURE, 0, "%s: invalid stream footer", filename);
220 debug ("decode index at pos = %" PRIu64, (uint64_t) pos);
222 /* Seek backwards to the index of this stream. */
223 if (lseek (fd, pos, SEEK_SET) == -1)
224 error (EXIT_FAILURE, errno, "%s: lseek", filename);
226 /* Decode the index. */
227 r = lzma_index_decoder (&strm, &this_index, UINT64_MAX);
229 error (EXIT_FAILURE, 0,
230 "%s: invalid stream index (error %d)", filename, r);
235 strm.avail_in = index_size;
236 if (strm.avail_in > BUFSIZ)
237 strm.avail_in = BUFSIZ;
239 n = read (fd, &buf, strm.avail_in);
241 error (EXIT_FAILURE, errno, "%s: read", filename);
243 index_size -= strm.avail_in;
246 r = lzma_code (&strm, LZMA_RUN);
247 } while (r == LZMA_OK);
249 if (r != LZMA_STREAM_END) {
250 error (EXIT_FAILURE, 0, "%s: could not parse index (error %d)",
253 pos -= lzma_index_total_size (this_index) + LZMA_STREAM_HEADER_SIZE;
255 debug ("decode stream header at pos = %" PRIu64, (uint64_t) pos);
257 /* Read and decode the stream header. */
258 if (lseek (fd, pos, SEEK_SET) == -1)
259 error (EXIT_FAILURE, errno, "%s: lseek", filename);
261 if (read (fd, header, LZMA_STREAM_HEADER_SIZE) != LZMA_STREAM_HEADER_SIZE)
262 error (EXIT_FAILURE, errno, "%s: read stream header", filename);
264 r = lzma_stream_header_decode (&header_flags, header);
266 error (EXIT_FAILURE, 0,
267 "%s: invalid stream header (error %d)", filename, r);
269 /* Header and footer of the stream should be equal. */
270 r = lzma_stream_flags_compare (&header_flags, &footer_flags);
272 error (EXIT_FAILURE, 0,
273 "%s: header and footer of stream are not equal (error %d)",
276 /* Store the decoded stream flags in this_index. */
277 r = lzma_index_stream_flags (this_index, &footer_flags);
279 error (EXIT_FAILURE, 0,
280 "%s: cannot read stream_flags from index (error %d)",
283 /* Store the amount of stream padding so far. Needed to calculate
284 * compressed offsets correctly in multi-stream files.
286 r = lzma_index_stream_padding (this_index, stream_padding);
288 error (EXIT_FAILURE, 0,
289 "%s: cannot set stream_padding in index (error %d)",
292 if (combined_index != NULL) {
293 r = lzma_index_cat (this_index, combined_index, NULL);
295 error (EXIT_FAILURE, 0, "%s: cannot combine indexes", filename);
298 combined_index = this_index;
304 return combined_index;
307 /* Iterate over the indexes and uncompress.
310 iter_indexes (lzma_index *idx)
312 lzma_index_iter iter;
314 lzma_index_iter_init (&iter, idx);
315 while (!lzma_index_iter_next (&iter, LZMA_INDEX_ITER_NONEMPTY_BLOCK)) {
328 xzfile_read_block (xzfile *xz, uint64_t offset,
329 uint64_t *start_rtn, uint64_t *size_rtn)
331 lzma_index_iter iter;
332 uint8_t header[LZMA_BLOCK_HEADER_SIZE_MAX];
334 lzma_filter filters[LZMA_FILTERS_MAX + 1];
336 lzma_stream strm = LZMA_STREAM_INIT;
341 /* Locate the block containing the uncompressed offset. */
342 lzma_index_iter_init (&iter, xz->idx);
343 if (lzma_index_iter_locate (&iter, offset)) {
344 nbdkit_error ("cannot find offset %" PRIu64 " in the xz file", offset);
348 *start_rtn = iter.block.uncompressed_file_offset;
349 *size_rtn = iter.block.uncompressed_size;
351 nbdkit_debug ("seek: block number %d at file offset %" PRIu64,
352 (int) iter.block.number_in_file,
353 (uint64_t) iter.block.compressed_file_offset);
355 if (lseek (xz->fd, iter.block.compressed_file_offset, SEEK_SET) == -1) {
356 nbdkit_error ("lseek: %m");
360 /* Read the block header. Start by reading a single byte which
361 * tell us how big the block header is.
363 n = read (xz->fd, header, 1);
365 nbdkit_error ("read: unexpected end of file reading block header byte");
369 nbdkit_error ("read: %m");
373 if (header[0] == '\0') {
374 nbdkit_error ("read: unexpected invalid block in file, header[0] = 0");
379 block.check = iter.stream.flags->check;
380 block.filters = filters;
381 block.header_size = lzma_block_header_size_decode (header[0]);
383 /* Now read and decode the block header. */
384 n = read (xz->fd, &header[1], block.header_size-1);
385 if (n >= 0 && n != block.header_size-1) {
386 nbdkit_error ("read: unexpected end of file reading block header");
390 nbdkit_error ("read: %m");
394 r = lzma_block_header_decode (&block, NULL, header);
396 nbdkit_error ("invalid block header (error %d)", r);
400 /* What this actually does is it checks that the block header
403 r = lzma_block_compressed_size (&block, iter.block.unpadded_size);
405 nbdkit_error ("cannot calculate compressed size (error %d)", r);
409 /* Read the block data. */
410 r = lzma_block_decoder (&strm, &block);
412 nbdkit_error ("invalid block (error %d)", r);
416 data = malloc (*size_rtn);
418 nbdkit_error ("malloc (%zu bytes): %m\n"
419 "NOTE: If this error occurs, you need to recompress your xz files with a smaller block size. Use: 'xz --block-size=16777216 ...'.",
426 strm.next_out = (uint8_t *) data;
427 strm.avail_out = block.uncompressed_size;
431 lzma_action action = LZMA_RUN;
433 if (strm.avail_in == 0) {
435 n = read (xz->fd, buf, sizeof buf);
437 nbdkit_error ("read: %m");
442 action = LZMA_FINISH;
447 r = lzma_code (&strm, action);
448 } while (r == LZMA_OK);
450 if (r != LZMA_OK && r != LZMA_STREAM_END) {
451 nbdkit_error ("could not parse block data (error %d)", r);
457 for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i)
458 free (filters[i].options);
466 for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i)
467 free (filters[i].options);