1 /* pxzcat derived from nbdkit
2 * Copyright (C) 2013 Red Hat Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
9 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * * Neither the name of Red Hat nor the names of its contributors may be
17 * used to endorse or promote products derived from this software without
18 * specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
22 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
24 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
27 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43 #include <sys/types.h>
53 #define debug(fs,...) fprintf (stderr, "pxzcat: debug: " fs "\n", ## __VA_ARGS__)
55 #define debug(fs,...) /* nothing */
58 #define XZ_HEADER_MAGIC "\xfd" "7zXZ\0"
59 #define XZ_HEADER_MAGIC_LEN 6
60 #define XZ_FOOTER_MAGIC "YZ"
61 #define XZ_FOOTER_MAGIC_LEN 2
63 static void usage (int exitcode);
64 static void xzfile_uncompress (const char *filename, const char *outputfile);
65 static int check_header_magic (int fd);
66 static lzma_index *parse_indexes (const char *filename, int fd);
67 static void iter_blocks (lzma_index *idx, const char *filename, int fd, const char *outputfile, int ofd);
69 static struct option long_options[] = {
70 { "output", required_argument, 0, 'o' },
71 { "help", 0, 0, '?' },
75 static const char *options = "o:";
78 main (int argc, char *argv[])
82 const char *outputfile = NULL;
85 c = getopt_long (argc, argv, options, long_options, &longopt_index);
90 /* Long option with no short opt equivalent. */
102 usage (EXIT_FAILURE);
106 if (outputfile == NULL)
107 error (EXIT_FAILURE, 0, "you must give the -o (output file) option\n");
109 if (optind != argc - 1)
110 usage (EXIT_FAILURE);
112 xzfile_uncompress (argv[optind], outputfile);
120 printf ("usage: pxzcat -o output input.xz\n");
125 xzfile_uncompress (const char *filename, const char *outputfile)
132 fd = open (filename, O_RDONLY);
134 error (EXIT_FAILURE, errno, "open: %s", filename);
136 /* Check file magic. */
137 if (!check_header_magic (fd))
138 error (EXIT_FAILURE, 0, "%s: not an xz file", filename);
140 /* Read and parse the indexes. */
141 idx = parse_indexes (filename, fd);
143 /* Get the file uncompressed size, create the output file. */
144 size = lzma_index_uncompressed_size (idx);
145 debug ("uncompressed size = %" PRIu64 " bytes", size);
147 ofd = open (outputfile, O_WRONLY|O_CREAT|O_TRUNC|O_NOCTTY, 0644);
149 error (EXIT_FAILURE, errno, "open: %s", outputfile);
150 if (ftruncate (ofd, size) == -1)
151 error (EXIT_FAILURE, errno, "ftruncate: %s", outputfile);
153 /* Iterate over blocks and uncompress. */
154 iter_blocks (idx, filename, fd, outputfile, ofd);
160 check_header_magic (int fd)
162 char buf[XZ_HEADER_MAGIC_LEN];
164 if (lseek (fd, 0, SEEK_SET) == -1)
166 if (read (fd, buf, XZ_HEADER_MAGIC_LEN) != XZ_HEADER_MAGIC_LEN)
168 if (memcmp (buf, XZ_HEADER_MAGIC, XZ_HEADER_MAGIC_LEN) != 0)
173 /* For explanation of this function, see src/xz/list.c:parse_indexes
177 parse_indexes (const char *filename, int fd)
180 off_t pos, index_size;
181 uint8_t footer[LZMA_STREAM_HEADER_SIZE];
182 uint8_t header[LZMA_STREAM_HEADER_SIZE];
183 lzma_stream_flags footer_flags;
184 lzma_stream_flags header_flags;
185 lzma_stream strm = LZMA_STREAM_INIT;
187 lzma_index *combined_index = NULL;
188 lzma_index *this_index = NULL;
189 lzma_vli stream_padding = 0;
190 size_t nr_streams = 0;
192 /* Check file size is a multiple of 4 bytes. */
193 pos = lseek (fd, 0, SEEK_END);
194 if (pos == (off_t) -1)
195 error (EXIT_FAILURE, errno, "%s: lseek", filename);
198 error (EXIT_FAILURE, 0,
199 "%s: not an xz file: size is not a multiple of 4 bytes",
202 /* Jump backwards through the file identifying each stream. */
204 debug ("looping through streams: pos = %" PRIu64, (uint64_t) pos);
206 if (pos < LZMA_STREAM_HEADER_SIZE)
207 error (EXIT_FAILURE, 0,
208 "%s: corrupted file at %" PRIu64, filename, (uint64_t) pos);
210 if (lseek (fd, -LZMA_STREAM_HEADER_SIZE, SEEK_CUR) == -1)
211 error (EXIT_FAILURE, errno, "%s: lseek", filename);
213 if (read (fd, footer, LZMA_STREAM_HEADER_SIZE) != LZMA_STREAM_HEADER_SIZE)
214 error (EXIT_FAILURE, errno, "%s: read stream footer", filename);
216 /* Skip stream padding. */
217 if (footer[8] == 0 && footer[9] == 0 &&
218 footer[10] == 0 && footer[11] == 0) {
224 pos -= LZMA_STREAM_HEADER_SIZE;
227 debug ("decode stream footer at pos = %" PRIu64, (uint64_t) pos);
229 /* Does the stream footer look reasonable? */
230 r = lzma_stream_footer_decode (&footer_flags, footer);
232 error (EXIT_FAILURE, 0,
233 "%s: invalid stream footer (error %d)", filename, r);
235 debug ("backward_size = %" PRIu64, (uint64_t) footer_flags.backward_size);
236 index_size = footer_flags.backward_size;
237 if (pos < index_size + LZMA_STREAM_HEADER_SIZE)
238 error (EXIT_FAILURE, 0, "%s: invalid stream footer", filename);
241 debug ("decode index at pos = %" PRIu64, (uint64_t) pos);
243 /* Seek backwards to the index of this stream. */
244 if (lseek (fd, pos, SEEK_SET) == -1)
245 error (EXIT_FAILURE, errno, "%s: lseek", filename);
247 /* Decode the index. */
248 r = lzma_index_decoder (&strm, &this_index, UINT64_MAX);
250 error (EXIT_FAILURE, 0,
251 "%s: invalid stream index (error %d)", filename, r);
256 strm.avail_in = index_size;
257 if (strm.avail_in > BUFSIZ)
258 strm.avail_in = BUFSIZ;
260 n = read (fd, &buf, strm.avail_in);
262 error (EXIT_FAILURE, errno, "%s: read", filename);
264 index_size -= strm.avail_in;
267 r = lzma_code (&strm, LZMA_RUN);
268 } while (r == LZMA_OK);
270 if (r != LZMA_STREAM_END)
271 error (EXIT_FAILURE, 0, "%s: could not parse index (error %d)",
274 pos -= lzma_index_total_size (this_index) + LZMA_STREAM_HEADER_SIZE;
276 debug ("decode stream header at pos = %" PRIu64, (uint64_t) pos);
278 /* Read and decode the stream header. */
279 if (lseek (fd, pos, SEEK_SET) == -1)
280 error (EXIT_FAILURE, errno, "%s: lseek", filename);
282 if (read (fd, header, LZMA_STREAM_HEADER_SIZE) != LZMA_STREAM_HEADER_SIZE)
283 error (EXIT_FAILURE, errno, "%s: read stream header", filename);
285 r = lzma_stream_header_decode (&header_flags, header);
287 error (EXIT_FAILURE, 0,
288 "%s: invalid stream header (error %d)", filename, r);
290 /* Header and footer of the stream should be equal. */
291 r = lzma_stream_flags_compare (&header_flags, &footer_flags);
293 error (EXIT_FAILURE, 0,
294 "%s: header and footer of stream are not equal (error %d)",
297 /* Store the decoded stream flags in this_index. */
298 r = lzma_index_stream_flags (this_index, &footer_flags);
300 error (EXIT_FAILURE, 0,
301 "%s: cannot read stream_flags from index (error %d)",
304 /* Store the amount of stream padding so far. Needed to calculate
305 * compressed offsets correctly in multi-stream files.
307 r = lzma_index_stream_padding (this_index, stream_padding);
309 error (EXIT_FAILURE, 0,
310 "%s: cannot set stream_padding in index (error %d)",
313 if (combined_index != NULL) {
314 r = lzma_index_cat (this_index, combined_index, NULL);
316 error (EXIT_FAILURE, 0, "%s: cannot combine indexes", filename);
319 combined_index = this_index;
325 return combined_index;
328 #define BUFFER_SIZE (64*1024)
330 /* Return true iff the buffer is all zero bytes.
332 * Note that gcc is smart enough to optimize this properly:
333 * http://stackoverflow.com/questions/1493936/faster-means-of-checking-for-an-empty-buffer-in-c/1493989#1493989
336 is_zero (const char *buffer, size_t size)
340 for (i = 0; i < size; ++i) {
348 /* Iterate over the blocks and uncompress. */
350 iter_blocks (lzma_index *idx,
351 const char *filename, int fd, const char *outputfile, int ofd)
353 lzma_index_iter iter;
354 uint8_t header[LZMA_BLOCK_HEADER_SIZE_MAX];
357 lzma_filter filters[LZMA_FILTERS_MAX + 1];
359 lzma_stream strm = LZMA_STREAM_INIT;
360 char outbuf[BUFFER_SIZE];
363 lzma_index_iter_init (&iter, idx);
364 while (!lzma_index_iter_next (&iter, LZMA_INDEX_ITER_NONEMPTY_BLOCK)) {
365 /* Seek to the start of the block in the input file. */
366 if (lseek (fd, iter.block.compressed_file_offset, SEEK_SET) == -1)
367 error (EXIT_FAILURE, errno, "lseek");
369 /* Read the block header. Start by reading a single byte which
370 * tell us how big the block header is.
372 n = read (fd, header, 1);
374 error (EXIT_FAILURE, 0,
375 "%s: read: unexpected end of file reading block header byte",
378 error (EXIT_FAILURE, errno, "%s: read", filename);
380 if (header[0] == '\0')
381 error (EXIT_FAILURE, errno,
382 "%s: read: unexpected invalid block in file, header[0] = 0",
386 block.check = iter.stream.flags->check;
387 block.filters = filters;
388 block.header_size = lzma_block_header_size_decode (header[0]);
390 /* Now read and decode the block header. */
391 n = read (fd, &header[1], block.header_size-1);
392 if (n >= 0 && n != block.header_size-1)
393 error (EXIT_FAILURE, 0,
394 "%s: read: unexpected end of file reading block header",
397 error (EXIT_FAILURE, errno, "%s: read", filename);
399 r = lzma_block_header_decode (&block, NULL, header);
401 error (EXIT_FAILURE, errno, "%s: invalid block header (error %d)",
404 /* What this actually does is it checks that the block header
407 r = lzma_block_compressed_size (&block, iter.block.unpadded_size);
409 error (EXIT_FAILURE, errno,
410 "%s: cannot calculate compressed size (error %d)", filename, r);
412 /* Read the block data and uncompress it. */
413 r = lzma_block_decoder (&strm, &block);
415 error (EXIT_FAILURE, 0, "%s: invalid block (error %d)", filename, r);
419 strm.next_out = outbuf;
420 strm.avail_out = sizeof outbuf;
423 uint8_t buf[BUFFER_SIZE];
424 lzma_action action = LZMA_RUN;
426 if (strm.avail_in == 0) {
428 n = read (fd, buf, sizeof buf);
430 error (EXIT_FAILURE, errno, "%s: read", filename);
433 action = LZMA_FINISH;
436 r = lzma_code (&strm, action);
438 if (strm.avail_out == 0 || r == LZMA_STREAM_END) {
439 size_t wsz = sizeof outbuf - strm.avail_out;
441 if (is_zero (outbuf, wsz)) { /* Seek to preserve sparseness. */
442 if (lseek (ofd, wsz, SEEK_CUR) == (off_t) -1)
443 error (EXIT_FAILURE, errno, "%s: seek", filename);
445 if (write (ofd, outbuf, wsz) != wsz)
446 /* XXX Handle short writes. */
447 error (EXIT_FAILURE, errno, "%s: write", filename);
450 strm.next_out = outbuf;
451 strm.avail_out = sizeof outbuf;
454 if (r == LZMA_STREAM_END)
457 error (EXIT_FAILURE, 0,
458 "%s: could not parse block data (error %d)", filename, r);
463 for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i)
464 free (filters[i].options);