1 /* pxzcat derived from nbdkit
2 * Copyright (C) 2013 Red Hat Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
9 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * * Neither the name of Red Hat nor the names of its contributors may be
17 * used to endorse or promote products derived from this software without
18 * specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
22 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
24 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
27 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43 #include <sys/types.h>
54 #define debug(fs,...) fprintf (stderr, "pxzcat: debug: " fs "\n", ## __VA_ARGS__)
56 #define debug(fs,...) /* nothing */
59 /* Size of buffers used in decompression loop. */
60 #define BUFFER_SIZE (64*1024)
62 #define XZ_HEADER_MAGIC "\xfd" "7zXZ\0"
63 #define XZ_HEADER_MAGIC_LEN 6
64 #define XZ_FOOTER_MAGIC "YZ"
65 #define XZ_FOOTER_MAGIC_LEN 2
67 static void usage (int exitcode);
68 static void xzfile_uncompress (const char *filename, const char *outputfile, unsigned nr_threads);
69 static int check_header_magic (int fd);
70 static lzma_index *parse_indexes (const char *filename, int fd);
71 static void iter_blocks (lzma_index *idx, unsigned nr_threads, const char *filename, int fd, const char *outputfile, int ofd);
73 static struct option long_options[] = {
74 { "output", required_argument, 0, 'o' },
75 { "threads", required_argument, 0, 'T' },
76 { "help", 0, 0, '?' },
80 static const char *options = "o:T:";
83 main (int argc, char *argv[])
87 unsigned nr_threads = 0;
88 const char *outputfile = NULL;
91 c = getopt_long (argc, argv, options, long_options, &longopt_index);
96 /* Long option with no short opt equivalent. */
105 if (sscanf (optarg, "%u", &nr_threads) != 1)
106 error (EXIT_FAILURE, 0, "cannot parse -T option");
110 usage (EXIT_SUCCESS);
113 usage (EXIT_FAILURE);
117 if (optind != argc - 1)
118 usage (EXIT_FAILURE);
120 if (outputfile == NULL)
121 error (EXIT_FAILURE, 0, "you must give the -o (output file) option");
123 /* -T 0 (default) means use all cores. */
124 if (nr_threads == 0) {
125 long i = sysconf (_SC_NPROCESSORS_ONLN);
127 error (EXIT_FAILURE, errno, "could not get number of cores");
128 nr_threads = (unsigned) i;
130 debug ("nr_threads = %u", nr_threads);
132 xzfile_uncompress (argv[optind], outputfile, nr_threads);
140 printf ("usage: pxzcat -o output [-T #threads] input.xz\n");
145 xzfile_uncompress (const char *filename, const char *outputfile,
153 fd = open (filename, O_RDONLY);
155 error (EXIT_FAILURE, errno, "open: %s", filename);
157 /* Check file magic. */
158 if (!check_header_magic (fd))
159 error (EXIT_FAILURE, 0, "%s: not an xz file", filename);
161 /* Read and parse the indexes. */
162 idx = parse_indexes (filename, fd);
164 /* Get the file uncompressed size, create the output file. */
165 size = lzma_index_uncompressed_size (idx);
166 debug ("uncompressed size = %" PRIu64 " bytes", size);
168 /* Avoid annoying ext4 auto_da_alloc which causes a flush on close
169 * unless we are very careful about not truncating a regular file
170 * from non-zero size to zero size. (Thanks Eric Sandeen)
172 ofd = open (outputfile, O_WRONLY|O_CREAT|O_NOCTTY, 0644);
174 error (EXIT_FAILURE, errno, "open: %s", outputfile);
176 if (ftruncate (ofd, 1) == -1)
177 error (EXIT_FAILURE, errno, "ftruncate (1 byte): %s", outputfile);
179 if (lseek (ofd, 0, SEEK_SET) == -1)
180 error (EXIT_FAILURE, errno, "lseek: %s", outputfile);
182 if (write (ofd, "\0", 1) == -1)
183 error (EXIT_FAILURE, errno, "write: %s", outputfile);
185 if (ftruncate (ofd, size) == -1)
186 error (EXIT_FAILURE, errno, "ftruncate: %s", outputfile);
188 /* Tell the kernel we won't read the output file. */
189 posix_fadvise (fd, 0, 0, POSIX_FADV_RANDOM|POSIX_FADV_DONTNEED);
191 /* Iterate over blocks. */
192 iter_blocks (idx, nr_threads, filename, fd, outputfile, ofd);
198 check_header_magic (int fd)
200 char buf[XZ_HEADER_MAGIC_LEN];
202 if (lseek (fd, 0, SEEK_SET) == -1)
204 if (read (fd, buf, XZ_HEADER_MAGIC_LEN) != XZ_HEADER_MAGIC_LEN)
206 if (memcmp (buf, XZ_HEADER_MAGIC, XZ_HEADER_MAGIC_LEN) != 0)
211 /* For explanation of this function, see src/xz/list.c:parse_indexes
215 parse_indexes (const char *filename, int fd)
218 off_t pos, index_size;
219 uint8_t footer[LZMA_STREAM_HEADER_SIZE];
220 uint8_t header[LZMA_STREAM_HEADER_SIZE];
221 lzma_stream_flags footer_flags;
222 lzma_stream_flags header_flags;
223 lzma_stream strm = LZMA_STREAM_INIT;
225 lzma_index *combined_index = NULL;
226 lzma_index *this_index = NULL;
227 lzma_vli stream_padding = 0;
228 size_t nr_streams = 0;
230 /* Check file size is a multiple of 4 bytes. */
231 pos = lseek (fd, 0, SEEK_END);
232 if (pos == (off_t) -1)
233 error (EXIT_FAILURE, errno, "%s: lseek", filename);
236 error (EXIT_FAILURE, 0,
237 "%s: not an xz file: size is not a multiple of 4 bytes",
240 /* Jump backwards through the file identifying each stream. */
242 debug ("looping through streams: pos = %" PRIu64, (uint64_t) pos);
244 if (pos < LZMA_STREAM_HEADER_SIZE)
245 error (EXIT_FAILURE, 0,
246 "%s: corrupted file at %" PRIu64, filename, (uint64_t) pos);
248 if (lseek (fd, -LZMA_STREAM_HEADER_SIZE, SEEK_CUR) == -1)
249 error (EXIT_FAILURE, errno, "%s: lseek", filename);
251 if (read (fd, footer, LZMA_STREAM_HEADER_SIZE) != LZMA_STREAM_HEADER_SIZE)
252 error (EXIT_FAILURE, errno, "%s: read stream footer", filename);
254 /* Skip stream padding. */
255 if (footer[8] == 0 && footer[9] == 0 &&
256 footer[10] == 0 && footer[11] == 0) {
262 pos -= LZMA_STREAM_HEADER_SIZE;
265 debug ("decode stream footer at pos = %" PRIu64, (uint64_t) pos);
267 /* Does the stream footer look reasonable? */
268 r = lzma_stream_footer_decode (&footer_flags, footer);
270 error (EXIT_FAILURE, 0,
271 "%s: invalid stream footer (error %d)", filename, r);
273 debug ("backward_size = %" PRIu64, (uint64_t) footer_flags.backward_size);
274 index_size = footer_flags.backward_size;
275 if (pos < index_size + LZMA_STREAM_HEADER_SIZE)
276 error (EXIT_FAILURE, 0, "%s: invalid stream footer", filename);
279 debug ("decode index at pos = %" PRIu64, (uint64_t) pos);
281 /* Seek backwards to the index of this stream. */
282 if (lseek (fd, pos, SEEK_SET) == -1)
283 error (EXIT_FAILURE, errno, "%s: lseek", filename);
285 /* Decode the index. */
286 r = lzma_index_decoder (&strm, &this_index, UINT64_MAX);
288 error (EXIT_FAILURE, 0,
289 "%s: invalid stream index (error %d)", filename, r);
294 strm.avail_in = index_size;
295 if (strm.avail_in > BUFSIZ)
296 strm.avail_in = BUFSIZ;
298 n = read (fd, &buf, strm.avail_in);
300 error (EXIT_FAILURE, errno, "%s: read", filename);
302 index_size -= strm.avail_in;
305 r = lzma_code (&strm, LZMA_RUN);
306 } while (r == LZMA_OK);
308 if (r != LZMA_STREAM_END)
309 error (EXIT_FAILURE, 0, "%s: could not parse index (error %d)",
312 pos -= lzma_index_total_size (this_index) + LZMA_STREAM_HEADER_SIZE;
314 debug ("decode stream header at pos = %" PRIu64, (uint64_t) pos);
316 /* Read and decode the stream header. */
317 if (lseek (fd, pos, SEEK_SET) == -1)
318 error (EXIT_FAILURE, errno, "%s: lseek", filename);
320 if (read (fd, header, LZMA_STREAM_HEADER_SIZE) != LZMA_STREAM_HEADER_SIZE)
321 error (EXIT_FAILURE, errno, "%s: read stream header", filename);
323 r = lzma_stream_header_decode (&header_flags, header);
325 error (EXIT_FAILURE, 0,
326 "%s: invalid stream header (error %d)", filename, r);
328 /* Header and footer of the stream should be equal. */
329 r = lzma_stream_flags_compare (&header_flags, &footer_flags);
331 error (EXIT_FAILURE, 0,
332 "%s: header and footer of stream are not equal (error %d)",
335 /* Store the decoded stream flags in this_index. */
336 r = lzma_index_stream_flags (this_index, &footer_flags);
338 error (EXIT_FAILURE, 0,
339 "%s: cannot read stream_flags from index (error %d)",
342 /* Store the amount of stream padding so far. Needed to calculate
343 * compressed offsets correctly in multi-stream files.
345 r = lzma_index_stream_padding (this_index, stream_padding);
347 error (EXIT_FAILURE, 0,
348 "%s: cannot set stream_padding in index (error %d)",
351 if (combined_index != NULL) {
352 r = lzma_index_cat (this_index, combined_index, NULL);
354 error (EXIT_FAILURE, 0, "%s: cannot combine indexes", filename);
357 combined_index = this_index;
363 return combined_index;
366 /* Return true iff the buffer is all zero bytes.
368 * Note that gcc is smart enough to optimize this properly:
369 * http://stackoverflow.com/questions/1493936/faster-means-of-checking-for-an-empty-buffer-in-c/1493989#1493989
372 is_zero (const char *buffer, size_t size)
376 for (i = 0; i < size; ++i) {
384 struct global_state {
385 /* Current iterator. Threads update this, but it is protected by a
386 * mutex, and each thread takes a copy of it when working on it.
388 lzma_index_iter iter;
389 lzma_bool iter_finished;
390 pthread_mutex_t iter_mutex;
392 /* Note that all threads are accessing these fds, so you have
393 * to use pread/pwrite instead of lseek!
397 const char *filename;
401 const char *outputfile;
405 struct per_thread_state {
407 struct global_state *global;
411 /* Create threads to iterate over the blocks and uncompress. */
412 static void *worker_thread (void *vp);
415 iter_blocks (lzma_index *idx, unsigned nr_threads,
416 const char *filename, int fd, const char *outputfile, int ofd)
418 struct global_state global;
419 struct per_thread_state per_thread[nr_threads];
420 pthread_t thread[nr_threads];
421 unsigned u, nr_errors;
425 lzma_index_iter_init (&global.iter, idx);
426 global.iter_finished = 0;
427 err = pthread_mutex_init (&global.iter_mutex, NULL);
429 error (EXIT_FAILURE, err, "pthread_mutex_init");
431 global.filename = filename;
433 global.outputfile = outputfile;
436 for (u = 0; u < nr_threads; ++u) {
437 per_thread[u].thread_num = u;
438 per_thread[u].global = &global;
441 /* Start the threads. */
442 for (u = 0; u < nr_threads; ++u) {
443 err = pthread_create (&thread[u], NULL, worker_thread, &per_thread[u]);
445 error (EXIT_FAILURE, err, "pthread_create (%u)", u);
448 /* Wait for the threads to exit. */
450 for (u = 0; u < nr_threads; ++u) {
451 err = pthread_join (thread[u], &status);
453 error (0, err, "pthread_join (%u)", u);
456 if (*(int *)status == -1)
464 /* Iterate over the blocks and uncompress. */
466 worker_thread (void *vp)
468 struct per_thread_state *state = vp;
469 struct global_state *global = state->global;
470 lzma_index_iter iter;
472 off_t position, oposition;
473 uint8_t header[LZMA_BLOCK_HEADER_SIZE_MAX];
476 lzma_filter filters[LZMA_FILTERS_MAX + 1];
478 lzma_stream strm = LZMA_STREAM_INIT;
479 uint8_t buf[BUFFER_SIZE];
480 char outbuf[BUFFER_SIZE];
482 lzma_bool iter_finished;
487 /* Get the next block. */
488 err = pthread_mutex_lock (&global->iter_mutex);
489 if (err != 0) abort ();
490 iter_finished = global->iter_finished;
491 if (!iter_finished) {
492 iter_finished = global->iter_finished =
493 lzma_index_iter_next (&global->iter, LZMA_INDEX_ITER_NONEMPTY_BLOCK);
495 /* Take a local copy of this iterator since another thread will
496 * update the global version.
500 err = pthread_mutex_unlock (&global->iter_mutex);
501 if (err != 0) abort ();
505 /* Read the block header. Start by reading a single byte which
506 * tell us how big the block header is.
508 position = iter.block.compressed_file_offset;
509 n = pread (global->fd, header, 1, position);
512 "%s: read: unexpected end of file reading block header byte",
514 return &state->status;
517 error (0, errno, "%s: read", global->filename);
518 return &state->status;
522 if (header[0] == '\0') {
524 "%s: read: unexpected invalid block in file, header[0] = 0",
526 return &state->status;
530 block.check = iter.stream.flags->check;
531 block.filters = filters;
532 block.header_size = lzma_block_header_size_decode (header[0]);
534 /* Now read and decode the block header. */
535 n = pread (global->fd, &header[1], block.header_size-1, position);
536 if (n >= 0 && n != block.header_size-1) {
538 "%s: read: unexpected end of file reading block header",
540 return &state->status;
543 error (0, errno, "%s: read", global->filename);
544 return &state->status;
548 r = lzma_block_header_decode (&block, NULL, header);
550 error (0, errno, "%s: invalid block header (error %d)",
551 global->filename, r);
552 return &state->status;
555 /* What this actually does is it checks that the block header
558 r = lzma_block_compressed_size (&block, iter.block.unpadded_size);
561 "%s: cannot calculate compressed size (error %d)",
562 global->filename, r);
563 return &state->status;
566 /* Where we will start writing to. */
567 oposition = iter.block.uncompressed_file_offset;
569 /* Read the block data and uncompress it. */
570 r = lzma_block_decoder (&strm, &block);
572 error (0, 0, "%s: invalid block (error %d)", global->filename, r);
573 return &state->status;
578 strm.next_out = outbuf;
579 strm.avail_out = sizeof outbuf;
582 lzma_action action = LZMA_RUN;
584 if (strm.avail_in == 0) {
586 n = pread (global->fd, buf, sizeof buf, position);
588 error (0, errno, "%s: read", global->filename);
589 return &state->status;
594 action = LZMA_FINISH;
597 r = lzma_code (&strm, action);
599 if (strm.avail_out == 0 || r == LZMA_STREAM_END) {
600 size_t wsz = sizeof outbuf - strm.avail_out;
602 /* Don't write if the block is all zero, to preserve output file
603 * sparseness. However we have to update oposition.
605 if (!is_zero (outbuf, wsz)) {
606 if (pwrite (global->ofd, outbuf, wsz, oposition) != wsz) {
607 /* XXX Handle short writes. */
608 error (0, errno, "%s: write", global->filename);
609 return &state->status;
614 strm.next_out = outbuf;
615 strm.avail_out = sizeof outbuf;
618 if (r == LZMA_STREAM_END)
622 "%s: could not parse block data (error %d)",
623 global->filename, r);
624 return &state->status;
630 for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i)
631 free (filters[i].options);
635 return &state->status;