pxzcat.c

   1 /* pxzcat derived from nbdkit
   2  * Copyright (C) 2013 Red Hat Inc.
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions are
   7  * met:
   8  *
   9  * * Redistributions of source code must retain the above copyright
  10  * notice, this list of conditions and the following disclaimer.
  11  *
  12  * * Redistributions in binary form must reproduce the above copyright
  13  * notice, this list of conditions and the following disclaimer in the
  14  * documentation and/or other materials provided with the distribution.
  15  *
  16  * * Neither the name of Red Hat nor the names of its contributors may be
  17  * used to endorse or promote products derived from this software without
  18  * specific prior written permission.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
  21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  22  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  23  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
  24  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  27  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  28  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  29  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  30  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31  * SUCH DAMAGE.
  32  */
  33
  34 #include <config.h>
  35
  36 #include <stdio.h>
  37 #include <stdlib.h>
  38 #include <string.h>
  39 #include <stdint.h>
  40 #include <inttypes.h>
  41 #include <unistd.h>
  42 #include <fcntl.h>
  43 #include <sys/types.h>
  44 #include <error.h>
  45 #include <errno.h>
  46
  47 #include <lzma.h>
  48
  49 #define DEBUG 1
  50
  51 #if DEBUG
  52 #define debug(fs...) fprintf (stderr, "pxzcat: debug: " fs ##__VA_ARGS__)
  53 #else
  54 #define debug(fs...) /* nothing */
  55 #endif
  56
  57 #define XZ_HEADER_MAGIC     "\xfd" "7zXZ\0"
  58 #define XZ_HEADER_MAGIC_LEN 6
  59 #define XZ_FOOTER_MAGIC     "YZ"
  60 #define XZ_FOOTER_MAGIC_LEN 2
  61
  62 static void xzfile_uncompress (const char *filename, const char *outputfile);
  63 static int check_header_magic (int fd);
  64 static lzma_index *parse_indexes (const char *filename, int fd, size_t *);
  65 static void iter_indexes (lzma_index *idx);
  66
  67 static struct option long_options[] = {
  68   { "output",   required_argument,  0, 'o' },
  69   { NULL,       0,                  0, 0   }
  70 };
  71
  72 static const char *options = "o:";
  73
  74 int
  75 main (int argc, char *argv[])
  76 {
  77   int c;
  78   int optind;
  79   const char *outputfile = NULL;
  80
  81   for (;;) {
  82     c = getopt_long (argc, argv, options, long_options, &optind);
  83     if (c == -1)
  84       break;
  85
  86     switch (c) {
  87       /* Long option with no short opt equivalent. */
  88     case 0:
  89       abort ();
  90
  91     case 'o':
  92       outputfile = optarg;
  93       break;
  94
  95     case '?':
  96     default:
  97       error (EXIT_FAILURE, 0, "usage: %s -o output file\n", argv[0]);
  98     }
  99   }
 100
 101   if (outputfile == NULL)
 102     error (EXIT_FAILURE, 0, "%s: you must give the -o (output file) option\n",
 103            argv[0]);
 104
 105   if (optind != argc - 1)
 106     error (EXIT_FAILURE, 0, "%s: input.xz\n", argv[0]);
 107
 108   xzfile_uncompress (argv[optind], outputfile);
 109
 110   exit (EXIT_SUCCESS);
 111 }
 112
 113 static void
 114 xzfile_uncompress (const char *filename, const char *outputfile)
 115 {
 116   int fd;
 117   uint64_t size;
 118   lzma_index *idx;
 119
 120   /* Open the file. */
 121   fd = open (filename, O_RDONLY|O_CLOEXEC);
 122   if (fd == -1)
 123     error (EXIT_FAILURE, errno, "open: %s", filename);
 124
 125   /* Check file magic. */
 126   if (!check_header_magic (fd))
 127     error (EXIT_FAILURE, 0, "%s: not an xz file", filename);
 128
 129   /* Read and parse the indexes. */
 130   idx = parse_indexes (filename, fd);
 131
 132   /* Iterate over indexes and uncompress. */
 133   iter_indexes (idx);
 134
 135   close (fd);
 136 }
 137
 138 static int
 139 check_header_magic (int fd)
 140 {
 141   char buf[XZ_HEADER_MAGIC_LEN];
 142
 143   if (lseek (fd, 0, SEEK_SET) == -1)
 144     return 0;
 145   if (read (fd, buf, XZ_HEADER_MAGIC_LEN) != XZ_HEADER_MAGIC_LEN)
 146     return 0;
 147   if (memcmp (buf, XZ_HEADER_MAGIC, XZ_HEADER_MAGIC_LEN) != 0)
 148     return 0;
 149   return 1;
 150 }
 151
 152 /* For explanation of this function, see src/xz/list.c:parse_indexes
 153  * in the xz sources.
 154  */
 155 static lzma_index *
 156 parse_indexes (const char *filename, int fd)
 157 {
 158   lzma_ret r;
 159   off_t pos, index_size;
 160   uint8_t footer[LZMA_STREAM_HEADER_SIZE];
 161   uint8_t header[LZMA_STREAM_HEADER_SIZE];
 162   lzma_stream_flags footer_flags;
 163   lzma_stream_flags header_flags;
 164   lzma_stream strm = LZMA_STREAM_INIT;
 165   ssize_t n;
 166   lzma_index *combined_index = NULL;
 167   lzma_index *this_index = NULL;
 168   lzma_vli stream_padding = 0;
 169   size_t nr_streams = 0;
 170
 171   /* Check file size is a multiple of 4 bytes. */
 172   pos = lseek (fd, 0, SEEK_END);
 173   if (pos == (off_t) -1)
 174     error (EXIT_FAILURE, errno, "%s: lseek", filename);
 175
 176   if ((pos & 3) != 0)
 177     error (EXIT_FAILURE, 0,
 178            "%s: not an xz file: size is not a multiple of 4 bytes",
 179            filename);
 180
 181   /* Jump backwards through the file identifying each stream. */
 182   while (pos > 0) {
 183     debug ("looping through streams: pos = %" PRIu64, (uint64_t) pos);
 184
 185     if (pos < LZMA_STREAM_HEADER_SIZE)
 186       error (EXIT_FAILURE, 0,
 187              "%s: corrupted file at %" PRIu64, filename, (uint64_t) pos);
 188
 189     if (lseek (fd, -LZMA_STREAM_HEADER_SIZE, SEEK_CUR) == -1)
 190       error (EXIT_FAILURE, errno, "%s: lseek", filename);
 191
 192     if (read (fd, footer, LZMA_STREAM_HEADER_SIZE) != LZMA_STREAM_HEADER_SIZE)
 193       error (EXIT_FAILURE, errno, "%s: read stream footer", filename);
 194
 195     /* Skip stream padding. */
 196     if (footer[8] == 0 && footer[9] == 0 &&
 197         footer[10] == 0 && footer[11] == 0) {
 198       stream_padding += 4;
 199       pos -= 4;
 200       continue;
 201     }
 202
 203     pos -= LZMA_STREAM_HEADER_SIZE;
 204     nr_streams++;
 205
 206     debug ("decode stream footer at pos = %" PRIu64, (uint64_t) pos);
 207
 208     /* Does the stream footer look reasonable? */
 209     r = lzma_stream_footer_decode (&footer_flags, footer);
 210     if (r != LZMA_OK)
 211       error (EXIT_FAILURE, 0,
 212              "%s: invalid stream footer (error %d)", filename, r);
 213
 214     debug ("backward_size = %" PRIu64, (uint64_t) footer_flags.backward_size);
 215     index_size = footer_flags.backward_size;
 216     if (pos < index_size + LZMA_STREAM_HEADER_SIZE)
 217       error (EXIT_FAILURE, 0, "%s: invalid stream footer", filename);
 218
 219     pos -= index_size;
 220     debug ("decode index at pos = %" PRIu64, (uint64_t) pos);
 221
 222     /* Seek backwards to the index of this stream. */
 223     if (lseek (fd, pos, SEEK_SET) == -1)
 224       error (EXIT_FAILURE, errno, "%s: lseek", filename);
 225
 226     /* Decode the index. */
 227     r = lzma_index_decoder (&strm, &this_index, UINT64_MAX);
 228     if (r != LZMA_OK)
 229       error (EXIT_FAILURE, 0,
 230              "%s: invalid stream index (error %d)", filename, r);
 231
 232     do {
 233       uint8_t buf[BUFSIZ];
 234
 235       strm.avail_in = index_size;
 236       if (strm.avail_in > BUFSIZ)
 237         strm.avail_in = BUFSIZ;
 238
 239       n = read (fd, &buf, strm.avail_in);
 240       if (n == -1)
 241         error (EXIT_FAILURE, errno, "%s: read", filename);
 242
 243       index_size -= strm.avail_in;
 244
 245       strm.next_in = buf;
 246       r = lzma_code (&strm, LZMA_RUN);
 247     } while (r == LZMA_OK);
 248
 249     if (r != LZMA_STREAM_END) {
 250       error (EXIT_FAILURE, 0, "%s: could not parse index (error %d)",
 251              filename, r);
 252
 253     pos -= lzma_index_total_size (this_index) + LZMA_STREAM_HEADER_SIZE;
 254
 255     debug ("decode stream header at pos = %" PRIu64, (uint64_t) pos);
 256
 257     /* Read and decode the stream header. */
 258     if (lseek (fd, pos, SEEK_SET) == -1)
 259       error (EXIT_FAILURE, errno, "%s: lseek", filename);
 260
 261     if (read (fd, header, LZMA_STREAM_HEADER_SIZE) != LZMA_STREAM_HEADER_SIZE)
 262       error (EXIT_FAILURE, errno, "%s: read stream header", filename);
 263
 264     r = lzma_stream_header_decode (&header_flags, header);
 265     if (r != LZMA_OK)
 266       error (EXIT_FAILURE, 0,
 267              "%s: invalid stream header (error %d)", filename, r);
 268
 269     /* Header and footer of the stream should be equal. */
 270     r = lzma_stream_flags_compare (&header_flags, &footer_flags);
 271     if (r != LZMA_OK)
 272       error (EXIT_FAILURE, 0,
 273              "%s: header and footer of stream are not equal (error %d)",
 274              filename, r);
 275
 276     /* Store the decoded stream flags in this_index. */
 277     r = lzma_index_stream_flags (this_index, &footer_flags);
 278     if (r != LZMA_OK)
 279       error (EXIT_FAILURE, 0,
 280              "%s: cannot read stream_flags from index (error %d)",
 281              filename, r);
 282
 283     /* Store the amount of stream padding so far.  Needed to calculate
 284      * compressed offsets correctly in multi-stream files.
 285      */
 286     r = lzma_index_stream_padding (this_index, stream_padding);
 287     if (r != LZMA_OK)
 288       error (EXIT_FAILURE, 0,
 289              "%s: cannot set stream_padding in index (error %d)",
 290              filename, r);
 291
 292     if (combined_index != NULL) {
 293       r = lzma_index_cat (this_index, combined_index, NULL);
 294       if (r != LZMA_OK)
 295         error (EXIT_FAILURE, 0, "%s: cannot combine indexes", filename);
 296     }
 297
 298     combined_index = this_index;
 299     this_index = NULL;
 300   }
 301
 302   lzma_end (&strm);
 303
 304   return combined_index;
 305 }
 306
 307 /* Iterate over the indexes and uncompress.
 308  */
 309 static void
 310 iter_indexes (lzma_index *idx)
 311 {
 312   lzma_index_iter iter;
 313
 314   lzma_index_iter_init (&iter, idx);
 315   while (!lzma_index_iter_next (&iter, LZMA_INDEX_ITER_NONEMPTY_BLOCK)) {
 316     abort ();
 317
 318
 319
 320
 321   }
 322
 323   return 0;
 324 }
 325
 326 #if 0
 327 char *
 328 xzfile_read_block (xzfile *xz, uint64_t offset,
 329                    uint64_t *start_rtn, uint64_t *size_rtn)
 330 {
 331   lzma_index_iter iter;
 332   uint8_t header[LZMA_BLOCK_HEADER_SIZE_MAX];
 333   lzma_block block;
 334   lzma_filter filters[LZMA_FILTERS_MAX + 1];
 335   lzma_ret r;
 336   lzma_stream strm = LZMA_STREAM_INIT;
 337   char *data;
 338   ssize_t n;
 339   size_t i;
 340
 341   /* Locate the block containing the uncompressed offset. */
 342   lzma_index_iter_init (&iter, xz->idx);
 343   if (lzma_index_iter_locate (&iter, offset)) {
 344     nbdkit_error ("cannot find offset %" PRIu64 " in the xz file", offset);
 345     return NULL;
 346   }
 347
 348   *start_rtn = iter.block.uncompressed_file_offset;
 349   *size_rtn = iter.block.uncompressed_size;
 350
 351   nbdkit_debug ("seek: block number %d at file offset %" PRIu64,
 352                 (int) iter.block.number_in_file,
 353                 (uint64_t) iter.block.compressed_file_offset);
 354
 355   if (lseek (xz->fd, iter.block.compressed_file_offset, SEEK_SET) == -1) {
 356     nbdkit_error ("lseek: %m");
 357     return NULL;
 358   }
 359
 360   /* Read the block header.  Start by reading a single byte which
 361    * tell us how big the block header is.
 362    */
 363   n = read (xz->fd, header, 1);
 364   if (n == 0) {
 365     nbdkit_error ("read: unexpected end of file reading block header byte");
 366     return NULL;
 367   }
 368   if (n == -1) {
 369     nbdkit_error ("read: %m");
 370     return NULL;
 371   }
 372
 373   if (header[0] == '\0') {
 374     nbdkit_error ("read: unexpected invalid block in file, header[0] = 0");
 375     return NULL;
 376   }
 377
 378   block.version = 0;
 379   block.check = iter.stream.flags->check;
 380   block.filters = filters;
 381   block.header_size = lzma_block_header_size_decode (header[0]);
 382
 383   /* Now read and decode the block header. */
 384   n = read (xz->fd, &header[1], block.header_size-1);
 385   if (n >= 0 && n != block.header_size-1) {
 386     nbdkit_error ("read: unexpected end of file reading block header");
 387     return NULL;
 388   }
 389   if (n == -1) {
 390     nbdkit_error ("read: %m");
 391     return NULL;
 392   }
 393
 394   r = lzma_block_header_decode (&block, NULL, header);
 395   if (r != LZMA_OK) {
 396     nbdkit_error ("invalid block header (error %d)", r);
 397     return NULL;
 398   }
 399
 400   /* What this actually does is it checks that the block header
 401    * matches the index.
 402    */
 403   r = lzma_block_compressed_size (&block, iter.block.unpadded_size);
 404   if (r != LZMA_OK) {
 405     nbdkit_error ("cannot calculate compressed size (error %d)", r);
 406     goto err1;
 407   }
 408
 409   /* Read the block data. */
 410   r = lzma_block_decoder (&strm, &block);
 411   if (r != LZMA_OK) {
 412     nbdkit_error ("invalid block (error %d)", r);
 413     goto err1;
 414   }
 415
 416   data = malloc (*size_rtn);
 417   if (data == NULL) {
 418     nbdkit_error ("malloc (%zu bytes): %m\n"
 419                   "NOTE: If this error occurs, you need to recompress your xz files with a smaller block size.  Use: 'xz --block-size=16777216 ...'.",
 420                   *size_rtn);
 421     goto err1;
 422   }
 423
 424   strm.next_in = NULL;
 425   strm.avail_in = 0;
 426   strm.next_out = (uint8_t *) data;
 427   strm.avail_out = block.uncompressed_size;
 428
 429   do {
 430     uint8_t buf[BUFSIZ];
 431     lzma_action action = LZMA_RUN;
 432
 433     if (strm.avail_in == 0) {
 434       strm.next_in = buf;
 435       n = read (xz->fd, buf, sizeof buf);
 436       if (n == -1) {
 437         nbdkit_error ("read: %m");
 438         goto err2;
 439       }
 440       strm.avail_in = n;
 441       if (n == 0)
 442         action = LZMA_FINISH;
 443     }
 444
 445     strm.avail_in = n;
 446     strm.next_in = buf;
 447     r = lzma_code (&strm, action);
 448   } while (r == LZMA_OK);
 449
 450   if (r != LZMA_OK && r != LZMA_STREAM_END) {
 451     nbdkit_error ("could not parse block data (error %d)", r);
 452     goto err2;
 453   }
 454
 455   lzma_end (&strm);
 456
 457   for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i)
 458     free (filters[i].options);
 459
 460   return data;
 461
 462  err2:
 463   free (data);
 464   lzma_end (&strm);
 465  err1:
 466   for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i)
 467     free (filters[i].options);
 468
 469   return NULL;
 470 }
 471 #endif