001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018 019/* 020 * This package is based on the work done by Timothy Gerard Endres 021 * (time@ice.com) to whom the Ant project is very grateful for his great code. 022 */ 023 024package org.apache.commons.compress.archivers.tar; 025 026import java.io.ByteArrayOutputStream; 027import java.io.FileInputStream; 028import java.io.IOException; 029import java.io.InputStream; 030import java.util.ArrayList; 031import java.util.HashMap; 032import java.util.List; 033import java.util.Map; 034 035import org.apache.commons.compress.archivers.ArchiveEntry; 036import org.apache.commons.compress.archivers.ArchiveInputStream; 037import org.apache.commons.compress.archivers.zip.ZipEncoding; 038import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 039import org.apache.commons.compress.utils.ArchiveUtils; 040import org.apache.commons.compress.utils.BoundedInputStream; 041import org.apache.commons.compress.utils.IOUtils; 042 043/** 044 * The TarInputStream reads a UNIX tar archive as an InputStream. 045 * methods are provided to position at each successive entry in 046 * the archive, and the read each entry as a normal input stream 047 * using read(). 048 * @NotThreadSafe 049 */ 050public class TarArchiveInputStream extends ArchiveInputStream { 051 052 private static final int SMALL_BUFFER_SIZE = 256; 053 054 private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE]; 055 056 /** The size the TAR header */ 057 private final int recordSize; 058 059 /** The buffer to store the TAR header **/ 060 private final byte[] recordBuffer; 061 062 /** The size of a block */ 063 private final int blockSize; 064 065 /** True if file has hit EOF */ 066 private boolean hasHitEOF; 067 068 /** Size of the current entry */ 069 private long entrySize; 070 071 /** How far into the entry the stream is at */ 072 private long entryOffset; 073 074 /** An input stream to read from */ 075 private final InputStream inputStream; 076 077 /** Input streams for reading sparse entries **/ 078 private List<InputStream> sparseInputStreams; 079 080 /** the index of current input stream being read when reading sparse entries */ 081 private int currentSparseInputStreamIndex; 082 083 /** The meta-data about the current entry */ 084 private TarArchiveEntry currEntry; 085 086 /** The encoding of the file */ 087 private final ZipEncoding zipEncoding; 088 089 // the provided encoding (for unit tests) 090 final String encoding; 091 092 // the global PAX header 093 private Map<String, String> globalPaxHeaders = new HashMap<>(); 094 095 // the global sparse headers, this is only used in PAX Format 0.X 096 private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>(); 097 098 private final boolean lenient; 099 100 /** 101 * Constructor for TarInputStream. 102 * @param is the input stream to use 103 */ 104 public TarArchiveInputStream(final InputStream is) { 105 this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE); 106 } 107 108 /** 109 * Constructor for TarInputStream. 110 * @param is the input stream to use 111 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be 112 * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an 113 * exception instead. 114 * @since 1.19 115 */ 116 public TarArchiveInputStream(final InputStream is, final boolean lenient) { 117 this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient); 118 } 119 120 /** 121 * Constructor for TarInputStream. 122 * @param is the input stream to use 123 * @param encoding name of the encoding to use for file names 124 * @since 1.4 125 */ 126 public TarArchiveInputStream(final InputStream is, final String encoding) { 127 this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, 128 encoding); 129 } 130 131 /** 132 * Constructor for TarInputStream. 133 * @param is the input stream to use 134 * @param blockSize the block size to use 135 */ 136 public TarArchiveInputStream(final InputStream is, final int blockSize) { 137 this(is, blockSize, TarConstants.DEFAULT_RCDSIZE); 138 } 139 140 /** 141 * Constructor for TarInputStream. 142 * @param is the input stream to use 143 * @param blockSize the block size to use 144 * @param encoding name of the encoding to use for file names 145 * @since 1.4 146 */ 147 public TarArchiveInputStream(final InputStream is, final int blockSize, 148 final String encoding) { 149 this(is, blockSize, TarConstants.DEFAULT_RCDSIZE, encoding); 150 } 151 152 /** 153 * Constructor for TarInputStream. 154 * @param is the input stream to use 155 * @param blockSize the block size to use 156 * @param recordSize the record size to use 157 */ 158 public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize) { 159 this(is, blockSize, recordSize, null); 160 } 161 162 /** 163 * Constructor for TarInputStream. 164 * @param is the input stream to use 165 * @param blockSize the block size to use 166 * @param recordSize the record size to use 167 * @param encoding name of the encoding to use for file names 168 * @since 1.4 169 */ 170 public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize, 171 final String encoding) { 172 this(is, blockSize, recordSize, encoding, false); 173 } 174 175 /** 176 * Constructor for TarInputStream. 177 * @param is the input stream to use 178 * @param blockSize the block size to use 179 * @param recordSize the record size to use 180 * @param encoding name of the encoding to use for file names 181 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be 182 * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an 183 * exception instead. 184 * @since 1.19 185 */ 186 public TarArchiveInputStream(final InputStream is, final int blockSize, final int recordSize, 187 final String encoding, final boolean lenient) { 188 this.inputStream = is; 189 this.hasHitEOF = false; 190 this.encoding = encoding; 191 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 192 this.recordSize = recordSize; 193 this.recordBuffer = new byte[recordSize]; 194 this.blockSize = blockSize; 195 this.lenient = lenient; 196 } 197 198 /** 199 * Closes this stream. Calls the TarBuffer's close() method. 200 * @throws IOException on error 201 */ 202 @Override 203 public void close() throws IOException { 204 // Close all the input streams in sparseInputStreams 205 if(sparseInputStreams != null) { 206 for (final InputStream inputStream : sparseInputStreams) { 207 inputStream.close(); 208 } 209 } 210 211 inputStream.close(); 212 } 213 214 /** 215 * Get the record size being used by this stream's buffer. 216 * 217 * @return The TarBuffer record size. 218 */ 219 public int getRecordSize() { 220 return recordSize; 221 } 222 223 /** 224 * Get the available data that can be read from the current 225 * entry in the archive. This does not indicate how much data 226 * is left in the entire archive, only in the current entry. 227 * This value is determined from the entry's size header field 228 * and the amount of data already read from the current entry. 229 * Integer.MAX_VALUE is returned in case more than Integer.MAX_VALUE 230 * bytes are left in the current entry in the archive. 231 * 232 * @return The number of available bytes for the current entry. 233 * @throws IOException for signature 234 */ 235 @Override 236 public int available() throws IOException { 237 if (isDirectory()) { 238 return 0; 239 } 240 241 if (currEntry.getRealSize() - entryOffset > Integer.MAX_VALUE) { 242 return Integer.MAX_VALUE; 243 } 244 return (int) (currEntry.getRealSize() - entryOffset); 245 } 246 247 248 /** 249 * Skips over and discards <code>n</code> bytes of data from this input 250 * stream. The <code>skip</code> method may, for a variety of reasons, end 251 * up skipping over some smaller number of bytes, possibly <code>0</code>. 252 * This may result from any of a number of conditions; reaching end of file 253 * or end of entry before <code>n</code> bytes have been skipped; are only 254 * two possibilities. The actual number of bytes skipped is returned. If 255 * <code>n</code> is negative, no bytes are skipped. 256 * 257 * 258 * @param n 259 * the number of bytes to be skipped. 260 * @return the actual number of bytes skipped. 261 * @throws IOException if a truncated tar archive is detected 262 * or some other I/O error occurs 263 */ 264 @Override 265 public long skip(final long n) throws IOException { 266 if (n <= 0 || isDirectory()) { 267 return 0; 268 } 269 270 final long availableOfInputStream = inputStream.available(); 271 final long available = currEntry.getRealSize() - entryOffset; 272 final long numToSkip = Math.min(n, available); 273 long skipped; 274 275 if (!currEntry.isSparse()) { 276 skipped = IOUtils.skip(inputStream, numToSkip); 277 // for non-sparse entry, we should get the bytes actually skipped bytes along with 278 // inputStream.available() if inputStream is instance of FileInputStream 279 skipped = getActuallySkipped(availableOfInputStream, skipped, numToSkip); 280 } else { 281 skipped = skipSparse(numToSkip); 282 } 283 284 285 count(skipped); 286 entryOffset += skipped; 287 return skipped; 288 } 289 290 /** 291 * Skip n bytes from current input stream, if the current input stream doesn't have enough data to skip, 292 * jump to the next input stream and skip the rest bytes, keep doing this until total n bytes are skipped 293 * or the input streams are all skipped 294 * 295 * @param n bytes of data to skip 296 * @return actual bytes of data skipped 297 * @throws IOException 298 */ 299 private long skipSparse(final long n) throws IOException { 300 if (sparseInputStreams == null || sparseInputStreams.isEmpty()) { 301 return inputStream.skip(n); 302 } 303 304 long bytesSkipped = 0; 305 306 while (bytesSkipped < n && currentSparseInputStreamIndex < sparseInputStreams.size()) { 307 final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex); 308 bytesSkipped += currentInputStream.skip(n - bytesSkipped); 309 310 if (bytesSkipped < n) { 311 currentSparseInputStreamIndex++; 312 } 313 } 314 315 return bytesSkipped; 316 } 317 318 /** 319 * Since we do not support marking just yet, we return false. 320 * 321 * @return False. 322 */ 323 @Override 324 public boolean markSupported() { 325 return false; 326 } 327 328 /** 329 * Since we do not support marking just yet, we do nothing. 330 * 331 * @param markLimit The limit to mark. 332 */ 333 @Override 334 public synchronized void mark(final int markLimit) { 335 } 336 337 /** 338 * Since we do not support marking just yet, we do nothing. 339 */ 340 @Override 341 public synchronized void reset() { 342 } 343 344 /** 345 * Get the next entry in this tar archive. This will skip 346 * over any remaining data in the current entry, if there 347 * is one, and place the input stream at the header of the 348 * next entry, and read the header and instantiate a new 349 * TarEntry from the header bytes and return that entry. 350 * If there are no more entries in the archive, null will 351 * be returned to indicate that the end of the archive has 352 * been reached. 353 * 354 * @return The next TarEntry in the archive, or null. 355 * @throws IOException on error 356 */ 357 public TarArchiveEntry getNextTarEntry() throws IOException { 358 if (isAtEOF()) { 359 return null; 360 } 361 362 if (currEntry != null) { 363 /* Skip will only go to the end of the current entry */ 364 IOUtils.skip(this, Long.MAX_VALUE); 365 366 /* skip to the end of the last record */ 367 skipRecordPadding(); 368 } 369 370 final byte[] headerBuf = getRecord(); 371 372 if (headerBuf == null) { 373 /* hit EOF */ 374 currEntry = null; 375 return null; 376 } 377 378 try { 379 currEntry = new TarArchiveEntry(headerBuf, zipEncoding, lenient); 380 } catch (final IllegalArgumentException e) { 381 throw new IOException("Error detected parsing the header", e); 382 } 383 384 entryOffset = 0; 385 entrySize = currEntry.getSize(); 386 387 if (currEntry.isGNULongLinkEntry()) { 388 final byte[] longLinkData = getLongNameData(); 389 if (longLinkData == null) { 390 // Bugzilla: 40334 391 // Malformed tar file - long link entry name not followed by 392 // entry 393 return null; 394 } 395 currEntry.setLinkName(zipEncoding.decode(longLinkData)); 396 } 397 398 if (currEntry.isGNULongNameEntry()) { 399 final byte[] longNameData = getLongNameData(); 400 if (longNameData == null) { 401 // Bugzilla: 40334 402 // Malformed tar file - long entry name not followed by 403 // entry 404 return null; 405 } 406 407 // COMPRESS-509 : the name of directories should end with '/' 408 final String name = zipEncoding.decode(longNameData); 409 currEntry.setName(name); 410 if (currEntry.isDirectory() && !name.endsWith("/")) { 411 currEntry.setName(name + "/"); 412 } 413 } 414 415 if (currEntry.isGlobalPaxHeader()){ // Process Global Pax headers 416 readGlobalPaxHeaders(); 417 } 418 419 try { 420 if (currEntry.isPaxHeader()){ // Process Pax headers 421 paxHeaders(); 422 } else if (!globalPaxHeaders.isEmpty()) { 423 applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders); 424 } 425 } catch (final NumberFormatException e) { 426 throw new IOException("Error detected parsing the pax header", e); 427 } 428 429 if (currEntry.isOldGNUSparse()){ // Process sparse files 430 readOldGNUSparse(); 431 } 432 433 // If the size of the next element in the archive has changed 434 // due to a new size being reported in the posix header 435 // information, we update entrySize here so that it contains 436 // the correct value. 437 entrySize = currEntry.getSize(); 438 439 return currEntry; 440 } 441 442 /** 443 * The last record block should be written at the full size, so skip any 444 * additional space used to fill a record after an entry. 445 * 446 * @throws IOException if a truncated tar archive is detected 447 */ 448 private void skipRecordPadding() throws IOException { 449 if (!isDirectory() && this.entrySize > 0 && this.entrySize % this.recordSize != 0) { 450 final long available = inputStream.available(); 451 final long numRecords = (this.entrySize / this.recordSize) + 1; 452 final long padding = (numRecords * this.recordSize) - this.entrySize; 453 long skipped = IOUtils.skip(inputStream, padding); 454 455 skipped = getActuallySkipped(available, skipped, padding); 456 457 count(skipped); 458 } 459 } 460 461 /** 462 * For FileInputStream, the skip always return the number you input, so we 463 * need the available bytes to determine how many bytes are actually skipped 464 * 465 * @param available available bytes returned by inputStream.available() 466 * @param skipped skipped bytes returned by inputStream.skip() 467 * @param expected bytes expected to skip 468 * @return number of bytes actually skipped 469 * @throws IOException if a truncated tar archive is detected 470 */ 471 private long getActuallySkipped(final long available, final long skipped, final long expected) throws IOException { 472 long actuallySkipped = skipped; 473 if (inputStream instanceof FileInputStream) { 474 actuallySkipped = Math.min(skipped, available); 475 } 476 477 if (actuallySkipped != expected) { 478 throw new IOException("Truncated TAR archive"); 479 } 480 481 return actuallySkipped; 482 } 483 484 /** 485 * Get the next entry in this tar archive as longname data. 486 * 487 * @return The next entry in the archive as longname data, or null. 488 * @throws IOException on error 489 */ 490 protected byte[] getLongNameData() throws IOException { 491 // read in the name 492 final ByteArrayOutputStream longName = new ByteArrayOutputStream(); 493 int length = 0; 494 while ((length = read(smallBuf)) >= 0) { 495 longName.write(smallBuf, 0, length); 496 } 497 getNextEntry(); 498 if (currEntry == null) { 499 // Bugzilla: 40334 500 // Malformed tar file - long entry name not followed by entry 501 return null; 502 } 503 byte[] longNameData = longName.toByteArray(); 504 // remove trailing null terminator(s) 505 length = longNameData.length; 506 while (length > 0 && longNameData[length - 1] == 0) { 507 --length; 508 } 509 if (length != longNameData.length) { 510 final byte[] l = new byte[length]; 511 System.arraycopy(longNameData, 0, l, 0, length); 512 longNameData = l; 513 } 514 return longNameData; 515 } 516 517 /** 518 * Get the next record in this tar archive. This will skip 519 * over any remaining data in the current entry, if there 520 * is one, and place the input stream at the header of the 521 * next entry. 522 * 523 * <p>If there are no more entries in the archive, null will be 524 * returned to indicate that the end of the archive has been 525 * reached. At the same time the {@code hasHitEOF} marker will be 526 * set to true.</p> 527 * 528 * @return The next header in the archive, or null. 529 * @throws IOException on error 530 */ 531 private byte[] getRecord() throws IOException { 532 byte[] headerBuf = readRecord(); 533 setAtEOF(isEOFRecord(headerBuf)); 534 if (isAtEOF() && headerBuf != null) { 535 tryToConsumeSecondEOFRecord(); 536 consumeRemainderOfLastBlock(); 537 headerBuf = null; 538 } 539 return headerBuf; 540 } 541 542 /** 543 * Determine if an archive record indicate End of Archive. End of 544 * archive is indicated by a record that consists entirely of null bytes. 545 * 546 * @param record The record data to check. 547 * @return true if the record data is an End of Archive 548 */ 549 protected boolean isEOFRecord(final byte[] record) { 550 return record == null || ArchiveUtils.isArrayZero(record, recordSize); 551 } 552 553 /** 554 * Read a record from the input stream and return the data. 555 * 556 * @return The record data or null if EOF has been hit. 557 * @throws IOException on error 558 */ 559 protected byte[] readRecord() throws IOException { 560 final int readNow = IOUtils.readFully(inputStream, recordBuffer); 561 count(readNow); 562 if (readNow != recordSize) { 563 return null; 564 } 565 566 return recordBuffer; 567 } 568 569 private void readGlobalPaxHeaders() throws IOException { 570 globalPaxHeaders = TarUtils.parsePaxHeaders(this, globalSparseHeaders, globalPaxHeaders, entrySize); 571 getNextEntry(); // Get the actual file entry 572 573 if (currEntry == null) { 574 throw new IOException("Error detected parsing the pax header"); 575 } 576 } 577 578 /** 579 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) 580 * may appear multi times, and they look like: 581 * 582 * GNU.sparse.size=size 583 * GNU.sparse.numblocks=numblocks 584 * repeat numblocks times 585 * GNU.sparse.offset=offset 586 * GNU.sparse.numbytes=numbytes 587 * end repeat 588 * 589 * 590 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 591 * 592 * GNU.sparse.map 593 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 594 * 595 * 596 * For PAX Format 1.X: 597 * The sparse map itself is stored in the file data block, preceding the actual file data. 598 * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary. 599 * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers 600 * giving the offset and size of the data block it describes. 601 * @throws IOException 602 */ 603 private void paxHeaders() throws IOException { 604 List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 605 final Map<String, String> headers = TarUtils.parsePaxHeaders(this, sparseHeaders, globalPaxHeaders, entrySize); 606 607 // for 0.1 PAX Headers 608 if (headers.containsKey("GNU.sparse.map")) { 609 sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get("GNU.sparse.map"))); 610 } 611 getNextEntry(); // Get the actual file entry 612 if (currEntry == null) { 613 throw new IOException("premature end of tar archive. Didn't find any entry after PAX header."); 614 } 615 applyPaxHeadersToCurrentEntry(headers, sparseHeaders); 616 617 // for 1.0 PAX Format, the sparse map is stored in the file data block 618 if (currEntry.isPaxGNU1XSparse()) { 619 sparseHeaders = TarUtils.parsePAX1XSparseHeaders(inputStream, recordSize); 620 currEntry.setSparseHeaders(sparseHeaders); 621 } 622 623 // sparse headers are all done reading, we need to build 624 // sparse input streams using these sparse headers 625 buildSparseInputStreams(); 626 } 627 628 private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders) 629 throws IOException { 630 currEntry.updateEntryFromPaxHeaders(headers); 631 currEntry.setSparseHeaders(sparseHeaders); 632 } 633 634 /** 635 * Adds the sparse chunks from the current entry to the sparse chunks, 636 * including any additional sparse entries following the current entry. 637 * 638 * @throws IOException on error 639 */ 640 private void readOldGNUSparse() throws IOException { 641 if (currEntry.isExtended()) { 642 TarArchiveSparseEntry entry; 643 do { 644 final byte[] headerBuf = getRecord(); 645 if (headerBuf == null) { 646 throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag."); 647 } 648 entry = new TarArchiveSparseEntry(headerBuf); 649 currEntry.getSparseHeaders().addAll(entry.getSparseHeaders()); 650 } while (entry.isExtended()); 651 } 652 653 // sparse headers are all done reading, we need to build 654 // sparse input streams using these sparse headers 655 buildSparseInputStreams(); 656 } 657 658 private boolean isDirectory() { 659 return currEntry != null && currEntry.isDirectory(); 660 } 661 662 /** 663 * Returns the next Archive Entry in this Stream. 664 * 665 * @return the next entry, 666 * or {@code null} if there are no more entries 667 * @throws IOException if the next entry could not be read 668 */ 669 @Override 670 public ArchiveEntry getNextEntry() throws IOException { 671 return getNextTarEntry(); 672 } 673 674 /** 675 * Tries to read the next record rewinding the stream if it is not a EOF record. 676 * 677 * <p>This is meant to protect against cases where a tar 678 * implementation has written only one EOF record when two are 679 * expected. Actually this won't help since a non-conforming 680 * implementation likely won't fill full blocks consisting of - by 681 * default - ten records either so we probably have already read 682 * beyond the archive anyway.</p> 683 */ 684 private void tryToConsumeSecondEOFRecord() throws IOException { 685 boolean shouldReset = true; 686 final boolean marked = inputStream.markSupported(); 687 if (marked) { 688 inputStream.mark(recordSize); 689 } 690 try { 691 shouldReset = !isEOFRecord(readRecord()); 692 } finally { 693 if (shouldReset && marked) { 694 pushedBackBytes(recordSize); 695 inputStream.reset(); 696 } 697 } 698 } 699 700 /** 701 * Reads bytes from the current tar archive entry. 702 * 703 * This method is aware of the boundaries of the current 704 * entry in the archive and will deal with them as if they 705 * were this stream's start and EOF. 706 * 707 * @param buf The buffer into which to place bytes read. 708 * @param offset The offset at which to place bytes read. 709 * @param numToRead The number of bytes to read. 710 * @return The number of bytes read, or -1 at EOF. 711 * @throws IOException on error 712 */ 713 @Override 714 public int read(final byte[] buf, final int offset, int numToRead) throws IOException { 715 if (numToRead == 0) { 716 return 0; 717 } 718 int totalRead = 0; 719 720 if (isAtEOF() || isDirectory()) { 721 return -1; 722 } 723 724 if (currEntry == null) { 725 throw new IllegalStateException("No current tar entry"); 726 } 727 728 if (entryOffset >= currEntry.getRealSize()) { 729 return -1; 730 } 731 732 numToRead = Math.min(numToRead, available()); 733 734 if (currEntry.isSparse()) { 735 // for sparse entries, we need to read them in another way 736 totalRead = readSparse(buf, offset, numToRead); 737 } else { 738 totalRead = inputStream.read(buf, offset, numToRead); 739 } 740 741 if (totalRead == -1) { 742 if (numToRead > 0) { 743 throw new IOException("Truncated TAR archive"); 744 } 745 setAtEOF(true); 746 } else { 747 count(totalRead); 748 entryOffset += totalRead; 749 } 750 751 return totalRead; 752 } 753 754 /** 755 * For sparse tar entries, there are many "holes"(consisting of all 0) in the file. Only the non-zero data is 756 * stored in tar files, and they are stored separately. The structure of non-zero data is introduced by the 757 * sparse headers using the offset, where a block of non-zero data starts, and numbytes, the length of the 758 * non-zero data block. 759 * When reading sparse entries, the actual data is read out with "holes" and non-zero data combined together 760 * according to the sparse headers. 761 * 762 * @param buf The buffer into which to place bytes read. 763 * @param offset The offset at which to place bytes read. 764 * @param numToRead The number of bytes to read. 765 * @return The number of bytes read, or -1 at EOF. 766 * @throws IOException on error 767 */ 768 private int readSparse(final byte[] buf, final int offset, final int numToRead) throws IOException { 769 // if there are no actual input streams, just read from the original input stream 770 if (sparseInputStreams == null || sparseInputStreams.isEmpty()) { 771 return inputStream.read(buf, offset, numToRead); 772 } 773 774 if (currentSparseInputStreamIndex >= sparseInputStreams.size()) { 775 return -1; 776 } 777 778 final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex); 779 final int readLen = currentInputStream.read(buf, offset, numToRead); 780 781 // if the current input stream is the last input stream, 782 // just return the number of bytes read from current input stream 783 if (currentSparseInputStreamIndex == sparseInputStreams.size() - 1) { 784 return readLen; 785 } 786 787 // if EOF of current input stream is meet, open a new input stream and recursively call read 788 if (readLen == -1) { 789 currentSparseInputStreamIndex++; 790 return readSparse(buf, offset, numToRead); 791 } 792 793 // if the rest data of current input stream is not long enough, open a new input stream 794 // and recursively call read 795 if (readLen < numToRead) { 796 currentSparseInputStreamIndex++; 797 final int readLenOfNext = readSparse(buf, offset + readLen, numToRead - readLen); 798 if (readLenOfNext == -1) { 799 return readLen; 800 } 801 802 return readLen + readLenOfNext; 803 } 804 805 // if the rest data of current input stream is enough(which means readLen == len), just return readLen 806 return readLen; 807 } 808 809 /** 810 * Whether this class is able to read the given entry. 811 * 812 * @return The implementation will return true if the {@link ArchiveEntry} is an instance of {@link TarArchiveEntry} 813 */ 814 @Override 815 public boolean canReadEntryData(final ArchiveEntry ae) { 816 return ae instanceof TarArchiveEntry; 817 } 818 819 /** 820 * Get the current TAR Archive Entry that this input stream is processing 821 * 822 * @return The current Archive Entry 823 */ 824 public TarArchiveEntry getCurrentEntry() { 825 return currEntry; 826 } 827 828 protected final void setCurrentEntry(final TarArchiveEntry e) { 829 currEntry = e; 830 } 831 832 protected final boolean isAtEOF() { 833 return hasHitEOF; 834 } 835 836 protected final void setAtEOF(final boolean b) { 837 hasHitEOF = b; 838 } 839 840 /** 841 * This method is invoked once the end of the archive is hit, it 842 * tries to consume the remaining bytes under the assumption that 843 * the tool creating this archive has padded the last block. 844 */ 845 private void consumeRemainderOfLastBlock() throws IOException { 846 final long bytesReadOfLastBlock = getBytesRead() % blockSize; 847 if (bytesReadOfLastBlock > 0) { 848 final long skipped = IOUtils.skip(inputStream, blockSize - bytesReadOfLastBlock); 849 count(skipped); 850 } 851 } 852 853 /** 854 * Checks if the signature matches what is expected for a tar file. 855 * 856 * @param signature 857 * the bytes to check 858 * @param length 859 * the number of bytes to check 860 * @return true, if this stream is a tar archive stream, false otherwise 861 */ 862 public static boolean matches(final byte[] signature, final int length) { 863 if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) { 864 return false; 865 } 866 867 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX, 868 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 869 && 870 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX, 871 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 872 ){ 873 return true; 874 } 875 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU, 876 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 877 && 878 ( 879 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE, 880 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 881 || 882 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO, 883 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 884 ) 885 ){ 886 return true; 887 } 888 // COMPRESS-107 - recognise Ant tar files 889 return ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT, 890 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 891 && 892 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT, 893 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN); 894 } 895 896 /** 897 * Build the input streams consisting of all-zero input streams and non-zero input streams. 898 * When reading from the non-zero input streams, the data is actually read from the original input stream. 899 * The size of each input stream is introduced by the sparse headers. 900 * 901 * NOTE : Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the 902 * 0 size input streams because they are meaningless. 903 */ 904 private void buildSparseInputStreams() throws IOException { 905 currentSparseInputStreamIndex = -1; 906 sparseInputStreams = new ArrayList<>(); 907 908 final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders(); 909 910 // Stream doesn't need to be closed at all as it doesn't use any resources 911 final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); //NOSONAR 912 // logical offset into the extracted entry 913 long offset = 0; 914 for (final TarArchiveStructSparse sparseHeader : sparseHeaders) { 915 final long zeroBlockSize = sparseHeader.getOffset() - offset; 916 if (zeroBlockSize < 0) { 917 // sparse header says to move backwards inside of the extracted entry 918 throw new IOException("Corrupted struct sparse detected"); 919 } 920 921 // only store the zero block if it is not empty 922 if (zeroBlockSize > 0) { 923 sparseInputStreams.add(new BoundedInputStream(zeroInputStream, sparseHeader.getOffset() - offset)); 924 } 925 926 // only store the input streams with non-zero size 927 if (sparseHeader.getNumbytes() > 0) { 928 sparseInputStreams.add(new BoundedInputStream(inputStream, sparseHeader.getNumbytes())); 929 } 930 931 offset = sparseHeader.getOffset() + sparseHeader.getNumbytes(); 932 } 933 934 if (!sparseInputStreams.isEmpty()) { 935 currentSparseInputStreamIndex = 0; 936 } 937 } 938}