001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018package org.apache.commons.compress.archivers.zip;
019
020import java.io.BufferedInputStream;
021import java.io.ByteArrayInputStream;
022import java.io.Closeable;
023import java.io.EOFException;
024import java.io.File;
025import java.io.IOException;
026import java.io.InputStream;
027import java.io.SequenceInputStream;
028import java.nio.Buffer;
029import java.nio.ByteBuffer;
030import java.nio.channels.FileChannel;
031import java.nio.channels.SeekableByteChannel;
032import java.nio.file.Files;
033import java.nio.file.StandardOpenOption;
034import java.util.Arrays;
035import java.util.Collections;
036import java.util.Comparator;
037import java.util.Enumeration;
038import java.util.EnumSet;
039import java.util.HashMap;
040import java.util.LinkedList;
041import java.util.List;
042import java.util.Map;
043import java.util.zip.Inflater;
044import java.util.zip.ZipException;
045
046import org.apache.commons.compress.archivers.EntryStreamOffsets;
047import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
048import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
049import org.apache.commons.compress.utils.BoundedArchiveInputStream;
050import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream;
051import org.apache.commons.compress.utils.CountingInputStream;
052import org.apache.commons.compress.utils.IOUtils;
053import org.apache.commons.compress.utils.InputStreamStatistics;
054
055import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
056import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
057import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
058import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
059import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT;
060
061/**
062 * Replacement for <code>java.util.ZipFile</code>.
063 *
064 * <p>This class adds support for file name encodings other than UTF-8
065 * (which is required to work on ZIP files created by native zip tools
066 * and is able to skip a preamble like the one found in self
067 * extracting archives.  Furthermore it returns instances of
068 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
069 * instead of <code>java.util.zip.ZipEntry</code>.</p>
070 *
071 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
072 * have to reimplement all methods anyway.  Like
073 * <code>java.util.ZipFile</code>, it uses SeekableByteChannel under the
074 * covers and supports compressed and uncompressed entries.  As of
075 * Apache Commons Compress 1.3 it also transparently supports Zip64
076 * extensions and thus individual entries and archives larger than 4
077 * GB or with more than 65536 entries.</p>
078 *
079 * <p>The method signatures mimic the ones of
080 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
081 *
082 * <ul>
083 *   <li>There is no getName method.</li>
084 *   <li>entries has been renamed to getEntries.</li>
085 *   <li>getEntries and getEntry return
086 *   <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
087 *   instances.</li>
088 *   <li>close is allowed to throw IOException.</li>
089 * </ul>
090 *
091 */
092public class ZipFile implements Closeable {
093    private static final int HASH_SIZE = 509;
094    static final int NIBLET_MASK = 0x0f;
095    static final int BYTE_SHIFT = 8;
096    private static final int POS_0 = 0;
097    private static final int POS_1 = 1;
098    private static final int POS_2 = 2;
099    private static final int POS_3 = 3;
100    private static final byte[] ONE_ZERO_BYTE = new byte[1];
101
102    /**
103     * List of entries in the order they appear inside the central
104     * directory.
105     */
106    private final List<ZipArchiveEntry> entries =
107        new LinkedList<>();
108
109    /**
110     * Maps String to list of ZipArchiveEntrys, name -> actual entries.
111     */
112    private final Map<String, LinkedList<ZipArchiveEntry>> nameMap =
113        new HashMap<>(HASH_SIZE);
114
115    /**
116     * The encoding to use for file names and the file comment.
117     *
118     * <p>For a list of possible values see <a
119     * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
120     * Defaults to UTF-8.</p>
121     */
122    private final String encoding;
123
124    /**
125     * The zip encoding to use for file names and the file comment.
126     */
127    private final ZipEncoding zipEncoding;
128
129    /**
130     * File name of actual source.
131     */
132    private final String archiveName;
133
134    /**
135     * The actual data source.
136     */
137    private final SeekableByteChannel archive;
138
139    /**
140     * Whether to look for and use Unicode extra fields.
141     */
142    private final boolean useUnicodeExtraFields;
143
144    /**
145     * Whether the file is closed.
146     */
147    private volatile boolean closed = true;
148
149    /**
150     * Whether the zip archive is a split zip archive
151     */
152    private final boolean isSplitZipArchive;
153
154    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
155    private final byte[] dwordBuf = new byte[DWORD];
156    private final byte[] wordBuf = new byte[WORD];
157    private final byte[] cfhBuf = new byte[CFH_LEN];
158    private final byte[] shortBuf = new byte[SHORT];
159    private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf);
160    private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf);
161    private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf);
162    private final ByteBuffer shortBbuf = ByteBuffer.wrap(shortBuf);
163
164    private long centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset;
165    private long centralDirectoryStartOffset;
166
167    /**
168     * Opens the given file for reading, assuming "UTF8" for file names.
169     *
170     * @param f the archive.
171     *
172     * @throws IOException if an error occurs while reading the file.
173     */
174    public ZipFile(final File f) throws IOException {
175        this(f, ZipEncodingHelper.UTF8);
176    }
177
178    /**
179     * Opens the given file for reading, assuming "UTF8".
180     *
181     * @param name name of the archive.
182     *
183     * @throws IOException if an error occurs while reading the file.
184     */
185    public ZipFile(final String name) throws IOException {
186        this(new File(name), ZipEncodingHelper.UTF8);
187    }
188
189    /**
190     * Opens the given file for reading, assuming the specified
191     * encoding for file names, scanning unicode extra fields.
192     *
193     * @param name name of the archive.
194     * @param encoding the encoding to use for file names, use null
195     * for the platform's default encoding
196     *
197     * @throws IOException if an error occurs while reading the file.
198     */
199    public ZipFile(final String name, final String encoding) throws IOException {
200        this(new File(name), encoding, true);
201    }
202
203    /**
204     * Opens the given file for reading, assuming the specified
205     * encoding for file names and scanning for unicode extra fields.
206     *
207     * @param f the archive.
208     * @param encoding the encoding to use for file names, use null
209     * for the platform's default encoding
210     *
211     * @throws IOException if an error occurs while reading the file.
212     */
213    public ZipFile(final File f, final String encoding) throws IOException {
214        this(f, encoding, true);
215    }
216
217    /**
218     * Opens the given file for reading, assuming the specified
219     * encoding for file names.
220     *
221     * @param f the archive.
222     * @param encoding the encoding to use for file names, use null
223     * for the platform's default encoding
224     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
225     * Extra Fields (if present) to set the file names.
226     *
227     * @throws IOException if an error occurs while reading the file.
228     */
229    public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields)
230        throws IOException {
231        this(f, encoding, useUnicodeExtraFields, false);
232    }
233
234    /**
235     * Opens the given file for reading, assuming the specified
236     * encoding for file names.
237     *
238     *
239     * <p>By default the central directory record and all local file headers of the archive will be read immediately
240     * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter
241     * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header
242     * may contain information not present inside of the central directory which will not be available when the argument
243     * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
244     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. Also
245     * {@link #getRawInputStream} is always going to return {@code null} if {@code ignoreLocalFileHeader} is {@code
246     * true}.</p>
247     *
248     * @param f the archive.
249     * @param encoding the encoding to use for file names, use null
250     * for the platform's default encoding
251     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
252     * Extra Fields (if present) to set the file names.
253     * @param ignoreLocalFileHeader whether to ignore information
254     * stored inside the local file header (see the notes in this method's javadoc)
255     *
256     * @throws IOException if an error occurs while reading the file.
257     * @since 1.19
258     */
259    public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields,
260                   final boolean ignoreLocalFileHeader)
261        throws IOException {
262        this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)),
263             f.getAbsolutePath(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader);
264    }
265
266    /**
267     * Opens the given channel for reading, assuming "UTF8" for file names.
268     *
269     * <p>{@link
270     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
271     * allows you to read from an in-memory archive.</p>
272     *
273     * @param channel the archive.
274     *
275     * @throws IOException if an error occurs while reading the file.
276     * @since 1.13
277     */
278    public ZipFile(final SeekableByteChannel channel)
279            throws IOException {
280        this(channel, "unknown archive", ZipEncodingHelper.UTF8, true);
281    }
282
283    /**
284     * Opens the given channel for reading, assuming the specified
285     * encoding for file names.
286     *
287     * <p>{@link
288     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
289     * allows you to read from an in-memory archive.</p>
290     *
291     * @param channel the archive.
292     * @param encoding the encoding to use for file names, use null
293     * for the platform's default encoding
294     *
295     * @throws IOException if an error occurs while reading the file.
296     * @since 1.13
297     */
298    public ZipFile(final SeekableByteChannel channel, final String encoding)
299        throws IOException {
300        this(channel, "unknown archive", encoding, true);
301    }
302
303    /**
304     * Opens the given channel for reading, assuming the specified
305     * encoding for file names.
306     *
307     * <p>{@link
308     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
309     * allows you to read from an in-memory archive.</p>
310     *
311     * @param channel the archive.
312     * @param archiveName name of the archive, used for error messages only.
313     * @param encoding the encoding to use for file names, use null
314     * for the platform's default encoding
315     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
316     * Extra Fields (if present) to set the file names.
317     *
318     * @throws IOException if an error occurs while reading the file.
319     * @since 1.13
320     */
321    public ZipFile(final SeekableByteChannel channel, final String archiveName,
322                   final String encoding, final boolean useUnicodeExtraFields)
323        throws IOException {
324        this(channel, archiveName, encoding, useUnicodeExtraFields, false, false);
325    }
326
327    /**
328     * Opens the given channel for reading, assuming the specified
329     * encoding for file names.
330     *
331     * <p>{@link
332     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
333     * allows you to read from an in-memory archive.</p>
334     *
335     * <p>By default the central directory record and all local file headers of the archive will be read immediately
336     * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter
337     * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header
338     * may contain information not present inside of the central directory which will not be available when the argument
339     * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
340     * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. Also
341     * {@link #getRawInputStream} is always going to return {@code null} if {@code ignoreLocalFileHeader} is {@code
342     * true}.</p>
343     *
344     * @param channel the archive.
345     * @param archiveName name of the archive, used for error messages only.
346     * @param encoding the encoding to use for file names, use null
347     * for the platform's default encoding
348     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
349     * Extra Fields (if present) to set the file names.
350     * @param ignoreLocalFileHeader whether to ignore information
351     * stored inside the local file header (see the notes in this method's javadoc)
352     *
353     * @throws IOException if an error occurs while reading the file.
354     * @since 1.19
355     */
356    public ZipFile(final SeekableByteChannel channel, final String archiveName,
357                   final String encoding, final boolean useUnicodeExtraFields,
358                   final boolean ignoreLocalFileHeader)
359        throws IOException {
360        this(channel, archiveName, encoding, useUnicodeExtraFields, false, ignoreLocalFileHeader);
361    }
362
363    private ZipFile(final SeekableByteChannel channel, final String archiveName,
364                    final String encoding, final boolean useUnicodeExtraFields,
365                    final boolean closeOnError, final boolean ignoreLocalFileHeader)
366        throws IOException {
367        isSplitZipArchive = (channel instanceof ZipSplitReadOnlySeekableByteChannel);
368
369        this.archiveName = archiveName;
370        this.encoding = encoding;
371        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
372        this.useUnicodeExtraFields = useUnicodeExtraFields;
373        archive = channel;
374        boolean success = false;
375        try {
376            final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag =
377                populateFromCentralDirectory();
378            if (!ignoreLocalFileHeader) {
379                resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
380            }
381            fillNameMap();
382            success = true;
383        } catch (final IOException e) {
384            throw new IOException("Error on ZipFile " + archiveName, e);
385        } finally {
386            closed = !success;
387            if (!success && closeOnError) {
388                IOUtils.closeQuietly(archive);
389            }
390        }
391    }
392
393    /**
394     * The encoding to use for file names and the file comment.
395     *
396     * @return null if using the platform's default character encoding.
397     */
398    public String getEncoding() {
399        return encoding;
400    }
401
402    /**
403     * Closes the archive.
404     * @throws IOException if an error occurs closing the archive.
405     */
406    @Override
407    public void close() throws IOException {
408        // this flag is only written here and read in finalize() which
409        // can never be run in parallel.
410        // no synchronization needed.
411        closed = true;
412
413        archive.close();
414    }
415
416    /**
417     * close a zipfile quietly; throw no io fault, do nothing
418     * on a null parameter
419     * @param zipfile file to close, can be null
420     */
421    public static void closeQuietly(final ZipFile zipfile) {
422        IOUtils.closeQuietly(zipfile);
423    }
424
425    /**
426     * Returns all entries.
427     *
428     * <p>Entries will be returned in the same order they appear
429     * within the archive's central directory.</p>
430     *
431     * @return all entries as {@link ZipArchiveEntry} instances
432     */
433    public Enumeration<ZipArchiveEntry> getEntries() {
434        return Collections.enumeration(entries);
435    }
436
437    /**
438     * Returns all entries in physical order.
439     *
440     * <p>Entries will be returned in the same order their contents
441     * appear within the archive.</p>
442     *
443     * @return all entries as {@link ZipArchiveEntry} instances
444     *
445     * @since 1.1
446     */
447    public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
448        final ZipArchiveEntry[] allEntries = entries.toArray(ZipArchiveEntry.EMPTY_ZIP_ARCHIVE_ENTRY_ARRAY);
449        Arrays.sort(allEntries, offsetComparator);
450        return Collections.enumeration(Arrays.asList(allEntries));
451    }
452
453    /**
454     * Returns a named entry - or {@code null} if no entry by
455     * that name exists.
456     *
457     * <p>If multiple entries with the same name exist the first entry
458     * in the archive's central directory by that name is
459     * returned.</p>
460     *
461     * @param name name of the entry.
462     * @return the ZipArchiveEntry corresponding to the given name - or
463     * {@code null} if not present.
464     */
465    public ZipArchiveEntry getEntry(final String name) {
466        final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
467        return entriesOfThatName != null ? entriesOfThatName.getFirst() : null;
468    }
469
470    /**
471     * Returns all named entries in the same order they appear within
472     * the archive's central directory.
473     *
474     * @param name name of the entry.
475     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
476     * given name
477     * @since 1.6
478     */
479    public Iterable<ZipArchiveEntry> getEntries(final String name) {
480        final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
481        return entriesOfThatName != null ? entriesOfThatName
482            : Collections.emptyList();
483    }
484
485    /**
486     * Returns all named entries in the same order their contents
487     * appear within the archive.
488     *
489     * @param name name of the entry.
490     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
491     * given name
492     * @since 1.6
493     */
494    public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) {
495        ZipArchiveEntry[] entriesOfThatName = ZipArchiveEntry.EMPTY_ZIP_ARCHIVE_ENTRY_ARRAY;
496        if (nameMap.containsKey(name)) {
497            entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName);
498            Arrays.sort(entriesOfThatName, offsetComparator);
499        }
500        return Arrays.asList(entriesOfThatName);
501    }
502
503    /**
504     * Whether this class is able to read the given entry.
505     *
506     * <p>May return false if it is set up to use encryption or a
507     * compression method that hasn't been implemented yet.</p>
508     * @since 1.1
509     * @param ze the entry
510     * @return whether this class is able to read the given entry.
511     */
512    public boolean canReadEntryData(final ZipArchiveEntry ze) {
513        return ZipUtil.canHandleEntryData(ze);
514    }
515
516    /**
517     * Expose the raw stream of the archive entry (compressed form).
518     *
519     * <p>This method does not relate to how/if we understand the payload in the
520     * stream, since we really only intend to move it on to somewhere else.</p>
521     *
522     * @param ze The entry to get the stream for
523     * @return The raw input stream containing (possibly) compressed data.
524     * @since 1.11
525     */
526    public InputStream getRawInputStream(final ZipArchiveEntry ze) {
527        if (!(ze instanceof Entry)) {
528            return null;
529        }
530        final long start = ze.getDataOffset();
531        if (start == EntryStreamOffsets.OFFSET_UNKNOWN) {
532            return null;
533        }
534        return createBoundedInputStream(start, ze.getCompressedSize());
535    }
536
537
538    /**
539     * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream.
540     * Compression and all other attributes will be as in this file.
541     * <p>This method transfers entries based on the central directory of the zip file.</p>
542     *
543     * @param target The zipArchiveOutputStream to write the entries to
544     * @param predicate A predicate that selects which entries to write
545     * @throws IOException on error
546     */
547    public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate)
548            throws IOException {
549        final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder();
550        while (src.hasMoreElements()) {
551            final ZipArchiveEntry entry = src.nextElement();
552            if (predicate.test( entry)) {
553                target.addRawArchiveEntry(entry, getRawInputStream(entry));
554            }
555        }
556    }
557
558    /**
559     * Returns an InputStream for reading the contents of the given entry.
560     *
561     * @param ze the entry to get the stream for.
562     * @return a stream to read the entry from. The returned stream
563     * implements {@link InputStreamStatistics}.
564     * @throws IOException if unable to create an input stream from the zipentry
565     */
566    public InputStream getInputStream(final ZipArchiveEntry ze)
567        throws IOException {
568        if (!(ze instanceof Entry)) {
569            return null;
570        }
571        // cast validity is checked just above
572        ZipUtil.checkRequestedFeatures(ze);
573        final long start = getDataOffset(ze);
574
575        // doesn't get closed if the method is not supported - which
576        // should never happen because of the checkRequestedFeatures
577        // call above
578        final InputStream is =
579            new BufferedInputStream(createBoundedInputStream(start, ze.getCompressedSize())); //NOSONAR
580        switch (ZipMethod.getMethodByCode(ze.getMethod())) {
581            case STORED:
582                return new StoredStatisticsStream(is);
583            case UNSHRINKING:
584                return new UnshrinkingInputStream(is);
585            case IMPLODING:
586                try {
587                    return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(),
588                            ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is);
589                } catch (final IllegalArgumentException ex) {
590                    throw new IOException("bad IMPLODE data", ex);
591                }
592            case DEFLATED:
593                final Inflater inflater = new Inflater(true);
594                // Inflater with nowrap=true has this odd contract for a zero padding
595                // byte following the data stream; this used to be zlib's requirement
596                // and has been fixed a long time ago, but the contract persists so
597                // we comply.
598                // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean)
599                return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)),
600                    inflater) {
601                    @Override
602                    public void close() throws IOException {
603                        try {
604                            super.close();
605                        } finally {
606                            inflater.end();
607                        }
608                    }
609                };
610            case BZIP2:
611                return new BZip2CompressorInputStream(is);
612            case ENHANCED_DEFLATED:
613                return new Deflate64CompressorInputStream(is);
614            case AES_ENCRYPTED:
615            case EXPANDING_LEVEL_1:
616            case EXPANDING_LEVEL_2:
617            case EXPANDING_LEVEL_3:
618            case EXPANDING_LEVEL_4:
619            case JPEG:
620            case LZMA:
621            case PKWARE_IMPLODING:
622            case PPMD:
623            case TOKENIZATION:
624            case UNKNOWN:
625            case WAVPACK:
626            case XZ:
627            default:
628                throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(ze.getMethod()), ze);
629        }
630    }
631
632    /**
633     * <p>
634     * Convenience method to return the entry's content as a String if isUnixSymlink()
635     * returns true for it, otherwise returns null.
636     * </p>
637     *
638     * <p>This method assumes the symbolic link's file name uses the
639     * same encoding that as been specified for this ZipFile.</p>
640     *
641     * @param entry ZipArchiveEntry object that represents the symbolic link
642     * @return entry's content as a String
643     * @throws IOException problem with content's input stream
644     * @since 1.5
645     */
646    public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException {
647        if (entry != null && entry.isUnixSymlink()) {
648            try (InputStream in = getInputStream(entry)) {
649                return zipEncoding.decode(IOUtils.toByteArray(in));
650            }
651        }
652        return null;
653    }
654
655    /**
656     * Ensures that the close method of this zipfile is called when
657     * there are no more references to it.
658     * @see #close()
659     */
660    @Override
661    protected void finalize() throws Throwable {
662        try {
663            if (!closed) {
664                System.err.println("Cleaning up unclosed ZipFile for archive "
665                                   + archiveName);
666                close();
667            }
668        } finally {
669            super.finalize();
670        }
671    }
672
673    /**
674     * Length of a "central directory" entry structure without file
675     * name, extra fields or comment.
676     */
677    private static final int CFH_LEN =
678        /* version made by                 */ SHORT
679        /* version needed to extract       */ + SHORT
680        /* general purpose bit flag        */ + SHORT
681        /* compression method              */ + SHORT
682        /* last mod file time              */ + SHORT
683        /* last mod file date              */ + SHORT
684        /* crc-32                          */ + WORD
685        /* compressed size                 */ + WORD
686        /* uncompressed size               */ + WORD
687        /* file name length                 */ + SHORT
688        /* extra field length              */ + SHORT
689        /* file comment length             */ + SHORT
690        /* disk number start               */ + SHORT
691        /* internal file attributes        */ + SHORT
692        /* external file attributes        */ + WORD
693        /* relative offset of local header */ + WORD;
694
695    private static final long CFH_SIG =
696        ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
697
698    /**
699     * Reads the central directory of the given archive and populates
700     * the internal tables with ZipArchiveEntry instances.
701     *
702     * <p>The ZipArchiveEntrys will know all data that can be obtained from
703     * the central directory alone, but not the data that requires the
704     * local file header or additional data to be read.</p>
705     *
706     * @return a map of zipentries that didn't have the language
707     * encoding flag set when read.
708     */
709    private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory()
710        throws IOException {
711        final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag =
712            new HashMap<>();
713
714        positionAtCentralDirectory();
715        centralDirectoryStartOffset = archive.position();
716
717        ((Buffer)wordBbuf).rewind();
718        IOUtils.readFully(archive, wordBbuf);
719        long sig = ZipLong.getValue(wordBuf);
720
721        if (sig != CFH_SIG && startsWithLocalFileHeader()) {
722            throw new IOException("Central directory is empty, can't expand"
723                                  + " corrupt archive.");
724        }
725
726        while (sig == CFH_SIG) {
727            readCentralDirectoryEntry(noUTF8Flag);
728            ((Buffer)wordBbuf).rewind();
729            IOUtils.readFully(archive, wordBbuf);
730            sig = ZipLong.getValue(wordBuf);
731        }
732        return noUTF8Flag;
733    }
734
735    /**
736     * Reads an individual entry of the central directory, creats an
737     * ZipArchiveEntry from it and adds it to the global maps.
738     *
739     * @param noUTF8Flag map used to collect entries that don't have
740     * their UTF-8 flag set and whose name will be set by data read
741     * from the local file header later.  The current entry may be
742     * added to this map.
743     */
744    private void
745        readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag)
746        throws IOException {
747        ((Buffer)cfhBbuf).rewind();
748        IOUtils.readFully(archive, cfhBbuf);
749        int off = 0;
750        final Entry ze = new Entry();
751
752        final int versionMadeBy = ZipShort.getValue(cfhBuf, off);
753        off += SHORT;
754        ze.setVersionMadeBy(versionMadeBy);
755        ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
756
757        ze.setVersionRequired(ZipShort.getValue(cfhBuf, off));
758        off += SHORT; // version required
759
760        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off);
761        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
762        final ZipEncoding entryEncoding =
763            hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
764        if (hasUTF8Flag) {
765            ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
766        }
767        ze.setGeneralPurposeBit(gpFlag);
768        ze.setRawFlag(ZipShort.getValue(cfhBuf, off));
769
770        off += SHORT;
771
772        //noinspection MagicConstant
773        ze.setMethod(ZipShort.getValue(cfhBuf, off));
774        off += SHORT;
775
776        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off));
777        ze.setTime(time);
778        off += WORD;
779
780        ze.setCrc(ZipLong.getValue(cfhBuf, off));
781        off += WORD;
782
783        long size = ZipLong.getValue(cfhBuf, off);
784        if (size < 0) {
785            throw new IOException("broken archive, entry with negative compressed size");
786        }
787        ze.setCompressedSize(size);
788        off += WORD;
789
790        size = ZipLong.getValue(cfhBuf, off);
791        if (size < 0) {
792            throw new IOException("broken archive, entry with negative size");
793        }
794        ze.setSize(size);
795        off += WORD;
796
797        final int fileNameLen = ZipShort.getValue(cfhBuf, off);
798        off += SHORT;
799        if (fileNameLen < 0) {
800            throw new IOException("broken archive, entry with negative fileNameLen");
801        }
802
803        final int extraLen = ZipShort.getValue(cfhBuf, off);
804        off += SHORT;
805        if (extraLen < 0) {
806            throw new IOException("broken archive, entry with negative extraLen");
807        }
808
809        final int commentLen = ZipShort.getValue(cfhBuf, off);
810        off += SHORT;
811        if (commentLen < 0) {
812            throw new IOException("broken archive, entry with negative commentLen");
813        }
814
815        ze.setDiskNumberStart(ZipShort.getValue(cfhBuf, off));
816        off += SHORT;
817
818        ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off));
819        off += SHORT;
820
821        ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off));
822        off += WORD;
823
824        final byte[] fileName = IOUtils.readRange(archive, fileNameLen);
825        if (fileName.length < fileNameLen) {
826            throw new EOFException();
827        }
828        ze.setName(entryEncoding.decode(fileName), fileName);
829
830        // LFH offset,
831        ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off));
832        // data offset will be filled later
833        entries.add(ze);
834
835        final byte[] cdExtraData = IOUtils.readRange(archive, extraLen);
836        if (cdExtraData.length < extraLen) {
837            throw new EOFException();
838        }
839        try {
840            ze.setCentralDirectoryExtra(cdExtraData);
841        } catch (RuntimeException ex) {
842            final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName());
843            z.initCause(ex);
844            throw z;
845        }
846
847        setSizesAndOffsetFromZip64Extra(ze);
848        sanityCheckLFHOffset(ze);
849
850        final byte[] comment = IOUtils.readRange(archive, commentLen);
851        if (comment.length < commentLen) {
852            throw new EOFException();
853        }
854        ze.setComment(entryEncoding.decode(comment));
855
856        if (!hasUTF8Flag && useUnicodeExtraFields) {
857            noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
858        }
859
860        ze.setStreamContiguous(true);
861    }
862
863    private void sanityCheckLFHOffset(final ZipArchiveEntry ze) throws IOException {
864        if (ze.getDiskNumberStart() < 0) {
865            throw new IOException("broken archive, entry with negative disk number");
866        }
867        if (ze.getLocalHeaderOffset() < 0) {
868            throw new IOException("broken archive, entry with negative local file header offset");
869        }
870        if (isSplitZipArchive) {
871            if (ze.getDiskNumberStart() > centralDirectoryStartDiskNumber) {
872                throw new IOException("local file header for " + ze.getName() + " starts on a later disk than central directory");
873            }
874            if (ze.getDiskNumberStart() == centralDirectoryStartDiskNumber
875                && ze.getLocalHeaderOffset() > centralDirectoryStartRelativeOffset) {
876                throw new IOException("local file header for " + ze.getName() + " starts after central directory");
877            }
878        } else {
879            if (ze.getLocalHeaderOffset() > centralDirectoryStartOffset) {
880                throw new IOException("local file header for " + ze.getName() + " starts after central directory");
881            }
882        }
883    }
884
885    /**
886     * If the entry holds a Zip64 extended information extra field,
887     * read sizes from there if the entry's sizes are set to
888     * 0xFFFFFFFFF, do the same for the offset of the local file
889     * header.
890     *
891     * <p>Ensures the Zip64 extra either knows both compressed and
892     * uncompressed size or neither of both as the internal logic in
893     * ExtraFieldUtils forces the field to create local header data
894     * even if they are never used - and here a field with only one
895     * size would be invalid.</p>
896     */
897    private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze)
898        throws IOException {
899        final ZipExtraField extra =
900            ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
901        if (extra != null && !(extra instanceof Zip64ExtendedInformationExtraField)) {
902            throw new ZipException("archive contains unparseable zip64 extra field");
903        }
904        final Zip64ExtendedInformationExtraField z64 =
905            (Zip64ExtendedInformationExtraField) extra;
906        if (z64 != null) {
907            final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC;
908            final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC;
909            final boolean hasRelativeHeaderOffset =
910                ze.getLocalHeaderOffset() == ZIP64_MAGIC;
911            final boolean hasDiskStart = ze.getDiskNumberStart() == ZIP64_MAGIC_SHORT;
912            z64.reparseCentralDirectoryData(hasUncompressedSize,
913                                            hasCompressedSize,
914                                            hasRelativeHeaderOffset,
915                                            hasDiskStart);
916
917            if (hasUncompressedSize) {
918                final long size = z64.getSize().getLongValue();
919                if (size < 0) {
920                    throw new IOException("broken archive, entry with negative size");
921                }
922                ze.setSize(size);
923            } else if (hasCompressedSize) {
924                z64.setSize(new ZipEightByteInteger(ze.getSize()));
925            }
926
927            if (hasCompressedSize) {
928                final long size = z64.getCompressedSize().getLongValue();
929                if (size < 0) {
930                    throw new IOException("broken archive, entry with negative compressed size");
931                }
932                ze.setCompressedSize(size);
933            } else if (hasUncompressedSize) {
934                z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize()));
935            }
936
937            if (hasRelativeHeaderOffset) {
938                ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue());
939            }
940
941            if (hasDiskStart) {
942                ze.setDiskNumberStart(z64.getDiskStartNumber().getValue());
943            }
944        }
945    }
946
947    /**
948     * Length of the "End of central directory record" - which is
949     * supposed to be the last structure of the archive - without file
950     * comment.
951     */
952    static final int MIN_EOCD_SIZE =
953        /* end of central dir signature    */ WORD
954        /* number of this disk             */ + SHORT
955        /* number of the disk with the     */
956        /* start of the central directory  */ + SHORT
957        /* total number of entries in      */
958        /* the central dir on this disk    */ + SHORT
959        /* total number of entries in      */
960        /* the central dir                 */ + SHORT
961        /* size of the central directory   */ + WORD
962        /* offset of start of central      */
963        /* directory with respect to       */
964        /* the starting disk number        */ + WORD
965        /* zipfile comment length          */ + SHORT;
966
967    /**
968     * Maximum length of the "End of central directory record" with a
969     * file comment.
970     */
971    private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
972        /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT;
973
974    /**
975     * Offset of the field that holds the location of the first
976     * central directory entry inside the "End of central directory
977     * record" relative to the start of the "End of central directory
978     * record".
979     */
980    private static final int CFD_LOCATOR_OFFSET =
981        /* end of central dir signature    */ WORD
982        /* number of this disk             */ + SHORT
983        /* number of the disk with the     */
984        /* start of the central directory  */ + SHORT
985        /* total number of entries in      */
986        /* the central dir on this disk    */ + SHORT
987        /* total number of entries in      */
988        /* the central dir                 */ + SHORT
989        /* size of the central directory   */ + WORD;
990
991    /**
992     * Offset of the field that holds the disk number of the first
993     * central directory entry inside the "End of central directory
994     * record" relative to the start of the "End of central directory
995     * record".
996     */
997    private static final int CFD_DISK_OFFSET =
998            /* end of central dir signature    */ WORD
999            /* number of this disk             */ + SHORT;
1000
1001    /**
1002     * Offset of the field that holds the location of the first
1003     * central directory entry inside the "End of central directory
1004     * record" relative to the "number of the disk with the start
1005     * of the central directory".
1006     */
1007    private static final int CFD_LOCATOR_RELATIVE_OFFSET =
1008            /* total number of entries in      */
1009            /* the central dir on this disk    */ + SHORT
1010            /* total number of entries in      */
1011            /* the central dir                 */ + SHORT
1012            /* size of the central directory   */ + WORD;
1013
1014    /**
1015     * Length of the "Zip64 end of central directory locator" - which
1016     * should be right in front of the "end of central directory
1017     * record" if one is present at all.
1018     */
1019    private static final int ZIP64_EOCDL_LENGTH =
1020        /* zip64 end of central dir locator sig */ WORD
1021        /* number of the disk with the start    */
1022        /* start of the zip64 end of            */
1023        /* central directory                    */ + WORD
1024        /* relative offset of the zip64         */
1025        /* end of central directory record      */ + DWORD
1026        /* total number of disks                */ + WORD;
1027
1028    /**
1029     * Offset of the field that holds the location of the "Zip64 end
1030     * of central directory record" inside the "Zip64 end of central
1031     * directory locator" relative to the start of the "Zip64 end of
1032     * central directory locator".
1033     */
1034    private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
1035        /* zip64 end of central dir locator sig */ WORD
1036        /* number of the disk with the start    */
1037        /* start of the zip64 end of            */
1038        /* central directory                    */ + WORD;
1039
1040    /**
1041     * Offset of the field that holds the location of the first
1042     * central directory entry inside the "Zip64 end of central
1043     * directory record" relative to the start of the "Zip64 end of
1044     * central directory record".
1045     */
1046    private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
1047        /* zip64 end of central dir        */
1048        /* signature                       */ WORD
1049        /* size of zip64 end of central    */
1050        /* directory record                */ + DWORD
1051        /* version made by                 */ + SHORT
1052        /* version needed to extract       */ + SHORT
1053        /* number of this disk             */ + WORD
1054        /* number of the disk with the     */
1055        /* start of the central directory  */ + WORD
1056        /* total number of entries in the  */
1057        /* central directory on this disk  */ + DWORD
1058        /* total number of entries in the  */
1059        /* central directory               */ + DWORD
1060        /* size of the central directory   */ + DWORD;
1061
1062    /**
1063     * Offset of the field that holds the disk number of the first
1064     * central directory entry inside the "Zip64 end of central
1065     * directory record" relative to the start of the "Zip64 end of
1066     * central directory record".
1067     */
1068    private static final int ZIP64_EOCD_CFD_DISK_OFFSET =
1069            /* zip64 end of central dir        */
1070            /* signature                       */ WORD
1071            /* size of zip64 end of central    */
1072            /* directory record                */ + DWORD
1073            /* version made by                 */ + SHORT
1074            /* version needed to extract       */ + SHORT
1075            /* number of this disk             */ + WORD;
1076
1077    /**
1078     * Offset of the field that holds the location of the first
1079     * central directory entry inside the "Zip64 end of central
1080     * directory record" relative to the "number of the disk
1081     * with the start of the central directory".
1082     */
1083    private static final int ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET =
1084            /* total number of entries in the  */
1085            /* central directory on this disk  */ DWORD
1086            /* total number of entries in the  */
1087            /* central directory               */ + DWORD
1088            /* size of the central directory   */ + DWORD;
1089
1090    /**
1091     * Searches for either the &quot;Zip64 end of central directory
1092     * locator&quot; or the &quot;End of central dir record&quot;, parses
1093     * it and positions the stream at the first central directory
1094     * record.
1095     */
1096    private void positionAtCentralDirectory()
1097        throws IOException {
1098        positionAtEndOfCentralDirectoryRecord();
1099        boolean found = false;
1100        final boolean searchedForZip64EOCD =
1101            archive.position() > ZIP64_EOCDL_LENGTH;
1102        if (searchedForZip64EOCD) {
1103            archive.position(archive.position() - ZIP64_EOCDL_LENGTH);
1104            ((Buffer)wordBbuf).rewind();
1105            IOUtils.readFully(archive, wordBbuf);
1106            found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG,
1107                                  wordBuf);
1108        }
1109        if (!found) {
1110            // not a ZIP64 archive
1111            if (searchedForZip64EOCD) {
1112                skipBytes(ZIP64_EOCDL_LENGTH - WORD);
1113            }
1114            positionAtCentralDirectory32();
1115        } else {
1116            positionAtCentralDirectory64();
1117        }
1118    }
1119
1120    /**
1121     * Parses the &quot;Zip64 end of central directory locator&quot;,
1122     * finds the &quot;Zip64 end of central directory record&quot; using the
1123     * parsed information, parses that and positions the stream at the
1124     * first central directory record.
1125     *
1126     * Expects stream to be positioned right behind the &quot;Zip64
1127     * end of central directory locator&quot;'s signature.
1128     */
1129    private void positionAtCentralDirectory64()
1130        throws IOException {
1131        if (isSplitZipArchive) {
1132            ((Buffer)wordBbuf).rewind();
1133            IOUtils.readFully(archive, wordBbuf);
1134            final long diskNumberOfEOCD = ZipLong.getValue(wordBuf);
1135
1136            ((Buffer)dwordBbuf).rewind();
1137            IOUtils.readFully(archive, dwordBbuf);
1138            final long relativeOffsetOfEOCD = ZipEightByteInteger.getLongValue(dwordBuf);
1139            ((ZipSplitReadOnlySeekableByteChannel) archive)
1140                .position(diskNumberOfEOCD, relativeOffsetOfEOCD);
1141        } else {
1142            skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET
1143                    - WORD /* signature has already been read */);
1144        ((Buffer)dwordBbuf).rewind();
1145            IOUtils.readFully(archive, dwordBbuf);
1146            archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
1147        }
1148
1149        ((Buffer)wordBbuf).rewind();
1150        IOUtils.readFully(archive, wordBbuf);
1151        if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
1152            throw new ZipException("Archive's ZIP64 end of central "
1153                                   + "directory locator is corrupt.");
1154        }
1155
1156        if (isSplitZipArchive) {
1157            skipBytes(ZIP64_EOCD_CFD_DISK_OFFSET
1158                    - WORD /* signature has already been read */);
1159            ((Buffer)wordBbuf).rewind();
1160            IOUtils.readFully(archive, wordBbuf);
1161            centralDirectoryStartDiskNumber = ZipLong.getValue(wordBuf);
1162
1163            skipBytes(ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET);
1164
1165            ((Buffer)dwordBbuf).rewind();
1166            IOUtils.readFully(archive, dwordBbuf);
1167            centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf);
1168            ((ZipSplitReadOnlySeekableByteChannel) archive)
1169                .position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset);
1170        } else {
1171            skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET
1172                    - WORD /* signature has already been read */);
1173            ((Buffer)dwordBbuf).rewind();
1174            IOUtils.readFully(archive, dwordBbuf);
1175            centralDirectoryStartDiskNumber = 0;
1176            centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf);
1177            archive.position(centralDirectoryStartRelativeOffset);
1178        }
1179    }
1180
1181    /**
1182     * Parses the &quot;End of central dir record&quot; and positions
1183     * the stream at the first central directory record.
1184     *
1185     * Expects stream to be positioned at the beginning of the
1186     * &quot;End of central dir record&quot;.
1187     */
1188    private void positionAtCentralDirectory32()
1189        throws IOException {
1190        if (isSplitZipArchive) {
1191            skipBytes(CFD_DISK_OFFSET);
1192            ((Buffer)shortBbuf).rewind();
1193            IOUtils.readFully(archive, shortBbuf);
1194            centralDirectoryStartDiskNumber = ZipShort.getValue(shortBuf);
1195
1196            skipBytes(CFD_LOCATOR_RELATIVE_OFFSET);
1197
1198            ((Buffer)wordBbuf).rewind();
1199            IOUtils.readFully(archive, wordBbuf);
1200            centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf);
1201            ((ZipSplitReadOnlySeekableByteChannel) archive)
1202                .position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset);
1203        } else {
1204            skipBytes(CFD_LOCATOR_OFFSET);
1205        ((Buffer)wordBbuf).rewind();
1206            IOUtils.readFully(archive, wordBbuf);
1207            centralDirectoryStartDiskNumber = 0;
1208            centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf);
1209            archive.position(centralDirectoryStartRelativeOffset);
1210        }
1211    }
1212
1213    /**
1214     * Searches for the and positions the stream at the start of the
1215     * &quot;End of central dir record&quot;.
1216     */
1217    private void positionAtEndOfCentralDirectoryRecord()
1218        throws IOException {
1219        final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE,
1220                                             ZipArchiveOutputStream.EOCD_SIG);
1221        if (!found) {
1222            throw new ZipException("Archive is not a ZIP archive");
1223        }
1224    }
1225
1226    /**
1227     * Searches the archive backwards from minDistance to maxDistance
1228     * for the given signature, positions the RandomaccessFile right
1229     * at the signature if it has been found.
1230     */
1231    private boolean tryToLocateSignature(final long minDistanceFromEnd,
1232                                         final long maxDistanceFromEnd,
1233                                         final byte[] sig) throws IOException {
1234        boolean found = false;
1235        long off = archive.size() - minDistanceFromEnd;
1236        final long stopSearching =
1237            Math.max(0L, archive.size() - maxDistanceFromEnd);
1238        if (off >= 0) {
1239            for (; off >= stopSearching; off--) {
1240                archive.position(off);
1241                try {
1242                    ((Buffer)wordBbuf).rewind();
1243                    IOUtils.readFully(archive, wordBbuf);
1244                    ((Buffer)wordBbuf).flip();
1245                } catch (final EOFException ex) { // NOSONAR
1246                    break;
1247                }
1248                int curr = wordBbuf.get();
1249                if (curr == sig[POS_0]) {
1250                    curr = wordBbuf.get();
1251                    if (curr == sig[POS_1]) {
1252                        curr = wordBbuf.get();
1253                        if (curr == sig[POS_2]) {
1254                            curr = wordBbuf.get();
1255                            if (curr == sig[POS_3]) {
1256                                found = true;
1257                                break;
1258                            }
1259                        }
1260                    }
1261                }
1262            }
1263        }
1264        if (found) {
1265            archive.position(off);
1266        }
1267        return found;
1268    }
1269
1270    /**
1271     * Skips the given number of bytes or throws an EOFException if
1272     * skipping failed.
1273     */
1274    private void skipBytes(final int count) throws IOException {
1275        final long currentPosition = archive.position();
1276        final long newPosition = currentPosition + count;
1277        if (newPosition > archive.size()) {
1278            throw new EOFException();
1279        }
1280        archive.position(newPosition);
1281    }
1282
1283    /**
1284     * Number of bytes in local file header up to the &quot;length of
1285     * file name&quot; entry.
1286     */
1287    private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
1288        /* local file header signature     */ WORD
1289        /* version needed to extract       */ + SHORT
1290        /* general purpose bit flag        */ + SHORT
1291        /* compression method              */ + SHORT
1292        /* last mod file time              */ + SHORT
1293        /* last mod file date              */ + SHORT
1294        /* crc-32                          */ + WORD
1295        /* compressed size                 */ + WORD
1296        /* uncompressed size               */ + (long) WORD;
1297
1298    /**
1299     * Walks through all recorded entries and adds the data available
1300     * from the local file header.
1301     *
1302     * <p>Also records the offsets for the data to read from the
1303     * entries.</p>
1304     */
1305    private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment>
1306                                            entriesWithoutUTF8Flag)
1307        throws IOException {
1308        for (final ZipArchiveEntry zipArchiveEntry : entries) {
1309            // entries is filled in populateFromCentralDirectory and
1310            // never modified
1311            final Entry ze = (Entry) zipArchiveEntry;
1312            final int[] lens = setDataOffset(ze);
1313            final int fileNameLen = lens[0];
1314            final int extraFieldLen = lens[1];
1315            skipBytes(fileNameLen);
1316            final byte[] localExtraData = IOUtils.readRange(archive, extraFieldLen);
1317            if (localExtraData.length < extraFieldLen) {
1318                throw new EOFException();
1319            }
1320            try {
1321                ze.setExtra(localExtraData);
1322            } catch (RuntimeException ex) {
1323                final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName());
1324                z.initCause(ex);
1325                throw z;
1326            }
1327
1328            if (entriesWithoutUTF8Flag.containsKey(ze)) {
1329                final NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
1330                ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
1331                                                         nc.comment);
1332            }
1333        }
1334    }
1335
1336    private void fillNameMap() {
1337        for (final ZipArchiveEntry ze : entries) {
1338            // entries is filled in populateFromCentralDirectory and
1339            // never modified
1340            final String name = ze.getName();
1341            LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.computeIfAbsent(name, k -> new LinkedList<>());
1342            entriesOfThatName.addLast(ze);
1343        }
1344    }
1345
1346    private int[] setDataOffset(final ZipArchiveEntry ze) throws IOException {
1347        long offset = ze.getLocalHeaderOffset();
1348        if (isSplitZipArchive) {
1349            ((ZipSplitReadOnlySeekableByteChannel) archive)
1350                .position(ze.getDiskNumberStart(), offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1351            // the offset should be updated to the global offset
1352            offset = archive.position() - LFH_OFFSET_FOR_FILENAME_LENGTH;
1353        } else {
1354            archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1355        }
1356        ((Buffer)wordBbuf).rewind();
1357        IOUtils.readFully(archive, wordBbuf);
1358        ((Buffer)wordBbuf).flip();
1359        wordBbuf.get(shortBuf);
1360        final int fileNameLen = ZipShort.getValue(shortBuf);
1361        wordBbuf.get(shortBuf);
1362        final int extraFieldLen = ZipShort.getValue(shortBuf);
1363        ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH
1364                         + SHORT + SHORT + fileNameLen + extraFieldLen);
1365        if (ze.getDataOffset() + ze.getCompressedSize() > centralDirectoryStartOffset) {
1366            throw new IOException("data for " + ze.getName() + " overlaps with central directory.");
1367        }
1368        return new int[] { fileNameLen, extraFieldLen };
1369    }
1370
1371    private long getDataOffset(final ZipArchiveEntry ze) throws IOException {
1372        final long s = ze.getDataOffset();
1373        if (s == EntryStreamOffsets.OFFSET_UNKNOWN) {
1374            setDataOffset(ze);
1375            return ze.getDataOffset();
1376        }
1377        return s;
1378    }
1379
1380    /**
1381     * Checks whether the archive starts with a LFH.  If it doesn't,
1382     * it may be an empty archive.
1383     */
1384    private boolean startsWithLocalFileHeader() throws IOException {
1385        archive.position(0);
1386        ((Buffer)wordBbuf).rewind();
1387        IOUtils.readFully(archive, wordBbuf);
1388        return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG);
1389    }
1390
1391    /**
1392     * Creates new BoundedInputStream, according to implementation of
1393     * underlying archive channel.
1394     */
1395    private BoundedArchiveInputStream createBoundedInputStream(final long start, final long remaining) {
1396        if (start < 0 || remaining < 0 || start + remaining < start) {
1397            throw new IllegalArgumentException("Corrupted archive, stream boundaries"
1398                + " are out of range");
1399        }
1400        return archive instanceof FileChannel ?
1401            new BoundedFileChannelInputStream(start, remaining) :
1402            new BoundedSeekableByteChannelInputStream(start, remaining, archive);
1403    }
1404
1405    /**
1406     * Lock-free implementation of BoundedInputStream. The
1407     * implementation uses positioned reads on the underlying archive
1408     * file channel and therefore performs significantly faster in
1409     * concurrent environment.
1410     */
1411    private class BoundedFileChannelInputStream extends BoundedArchiveInputStream {
1412        private final FileChannel archive;
1413
1414        BoundedFileChannelInputStream(final long start, final long remaining) {
1415            super(start, remaining);
1416            archive = (FileChannel) ZipFile.this.archive;
1417        }
1418
1419        @Override
1420        protected int read(final long pos, final ByteBuffer buf) throws IOException {
1421            final int read = archive.read(buf, pos);
1422            ((Buffer)buf).flip();
1423            return read;
1424        }
1425    }
1426
1427    private static final class NameAndComment {
1428        private final byte[] name;
1429        private final byte[] comment;
1430        private NameAndComment(final byte[] name, final byte[] comment) {
1431            this.name = name;
1432            this.comment = comment;
1433        }
1434    }
1435
1436    /**
1437     * Compares two ZipArchiveEntries based on their offset within the archive.
1438     *
1439     * <p>Won't return any meaningful results if one of the entries
1440     * isn't part of the archive at all.</p>
1441     *
1442     * @since 1.1
1443     */
1444    private final Comparator<ZipArchiveEntry> offsetComparator =
1445        Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart)
1446            .thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset);
1447
1448    /**
1449     * Extends ZipArchiveEntry to store the offset within the archive.
1450     */
1451    private static class Entry extends ZipArchiveEntry {
1452
1453        Entry() {
1454        }
1455
1456        @Override
1457        public int hashCode() {
1458            return 3 * super.hashCode()
1459                + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32);
1460        }
1461
1462        @Override
1463        public boolean equals(final Object other) {
1464            if (super.equals(other)) {
1465                // super.equals would return false if other were not an Entry
1466                final Entry otherEntry = (Entry) other;
1467                return getLocalHeaderOffset()
1468                        == otherEntry.getLocalHeaderOffset()
1469                    && super.getDataOffset()
1470                        == otherEntry.getDataOffset()
1471                    && super.getDiskNumberStart()
1472                        == otherEntry.getDiskNumberStart();
1473            }
1474            return false;
1475        }
1476    }
1477
1478    private static class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics {
1479        StoredStatisticsStream(final InputStream in) {
1480            super(in);
1481        }
1482
1483        @Override
1484        public long getCompressedCount() {
1485            return super.getBytesRead();
1486        }
1487
1488        @Override
1489        public long getUncompressedCount() {
1490            return getCompressedCount();
1491        }
1492    }
1493}