
Jar文件内容分析的简单算法; (Nested Jar in War file or a directory)

2006-01-09
* @(#) JarFileReader.java 1.00 2004/09/20




* FILENAME : JarFileReader.java

* PACKAGE : org.opensource.study.jar

* CREATE DATE : 2004/09/20

* AUTHOR : Humx ([url=mailto:humx_hn@hotmail.com]humx_hn@hotmail.com)


* DESCRIPTION : a class read all entry from certain file

* you are free to use and modify it.


package org.opensource.study.jar.util;

import java.io.IOException;

import java.io.File;

import java.io.InputStream;

import java.io.BufferedInputStream;

import java.io.OutputStream;

import java.io.BufferedOutputStream;

import java.io.FileOutputStream;

import java.io.FileWriter;

import java.util.Enumeration;

import java.util.Map;

import java.util.HashMap;

import java.util.Set;

import java.util.HashSet;

import java.util.Iterator;

import java.util.jar.JarEntry;

import java.util.jar.JarFile;


* a simple class to read entry from jar file or directory

* default it will read the entries from the nested jar | war file,

* you can this behavior by setRecursive(false), all the entry will

* be in a set.<br>

* <b>Attention:</b> the duplicated entry name will ignored.

* @author Hu mingxin

* @version 1.0


public final class FileReader {

private Set suffix = null;


* default constructor without argument,

* default, the file with suffix: ".jar" and ".war" will

* be read out recursively


public FileReader() {

suffix = new HashSet();





* specifies the suffix of the file

* @param suffix the set contains the suffix of the zip file

* which should be uncompress and analyse, it means this

* process is done recursively, the entry in the set should

* be String instance.<br>

* for example:<br>

* zipSet = {".jar", ".war"};


public FileReader(Set suffix) {

this.suffix = suffix;



* read all the entries in the file, file may be a jar | war file

* or a directory, you can set the suffix of the file which one in

* the directory or jar | war file should be read recursively

* @param file the file or directory you want to process

* @return set it contains all the entries in the directory or jar

* file, maybe include entries from the nested jar | war files.

* @throws java.io.IOException if an I/O error has occurred

* @see #FileReader(Set suffix)


public Set read(File file) throws IOException {

Set entries = new HashSet();

read(file, entries, false);

return entries;



* comparatively, this method has a parameter surface, if false,

* this method's behavior is the same to read(File file).

* on the contrary, the returned set ony contain the entries of

* the surface. if it's a directory, all files in it will be scaned

* recursively; jar | war file will only list the entry in this zip

* file, if there are jar | zip file entries, they will not be read

* recursively

* @param file the file or directory you want to process

* @return set it contains all the entries in the directory or jar

* file, if surface is true, entries nested jar | war file will

* not be contained

* @throws java.io.IOException if an I/O error has occurred

* @see #read(File file)


public Set read(File file, boolean surface) throws IOException {

if (surface) {

// if file is a directory, all entries should be

// read out but only jar | war file name is traced

Set entries = new HashSet();

if (file.isDirectory()) {

read(file, entries, true);

} else {

JarFile jarFile = new JarFile(file);

read(jarFile, entries, false);


return entries;


return read(file);



* read the entries into entrySet of certain File

* @param file the file you analyse

* @param entrySet the set of the entries

* @param ignoreJar if true when jar | war file is encountered

* only the entry name of it will be add into the set.

* otherwise, jar | war entry will be retrieved recresively

* @throws java.io.IOException if an I/O error has occurred


private void read(File file, Set entrySet, boolean ignoreJar)

throws IOException {

String name = file.getName();

if (file.isDirectory()) {

File[] files = file.listFiles();

for (int i = 0; i < files.length; i++) {

read(files[i], entrySet, ignoreJar);


} else if (isRecursiveZip(name) && !ignoreJar) {

JarFile entry = new JarFile(file);

read(entry, entrySet, true);

} else {





* read the entries into entrySet of certain JarFile

* @param jarFile the jarFile you care about

* @param entrySet the set of the entries

* @param recursive if true, the entries in the nested jar | war

* file will also be retrieved and added into entrySet

* @throws java.io.IOException if an I/O error has occurred


private void read(JarFile jarFile, Set entrySet, boolean recursive)

throws IOException {

for (Enumeration e = jarFile.entries(); e.hasMoreElements(); ) {

JarEntry jarEntry = (JarEntry)e.nextElement();

if (jarEntry.isDirectory()) {



String entryName = jarEntry.getName();

if (isRecursiveZip(entryName) && recursive) {

JarFile innerJarFile = getJarFile(jarFile, jarEntry);

read(innerJarFile, entrySet, recursive);

} else {

if (!entrySet.add(entryName)) {

// a duplicated entry [" + entryName + "] has been ignored";






* get teh JarFile reference from the entry[jar | war] from centain

* JarFile, in other words, the inner jar | war file are uncompressed

* and it's reference is returned

* @param jarFile the jarFile you analyse

* @param jarEntry the zip entry of the jarFile

* @return jarEntry's JarFile reference

* @throws java.io.IOException if an I/O error has occurred


private JarFile getJarFile(JarFile jarFile, JarEntry jarEntry)

throws IOException {

// read jar entry into a byte array

InputStream in = new BufferedInputStream(jarFile.getInputStream(jarEntry));

// write these bytes into a file

File temporary = File.createTempFile("clazzjarfile", ".jar");

OutputStream out = new BufferedOutputStream(new FileOutputStream(temporary));

int len;

byte[] buf = new byte[1024];

while ((len = in.read(buf)) > 0) {

out.write(buf, 0, len);


// release resource




// analyse the temporary file

return new JarFile(temporary);



* if a file is a jar | war file, we should care about

* @param name the file's name

* @return if the file has a suffix contained in suffix

* true is return, else false


private boolean isRecursiveZip(String name) {

if (suffix == null) {

return false;


for (Iterator i = suffix.iterator(); i.hasNext(); ) {

if (name.endsWith(i.next().toString())) {

return true;



return false;



// TODO this FileReader will generated a lot of temporary jar file, perhaps

// we should clean up all of them; 2004-9-23 07:50:15

