这个程序出自Java Tools for Extreme Programming一书。
import com.meterware.httpunit.*;
import java.util.HashSet;
import java.util.Set;
public class CheckSite {
private WebConversation conversation;
private Set checkedLinks;
private String host = "www.sohu.com";
public static void main(String[] args) throws Exception {
CheckSite cs = new CheckSite();
cs.setUp();
cs.testEntireSite();
}
public void setUp() {
conversation = new WebConversation();
checkedLinks = new HashSet();
}
public void testEntireSite() throws Exception {
WebResponse response = conversation.getResponse("http://" + host);
checkAllLinks(response);
System.out.println("Site check finished. Link's checked: "
+ checkedLinks.size() + " : " + checkedLinks);
}
private void checkAllLinks(WebResponse response) throws Exception {
if (!isHtml(response)) {
return;
}
WebLink[] links = response.getLinks();
System.out.println(response.getTitle() + " -- links found = "
+ links.length);
for (int i = 0; i < links.length; i++) {
boolean newLink = checkedLinks.add(links[i].getURLString());
if (newLink) {
System.out.println("Total links checked so far: "
+ checkedLinks.size());
checkLink(links[i]);
}
}
}
private boolean isHtml(WebResponse response) {
return response.getContentType().equals("text/html");
}
private void checkLink(WebLink link) throws Exception {
WebRequest request = link.getRequest();
java.net.URL url = request.getURL();
System.out.println("checking link: " + url);
String linkHost = url.getHost();
if (linkHost.equals(this.host)) {
WebResponse response = conversation.getResponse(request);
this.checkAllLinks(response);
}
}
}