通过输入网址获取返回的源码,没有去掉http协议头信息。
用的URI解析域名及Socket连接服务器,可自定义一次返回多少k的信息。直到返回完毕为止。可作为网络爬虫的简单引擎。
源码地址:http://www.foryears.com/HttpClient.rar
图片如下:
此主题相关图片如下:核心代码:
import java.io.*;
import java.net.*;
import javax.net.*;
import javax.net.ssl.*;
import java.security.cert.*;
public class HttpClient {
public String getResponseStr(String url) {
String responseStr = "";
try {
if ((!url.startsWith("http://")) && (!url.startsWith("https://"))) {
url = "http://" + url;
}
URI uri = new URI(url);
String protocol = uri.getScheme();
String host = uri.getHost();
int port = uri.getPort();
String path = uri.getRawPath();
if (path == null || path.length() == 0) {
path = "/";
}
String query = uri.getRawQuery();
if (query != null && query.length() > 0) {
path += "?" + query;
}
Socket socket;
if (protocol.equals("http")) {
if (port == -1) {
port = 80;
}
socket = new Socket(host, port);
}
else if (protocol.equals("https")) {
if (port == -1) {
port = 443;
}
SocketFactory factory = SSLSocketFactory.getDefault();
SSLSocket ssock = (SSLSocket) factory.createSocket(host, port);
SSLSession session = ssock.getSession();
X509Certificate cert = null;
try {
cert = (X509Certificate) session.getPeerCertificates()[0];
}
catch (SSLPeerUnverifiedException e) {
System.err.println(session.getPeerHost() +
" did not present a valid certificate");
System.exit(1);
}
System.out.println(session.getPeerHost() +
" has presented a certificate belonging t\t" + "[" +
cert.getSubjectDN() + "]\n" +
"The certificate was issued by: \t" + "[" +
cert.getIssuerDN() + "]");
socket = ssock;
}
else {
throw new IllegalArgumentException("URL must use http: or " +
"https: protocol");
}
InputStream from_server = socket.getInputStream();
PrintWriter to_server = new PrintWriter(socket.getOutputStream());
to_server.print("GET " + path + " HTTP/1.0\r\n" + "Host: " + host +
"\r\n" + "Connection: close\r\n\r\n");
to_server.flush();
byte[] buffer = new byte[8 * 1024];
int bytes_read;
int numbytes = 0;
while (true) {
bytes_read = from_server.read(buffer, numbytes,
buffer.length - numbytes);
if (bytes_read == -1) {
break;
}
numbytes += bytes_read;
if (numbytes >= 4 * 1024) {
break;
}
}
int i = 0;
while (i <= numbytes - 4) {
if (buffer[i++] == 13 && buffer[i++] == 10 && buffer[i++] == 13 &&
buffer[i++] == 10) {
break;
}
}
if (i > numbytes - 4) {
throw new IOException("End of headers not found in first " + numbytes +
" bytes");
}
responseStr = new String(buffer);
socket.close();
}
catch (UnknownHostException e) {
responseStr = "域名解析失败,请检查网络设置:" + e.toString();
System.err.println(e);
}
catch (IOException e) {
responseStr = "文件传输失败:" + e.toString();
System.err.println(e);
}
catch (URISyntaxException e) {
responseStr = "URI构造语法出错:" + e.toString();
System.err.println(e);
System.err.println("Usage: java HttpClient <URL> [<filename>]");
}
return responseStr;
}
}