现在的位置: 首页 > 综合 > 正文

HttpClient读取页面的使用例子

2013年03月09日 ⁄ 综合 ⁄ 共 4681字 ⁄ 字号 评论关闭
一个更真实的例子看这里吧:
HttpClient使用例子:读取CSDN的投票列表并正则解析
处理的结果

截至2009年01月06日,CSDN参与人数最多的投票列表。

  1. package com.laozizhu.apache.httpclient;
  2. import java.net.Socket;
  3. import org.apache.http.ConnectionReuseStrategy;
  4. import org.apache.http.Header;
  5. import org.apache.http.HttpHost;
  6. import org.apache.http.HttpResponse;
  7. import org.apache.http.HttpVersion;
  8. import org.apache.http.impl.DefaultConnectionReuseStrategy;
  9. import org.apache.http.impl.DefaultHttpClientConnection;
  10. import org.apache.http.message.BasicHttpRequest;
  11. import org.apache.http.params.BasicHttpParams;
  12. import org.apache.http.params.HttpParams;
  13. import org.apache.http.params.HttpProtocolParams;
  14. import org.apache.http.protocol.BasicHttpContext;
  15. import org.apache.http.protocol.BasicHttpProcessor;
  16. import org.apache.http.protocol.ExecutionContext;
  17. import org.apache.http.protocol.HttpContext;
  18. import org.apache.http.protocol.HttpRequestExecutor;
  19. import org.apache.http.protocol.RequestConnControl;
  20. import org.apache.http.protocol.RequestContent;
  21. import org.apache.http.protocol.RequestExpectContinue;
  22. import org.apache.http.protocol.RequestTargetHost;
  23. import org.apache.http.protocol.RequestUserAgent;
  24. import org.apache.http.util.EntityUtils;
  25. /**
  26.  * HttpClient读取页面的使用例子
  27.  * @author 老紫竹(java2000.net)
  28.  *
  29.  */
  30. public class HttpGet {
  31.   public static void main(String[] args) throws Exception {
  32.     HttpParams params = new BasicHttpParams();
  33.     // HTTP 协议的版本,1.1/1.0/0.9
  34.     HttpProtocolParams.setVersion(params, HttpVersion.HTTP_1_1);
  35.     // 字符集
  36.     HttpProtocolParams.setContentCharset(params, "UTF-8");
  37.     // 伪装的浏览器类型
  38.     // IE7 是 
  39.     // Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 6.0)
  40.     //
  41.     // Firefox3.03
  42.     // Mozilla/5.0 (Windows; U; Windows NT 5.2; zh-CN; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3
  43.     //
  44.     HttpProtocolParams.setUserAgent(params, "HttpComponents/1.1");
  45.     HttpProtocolParams.setUseExpectContinue(params, true);
  46.     BasicHttpProcessor httpproc = new BasicHttpProcessor();
  47.     httpproc.addInterceptor(new RequestContent());
  48.     httpproc.addInterceptor(new RequestTargetHost());
  49.     httpproc.addInterceptor(new RequestConnControl());
  50.     httpproc.addInterceptor(new RequestUserAgent());
  51.     httpproc.addInterceptor(new RequestExpectContinue());
  52.     HttpRequestExecutor httpexecutor = new HttpRequestExecutor();
  53.     HttpContext context = new BasicHttpContext(null);
  54.     HttpHost host = new HttpHost("www.java2000.net"80);
  55.     DefaultHttpClientConnection conn = new DefaultHttpClientConnection();
  56.     ConnectionReuseStrategy connStrategy = new DefaultConnectionReuseStrategy();
  57.     context.setAttribute(ExecutionContext.HTTP_CONNECTION, conn);
  58.     context.setAttribute(ExecutionContext.HTTP_TARGET_HOST, host);
  59.     try {
  60.       String[] targets = { "/""/help.jsp" };
  61.       for (int i = 0; i < targets.length; i++) {
  62.         if (!conn.isOpen()) {
  63.           Socket socket = new Socket(host.getHostName(), host.getPort());
  64.           conn.bind(socket, params);
  65.         }
  66.         BasicHttpRequest request = new BasicHttpRequest("GET", targets[i]);
  67.         System.out.println(">> Request URI: " + request.getRequestLine().getUri());
  68.         context.setAttribute(ExecutionContext.HTTP_REQUEST, request);
  69.         request.setParams(params);
  70.         httpexecutor.preProcess(request, httpproc, context);
  71.         HttpResponse response = httpexecutor.execute(request, conn, context);
  72.         response.setParams(params);
  73.         httpexecutor.postProcess(response, httpproc, context);
  74.         // 返回码
  75.         System.out.println("<< Response: " + response.getStatusLine());
  76.         // 返回的文件头信息
  77.         Header[] hs = response.getAllHeaders();
  78.         for (Header h : hs) {
  79.           System.out.println(h.getName() + ":" + h.getValue());
  80.         }
  81.         // 输出主体信息
  82.         System.out.println(EntityUtils.toString(response.getEntity()));
  83.         System.out.println("==============");
  84.         if (!connStrategy.keepAlive(response, context)) {
  85.           conn.close();
  86.         } else {
  87.           System.out.println("Connection kept alive...");
  88.         }
  89.       }
  90.     } finally {
  91.       conn.close();
  92.     }
  93.   }
  94. }

这个代码为httpClient自带的例子,可以借鉴的地方很多,我简单的改造了一下,把文件头也输出了,大家随便看一下结果

>> Request URI: /
<< Response: HTTP/1.1 200 OK
Proxy-Connection:Keep-Alive
Connection:Keep-Alive
Transfer-Encoding:chunked
Via:1.1 GDATAISASERVER
Date:Tue, 06 Jan 2009 05:07:54 GMT
Content-Type:text/html;charset=UTF-8
Server:Apache/2.2.4 (Win32) mod_jk/1.2.26
Set-Cookie:JSESSIONID=AAF2386712151447598F72716A64F847; Path=/
Set-Cookie:JAVA2000_STYLE_ID=1; Domain=www.java2000.net; Expires=Thu, 08-Mar-2012 14:54:33 GMT; Path=/
Vary:Accept-Encoding
Keep-Alive:timeout=5, max=100

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="zh-CN" dir="ltr">

后面的我就不写了... 那个 keep-alive对性能的影响还是很大的。

抱歉!评论已关闭.