一个使用代理读取网页的方法

JASONOSC 发布于 2014/11/14 15:24
阅读 516
收藏 1

    /**
     * 模拟普通浏览器访问标记
     */
    public final static String USERAGENT = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; GTB5; .NET CLR 2.0.50727; CIBA)";



public String getHttpRequest(String uri, String host, Integer port,
            String userAgent)
            throws NetWorkConnectException, IOException {
 
        StringBuffer result = new StringBuffer();
 
        /* 1 生成 HttpClinet 对象并设置参数 */
        HttpClient httpClient = new HttpClient();
 
        /* 代理的主机 */
        ProxyHost proxy = new ProxyHost(host, port);
         
        /* 使用代理 */
        httpClient.getHostConfiguration().setProxyHost(proxy);
 
        /* 添加 userAgent */
        httpClient.getParams().setParameter(HttpMethodParams.USER_AGENT,userAgent);
 
        /* 链接超时 */
        httpClient.getHttpConnectionManager().getParams().setSoTimeout(70000);
 
        /* 读取超时 */
        httpClient.getHttpConnectionManager().getParams().setConnectionTimeout(70000);
 
        /* 生成 GetMethod 对象并设置参数 */
        GetMethod getMethod = new GetMethod(uri);
 
        /* 请求超时 */
        getMethod.getParams().setParameter(HttpMethodParams.SO_TIMEOUT, 70000);
        getMethod.getParams().setParameter(HttpMethodParams.HEAD_BODY_CHECK_TIMEOUT, 70000);
 
        /* 设置 get 请求超时为 2 秒 */
        getMethod.getParams().setSoTimeout(2000);
 
        /* 设置请求重试处理,用的是默认的重试处理:请求三次 */
        getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,new DefaultHttpMethodRetryHandler());
 
        /* 执行 HTTP GET 请求 */
        InputStream response = null;
        BufferedReader in = null;
        try {
            int statusCode = httpClient.executeMethod(getMethod);
            if (statusCode != HttpStatus.SC_OK) {
                throw new NetWorkConnectException("读取地址返回代码错误");
            } else {
                response = getMethod.getResponseBodyAsStream();
                in = new BufferedReader(new InputStreamReader(response, "GBK"));
                String line;
                while ((line = in.readLine()) != null) {
                    result.append("\n").append(line);
                }
            }
        } catch (HttpException e) {
            throw new NetWorkConnectException("网络连接异常");
        } catch (IOException e) {
            throw new IOException("页面内容读取异常");
        } finally {
            /* 释放连接 */
            if (getMethod != null) {
                getMethod.releaseConnection();
            }
        }
        return result.toString();
    }






加载中
返回顶部
顶部