3
回答
java抓取淘宝数据,但要先用httpclient自动登录淘宝网

要抓取淘宝的部分数据,但前提是必须先登陆,google都是下面这段代码:

根据"_tb_token_"来截取字符串,但response的内容根本就没这个字符串,哪位大侠帮忙看看。还没登陆的时候已经挂掉,所以不用考虑用户名密码是否错误

private static final String LOGON_SITE = "http://www.taobao.com";
private static final int LOGON_PORT = 80;
// 普通会员登录
//private static final String TAOBAO_BASE_LOGIN_BEFORE = "http://shu.taobao.com/trendindex?spm=0.0.0.0.vPrmim&query=%E9%92%88%E7%BB%87%E8%A1%AB";
private static final String TAOBAO_BASE_LOGIN = "http://login.taobao.com/member/login.jhtml";


public static void main(String args[]) throws HttpException, IOException {
String taobaoUser="woshigoojje@163.com";
//下面2个值从httpwatch中得到
String taobaoPwd="3DES_2_000000000000000000000000000000_61F0B8BE021BBBDD020919017B6816F5";
String taobaoTid="XOR_1_000000000000000000000000000000_63584054400B0F717B750370";

HttpClient client = new HttpClient();
client.getHostConfiguration().setHost(LOGON_SITE, LOGON_PORT);

//取得_tb_token_值
String _tb_token_Value="";
Cookie[] cookies = client.getState().getCookies();        
        String responseString = processGet(client,null,TAOBAO_BASE_LOGIN,cookies,true,true);   
        System.out.println(responseString);


        //取第二个_tb_token_为,现在的登录方式
        responseString=responseString.substring(responseString.indexOf("_tb_token_")+"_tb_token_".length());
        System.out.println(responseString);
        responseString=responseString.substring(responseString.indexOf("_tb_token_")+"_tb_token_".length());
        System.out.println(responseString);
        _tb_token_Value=responseString.substring(responseString.indexOf("value=")+"value='".length(),responseString.indexOf(">")-1);

举报
os6101
发帖于5年前 3回/7K+阅
顶部