python爬虫

小小猪pig 发布于 2018/01/03 16:32
阅读 734
收藏 0
def _finance_geturl_roll():
    baseurl = 'http://roll.news.sina.com.cn/interface/rollnews_ch_out_interface.php?'
    'spec=&type=&ch=03&k=&offset_page=0&offset_num=0&num=60&asc=&r=0.9330196594434315'
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 "
                             "(KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36",
               # "Content-Type": "application/json",
               "Accept": "*/*"}
    p_url = []

    for page in range(1, 22):
        # for page in range(1):
        post_param = {'col': '43', 'page': page}
        return_data = requests.get(baseurl, params=post_param, headers=headers, verify=False)
        data = return_data.text

        #         print data
        result = re.findall('{channel : {title : (.+?),id : .*},title : "(.*)",url : "(.*)",type.*,time :(.*)},', data)
        for item in result:
            url = item[2]
            # print chardet.detect(item[1])
            # print item[1], '>', item[2], '>', item[3]
            if url:
                p_url.append(url)

    print len(p_url)
    return p_url


if __name__ == '__main__':
    _finance_geturl_roll()

这个是写的代码

D:\A\Python\python.exe D:/A/workspace/pycharm/littlepy/icore/base/unittest/hel.py
Traceback (most recent call last):
  File "D:/A/workspace/pycharm/littlepy/icore/base/unittest/hel.py", line 40, in <module>
    _finance_geturl_roll() 
  File "D:/A/workspace/pycharm/littlepy/icore/base/unittest/hel.py", line 23, in _finance_geturl_roll
    return_data = requests.get(baseurl, params=post_param, headers=headers, verify=False)
  File "D:\A\Python\lib\site-packages\requests-2.18.4-py2.7.egg\requests\api.py", line 72, in get
    return request('get', url, params=params, **kwargs)
  File "D:\A\Python\lib\site-packages\requests-2.18.4-py2.7.egg\requests\api.py", line 58, in request
    return session.request(method=method, url=url, **kwargs)
  File "D:\A\Python\lib\site-packages\requests-2.18.4-py2.7.egg\requests\sessions.py", line 508, in request
    resp = self.send(prep, **send_kwargs)
  File "D:\A\Python\lib\site-packages\requests-2.18.4-py2.7.egg\requests\sessions.py", line 618, in send
    r = adapter.send(request, **kwargs)
  File "D:\A\Python\lib\site-packages\requests-2.18.4-py2.7.egg\requests\adapters.py", line 508, in send
    raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPConnectionPool(host='roll.news.sina.com.cn', port=80): Max retries exceeded with url: /interface/rollnews_ch_out_interface.php?col=43&page=1 (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000000000304B470>: Failed to establish a new connection: [Errno 11004] getaddrinfo failed',))

Process finished with exit code 1
 

这个是报错信息

 

有谁知道是什么情况啊,都研究了好久了,jar包什么的都安装了,但是还是报错

 

 

 

 

 

 

加载中
0
李复唐

环境变量是否正确

小小猪pig
小小猪pig
回复@李复唐 : 是网络问题,不能访问外网,醉了
返回顶部
顶部