疑难python3列表合并问题

李复唐 发布于 2018/01/31 08:56
阅读 250
收藏 1

每一页抓取的内容,都单独成一个列表了。用append或extend没成功,不知道应该写在哪个位置?

原码:

#!/usr/bin/env python
# encoding=utf-8
from bs4 import BeautifulSoup
import requests
import codecs
from lxml import etree
DOWNLOAD_URL = 'http://movie.douban.com/top250'

def download_page(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36'
    }
    return requests.get(url,headers=headers).content

def parse_html(html):
    tree=etree.HTML(html)
    movie_name_list=[]
    
    evaluate=tree.xpath('body/div/div/div/div[1]/ol/li/div/div[2]/div[2]/div/span[4]/text()')
    film_name=tree.xpath('body/div/div/div/div[1]/ol/li/div/div[2]/div[1]/a/span[1]/text()')
    movie=(list(zip(film_name,evaluate)))
    for i in movie:
        movie_name_list.append(i)
    next_page1=tree.xpath('.//span[@class="next"]/a/@href ')
    next_page=".".join(next_page1)
    if next_page:
        return movie_name_list,DOWNLOAD_URL+next_page
    return movie_name_list,None

def main():
    url=DOWNLOAD_URL
    with codecs.open('a.txt','wb',encoding='utf-8')as fp:
        while url:
            html=download_page(url)
            a,url=parse_html(html)
            print(a)
if __name__ == '__main__':
    main()

 

加载中
0
李复唐

请各位老铁指点一二

返回顶部
顶部