爬虫 爬取饿了么数据出现问题

OSC_bhdOFV 发布于 2018/12/13 16:28
阅读 148
收藏 0

import requests
import json
import time
from bs4 import BeautifulSoup
import xlwt
from  openpyxl  import Workbook
#from pyExcelerator import *
id_list = []#店铺的id列表
name_list = []#店铺的名称列表
address_list = []#店铺的地址列表
lat_list = []#店铺的纬度
long_list = []#店铺的经度
olt_list = []#店铺的平均送达时间
paf_list = []#店铺的配送费
phone_list = []#店铺的电话
oph_list = []#店铺的营业时间
def get_all_id():
    for offset in range(0,985,24):
        url='https://www.ele.me/restapi/shopping/restaurants?extras%5B%5D=activities&geohash=wk3je75zd60f&latitude=24.890825&limit=24&longitude=102.80432&offset=0&terminal=web'.format(offset)
        web_data = requests.get(url)
        soup=BeautifulSoup(web_data.text,'lxml')
        content = soup.text
        json_obj = json.loads(content)
        for item in json_obj:
            restaurant_address = item.get('address')
            address_list.append(restaurant_address)
            restaurant_name = item.get('name')
            name_list.append(restaurant_name)
            restaurant_id = item.get('id')
            id_list.append(restaurant_id)
            restaurant_lat = item.get('latitude')
            lat_list.append(restaurant_lat)
            restaurant_long = item.get('longitude')
            long_list.append(restaurant_long)
            restaurant_olt = item.get('order_lead_time')
            olt_list.append(restaurant_olt)
            restaurant_paf = item.get('piecewise_agent_fee')
            paf_list.append(restaurant_paf)
            restaurant_phone = item.get('phone')
            phone_list.append(restaurant_phone)
            restaurant_oph = item.get('opening_hours')
            oph_list.append(restaurant_oph)
            
    return name_list,address_list,id_list,lat_list,long_list,olt_list,paf_list,phone_list,oph_list
get_all_id()
i=0
w=Workbook()
ws=w.create_sheet('my')
ws.column_dimensions["A"].width =10.0
ws.append(["ID","店名","地址","纬度","经度","平均送达时间","电话"])
ws.column_dimensions["A"].width =30.0
ws.column_dimensions["B"].width =30.0
ws.column_dimensions["C"].width =18.0
ws.column_dimensions["D"].width =20.0
ws.column_dimensions["E"].width =20.0
ws.column_dimensions["F"].width =15.0
ws.column_dimensions["G"].width =30.0
#ws.column_dimensions["H"].width =10.0
#ws.column_dimensions["I"].width =25.0
#ws.column_dimensions["J"].width =40.0
 
for i in range(len(id_list)):
    ws.append([id_list[i],name_list[i],address_list[i],lat_list[i],long_list[i],olt_list[i],phone_list[i]])
w.save('min.xls')
m=0#用来计数,第几个店铺
n=0#用来记录数据,第几条数据
for id in id_list:
    m=m+1
    restaurant_url = 'https://mainsite-restapi.ele.me/shopping/v2/menu?restaurant_id='+str(id)
    print('*************************这里是店铺分界线******第{}个店铺*********************************************'.format(m))
 
    print(name_list[m])
    print(address_list[m])
    print(lat_list[m])
    print(long_list[m])
    print(olt_list[m])
    print(paf_list[m])
    print(phone_list[m])
    print(oph_list[m])
    headers = {'User-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.6788.400 QQBrowser/10.3.2854.400'}
    web_data = requests.get(restaurant_url,headers=headers)
    #time.sleep(3)
    content = web_data.text
    json_obj = json.loads(content)
    try:
        for item in json_obj:
            for food in item.get('foods'):
                n +=1
                print('第%d条数据:' % n)
                print(food.get('name'),food.get('tips'),'评分',food.get('rating'))
    except AttributeError as e :
        pass
    except IndexError as e1:
        pass
想爬取piecewise_agent_fee里的fee,但是爬不出来,还有就是oph_list = []存不进excel里面。

加载中
返回顶部
顶部