结果返回None, 请问是不是淘宝把我屏蔽了
来源:2-11 爬取淘宝网的数据

翻版郭富城
2019-02-27
import requests
import re
import json
def spider(sn):
url = 'https://s.taobao.com/search?q={0}'.format(sn)
#get html content
text = requests.get(url, headers={
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.344'
'0.106 Safari/537.36',
'cookie': '我的cookies'
}).text
#get json obect by using regular expression
p = re.compile(r'g_page_config = (\{.+\});\s*', re.M)
rest = p.search(text)
print(rest)
if rest:
print(rest.group(1))
data = json.loads(rest.group(1))
bk_list = data['mods']['itemlist']['data']['auctions']
print(len(bk_list))
#get book list
for bk in bk_list:
#name
title = bk['raw_title']
#price
price = bk['view_price']
#link
link = bk['detail_url']
#store
store = bk['nick']
book_list.append({'title': title, 'price': price, 'link': link, 'store': store})
print('{title}:{price}:{link}:{store}'.format(title=title, price=price, link=link, store=store))
if __name__ == '__main__':
sn = '9787115428028'
spider(sn)
写回答
1回答
-
翻版郭富城
提问者
2019-02-27
突然又正常抓取数据了
012019-02-28
相似问题