把商品数据往__init__的list中添加,添加失败
来源:2-13 代码优化

慕仰8543635
2019-07-25
from typing import NamedTuple import requests from lxml import html class Main(NamedTuple): """ 信息格式 """ name: str price: float link: str def __str__(self): return '价格:{self.price};名称:{self.name};链接:{self.link}'.format(self=self) class Spider(object): """ 爬虫程序体 """ def __init__(self, goods): self.goods = goods self.goodslist = [] def jd(self): url = 'https://search.jd.com/Search?keyword={0}'.format(self.goods) # 获取HTML文档 resp = requests.get(url, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36' }) resp.encoding = 'utf-8' html_data = resp.text # 提取xpath对象 selector = html.fromstring(html_data) # 获取数据列表 ul_list = selector.xpath('//div[@id="J_goodsList"]/ul/li') # 解析对应内容 n = 0 for li in ul_list: n += 1 # print('这是第%d条数据' % n) try: # 获取商品名称 goods = li.xpath('div/div[@class="p-name p-name-type-2"]/a/em/text()') # print(goods) goods2 = li.xpath('div/div[@class="p-name p-name-type-2"]/a/em/font[@class="skcolor_ljg"]/text()') goods = goods[0] + goods2[0] + goods[1] # 组合商品名称 # 获取商品价格 price = li.xpath('div/div[@class="p-price"]/strong/i/text()') # 获取购买链接 link = li.xpath('div/div[@class="p-name p-name-type-2"]/a/@href') link = 'http:' + link[0] # 链接前加'http:'成为完成的链接 # print(goods) # print(price[0]) # print(link) g_list = Main( name=goods, link=link, price=price[0] ) # print(g_list) self.goodslist.append(g_list) print(self.goodslist) except Exception as e: print('There has an Error:', e) def info_sorted(self): self.jd() a_list = sorted(self.goodslist, key=lambda item: float(item.price), reverse=True) for goods in a_list: # print(a_list) pass if __name__ == '__main__': # user_input = str(input('请输入您要查询的内容:')) a = Spider('2060') a.info_sorted()
输出结果
提问:
老师,代码中我的g_list打印出来的数据是正常的,但是往goodslist添加会重复添加第一条数据,请问是哪里出了问题?
写回答
1回答
-
NavCat
2019-07-26
是你代码的第74行在循环里面打印了整个列表造成的视觉错觉吧。改成这样试试:
for goods in a_list: print(goods) pass
没看都有重复数据呀
00
相似问题
https 的网址数据爬取不到????
回答 1
打开mysql的admin失败
回答 1