把商品数据往__init__的list中添加,添加失败
来源:2-13 代码优化
慕仰8543635
2019-07-25
from typing import NamedTuple
import requests
from lxml import html
class Main(NamedTuple):
""" 信息格式 """
name: str
price: float
link: str
def __str__(self):
return '价格:{self.price};名称:{self.name};链接:{self.link}'.format(self=self)
class Spider(object):
""" 爬虫程序体 """
def __init__(self, goods):
self.goods = goods
self.goodslist = []
def jd(self):
url = 'https://search.jd.com/Search?keyword={0}'.format(self.goods)
# 获取HTML文档
resp = requests.get(url, headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
})
resp.encoding = 'utf-8'
html_data = resp.text
# 提取xpath对象
selector = html.fromstring(html_data)
# 获取数据列表
ul_list = selector.xpath('//div[@id="J_goodsList"]/ul/li')
# 解析对应内容
n = 0
for li in ul_list:
n += 1
# print('这是第%d条数据' % n)
try:
# 获取商品名称
goods = li.xpath('div/div[@class="p-name p-name-type-2"]/a/em/text()')
# print(goods)
goods2 = li.xpath('div/div[@class="p-name p-name-type-2"]/a/em/font[@class="skcolor_ljg"]/text()')
goods = goods[0] + goods2[0] + goods[1] # 组合商品名称
# 获取商品价格
price = li.xpath('div/div[@class="p-price"]/strong/i/text()')
# 获取购买链接
link = li.xpath('div/div[@class="p-name p-name-type-2"]/a/@href')
link = 'http:' + link[0] # 链接前加'http:'成为完成的链接
# print(goods)
# print(price[0])
# print(link)
g_list = Main(
name=goods,
link=link,
price=price[0]
)
# print(g_list)
self.goodslist.append(g_list)
print(self.goodslist)
except Exception as e:
print('There has an Error:', e)
def info_sorted(self):
self.jd()
a_list = sorted(self.goodslist, key=lambda item: float(item.price), reverse=True)
for goods in a_list:
# print(a_list)
pass
if __name__ == '__main__':
# user_input = str(input('请输入您要查询的内容:'))
a = Spider('2060')
a.info_sorted()输出结果

提问:
老师,代码中我的g_list打印出来的数据是正常的,但是往goodslist添加会重复添加第一条数据,请问是哪里出了问题?
写回答
1回答
-
NavCat
2019-07-26
是你代码的第74行在循环里面打印了整个列表造成的视觉错觉吧。改成这样试试:
for goods in a_list: print(goods) pass

没看都有重复数据呀
00
相似问题
打开mysql的admin失败
回答 1
https 的网址数据爬取不到????
回答 1
