13-9小爬虫排序问题
来源:13-10 sorted 排序
天上掉下个小馅饼
2018-07-19
老师,我的排序的时候总有几个排在前面,除了这几个以外呢,其他都是正常的,我自己调试不出来,所以来问下老师
from urllib import request
import re
class Spider():
url = "https://www.panda.tv/cate/lol?pdt=1.24.s1.3.7qn4br8ij3u"
root_pattern = '<div class="video-info">.*?</div>'
name_pattern = '<span class="video-nickname" title="(.*)"'
number_pattern = '<span class="video-number">(.*)</span>'
def __fetch_content(self):
r = request.urlopen(Spider.url)
html = r.read()
html = str(html,encoding = "utf-8")
return html
def __analysis(self,htmls):
name_numbers = []
infor = re.findall(Spider.root_pattern, htmls,re.S)
for x in infor:
name = re.findall(Spider.name_pattern, x)
number = re.findall(Spider.number_pattern, x)
name_numbers.append({'name':name[0],'number':number[0]})
return name_numbers
def __sort(self,name_numbers):
sorted(name_numbers, key = self.__sort_seed)
return name_numbers
def __sort_seed(self,name_number):
r = re.findall('\\d*', name_number['number'])
number = float(r[0])
if '万' in name_number['number']:
number *= 10000
return number
def __show(self,name_numbers):
for x in name_numbers:
print(x['name'] + "-----------" + x['number'])
def go(self):
htmls = self.__fetch_content()
name_numbers = self.__analysis(htmls)
name_numbers = self.__sort(name_numbers)
self.__show(name_numbers)
spider = Spider()
spider.go()
写回答
1回答
-
7七月
2018-07-20
排序函数是不是没有按纯数字排序?检测下
00
相似问题