报错: list index out of range
来源:13-9 数据精炼
归零删人
2018-02-04
import re #re是正则表达式模块 from urllib import request class Spider(): url = 'https://live.bilibili.com/pages/area/draw' root_pattern = '<div class="live-au">([\s\S]*?)</div>' #.符号匹配所有内容 *不限字符数 ?非贪婪模式 name_pattern = '<span class="name">([\s\S]*?)</span>' counting_pattern = '<span class="look v-middle">([\s\S]*?)</span>' def __fetch_content(self): result = request.urlopen(Spider.url) #发送请求打开url htmls = result.read() #此时变量htmls是网页html代码的字节码 htmls = str(htmls, encoding ='utf-8') #将字节码转为utf-8编码字符集 return htmls #返回变量htmls def __analysis(self,htmls): root_htmls = re.findall(Spider.root_pattern, htmls) anchors = [] for html in root_htmls: name = re.findall(Spider.name_pattern,html) counting = re.findall(Spider.counting_pattern,html) anchor = {'name':name, 'counting':counting} anchors.append(anchor) return anchors def __refine(self, anchors): l = lambda anchor: { 'name':anchor['name'][0].strip(), 'counting':anchor['counting'][0] } return map(l, anchors) a=1 def run(self): htmls = self.__fetch_content() anchors = self.__analysis(htmls) anchors = list(self.__refine(anchors)) #调试到这一行没有问题 print(anchors) a = 1 spider = Spider() spider.run()
调试到这一行都没问题anchors = list(self.__refine(anchors))
运行到print(anchors)就会报错: list index out of range
写回答
1回答
-
归零删人
提问者
2018-02-04
解决了,赋值counting后面加了个if语句:
if counting == []:
counting = ['无人']
00
相似问题