TypeError: 'NoneType' object is not iterable错误,debug发现执行不到parse里面的代码逻辑
来源:6-14 item loder方式提取question - 1

WittChen
2019-10-04
def start_requests(self):
code = 'null’
while code == ‘null’:
chrome_option = Options()
chrome_option.add_argument("–disable-extensions")
chrome_option.add_experimental_option(“debuggerAddress”,“127.0.0.1:9222”)
browser = webdriver.Chrome(“E:\开发工具\Chromedriver\chromedriver.exe”, chrome_options=chrome_option)
#把我们的窗口最大化,不然后面验证左边会有问题
# 但是如果 我们窗口已经是最大化则会报错,因此我们要try它,让程序继续执行下去
try:
browser.maximize_window()
except:
pass
browser.get("https://www.zhihu.com")
if browser.find_elements_by_css_selector("svg.Zi--Bell"):
break
browser.find_elements_by_css_selector("div.SignFlow-tab")[1].click()
browser.find_elements_by_css_selector(".SignFlow-account input[name='username']")[0].send_keys(Keys.CONTROL + "a")
browser.find_elements_by_css_selector(".SignFlow-account input[name='username']")[0].send_keys("15967191468")
browser.find_elements_by_css_selector("input[name='password']")[0].send_keys(Keys.CONTROL + "a")
browser.find_elements_by_css_selector("input[name='password']")[0].send_keys("546372819.")
browser.find_elements_by_css_selector("button.SignFlow-submitButton")[0].click()
try:
english_captcha_element = browser.find_element_by_class_name("Captcha-englishImg")
except:
english_captcha_element = None
try:
chinese_captcha_element = browser.find_element_by_class_name("Captcha-chineseImg")
except:
chinese_captcha_element = None
file_path = os.path.dirname(os.path.dirname(__file__)) + r"\yzm_images\yzm_cn.jpeg"
#中文验证
if chinese_captcha_element:
x_relative = chinese_captcha_element.location["x"]
y_relative = chinese_captcha_element.location["y"]
# y_relative += 70
#分析网站可知,验证码图片十base64编码,我们需要获取编码
#并把编码保存成图片,然后通过zheye去解析它
base64_text = chinese_captcha_element.get_attribute("src")
code = base64_text.replace("data:image/jpg;base64,","").replace("%0A","")
# file_path = "E:/Envs/ArticleSpider/ArticleSpider/yzm_images/yzm_cn.jpeg"
if code != 'null':
fh = open(file_path,"wb")
fh.write(base64.b64decode(code))
fh.close()
z = zheye()
positions = z.Recognize(file_path)
last_position = []
if len(positions) > 0 or len(positions) >= 2:
for position in positions:
if position[1] < position[0]:
last_position.append(position)
else:
last_position.append((position[1], position[0]))
last_position.sort(key=lambda position: position[0])
for position in last_position:
x = x_relative+int(position[0]/2)
y = y_relative+43+int(position[1]/2)
move(x,y)
click()
#英文验证
if english_captcha_element:
base64_text = english_captcha_element.get_attribute("src")
code = base64_text.replace("data:image/jpg;base64,", "").replace("%0A", "")
# file_path = "E:/Envs/ArticleSpider/ArticleSpider/yzm_images/yzm_cn.jpeg"
if code != 'null':
fh = open(file_path, "wb")
fh.write(base64.b64decode(code))
fh.close()
yundama = YDMHttp("ww774882345","ww546372819","9042","900ae07b52508aac571a1edb25e2887c")
cid,result = yundama.decode(file_path,5000,60)
while True:
if result == "":
cid,result = yundama.decode(file_path, 5000, 60)
else:
break
browser.find_elements_by_css_selector("input[name='captcha']")[0].send_keys(result)
browser.find_elements_by_css_selector("button.SignFlow-submitButton")[0].click()
if browser.find_elements_by_css_selector("svg.Zi--Bell"):
break
以上是我的报错代码,但是在最后假如yield scrapy.Request(“https://www.zhihu.com”, headers=self.headers)后,就不会报错,但是得到的页面是登入时的页面我就无法对登入后的页面进行提取数据
写回答
1回答
-
WittChen
提问者
2019-10-04
问题已经解决,我用了cookie 最后添加了yield scrapy.FormRequest(self.start_urls[0], cookies=cookie_dict),然后获取到了知乎登入后的首页
022019-11-14
相似问题