用selenium模拟登陆拉钩后,scrapy没有保存cookies
来源:9-4 selenium集成到scrapy中
pythoner_
2017-11-12
我把cookie放进headers里传给scrapy了
class LagouSpider(CrawlSpider):
name = 'lagou'
allowed_domains = ['www.lagou.com']
start_urls = ['https://www.lagou.com']
headers = {
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0",
}
custom_settings = {
"COOKIES_ENABLED": True,
"DOWNLOAD_DELAY" : 0,
}
rules = (
Rule(LinkExtractor(allow=("zhaopin/.*",)), follow=True),
Rule(LinkExtractor(allow=("gongsi/j\d+.html",)), follow=True),
Rule(LinkExtractor(allow=r'jobs/\d+.html'), callback='parse_job', follow=True),
)
def __init__(self):
self.browser = webdriver.Chrome(executable_path="F:\python项目开发\软件\chromedriver.exe")
super(LagouSpider, self).__init__()
dispatcher.connect(self.spider_closed, signals.spider_closed)
def spider_closed(self, spider):
# 当爬虫退出的时候退出chrome
print("spider closed")
self.browser.quit()
def start_requests(self):
cookie_dict = {}
self.browser.get("https://passport.lagou.com/login/login.html")
self.browser.find_element_by_css_selector(".active input[placeholder='请输入常用手机号/邮箱']").send_keys("18229536448")
self.browser.find_element_by_css_selector(".active input[placeholder='请输入密码']").send_keys("zdj5dsd5158.")
self.browser.find_element_by_css_selector(".active input.btn_green").click()
import time
time.sleep(10)
cookie = [item["name"] + "=" + item["value"] for item in self.browser.get_cookies()]
self.headers["cookie"] = ';'.join(item for item in cookie)
text = requests.get("https://www.lagou.com/s/subscribe.html", headers=self.headers)
return [scrapy.Request(url=self.start_urls[0], headers=self.headers, callback=self.parse)]
def parse_job(self, response):
i = {}
i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract()
i['name'] = response.xpath('//div[@id="name"]').extract()
i['description'] = response.xpath('//div[@id="description"]').extract()
return i写回答
1回答
-
pythoner_
提问者
2017-11-12
通过实验发现传到headers里没法保存cookie,只能传递cookie才行。
012017-11-13
相似问题