用selenium模拟登陆拉钩后,scrapy没有保存cookies
来源:9-4 selenium集成到scrapy中
pythoner_
2017-11-12
我把cookie放进headers里传给scrapy了
class LagouSpider(CrawlSpider): name = 'lagou' allowed_domains = ['www.lagou.com'] start_urls = ['https://www.lagou.com'] headers = { 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0", } custom_settings = { "COOKIES_ENABLED": True, "DOWNLOAD_DELAY" : 0, } rules = ( Rule(LinkExtractor(allow=("zhaopin/.*",)), follow=True), Rule(LinkExtractor(allow=("gongsi/j\d+.html",)), follow=True), Rule(LinkExtractor(allow=r'jobs/\d+.html'), callback='parse_job', follow=True), ) def __init__(self): self.browser = webdriver.Chrome(executable_path="F:\python项目开发\软件\chromedriver.exe") super(LagouSpider, self).__init__() dispatcher.connect(self.spider_closed, signals.spider_closed) def spider_closed(self, spider): # 当爬虫退出的时候退出chrome print("spider closed") self.browser.quit() def start_requests(self): cookie_dict = {} self.browser.get("https://passport.lagou.com/login/login.html") self.browser.find_element_by_css_selector(".active input[placeholder='请输入常用手机号/邮箱']").send_keys("18229536448") self.browser.find_element_by_css_selector(".active input[placeholder='请输入密码']").send_keys("zdj5dsd5158.") self.browser.find_element_by_css_selector(".active input.btn_green").click() import time time.sleep(10) cookie = [item["name"] + "=" + item["value"] for item in self.browser.get_cookies()] self.headers["cookie"] = ';'.join(item for item in cookie) text = requests.get("https://www.lagou.com/s/subscribe.html", headers=self.headers) return [scrapy.Request(url=self.start_urls[0], headers=self.headers, callback=self.parse)] def parse_job(self, response): i = {} i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract() i['name'] = response.xpath('//div[@id="name"]').extract() i['description'] = response.xpath('//div[@id="description"]').extract() return i
写回答
1回答
-
pythoner_
提问者
2017-11-12
通过实验发现传到headers里没法保存cookie,只能传递cookie才行。
012017-11-13
相似问题