TypeError: 'str' object does not support item assignment

来源:4-15 items的定义和使用 - 2

暮烟归雨

2020-06-25

# -*- coding: utf-8 -*-
import re
import json
from urllib import parse

import scrapy
import requests
from scrapy import Request

from ArticleSpider.items import CnblogsArticleItem
from ArticleSpider.utils import common


class CnblogsSpider(scrapy.Spider):
   name = 'cnblogs'
   allowed_domains = ['news.cnblogs.com']
   start_urls = ['https://news.cnblogs.com/']

   def parse(self, response):
       lines = response.xpath("//div[@class='content']")
       for line in lines[1:2]:
           content_url = line.xpath("//h2/a/@href").extract_first("")
           image_url = line.xpath("//div[@class='entry_summary']/a/img/@src").extract_first("")
           yield Request(url=parse.urljoin(response.url, content_url), meta={"image_url": image_url},
                         callback=self.parse_detail)

   def parse_detail(self, response):
       url_re = re.search("\d+", response.url)
       if url_re:
           article_item = CnblogsArticleItem()
           title = response.xpath("//div[@id='news_title']/a/text()").extract_first("")

           release_date = response.xpath("//span[@class='time']/text()").extract_first("")
           date_re = re.search("(\d+.*)", release_date)
           if date_re:
               release_date = date_re.group()

           content = response.xpath("//div[@id='news_body']").extract_first("")
           tag_list = response.xpath("//div[@class='news_tags']/a/text()").extract()
           tags = ",".join(tag_list)

           article_item['title'] = title
           article_item['release_date'] = release_date
           article_item['content'] = content
           article_item['tags'] = tags
           article_item['url'] = response.url
           article_item['image_url'] = response.meta.get("image_url", "")

           json_url = parse.urljoin(response.url, "/NewsAjax/GetAjaxNewsInfo?contentId={}".format(url_re.group()))
           yield Request(url=json_url, meta=article_item, callback=self.parse_nums)

   def parse_nums(self, response):
       article_item = response.meta.get("article_item", "")

       json_data = json.loads(response.text)
       comment_nums = json_data['CommentCount']
       view_nums = json_data['TotalView']

       article_item['comment_nums'] = comment_nums
       article_item['view_nums'] = view_nums
       article_item['md5_url'] = common.get_md5(article_item['url'])

       yield article_item

写回答

2回答

暮烟归雨

提问者

2020-06-27


Traceback (most recent call last):

  File "E:\Code\untitled\venv\lib\site-packages\scrapy\utils\defer.py", line 117, in iter_errback

    yield next(it)

  File "E:\Code\untitled\venv\lib\site-packages\scrapy\utils\python.py", line 345, in __next__

    return next(self.data)

  File "E:\Code\untitled\venv\lib\site-packages\scrapy\utils\python.py", line 345, in __next__

    return next(self.data)

  File "E:\Code\untitled\venv\lib\site-packages\scrapy\core\spidermw.py", line 64, in _evaluate_iterable

    for r in iterable:

  File "E:\Code\untitled\venv\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 29, in process_spider_output

    for x in result:

  File "E:\Code\untitled\venv\lib\site-packages\scrapy\core\spidermw.py", line 64, in _evaluate_iterable

    for r in iterable:

  File "E:\Code\untitled\venv\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 338, in <genexpr>

    return (_set_referer(r) for r in result or ())

  File "E:\Code\untitled\venv\lib\site-packages\scrapy\core\spidermw.py", line 64, in _evaluate_iterable

    for r in iterable:

  File "E:\Code\untitled\venv\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in <genexpr>

    return (r for r in result or () if _filter(r))

  File "E:\Code\untitled\venv\lib\site-packages\scrapy\core\spidermw.py", line 64, in _evaluate_iterable

    for r in iterable:

  File "E:\Code\untitled\venv\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 58, in <genexpr>

    return (r for r in result or () if _filter(r))

  File "E:\Code\untitled\venv\lib\site-packages\scrapy\core\spidermw.py", line 64, in _evaluate_iterable

    for r in iterable:

  File "E:\Code\untitled\ArticleSpider\ArticleSpider\spiders\cnblogs.py", line 59, in parse_nums

    article_item['comment_nums'] = CommentCount

TypeError: 'str' object does not support item assignment


0
0

bobby

2020-06-27

你把完整的错误栈信息截图我看看 里面有具体说明是哪一行代码出了问题

0
3
bobby
回复
暮烟归雨
好的。
2020-06-28
共3条回复

Scrapy打造搜索引擎 畅销4年的Python分布式爬虫课

带你彻底掌握Scrapy,用Django+Elasticsearch搭建搜索引擎

5796 学习 · 6290 问题

查看课程