mysql异步入库失败,苦思无解,求老师指点呀(续)

来源:6-19 保存数据到mysql中 -1

慕侠2131314

2018-11-25

class ZhilianSpiderItem(scrapy.Item):
    job_pubDate = scrapy.Field()
    job_refresh = scrapy.Field()
    job_endDate = scrapy.Field()
    job_tag = scrapy.Field()
    job_id = scrapy.Field()
    job_detailUrl = scrapy.Field()
    company_name = scrapy.Field()
    company_industry = scrapy.Field()
    company_property = scrapy.Field()
    company_scale = scrapy.Field()
    company_location = scrapy.Field()
    company_detailUrl = scrapy.Field()
    job_name = scrapy.Field()
    job_salary = scrapy.Field()
    job_location = scrapy.Field()
    job_experience = scrapy.Field()
    job_education = scrapy.Field()
    job_quantity = scrapy.Field()
    job_jd = scrapy.Field()
    flag = scrapy.Field()
    crawl_date = scrapy.Field()


    def get_insert_sql(self):

        insert_sql = """
                   insert into jobspider_zhilian(job_pubDate, job_refresh, job_endDate, job_tag, job_id, job_detailUrl, company_name,
                     company_industry, company_property, company_scale, company_location, company_detailUrl, job_name, job_salary,
                     job_location, job_experience, job_education, job_quantity, job_jd, flag, crawl_date
                     ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                     ON DUPLICATE KEY UPDATE job_pubDate=VALUES(job_pubDate), job_refresh=VALUES(job_refresh), job_endDate=VALUES(job_endDate),
                     job_tag=VALUES(job_tag), job_detailUrl=VALUES(job_detailUrl), company_name=VALUES(company_name),
                     company_industry=VALUES(company_industry), company_property=VALUES(company_property), company_scale=VALUES(company_scale),
                     company_location=VALUES(company_location), company_detailUrl=VALUES(company_detailUrl), job_name=VALUES(job_name),
                     job_salary=VALUES(job_salary), job_location=VALUES(job_location), job_experience=VALUES(job_experience),
                     job_education=VALUES(job_education), job_quantity=VALUES(job_quantity), job_jd=VALUES(job_jd),
                     flag=VALUES(flag), crawl_date=VALUES(crawl_date)
               """
        params = (
            self["job_pubDate"], self["job_refresh"], self["job_endDate"],self["job_tag"],self["job_id"],
            self["job_detailUrl"], self["company_name"], self["company_industry"],self["company_property"],self["company_scale"],
            self["company_location"],self["company_detailUrl"],self["job_name"],self["job_salary"],self["job_location"],
            self["job_experience"],self["job_education"],self["job_quantity"],self["job_jd"],self["flag"],self["crawl_date"],
        )
        return insert_sql, params

class JobPositionSpiderItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    job_id = scrapy.Field()
    job_pubDate = scrapy.Field()
    job_location = scrapy.Field()
    job_detailUrl = scrapy.Field()
    company_name = scrapy.Field()
    company_industry = scrapy.Field()
    company_property = scrapy.Field()
    company_scale = scrapy.Field()
    company_location = scrapy.Field()
    company_detailUrl = scrapy.Field()
    job_salary = scrapy.Field()
    job_name = scrapy.Field()
    job_tag = scrapy.Field()
    job_jd = scrapy.Field()
    crawl_date = scrapy.Field()
    flag = scrapy.Field()
    job_experience = scrapy.Field()
    job_education = scrapy.Field()
    job_quantity = scrapy.Field()

    def get_insert_sql(self):
        insert_sql = """
                       insert into jobspider_51job(job_id, job_pubDate, job_location, job_detailUrl, company_name, company_industry, 
                       company_property, company_scale, company_location, company_detailUrl, job_salary, job_name, job_tag, job_jd, 
                       crawl_date, flag, job_experience, job_education, job_quantity)
                       VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
                       ON DUPLICATE KEY UPDATE job_pubDate=VALUES(job_pubDate), job_location=VALUES(job_location), 
                        job_detailUrl=VALUES(job_detailUrl), company_name=VALUES(company_name), company_industry=VALUES(company_industry), 
                        company_property=VALUES(company_property), company_scale=VALUES(company_scale), 
                        company_location=VALUES(company_location), company_detailUrl=VALUES(company_detailUrl), 
                        job_salary=VALUES(job_salary), job_name=VALUES(job_name),job_tag=VALUES(job_tag), 
                        job_jd=VALUES(job_jd), crawl_date=VALUES(crawl_date), flag=VALUES(flag),
                         job_experience=VALUES(job_experience), job_education=VALUES(job_education),job_quantity=VALUES(job_quantity)
                       """

        params = (
            self["job_id"], self["job_pubDate"], self["job_location"], self["job_detailUrl"], self["company_name"],
            self["company_industry"], self["company_property"], self["company_scale"], self["company_location"],
            self["company_detailUrl"], self["job_salary"], self["job_name"], self["job_tag"],
            self["job_jd"], self["crawl_date"], self["flag"], self["job_experience"],
            self["job_education"], self["job_quantity"]
        )
        return insert_sql, params

这是items当中定义的2个表数据的分别入库

class MysqlTwistedPipline():
    def __init__(self, dbpool):
        self.dbpool = dbpool

    @classmethod
    def from_settings(cls, settings):
        dbparms = dict(
        host = settings["MYSQL_HOST"],
        db = settings['MYSQL_DBNAME'],
        user = settings['MYSQL_USER'],
        passwd = settings['MYSQL_PASSWORD'],
        charset = settings['MYSQL_CHARSET'],
        port = settings['MYSQL_PORT'],
        use_unicode = True,
        cursorclass = pymysql.cursors.DictCursor
        )

        dbpool = adbapi.ConnectionPool("pymysql", **dbparms)
        return cls(dbpool)

    def process_item(self, item, spider):
        query = self.dbpool.runInteraction(self.do_insert, item)
        query.addErrback(self.handle_error,)

    def handle_error(self, failure,):
        #处理异步插入的异常
        print (failure)

    def do_insert(self, cursor, item):
        insert_sql, params = item.get_insert_sql()
        cursor.execute(insert_sql, params)

这是pipelines当中写的通用异步入库

同样的入库语句在同步入库中没毛病,然后异步入库就报之前那个错误,我的表结构设计中也没有定义int类型的数据,老师,求指点呀。
报错信息:
C:\Users\AppData\Local\Programs\Python\Python35\lib\threading.py:914:_bootstrap_inner
C:\Users\AppData\Local\Programs\Python\Python35\lib\threading.py:862:run
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\twisted_threads_threadworker.py:46:work
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\twisted_threads_team.py:190:doWork
— —
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\twisted\python\threadpool.py:250:inContext
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\twisted\python\threadpool.py:266:
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\twisted\python\context.py:122:callWithContext
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\twisted\python\context.py:85:callWithContext
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\twisted\enterprise\adbapi.py:462:runInteraction
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\twisted\enterprise\adbapi.py:36:init
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\twisted\enterprise\adbapi.py:76:reconnect
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\twisted\enterprise\adbapi.py:429:connect
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\pymysql_init
.py:94:Connect
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\pymysql\connections.py:327:init
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\pymysql\connections.py:588:connect
]

写回答

1回答

慕侠2131314

提问者

2018-11-25

报错语句:[Failure instance: Traceback: <class 'TypeError'>: %d format: a number is required, not str

0
1
bobby
你加我qq 442421039 我看看
2018-11-26
共1条回复

Scrapy打造搜索引擎 畅销4年的Python分布式爬虫课

带你彻底掌握Scrapy,用Django+Elasticsearch搭建搜索引擎

5795 学习 · 6290 问题

查看课程