mysql异步入库失败,苦思无解,求老师指点呀(续)
来源:6-19 保存数据到mysql中 -1
慕侠2131314
2018-11-25
class ZhilianSpiderItem(scrapy.Item):
job_pubDate = scrapy.Field()
job_refresh = scrapy.Field()
job_endDate = scrapy.Field()
job_tag = scrapy.Field()
job_id = scrapy.Field()
job_detailUrl = scrapy.Field()
company_name = scrapy.Field()
company_industry = scrapy.Field()
company_property = scrapy.Field()
company_scale = scrapy.Field()
company_location = scrapy.Field()
company_detailUrl = scrapy.Field()
job_name = scrapy.Field()
job_salary = scrapy.Field()
job_location = scrapy.Field()
job_experience = scrapy.Field()
job_education = scrapy.Field()
job_quantity = scrapy.Field()
job_jd = scrapy.Field()
flag = scrapy.Field()
crawl_date = scrapy.Field()
def get_insert_sql(self):
insert_sql = """
insert into jobspider_zhilian(job_pubDate, job_refresh, job_endDate, job_tag, job_id, job_detailUrl, company_name,
company_industry, company_property, company_scale, company_location, company_detailUrl, job_name, job_salary,
job_location, job_experience, job_education, job_quantity, job_jd, flag, crawl_date
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE job_pubDate=VALUES(job_pubDate), job_refresh=VALUES(job_refresh), job_endDate=VALUES(job_endDate),
job_tag=VALUES(job_tag), job_detailUrl=VALUES(job_detailUrl), company_name=VALUES(company_name),
company_industry=VALUES(company_industry), company_property=VALUES(company_property), company_scale=VALUES(company_scale),
company_location=VALUES(company_location), company_detailUrl=VALUES(company_detailUrl), job_name=VALUES(job_name),
job_salary=VALUES(job_salary), job_location=VALUES(job_location), job_experience=VALUES(job_experience),
job_education=VALUES(job_education), job_quantity=VALUES(job_quantity), job_jd=VALUES(job_jd),
flag=VALUES(flag), crawl_date=VALUES(crawl_date)
"""
params = (
self["job_pubDate"], self["job_refresh"], self["job_endDate"],self["job_tag"],self["job_id"],
self["job_detailUrl"], self["company_name"], self["company_industry"],self["company_property"],self["company_scale"],
self["company_location"],self["company_detailUrl"],self["job_name"],self["job_salary"],self["job_location"],
self["job_experience"],self["job_education"],self["job_quantity"],self["job_jd"],self["flag"],self["crawl_date"],
)
return insert_sql, params
class JobPositionSpiderItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
job_id = scrapy.Field()
job_pubDate = scrapy.Field()
job_location = scrapy.Field()
job_detailUrl = scrapy.Field()
company_name = scrapy.Field()
company_industry = scrapy.Field()
company_property = scrapy.Field()
company_scale = scrapy.Field()
company_location = scrapy.Field()
company_detailUrl = scrapy.Field()
job_salary = scrapy.Field()
job_name = scrapy.Field()
job_tag = scrapy.Field()
job_jd = scrapy.Field()
crawl_date = scrapy.Field()
flag = scrapy.Field()
job_experience = scrapy.Field()
job_education = scrapy.Field()
job_quantity = scrapy.Field()
def get_insert_sql(self):
insert_sql = """
insert into jobspider_51job(job_id, job_pubDate, job_location, job_detailUrl, company_name, company_industry,
company_property, company_scale, company_location, company_detailUrl, job_salary, job_name, job_tag, job_jd,
crawl_date, flag, job_experience, job_education, job_quantity)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
ON DUPLICATE KEY UPDATE job_pubDate=VALUES(job_pubDate), job_location=VALUES(job_location),
job_detailUrl=VALUES(job_detailUrl), company_name=VALUES(company_name), company_industry=VALUES(company_industry),
company_property=VALUES(company_property), company_scale=VALUES(company_scale),
company_location=VALUES(company_location), company_detailUrl=VALUES(company_detailUrl),
job_salary=VALUES(job_salary), job_name=VALUES(job_name),job_tag=VALUES(job_tag),
job_jd=VALUES(job_jd), crawl_date=VALUES(crawl_date), flag=VALUES(flag),
job_experience=VALUES(job_experience), job_education=VALUES(job_education),job_quantity=VALUES(job_quantity)
"""
params = (
self["job_id"], self["job_pubDate"], self["job_location"], self["job_detailUrl"], self["company_name"],
self["company_industry"], self["company_property"], self["company_scale"], self["company_location"],
self["company_detailUrl"], self["job_salary"], self["job_name"], self["job_tag"],
self["job_jd"], self["crawl_date"], self["flag"], self["job_experience"],
self["job_education"], self["job_quantity"]
)
return insert_sql, params
这是items当中定义的2个表数据的分别入库
class MysqlTwistedPipline():
def __init__(self, dbpool):
self.dbpool = dbpool
@classmethod
def from_settings(cls, settings):
dbparms = dict(
host = settings["MYSQL_HOST"],
db = settings['MYSQL_DBNAME'],
user = settings['MYSQL_USER'],
passwd = settings['MYSQL_PASSWORD'],
charset = settings['MYSQL_CHARSET'],
port = settings['MYSQL_PORT'],
use_unicode = True,
cursorclass = pymysql.cursors.DictCursor
)
dbpool = adbapi.ConnectionPool("pymysql", **dbparms)
return cls(dbpool)
def process_item(self, item, spider):
query = self.dbpool.runInteraction(self.do_insert, item)
query.addErrback(self.handle_error,)
def handle_error(self, failure,):
#处理异步插入的异常
print (failure)
def do_insert(self, cursor, item):
insert_sql, params = item.get_insert_sql()
cursor.execute(insert_sql, params)
这是pipelines当中写的通用异步入库
同样的入库语句在同步入库中没毛病,然后异步入库就报之前那个错误,我的表结构设计中也没有定义int类型的数据,老师,求指点呀。
报错信息:
C:\Users\AppData\Local\Programs\Python\Python35\lib\threading.py:914:_bootstrap_inner
C:\Users\AppData\Local\Programs\Python\Python35\lib\threading.py:862:run
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\twisted_threads_threadworker.py:46:work
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\twisted_threads_team.py:190:doWork
— —
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\twisted\python\threadpool.py:250:inContext
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\twisted\python\threadpool.py:266:
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\twisted\python\context.py:122:callWithContext
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\twisted\python\context.py:85:callWithContext
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\twisted\enterprise\adbapi.py:462:runInteraction
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\twisted\enterprise\adbapi.py:36:init
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\twisted\enterprise\adbapi.py:76:reconnect
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\twisted\enterprise\adbapi.py:429:connect
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\pymysql_init.py:94:Connect
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\pymysql\connections.py:327:init
C:\Users\AppData\Local\Programs\Python\Python35\lib\site-packages\pymysql\connections.py:588:connect
]
1回答
-
慕侠2131314
提问者
2018-11-25
报错语句:[Failure instance: Traceback: <class 'TypeError'>: %d format: a number is required, not str
012018-11-26
相似问题