1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
| # -*- coding: utf-8 -*- # Define your item pipelines here # # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html import json from twisted.enterprise import adbapi #这里要说一下,要先安装一下MySQLdb,PYTHON用来对数据库进行操作的库 import MySQLdb #使用了连接池的插入数据库函数 class MysqlPipeline(object): #def __init__(self): #self.file = open('MoocSpider1.json','w',encoding='utf-8') def open_spider(self,spider): db = spider.settings.get('MYSQL_DB_NAME','python_data') host = spider.settings.get('MYSQL_HOST','localhost') port = 3306 user = spider.settings.get('MYSQL_DB_USER','root') passwd = spider.settings.get('MYSQL_DB_PASSWORD','root') self.dbpool = adbapi.ConnectionPool('MySQLdb',host=host,db=db,user=user,passwd=passwd,charset='utf8') def close_spider(self,spider): self.dbpool.close() def process_item(self, item, spider): #读取item中的数据 #line = json.dumps(dict(item), ensure_ascii=False) + "\n" #写入文件 #self.file.write(line) #返回item #return item self.dbpool.runInteraction(self.insert_db,item) def insert_db(self,tx,item): values = ( item['Url'], item['Title'], item['Image_Url'], item['Student'], item['Introduction'], ) sql = 'INSERT INTO books VALUES (%s,%s,%s,%s,%s)' tx.execute(sql,values) #普通的连接函数 class MysqlPipeline1(object): def open_spider(self,spider): db = spider.settings.get('MYSQL_DB_NAME','python_data') host = spider.settings.get('MYSQL_HOST','localhost') port = 3306 user = spider.settings.get('MYSQL_DB_USER','root') passwd = spider.settings.get('MYSQL_DB_PASSWORD','root') self.db_conn = MySQLdb.connect(host=host,port=port,db=db,user=user,passwd=passwd,charset='utf8') self.db_cur = self.db_conn.cursor() def process_item(self, item, spider): #读取item中的数据 #line = json.dumps(dict(item), ensure_ascii=False) + "\n" #写入文件 #self.file.write(line) #返回item #return item try: self.insert_db(item) self.db_conn.commit() except Exception as error: print(error) return item def insert_db(self,item): values = ( item['Url'], item['Title'], item['Image_Url'], item['Student'], item['Introduction'] ) sql = 'INSERT INTO books VALUES (%s,%s,%s,%s,%s)' self.db_cur.execute(sql,values)
|