1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
| # -*- coding: utf-8 -*- import pymongo from scrapy import log from scrapy.conf import settings import threading from openpyxl import Workbook import redis from scrapy.pipelines.images import ImagesPipeline from scrapy.exceptions import DropItem import scrapy import pymysql from twisted.enterprise import adbapi import random import sys # Define your item pipelines here # # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html # 单例模式创建MongoPipline
Lock = threading.Lock()
class MongoPipeline(object): # 定义静态变量实例 __instance = None
def __init__(self): pass
def __new__(cls, *args, **kwargs): if not cls.__instance: try: Lock.acquire() # double check if not cls.__instance: cls.client = pymongo.MongoClient(settings['MONGO_URI']) cls.db = cls.client[settings['MONGO_DATABASE']] cls.__instance = super(MongoPipeline, cls).__new__(cls, *args, **kwargs) finally: Lock.release() return cls.__instance
def dorp_connection(self,db_name): #删除数据库 return self.db[db_name].drop()
def ensure_index(self,db_name,unique_id): #建立索引 return self.db[db_name].ensure_index(unique_id,unique=True)
def insert(self, items, db_name): self.db[db_name].insert(items)
def find(self,db_name,conditions,return_range): return self.db[db_name].find(conditions,return_range)
def update(self,db_name,conditions,info): return self.db[db_name].update(conditions,info,False)
def upsert(self,db_name,conditions,info): return self.db[db_name].update(conditions,info,True)
def update_many(self,db_name,conditions,info): return self.db[db_name].update_many(conditions,info,False)
|