python
/
xueqiu


			
							1234567891011121314151617181920212223242526272829
							# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
import os

from pymongo import MongoClient


class XueqiuPipeline(object):
    mongo = MongoClient(
        host=os.environ.get('MONGO_HOST') or 'localhost',
        port=int(os.environ.get('MONGO_PORT') or 27017)
    )
    db = mongo[os.environ.get('MONGO_DB') or 'crawlab_test']
    col = db.get_collection(os.environ.get('CRAWLAB_COLLECTION') or 'results_xueqiu')

    # create indexes
    col.create_index('stocks')
    col.create_index('id')
    col.create_index('url')

    def process_item(self, item, spider):
        item['task_id'] = os.environ.get('CRAWLAB_TASK_ID')
        if self.col.find_one({'id': item['id']}) is None:
            self.col.save(item)
            return item