Python scrapy.crawler 模块,Crawler() 实例源码

我们从Python开源项目中,提取了以下4个代码示例,用于说明如何使用scrapy.crawler.Crawler()

项目:scrappy    作者:DormyMo    | 项目源码 | 文件源码
def default(self, o):
        if isinstance(o, datetime.datetime):
            return o.strftime("%s %s" % (self.DATE_FORMAT, self.TIME_FORMAT))
        elif isinstance(o, datetime.date):
            return o.strftime(self.DATE_FORMAT)
        elif isinstance(o, datetime.time):
            return o.strftime(self.TIME_FORMAT)
        elif isinstance(o, decimal.Decimal):
            return str(o)
        elif isinstance(o, defer.Deferred):
            return str(o)
        elif isinstance(o, BaseItem):
            return dict(o)
        elif isinstance(o, Request):
            return "<%s %s %s>" % (type(o).__name__, o.method, o.url)
        elif isinstance(o, Response):
            return "<%s %s %s>" % (type(o).__name__, o.status, o.url)
        elif isinstance(o, Crawler):
            return o.stats.get_stats()
        else:
            return super(ScrapyJSONEncoder, self).default(o)
项目:Charlie    作者:nxintech    | 项目源码 | 文件源码
def run_spider():
    settings = Settings()
    settings.set('ITEM_PIPELINES', {
        '__main__.JsonWriterPipeline': 100
    })

    # enable remote sever certificate verification
    # see http://doc.scrapy.org/en/latest/topics/settings.html#downloader-clientcontextfactory
    settings.set('DOWNLOADER_CLIENTCONTEXTFACTORY',
                 'scrapy.core.downloader.contextfactory.BrowserLikeContextFactory'
                 )

    # uncomment below line to enable the logging for debug
    # configure_logging()

    crawler = Crawler(JenkinsJobSpider, settings)
    crawler.signals.connect(callback, signal=signals.spider_closed)
    crawler.crawl()
    reactor.run()
项目:scrapy_rss    作者:woxcab    | 项目源码 | 文件源码
def test_spider_output_handling(self):
        spider = self.MySpider()
        scraper = Scraper(Crawler(spider))
        scraper.open_spider(spider)
        scraper._process_spidermw_output(RssItem(), None, None, None)
        scraper._process_spidermw_output(ExtendableItem(), None, None, None)
        scraper._process_spidermw_output(RssedItem(), None, None, None)
        scraper.close_spider(spider)
项目:domain-discovery-crawler    作者:TeamHG-Memex    | 项目源码 | 文件源码
def make_queue(redis_server, cls: type, slots=None, skip_cache=True, settings=None,
               hints=None) -> BaseRequestQueue:
    global logging_configured
    if not logging_configured:
        configure_logging(settings=settings)
        logging_configured = True
    crawler = Crawler(Spider, settings=settings)
    if slots is None:
        slots = {}
    spider = Spider.from_crawler(crawler, 'test_dd_spider')
    if hints:
        spider.hint_urls = hints
    return cls(server=redis_server, spider=spider, key=SCHEDULER_QUEUE_KEY,
               slots_mock=slots, skip_cache=skip_cache)