Here is the solution that get all output/results in a list
from scrapy import signals
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
from scrapy.signalmanager import dispatcher
def spider_results():
results = []
def crawler_results(signal, sender, item, response, spider):
results.append(item)
dispatcher.connect(crawler_results, signal=signals.item_scraped)
process = CrawlerProcess(get_project_settings())
process.crawl(MySpider)
process.start() # the script will block here until the crawling is finished
return results
if __name__ == '__main__':
print(spider_results())
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…