def _next_request(self, spider): slot = self.slot if not slot: return
if self.paused: return # 限制并发数的(如果request连接大于小于一个数,respons内容小于一个数)继续网络下载抓取 while not self._needs_backout(spider): # 1. 网络下载 if not self._next_request_from_scheduler(spider): break
if slot.start_requests and not self._needs_backout(spider): try: # 2. 第一次启动调用自己写的start_requests request = next(slot.start_requests) except StopIteration: slot.start_requests = None except Exception: slot.start_requests = None logger.error('Error while obtaining start requests', exc_info=True, extra={'spider': spider}) else: self.crawl(request, spider)
if self.spider_is_idle(spider) and slot.close_if_idle: # 3. 开启redis新的调度 self._spider_idle(spider)