Karim shoair commited on
Commit ·
168160d
1
Parent(s): 979d1e6
fix(spiders engine): Improve logging
Browse files
scrapling/spiders/engine.py
CHANGED
|
@@ -85,7 +85,7 @@ class CrawlerEngine:
|
|
| 85 |
retry_request.dont_filter = True
|
| 86 |
new_request = await self.spider.retry_blocked_request(retry_request, response)
|
| 87 |
await self.scheduler.enqueue(new_request)
|
| 88 |
-
log.
|
| 89 |
f"Scheduled blocked request for retry ({retry_request._retry_count}/{self.spider.max_blocked_retries}): {request.url}"
|
| 90 |
)
|
| 91 |
else:
|
|
@@ -108,6 +108,8 @@ class CrawlerEngine:
|
|
| 108 |
await self._item_stream.send(result)
|
| 109 |
await self.spider.on_scraped_item(result)
|
| 110 |
log.debug(f"Scraped from {str(response)}\n{result}")
|
|
|
|
|
|
|
| 111 |
except Exception as e:
|
| 112 |
await self.spider.on_error(request, e)
|
| 113 |
|
|
|
|
| 85 |
retry_request.dont_filter = True
|
| 86 |
new_request = await self.spider.retry_blocked_request(retry_request, response)
|
| 87 |
await self.scheduler.enqueue(new_request)
|
| 88 |
+
log.info(
|
| 89 |
f"Scheduled blocked request for retry ({retry_request._retry_count}/{self.spider.max_blocked_retries}): {request.url}"
|
| 90 |
)
|
| 91 |
else:
|
|
|
|
| 108 |
await self._item_stream.send(result)
|
| 109 |
await self.spider.on_scraped_item(result)
|
| 110 |
log.debug(f"Scraped from {str(response)}\n{result}")
|
| 111 |
+
elif result is not None:
|
| 112 |
+
log.error(f"Spider must return Request, dict or None, got '{type(result)}' in {request}")
|
| 113 |
except Exception as e:
|
| 114 |
await self.spider.on_error(request, e)
|
| 115 |
|