Karim shoair commited on
Commit ·
e251ff4
1
Parent(s): 337bce0
fix: make `retry_blocked_request` take the response as well
Browse files
scrapling/spiders/engine.py
CHANGED
|
@@ -82,7 +82,7 @@ class CrawlerEngine:
|
|
| 82 |
retry_request._retry_count += 1
|
| 83 |
retry_request.priority -= 1 # Don't retry immediately
|
| 84 |
retry_request.dont_filter = True
|
| 85 |
-
new_request = await self.spider.retry_blocked_request(retry_request)
|
| 86 |
await self.scheduler.enqueue(new_request)
|
| 87 |
log.debug(
|
| 88 |
f"Scheduled blocked request for retry ({retry_request._retry_count}/{self.spider.max_blocked_retries}): {request.url}"
|
|
|
|
| 82 |
retry_request._retry_count += 1
|
| 83 |
retry_request.priority -= 1 # Don't retry immediately
|
| 84 |
retry_request.dont_filter = True
|
| 85 |
+
new_request = await self.spider.retry_blocked_request(retry_request, response)
|
| 86 |
await self.scheduler.enqueue(new_request)
|
| 87 |
log.debug(
|
| 88 |
f"Scheduled blocked request for retry ({retry_request._retry_count}/{self.spider.max_blocked_retries}): {request.url}"
|
scrapling/spiders/spider.py
CHANGED
|
@@ -162,7 +162,7 @@ class Spider(ABC):
|
|
| 162 |
return True
|
| 163 |
return False
|
| 164 |
|
| 165 |
-
async def retry_blocked_request(self, request: Request) -> Request:
|
| 166 |
"""Users should override this to prepare the blocked request before retrying, if needed."""
|
| 167 |
return request
|
| 168 |
|
|
|
|
| 162 |
return True
|
| 163 |
return False
|
| 164 |
|
| 165 |
+
async def retry_blocked_request(self, request: Request, response: "Response") -> Request:
|
| 166 |
"""Users should override this to prepare the blocked request before retrying, if needed."""
|
| 167 |
return request
|
| 168 |
|