Karim shoair commited on
Commit ·
47633d8
1
Parent(s): 98a7a0d
fix(spiders): handle errors with manual pause during stream mode
Browse files
scrapling/spiders/engine.py
CHANGED
|
@@ -207,6 +207,7 @@ class CrawlerEngine:
|
|
| 207 |
self._items.clear()
|
| 208 |
self.paused = False
|
| 209 |
self._pause_requested = False
|
|
|
|
| 210 |
self.stats = CrawlStats(start_time=anyio.current_time())
|
| 211 |
|
| 212 |
# Check for existing checkpoint
|
|
|
|
| 207 |
self._items.clear()
|
| 208 |
self.paused = False
|
| 209 |
self._pause_requested = False
|
| 210 |
+
self._force_stop = False
|
| 211 |
self.stats = CrawlStats(start_time=anyio.current_time())
|
| 212 |
|
| 213 |
# Check for existing checkpoint
|
scrapling/spiders/spider.py
CHANGED
|
@@ -211,7 +211,9 @@ class Spider(ABC):
|
|
| 211 |
manager.add("default", FetcherSession())
|
| 212 |
|
| 213 |
def pause(self):
|
| 214 |
-
"""Pause the crawling process
|
|
|
|
|
|
|
| 215 |
if self._engine:
|
| 216 |
self._engine.request_pause()
|
| 217 |
else:
|
|
|
|
| 211 |
manager.add("default", FetcherSession())
|
| 212 |
|
| 213 |
def pause(self):
|
| 214 |
+
"""Pause the crawling process. Requires crawldir to be set for checkpoint system."""
|
| 215 |
+
if not self.crawldir:
|
| 216 |
+
raise RuntimeError("Cannot pause without crawldir - checkpoint system not enabled")
|
| 217 |
if self._engine:
|
| 218 |
self._engine.request_pause()
|
| 219 |
else:
|