Karim shoair commited on
Commit
47633d8
·
1 Parent(s): 98a7a0d

fix(spiders): handle errors with manual pause during stream mode

Browse files
scrapling/spiders/engine.py CHANGED
@@ -207,6 +207,7 @@ class CrawlerEngine:
207
  self._items.clear()
208
  self.paused = False
209
  self._pause_requested = False
 
210
  self.stats = CrawlStats(start_time=anyio.current_time())
211
 
212
  # Check for existing checkpoint
 
207
  self._items.clear()
208
  self.paused = False
209
  self._pause_requested = False
210
+ self._force_stop = False
211
  self.stats = CrawlStats(start_time=anyio.current_time())
212
 
213
  # Check for existing checkpoint
scrapling/spiders/spider.py CHANGED
@@ -211,7 +211,9 @@ class Spider(ABC):
211
  manager.add("default", FetcherSession())
212
 
213
  def pause(self):
214
- """Pause the crawling process if checkpoint system is enabled."""
 
 
215
  if self._engine:
216
  self._engine.request_pause()
217
  else:
 
211
  manager.add("default", FetcherSession())
212
 
213
  def pause(self):
214
+ """Pause the crawling process. Requires crawldir to be set for checkpoint system."""
215
+ if not self.crawldir:
216
+ raise RuntimeError("Cannot pause without crawldir - checkpoint system not enabled")
217
  if self._engine:
218
  self._engine.request_pause()
219
  else: