Spaces:
Running
Running
Remove timed incomplete flush config
Browse files- crawler/config.py +0 -4
crawler/config.py
CHANGED
|
@@ -52,7 +52,6 @@ class CrawlerConfig:
|
|
| 52 |
|
| 53 |
enable_hf_upload: bool = False
|
| 54 |
upload_incomplete_shards: bool = False
|
| 55 |
-
incomplete_shard_flush_seconds: float = 30.0
|
| 56 |
hf_repo_id: str = ""
|
| 57 |
hf_token: str = ""
|
| 58 |
hf_repo_type: str = "dataset"
|
|
@@ -92,9 +91,6 @@ class CrawlerConfig:
|
|
| 92 |
self.hf_repo_id = self.hf_repo_id.strip()
|
| 93 |
self.hf_token = self.hf_token.strip()
|
| 94 |
self.hf_path_prefix = self.hf_path_prefix.strip() or "crawl_shards"
|
| 95 |
-
self.incomplete_shard_flush_seconds = float(self.incomplete_shard_flush_seconds)
|
| 96 |
-
if self.incomplete_shard_flush_seconds <= 0:
|
| 97 |
-
raise ValueError("incomplete_shard_flush_seconds must be > 0.")
|
| 98 |
|
| 99 |
if self.enable_hf_upload:
|
| 100 |
if not self.hf_repo_id:
|
|
|
|
| 52 |
|
| 53 |
enable_hf_upload: bool = False
|
| 54 |
upload_incomplete_shards: bool = False
|
|
|
|
| 55 |
hf_repo_id: str = ""
|
| 56 |
hf_token: str = ""
|
| 57 |
hf_repo_type: str = "dataset"
|
|
|
|
| 91 |
self.hf_repo_id = self.hf_repo_id.strip()
|
| 92 |
self.hf_token = self.hf_token.strip()
|
| 93 |
self.hf_path_prefix = self.hf_path_prefix.strip() or "crawl_shards"
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
if self.enable_hf_upload:
|
| 96 |
if not self.hf_repo_id:
|