Spaces:
Running
Running
elfsong commited on
Commit ·
5e842ff
1
Parent(s): 99527b8
feat: add --force flag to allow re-computing and overwriting existing snapshots
Browse files
src/lifecycle_retrieve.py
CHANGED
|
@@ -286,10 +286,11 @@ def push_lifecycle_to_hf(lifecycle_en: dict, lifecycle_zh: dict,
|
|
| 286 |
# Run one snapshot
|
| 287 |
# ---------------------------------------------------------------------------
|
| 288 |
def run_snapshot(snapshot_month: str, all_papers: list[dict],
|
| 289 |
-
existing_splits: set[str], no_push: bool = False
|
|
|
|
| 290 |
split_name = _snapshot_to_split(snapshot_month)
|
| 291 |
|
| 292 |
-
if split_name in existing_splits:
|
| 293 |
print(f" {_GRAY}⊘ {snapshot_month} — already on HF, skipping{_RESET}")
|
| 294 |
return
|
| 295 |
|
|
@@ -332,6 +333,8 @@ def main():
|
|
| 332 |
help="Compute all missing bimonthly snapshots")
|
| 333 |
parser.add_argument("--no-push", action="store_true",
|
| 334 |
help="Skip pushing results to HuggingFace")
|
|
|
|
|
|
|
| 335 |
args = parser.parse_args()
|
| 336 |
|
| 337 |
print(f"\n {_BOLD}📊 Lifecycle Snapshot Retriever{_RESET}\n")
|
|
@@ -386,7 +389,8 @@ def main():
|
|
| 386 |
print(f" {_DIM}Snapshots to process: {len(snapshots)}{_RESET}\n")
|
| 387 |
|
| 388 |
for snapshot in snapshots:
|
| 389 |
-
run_snapshot(snapshot, all_papers, existing_splits,
|
|
|
|
| 390 |
|
| 391 |
print(f"\n {_GREEN}{_BOLD}✓{_RESET} Done\n")
|
| 392 |
|
|
|
|
| 286 |
# Run one snapshot
|
| 287 |
# ---------------------------------------------------------------------------
|
| 288 |
def run_snapshot(snapshot_month: str, all_papers: list[dict],
|
| 289 |
+
existing_splits: set[str], no_push: bool = False,
|
| 290 |
+
force: bool = False):
|
| 291 |
split_name = _snapshot_to_split(snapshot_month)
|
| 292 |
|
| 293 |
+
if split_name in existing_splits and not force:
|
| 294 |
print(f" {_GRAY}⊘ {snapshot_month} — already on HF, skipping{_RESET}")
|
| 295 |
return
|
| 296 |
|
|
|
|
| 333 |
help="Compute all missing bimonthly snapshots")
|
| 334 |
parser.add_argument("--no-push", action="store_true",
|
| 335 |
help="Skip pushing results to HuggingFace")
|
| 336 |
+
parser.add_argument("--force", action="store_true",
|
| 337 |
+
help="Re-compute and overwrite existing snapshots")
|
| 338 |
args = parser.parse_args()
|
| 339 |
|
| 340 |
print(f"\n {_BOLD}📊 Lifecycle Snapshot Retriever{_RESET}\n")
|
|
|
|
| 389 |
print(f" {_DIM}Snapshots to process: {len(snapshots)}{_RESET}\n")
|
| 390 |
|
| 391 |
for snapshot in snapshots:
|
| 392 |
+
run_snapshot(snapshot, all_papers, existing_splits,
|
| 393 |
+
no_push=args.no_push, force=args.force)
|
| 394 |
|
| 395 |
print(f"\n {_GREEN}{_BOLD}✓{_RESET} Done\n")
|
| 396 |
|