elfsong commited on
Commit
5e842ff
·
1 Parent(s): 99527b8

feat: add --force flag to allow re-computing and overwriting existing snapshots

Browse files
Files changed (1) hide show
  1. src/lifecycle_retrieve.py +7 -3
src/lifecycle_retrieve.py CHANGED
@@ -286,10 +286,11 @@ def push_lifecycle_to_hf(lifecycle_en: dict, lifecycle_zh: dict,
286
  # Run one snapshot
287
  # ---------------------------------------------------------------------------
288
  def run_snapshot(snapshot_month: str, all_papers: list[dict],
289
- existing_splits: set[str], no_push: bool = False):
 
290
  split_name = _snapshot_to_split(snapshot_month)
291
 
292
- if split_name in existing_splits:
293
  print(f" {_GRAY}⊘ {snapshot_month} — already on HF, skipping{_RESET}")
294
  return
295
 
@@ -332,6 +333,8 @@ def main():
332
  help="Compute all missing bimonthly snapshots")
333
  parser.add_argument("--no-push", action="store_true",
334
  help="Skip pushing results to HuggingFace")
 
 
335
  args = parser.parse_args()
336
 
337
  print(f"\n {_BOLD}📊 Lifecycle Snapshot Retriever{_RESET}\n")
@@ -386,7 +389,8 @@ def main():
386
  print(f" {_DIM}Snapshots to process: {len(snapshots)}{_RESET}\n")
387
 
388
  for snapshot in snapshots:
389
- run_snapshot(snapshot, all_papers, existing_splits, no_push=args.no_push)
 
390
 
391
  print(f"\n {_GREEN}{_BOLD}✓{_RESET} Done\n")
392
 
 
286
  # Run one snapshot
287
  # ---------------------------------------------------------------------------
288
  def run_snapshot(snapshot_month: str, all_papers: list[dict],
289
+ existing_splits: set[str], no_push: bool = False,
290
+ force: bool = False):
291
  split_name = _snapshot_to_split(snapshot_month)
292
 
293
+ if split_name in existing_splits and not force:
294
  print(f" {_GRAY}⊘ {snapshot_month} — already on HF, skipping{_RESET}")
295
  return
296
 
 
333
  help="Compute all missing bimonthly snapshots")
334
  parser.add_argument("--no-push", action="store_true",
335
  help="Skip pushing results to HuggingFace")
336
+ parser.add_argument("--force", action="store_true",
337
+ help="Re-compute and overwrite existing snapshots")
338
  args = parser.parse_args()
339
 
340
  print(f"\n {_BOLD}📊 Lifecycle Snapshot Retriever{_RESET}\n")
 
389
  print(f" {_DIM}Snapshots to process: {len(snapshots)}{_RESET}\n")
390
 
391
  for snapshot in snapshots:
392
+ run_snapshot(snapshot, all_papers, existing_splits,
393
+ no_push=args.no_push, force=args.force)
394
 
395
  print(f"\n {_GREEN}{_BOLD}✓{_RESET} Done\n")
396