Spaces:
Running
Running
| """Safe snapshot update: add match results without modifying historical data. | |
| Usage: | |
| # Add a single result | |
| python scripts/update_snapshot.py --group A --home Mexico --away "South Africa" --home-goals 2 --away-goals 0 | |
| # Update the information cutoff | |
| python scripts/update_snapshot.py --cutoff "2026-06-13T19:00:00Z" | |
| # Preview changes without writing | |
| python scripts/update_snapshot.py --group A --home Mexico --away "South Africa" --home-goals 2 --away-goals 0 --dry-run | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import copy | |
| import json | |
| from datetime import datetime, timezone | |
| from pathlib import Path | |
| from underdog_lab.config import DATA_DIR | |
| from underdog_lab.world_cup.data import TournamentRepository | |
| SNAPSHOT_PATH = DATA_DIR / "world_cup_2026" / "snapshot.json" | |
| BACKUP_DIR = DATA_DIR / "world_cup_2026" / "backups" | |
| def load_snapshot() -> dict: | |
| return json.loads(SNAPSHOT_PATH.read_text(encoding="utf-8")) | |
| def save_snapshot(data: dict) -> None: | |
| SNAPSHOT_PATH.write_text( | |
| json.dumps(data, indent=2, ensure_ascii=True) + "\n", | |
| encoding="utf-8", | |
| ) | |
| def create_backup(data: dict) -> Path: | |
| BACKUP_DIR.mkdir(parents=True, exist_ok=True) | |
| timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") | |
| path = BACKUP_DIR / f"snapshot-{timestamp}.json" | |
| path.write_text( | |
| json.dumps(data, indent=2, ensure_ascii=True) + "\n", | |
| encoding="utf-8", | |
| ) | |
| return path | |
| def add_result( | |
| snapshot: dict, | |
| group: str, | |
| home: str, | |
| away: str, | |
| home_goals: int, | |
| away_goals: int, | |
| *, | |
| allow_correction: bool = False, | |
| ) -> dict: | |
| updated = copy.deepcopy(snapshot) | |
| results = updated.setdefault("results", []) | |
| # Update existing or append new | |
| for result in results: | |
| if result["group"] == group and result["home"] == home and result["away"] == away: | |
| unchanged = ( | |
| result["home_goals"] == home_goals | |
| and result["away_goals"] == away_goals | |
| ) | |
| if unchanged: | |
| return updated | |
| if not allow_correction: | |
| raise ValueError( | |
| f"Result correction requires --allow-corrections: " | |
| f"{home} {result['home_goals']}-{result['away_goals']} " | |
| f"{away} -> {home_goals}-{away_goals}" | |
| ) | |
| result["home_goals"] = home_goals | |
| result["away_goals"] = away_goals | |
| return updated | |
| results.append({ | |
| "group": group, | |
| "home": home, | |
| "away": away, | |
| "home_goals": home_goals, | |
| "away_goals": away_goals, | |
| }) | |
| return updated | |
| def add_results( | |
| snapshot: dict, | |
| rows: list[dict], | |
| *, | |
| allow_corrections: bool = False, | |
| ) -> dict: | |
| updated = snapshot | |
| for row in rows: | |
| updated = add_result( | |
| updated, | |
| row["group"], | |
| row["home"], | |
| row["away"], | |
| int(row["home_goals"]), | |
| int(row["away_goals"]), | |
| allow_correction=allow_corrections, | |
| ) | |
| return updated | |
| def validate_result_rows(rows: list[dict]) -> None: | |
| repository = TournamentRepository() | |
| fixtures = { | |
| (fixture.group, fixture.home, fixture.away) | |
| for fixture in repository.fixtures | |
| } | |
| for row in rows: | |
| key = (row["group"], row["home"], row["away"]) | |
| if key not in fixtures: | |
| raise ValueError(f"Unknown tournament fixture: {key}") | |
| if int(row["home_goals"]) < 0 or int(row["away_goals"]) < 0: | |
| raise ValueError(f"Goals must be non-negative: {key}") | |
| def update_cutoff(snapshot: dict, cutoff: str) -> dict: | |
| updated = copy.deepcopy(snapshot) | |
| updated["information_cutoff"] = cutoff | |
| return updated | |
| def main() -> None: | |
| parser = argparse.ArgumentParser( | |
| description="Safely update the World Cup 2026 tournament snapshot." | |
| ) | |
| parser.add_argument("--group", help="Group letter (A-L)") | |
| parser.add_argument("--home", help="Home team name") | |
| parser.add_argument("--away", help="Away team name") | |
| parser.add_argument("--home-goals", type=int, help="Home goals scored") | |
| parser.add_argument("--away-goals", type=int, help="Away goals scored") | |
| parser.add_argument( | |
| "--cutoff", | |
| help="Update information_cutoff timestamp (ISO 8601).", | |
| ) | |
| parser.add_argument( | |
| "--results-file", | |
| type=Path, | |
| help=( | |
| "JSON file containing {'results': [...], 'information_cutoff': ...}. " | |
| "This is an audited ingestion boundary; it performs no scraping." | |
| ), | |
| ) | |
| parser.add_argument( | |
| "--dry-run", | |
| action="store_true", | |
| help="Preview changes without writing.", | |
| ) | |
| parser.add_argument( | |
| "--allow-corrections", | |
| action="store_true", | |
| help="Apply explicitly reported score corrections for human review.", | |
| ) | |
| args = parser.parse_args() | |
| original = load_snapshot() | |
| updated = copy.deepcopy(original) | |
| actions = [] | |
| if args.results_file: | |
| payload = json.loads(args.results_file.read_text(encoding="utf-8")) | |
| rows = [ | |
| *payload.get("results", []), | |
| *payload.get("corrections", []), | |
| ] | |
| validate_result_rows(rows) | |
| updated = add_results( | |
| updated, | |
| rows, | |
| allow_corrections=args.allow_corrections, | |
| ) | |
| actions.append( | |
| f"Import {len(payload.get('results', []))} result(s) and " | |
| f"{len(payload.get('corrections', []))} correction(s) from " | |
| f"{args.results_file}" | |
| ) | |
| if payload.get("information_cutoff"): | |
| updated = update_cutoff(updated, payload["information_cutoff"]) | |
| actions.append( | |
| f"Update cutoff to {payload['information_cutoff']}" | |
| ) | |
| sources = updated.setdefault("sources", []) | |
| for source in payload.get("sources", []): | |
| if source not in sources: | |
| sources.append(source) | |
| if args.cutoff: | |
| updated = update_cutoff(updated, args.cutoff) | |
| actions.append(f"Update cutoff to {args.cutoff}") | |
| if args.group and args.home and args.away: | |
| if args.home_goals is None or args.away_goals is None: | |
| parser.error("--home-goals and --away-goals are required when adding a result.") | |
| updated = add_result( | |
| updated, args.group, args.home, args.away, | |
| args.home_goals, args.away_goals, | |
| ) | |
| actions.append( | |
| f"Group {args.group}: {args.home} {args.home_goals}-{args.away_goals} {args.away}" | |
| ) | |
| if not actions: | |
| parser.error("No action specified. Use --cutoff or --group/--home/--away/--home-goals/--away-goals.") | |
| if args.dry_run: | |
| print("DRY RUN — no changes written.") | |
| print("Actions that would be taken:") | |
| for action in actions: | |
| print(f" • {action}") | |
| print() | |
| print("Resulting snapshot diff:") | |
| orig_json = json.dumps(original, indent=2, sort_keys=True) | |
| upd_json = json.dumps(updated, indent=2, sort_keys=True) | |
| if orig_json == upd_json: | |
| print(" (no change)") | |
| else: | |
| import difflib | |
| diff = difflib.unified_diff( | |
| orig_json.splitlines(), | |
| upd_json.splitlines(), | |
| fromfile="original", | |
| tofile="updated", | |
| ) | |
| for line in diff: | |
| print(f" {line}") | |
| return | |
| if updated == original: | |
| print("No snapshot changes required.") | |
| return | |
| # Back up before writing | |
| backup_path = create_backup(original) | |
| print(f"Backup saved to {backup_path}") | |
| save_snapshot(updated) | |
| for action in actions: | |
| print(f"✓ {action}") | |
| # Verify round-trip | |
| reloaded = load_snapshot() | |
| assert reloaded == updated, "Snapshot round-trip verification failed." | |
| print("✓ Round-trip verification passed.") | |
| if __name__ == "__main__": | |
| main() | |