Spaces:
Runtime error
Runtime error
| from datetime import date, datetime, timezone | |
| from ingest.sofascore.event_helpers import match_date_from_event, resolve_match_date | |
| from ingest.sofascore.stats_ingest import backfill_match_dates, build_match_stats_payload | |
| from tests.test_sofascore_stats import FakeStatsClient | |
| def test_match_date_from_event_start_timestamp(): | |
| ts = int(datetime(2024, 6, 15, 20, 0, tzinfo=timezone.utc).timestamp()) | |
| event = {"id": 99, "startTimestamp": ts} | |
| assert match_date_from_event(event) == "2024-06-15T20:00:00+00:00" | |
| def test_resolve_match_date_prefers_explicit_date(): | |
| event = {"startTimestamp": 1_700_000_000} | |
| resolved = resolve_match_date(event, date(2026, 6, 10)) | |
| assert resolved == "2026-06-10" | |
| def test_build_match_stats_payload_extracts_date_from_event(): | |
| team_map = { | |
| "Brasil": {"sofascore_id": 4748}, | |
| "Argentina": {"sofascore_id": 4819}, | |
| } | |
| ts = int(datetime(2025, 3, 20, 18, 30, tzinfo=timezone.utc).timestamp()) | |
| event = { | |
| "id": 42, | |
| "startTimestamp": ts, | |
| "homeTeam": {"id": 4748, "name": "Brazil"}, | |
| "awayTeam": {"id": 4819, "name": "Argentina"}, | |
| } | |
| client = FakeStatsClient(event, {"statistics": []}, {"incidents": []}) | |
| result = build_match_stats_payload(event_id=42, client=client, team_map=team_map) | |
| assert result.match_date == "2025-03-20T18:30:00+00:00" | |
| assert result.stats["match_date"] == "2025-03-20T18:30:00+00:00" | |
| def test_backfill_match_dates_updates_parquet(tmp_path): | |
| import pandas as pd | |
| from ingest.sofascore.paths import MATCH_STATS_PARQUET | |
| parquet = tmp_path / MATCH_STATS_PARQUET | |
| pd.DataFrame( | |
| [ | |
| {"event_id": 10, "home_team": "Brasil", "away_team": "Argentina", "match_date": None}, | |
| {"event_id": 11, "home_team": "França", "away_team": "Alemanha", "match_date": "2024-01-01"}, | |
| ] | |
| ).to_parquet(parquet, index=False) | |
| ts = int(datetime(2023, 11, 16, 21, 0, tzinfo=timezone.utc).timestamp()) | |
| class FakeClient: | |
| def event(self, event_id: int) -> dict: | |
| if event_id == 10: | |
| return {"id": 10, "startTimestamp": ts} | |
| return {"id": event_id} | |
| report = backfill_match_dates( | |
| client=FakeClient(), | |
| output_dir=tmp_path, | |
| update_json=False, | |
| ) | |
| df = pd.read_parquet(parquet) | |
| assert report == {"total": 1, "updated": 1, "skipped": 0, "failed": 0} | |
| assert pd.notna(df.loc[df["event_id"] == 10, "match_date"].iloc[0]) | |