amarorn / tests /test_sofascore_match_date.py
beAnalytic's picture
feat: sync main with feature/superbet-live-inplay
16c19b8 verified
Raw
History Blame Contribute Delete
2.49 kB
from datetime import date, datetime, timezone
from ingest.sofascore.event_helpers import match_date_from_event, resolve_match_date
from ingest.sofascore.stats_ingest import backfill_match_dates, build_match_stats_payload
from tests.test_sofascore_stats import FakeStatsClient
def test_match_date_from_event_start_timestamp():
ts = int(datetime(2024, 6, 15, 20, 0, tzinfo=timezone.utc).timestamp())
event = {"id": 99, "startTimestamp": ts}
assert match_date_from_event(event) == "2024-06-15T20:00:00+00:00"
def test_resolve_match_date_prefers_explicit_date():
event = {"startTimestamp": 1_700_000_000}
resolved = resolve_match_date(event, date(2026, 6, 10))
assert resolved == "2026-06-10"
def test_build_match_stats_payload_extracts_date_from_event():
team_map = {
"Brasil": {"sofascore_id": 4748},
"Argentina": {"sofascore_id": 4819},
}
ts = int(datetime(2025, 3, 20, 18, 30, tzinfo=timezone.utc).timestamp())
event = {
"id": 42,
"startTimestamp": ts,
"homeTeam": {"id": 4748, "name": "Brazil"},
"awayTeam": {"id": 4819, "name": "Argentina"},
}
client = FakeStatsClient(event, {"statistics": []}, {"incidents": []})
result = build_match_stats_payload(event_id=42, client=client, team_map=team_map)
assert result.match_date == "2025-03-20T18:30:00+00:00"
assert result.stats["match_date"] == "2025-03-20T18:30:00+00:00"
def test_backfill_match_dates_updates_parquet(tmp_path):
import pandas as pd
from ingest.sofascore.paths import MATCH_STATS_PARQUET
parquet = tmp_path / MATCH_STATS_PARQUET
pd.DataFrame(
[
{"event_id": 10, "home_team": "Brasil", "away_team": "Argentina", "match_date": None},
{"event_id": 11, "home_team": "França", "away_team": "Alemanha", "match_date": "2024-01-01"},
]
).to_parquet(parquet, index=False)
ts = int(datetime(2023, 11, 16, 21, 0, tzinfo=timezone.utc).timestamp())
class FakeClient:
def event(self, event_id: int) -> dict:
if event_id == 10:
return {"id": 10, "startTimestamp": ts}
return {"id": event_id}
report = backfill_match_dates(
client=FakeClient(),
output_dir=tmp_path,
update_json=False,
)
df = pd.read_parquet(parquet)
assert report == {"total": 1, "updated": 1, "skipped": 0, "failed": 0}
assert pd.notna(df.loc[df["event_id"] == 10, "match_date"].iloc[0])