| import json |
| import os |
| from datetime import datetime |
|
|
| import pandas as pd |
| import pytest |
| from py.path import local |
| from pytest import fixture |
| from socceraction.data.base import MissingDataError |
| from socceraction.data.opta import ( |
| OptaEventSchema, |
| OptaGameSchema, |
| OptaPlayerSchema, |
| OptaTeamSchema, |
| ) |
| from socceraction.data.opta.parsers import WhoScoredParser |
|
|
|
|
| @fixture() |
| def whoscored_parser() -> WhoScoredParser: |
| path = os.path.join( |
| os.path.dirname(__file__), |
| os.pardir, |
| os.pardir, |
| os.pardir, |
| "datasets", |
| "whoscored", |
| "1005916.json", |
| ) |
| return WhoScoredParser(str(path), competition_id=5, season_id=1516, game_id=1005916) |
|
|
|
|
| def test_extract_competition_id(tmpdir: local) -> None: |
| path = os.path.join( |
| os.path.dirname(__file__), |
| os.pardir, |
| os.pardir, |
| os.pardir, |
| "datasets", |
| "whoscored", |
| "1005916.json", |
| ) |
| |
| parser = WhoScoredParser(path, competition_id=1234, season_id=1516, game_id=1005916) |
| assert parser.competition_id == 1234 |
| |
| parser = WhoScoredParser(path, competition_id=None, season_id=1516, game_id=1005916) |
| assert parser.competition_id == 5 |
| |
| p = tmpdir.join("1005916.json") |
| p.write(json.dumps({})) |
| with pytest.raises(MissingDataError): |
| WhoScoredParser(str(p), competition_id=None, season_id=1516, game_id=1005916) |
|
|
|
|
| def test_extract_season_id(tmpdir: local) -> None: |
| path = os.path.join( |
| os.path.dirname(__file__), |
| os.pardir, |
| os.pardir, |
| os.pardir, |
| "datasets", |
| "whoscored", |
| "1005916.json", |
| ) |
| |
| parser = WhoScoredParser(path, competition_id=5, season_id=1234, game_id=1005916) |
| assert parser.season_id == 1234 |
| |
| parser = WhoScoredParser(path, competition_id=5, season_id=None, game_id=1005916) |
| assert parser.season_id == 1516 |
| |
| p = tmpdir.join("1005916.json") |
| p.write(json.dumps({})) |
| with pytest.raises(MissingDataError): |
| WhoScoredParser(str(p), competition_id=5, season_id=None, game_id=1005916) |
|
|
|
|
| def test_extract_game_id(tmpdir: local) -> None: |
| path = os.path.join( |
| os.path.dirname(__file__), |
| os.pardir, |
| os.pardir, |
| os.pardir, |
| "datasets", |
| "whoscored", |
| "1005916.json", |
| ) |
| |
| parser = WhoScoredParser(path, competition_id=5, season_id=1516, game_id=1234) |
| assert parser.game_id == 1234 |
| |
| parser = WhoScoredParser(path, competition_id=5, season_id=1516, game_id=None) |
| assert parser.game_id == 1005916 |
| |
| p = tmpdir.join("1005916.json") |
| p.write(json.dumps({})) |
| with pytest.raises(MissingDataError): |
| WhoScoredParser(str(p), competition_id=5, season_id=1516, game_id=None) |
|
|
|
|
| def test_extract_games(whoscored_parser: WhoScoredParser) -> None: |
| games = whoscored_parser.extract_games() |
| assert len(games) == 1 |
| assert games[1005916] == { |
| "game_id": 1005916, |
| "season_id": 1516, |
| "competition_id": 5, |
| "game_day": None, |
| "game_date": datetime(2015, 8, 23, 19, 45), |
| "home_team_id": 272, |
| "away_team_id": 267, |
| "home_score": 1, |
| "away_score": 3, |
| "duration": 96, |
| "venue": "Carlo Castellani", |
| "attendance": 7309, |
| "referee": "Maurizio Mariani", |
| "home_manager": "Marco Giampaolo", |
| "away_manager": "Rolando Maran", |
| } |
| OptaGameSchema.validate(pd.DataFrame.from_dict(games, orient="index")) |
|
|
|
|
| def test_extract_teams(whoscored_parser: WhoScoredParser) -> None: |
| teams = whoscored_parser.extract_teams() |
| assert len(teams) == 2 |
| assert teams[272] == { |
| "team_id": 272, |
| "team_name": "Empoli", |
| } |
| assert teams[267] == { |
| "team_id": 267, |
| "team_name": "Chievo", |
| } |
| OptaTeamSchema.validate(pd.DataFrame.from_dict(teams, orient="index")) |
|
|
|
|
| def test_extract_players(whoscored_parser: WhoScoredParser) -> None: |
| players = whoscored_parser.extract_players() |
| assert len(players) == 21 + 23 |
| assert players[(1005916, 4444)] == { |
| "game_id": 1005916, |
| "team_id": 267, |
| "player_id": 4444, |
| "player_name": "Albano Bizzarri", |
| "is_starter": True, |
| "minutes_played": 96, |
| "jersey_number": 1, |
| "starting_position": "GK", |
| } |
| OptaPlayerSchema.validate(pd.DataFrame.from_dict(players, orient="index")) |
|
|
|
|
| def test_extract_events(whoscored_parser: WhoScoredParser) -> None: |
| events = whoscored_parser.extract_events() |
| assert len(events) == 1562 |
| assert events[(1005916, 832925173)] == { |
| "game_id": 1005916, |
| "event_id": 832925173, |
| "period_id": 1, |
| "team_id": 272, |
| "player_id": 128778, |
| "type_id": 1, |
| "timestamp": datetime(2015, 8, 23, 19, 45, 1), |
| "minute": 0, |
| "second": 1, |
| "outcome": True, |
| "start_x": 50.9, |
| "start_y": 48.8, |
| "end_x": 35.9, |
| "end_y": 49.8, |
| "qualifiers": {56: "Back", 140: "35.9", 141: "49.8", 212: "15.8", 213: "3.1"}, |
| "related_player_id": None, |
| "goal": False, |
| "shot": False, |
| "touch": True, |
| } |
| df = pd.DataFrame.from_dict(events, orient="index") |
| df["type_name"] = "Added later" |
| OptaEventSchema.validate(df) |
|
|
|
|
| def test_extract_substitutions(whoscored_parser: WhoScoredParser) -> None: |
| substitutions = whoscored_parser.extract_substitutions() |
| assert len(substitutions) == 6 |
| assert substitutions[(1005916, 294162)] == { |
| "game_id": 1005916, |
| "team_id": 272, |
| "period_id": 2, |
| "period_milliseconds": 1693000, |
| "player_in_id": 294162, |
| "player_out_id": 260588, |
| } |
|
|
|
|
| def test_extract_positions(whoscored_parser: WhoScoredParser) -> None: |
| positions = whoscored_parser.extract_positions() |
| assert len(positions) == 88 |
| assert positions[(1005916, 4444, 0)] == { |
| "game_id": 1005916, |
| "team_id": 267, |
| "player_id": 4444, |
| "period_id": 1, |
| "period_milliseconds": 0, |
| "start_milliseconds": 0, |
| "end_milliseconds": 2520000, |
| "formation_scheme": "442", |
| "player_position": "GK", |
| "player_position_x": 0.0, |
| "player_position_y": 5.0, |
| } |
|
|
|
|
| def test_extract_teamgamestats(whoscored_parser: WhoScoredParser) -> None: |
| teamgamestats = whoscored_parser.extract_teamgamestats() |
| assert len(teamgamestats) == 2 |
| assert teamgamestats[(1005916, 272)]["game_id"] == 1005916 |
| assert teamgamestats[(1005916, 272)]["team_id"] == 272 |
| assert teamgamestats[(1005916, 272)]["side"] == "home" |
| assert teamgamestats[(1005916, 272)]["score"] == 1 |
| assert teamgamestats[(1005916, 272)]["shootout_score"] is None |
| assert teamgamestats[(1005916, 272)]["aerials_total"] == 34 |
| assert teamgamestats[(1005916, 272)]["aerials_won"] == 10 |
| assert "aerials_success" not in teamgamestats[(1005916, 272)] |
|
|
|
|
| def test_extract_playergamestats(whoscored_parser: WhoScoredParser) -> None: |
| playergamestats = whoscored_parser.extract_playergamestats() |
| assert len(playergamestats) == 21 + 23 |
| assert playergamestats[(1005916, 90878)]["game_id"] == 1005916 |
| assert playergamestats[(1005916, 90878)]["team_id"] == 272 |
| assert playergamestats[(1005916, 90878)]["player_id"] == 90878 |
| assert playergamestats[(1005916, 90878)]["mvp"] is False |
| assert playergamestats[(1005916, 90878)]["minute_start"] == 0 |
| assert playergamestats[(1005916, 90878)]["minute_end"] == 96 |
| assert playergamestats[(1005916, 90878)]["minutes_played"] == 96 |
| assert playergamestats[(1005916, 90878)]["passes_total"] == 47 |
| assert playergamestats[(1005916, 90878)]["passes_accurate"] == 37 |
| assert "pass_success" not in playergamestats[(1005916, 90878)] |
|
|