| import os |
| from typing import NamedTuple |
|
|
| import pandas as pd |
| import pytest |
| from kloppy import opta, statsbomb, wyscout |
| from kloppy.domain import Orientation |
| from pandas.testing import assert_frame_equal |
| from socceraction.data.opta import OptaLoader |
| from socceraction.data.statsbomb import StatsBombLoader |
| from socceraction.data.wyscout import PublicWyscoutLoader, WyscoutLoader |
| from socceraction.spadl import config as spadl |
| from socceraction.spadl import kloppy as kl |
| from socceraction.spadl import opta as spadl_opta |
| from socceraction.spadl import statsbomb as sb |
| from socceraction.spadl import wyscout as spadl_wyscout |
|
|
| pd.set_option("display.max_columns", None) |
| pd.set_option("display.max_rows", None) |
|
|
|
|
| class Dataset(NamedTuple): |
| kloppy: pd.DataFrame |
| socceraction: pd.DataFrame |
|
|
|
|
| @pytest.fixture(scope="session") |
| def statsbomb_actions() -> Dataset: |
| data_dir = os.path.join(os.path.dirname(__file__), os.pardir, "datasets", "statsbomb", "raw") |
| kloppy_dataset = statsbomb.load( |
| event_data=os.path.join(data_dir, "events", "7584.json"), |
| lineup_data=os.path.join(data_dir, "lineups", "7584.json"), |
| ) |
| df_actions_kl = kl.convert_to_actions(kloppy_dataset, game_id=7584) |
| SBL = StatsBombLoader(root=data_dir, getter="local") |
| df_actions_sa = sb.convert_to_actions(SBL.events(7584), 782) |
|
|
| return Dataset(df_actions_kl, df_actions_sa) |
|
|
|
|
| @pytest.mark.parametrize("actiontype", spadl.actiontypes) |
| def test_kloppy_to_actions_statsbomb(statsbomb_actions: Dataset, actiontype: str) -> None: |
| |
| cols = [ |
| "game_id", |
| "original_event_id", |
| "period_id", |
| "time_seconds", |
| "team_id", |
| "player_id", |
| |
| |
| |
| |
| "type_id", |
| "result_id", |
| "bodypart_id", |
| |
| ] |
| |
| sel_actions_sa = statsbomb_actions.socceraction.loc[ |
| (statsbomb_actions.socceraction.type_id == spadl.actiontypes.index(actiontype)), |
| cols, |
| ] |
| |
| sel_actions_kl = statsbomb_actions.kloppy.loc[ |
| (statsbomb_actions.kloppy.type_id == spadl.actiontypes.index(actiontype)), |
| cols, |
| ].replace({"original_event_id": {"interception-": ""}}, regex=True) |
| |
| sel_actions_sa["team_id"] = sel_actions_sa["team_id"].astype(str) |
| sel_actions_sa["player_id"] = sel_actions_sa["player_id"].astype("Int64").astype(str) |
| if actiontype in ["keeper_save", "keeper_punch"]: |
| sel_actions_sa["result_id"] = spadl.results.index("success") |
| |
| print( |
| "These events should not be included", |
| set(sel_actions_kl.original_event_id) - set(sel_actions_sa.original_event_id), |
| ) |
| print( |
| "These events are missing", |
| set(sel_actions_sa.original_event_id) - set(sel_actions_kl.original_event_id), |
| ) |
| |
| assert_frame_equal( |
| sel_actions_kl.set_index("original_event_id"), |
| sel_actions_sa.set_index("original_event_id"), |
| ) |
|
|
|
|
| @pytest.fixture(scope="session") |
| def opta_actions() -> Dataset: |
| data_dir = os.path.join(os.path.dirname(__file__), os.pardir, "datasets", "opta") |
| kloppy_dataset = opta.load( |
| f7_data=os.path.join(data_dir, "f7-23-2018-1009316-matchresults.xml"), |
| f24_data=os.path.join(data_dir, "f24-23-2018-1009316-eventdetails.xml"), |
| ) |
| df_actions_kl = kl.convert_to_actions(kloppy_dataset, game_id=1009316) |
| loader = OptaLoader( |
| root=data_dir, |
| parser="xml", |
| feeds={ |
| "f7": "f7-{competition_id}-{season_id}-{game_id}-matchresults.xml", |
| "f24": "f24-{competition_id}-{season_id}-{game_id}-eventdetails.xml", |
| }, |
| ) |
| df_actions_sa = spadl_opta.convert_to_actions(loader.events(1009316), 174) |
|
|
| return Dataset(df_actions_kl, df_actions_sa) |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
|
|
| @pytest.mark.skip(reason="not yet supported") |
| @pytest.mark.parametrize("actiontype", spadl.actiontypes) |
| def test_kloppy_to_actions_opta(opta_actions: Dataset, actiontype: str) -> None: |
| |
| cols = [ |
| "game_id", |
| "original_event_id", |
| "period_id", |
| |
| "team_id", |
| "player_id", |
| |
| |
| |
| |
| "type_id", |
| "result_id", |
| "bodypart_id", |
| |
| ] |
| |
| sel_actions_sa = opta_actions.socceraction.loc[ |
| (opta_actions.socceraction.type_id == spadl.actiontypes.index(actiontype)), |
| cols, |
| ] |
| |
| sel_actions_kl = opta_actions.kloppy.loc[ |
| (opta_actions.kloppy.type_id == spadl.actiontypes.index(actiontype)), |
| cols, |
| ] |
| |
| sel_actions_kl["team_id"] = sel_actions_kl["team_id"].astype(int) |
| sel_actions_kl["player_id"] = sel_actions_kl["player_id"].astype(float) |
| |
| sel_actions_sa["original_event_id"] = sel_actions_sa["original_event_id"].astype(str) |
| |
| print( |
| "These events should not be included", |
| set(sel_actions_kl.original_event_id) - set(sel_actions_sa.original_event_id), |
| ) |
| print( |
| "These events are missing", |
| set(sel_actions_sa.original_event_id) - set(sel_actions_kl.original_event_id), |
| ) |
| print("These events are different") |
| df = pd.concat( |
| [ |
| sel_actions_kl.set_index("original_event_id"), |
| sel_actions_sa.set_index("original_event_id"), |
| ] |
| ) |
| df = df.reset_index(drop=False) |
| df_gpby = df.groupby(list(df.columns)) |
| idx = [x[0] for x in df_gpby.groups.values() if len(x) == 1] |
| print(df.reindex(idx)) |
| |
| assert_frame_equal( |
| sel_actions_kl.set_index("original_event_id"), |
| sel_actions_sa.set_index("original_event_id"), |
| ) |
|
|
|
|
| @pytest.fixture(scope="session") |
| def wyscout_actions() -> Dataset: |
| data_dir = os.path.join(os.path.dirname(__file__), os.pardir, "datasets", "wyscout_api") |
| kloppy_dataset = wyscout.load( |
| event_data=os.path.join(data_dir, "events_2852835.json"), |
| ) |
| df_actions_kl = kl.convert_to_actions(kloppy_dataset, game_id=2852835) |
| WSL = WyscoutLoader( |
| root=data_dir, |
| getter="local", |
| feeds={ |
| "competitions": "competitions.json", |
| "seasons": "seasons_{competition_id}.json", |
| |
| "events": "events_{game_id}.json", |
| }, |
| ) |
| df_actions_sa = spadl_wyscout.convert_to_actions(WSL.events(2852835), 3166) |
|
|
| return Dataset(df_actions_kl, df_actions_sa) |
|
|
|
|
| @pytest.fixture(scope="session") |
| def public_wyscout_actions() -> tuple[pd.DataFrame, pd.DataFrame]: |
| data_dir = os.path.join( |
| os.path.dirname(__file__), os.pardir, "datasets", "wyscout_public", "raw" |
| ) |
| kloppy_dataset = wyscout.load_open_data(match_id="2058007") |
| kloppy_dataset.metadata.orientation = Orientation.ACTION_EXECUTING_TEAM |
| df_actions_kl = kl.convert_to_actions(kloppy_dataset, game_id=2058007) |
| WSL = PublicWyscoutLoader(root=data_dir, download=False) |
| df_actions_sa = spadl_wyscout.convert_to_actions(WSL.events(2058007), 5629) |
|
|
| return Dataset(df_actions_kl, df_actions_sa) |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
|
|
| @pytest.mark.skip(reason="not yet supported") |
| @pytest.mark.parametrize("actiontype", spadl.actiontypes) |
| def test_kloppy_to_actions_wyscout(public_wyscout_actions: Dataset, actiontype: str) -> None: |
| |
| cols = [ |
| "game_id", |
| "original_event_id", |
| "period_id", |
| "time_seconds", |
| "team_id", |
| "player_id", |
| "start_x", |
| "start_y", |
| "end_x", |
| "end_y", |
| "type_id", |
| "result_id", |
| "bodypart_id", |
| |
| ] |
| |
| sel_actions_sa = public_wyscout_actions.socceraction.loc[ |
| (public_wyscout_actions.socceraction.type_id == spadl.actiontypes.index(actiontype)), |
| cols, |
| ] |
| |
| sel_actions_kl = public_wyscout_actions.kloppy.loc[ |
| (public_wyscout_actions.kloppy.type_id == spadl.actiontypes.index(actiontype)), |
| cols, |
| ].replace({"original_event_id": {"interception-": ""}}, regex=True) |
|
|
| |
| sel_actions_kl["team_id"] = sel_actions_kl["team_id"].astype(int) |
| sel_actions_kl["player_id"] = sel_actions_kl["player_id"].astype(int) |
| sel_actions_sa["original_event_id"] = sel_actions_sa["original_event_id"].astype(str) |
| |
| print( |
| "These events should not be included", |
| set(sel_actions_kl.original_event_id) - set(sel_actions_sa.original_event_id), |
| ) |
| print( |
| "These events are missing", |
| set(sel_actions_sa.original_event_id) - set(sel_actions_kl.original_event_id), |
| ) |
| |
| assert_frame_equal( |
| sel_actions_kl.set_index("original_event_id"), |
| sel_actions_sa.set_index("original_event_id"), |
| ) |
|
|