| """SPADL schema for Opta data.""" | |
| from typing import Optional | |
| import pandas as pd | |
| import pandera as pa | |
| from pandera.typing import DateTime, Object, Series | |
| from socceraction.data.schema import ( | |
| CompetitionSchema, | |
| EventSchema, | |
| GameSchema, | |
| PlayerSchema, | |
| TeamSchema, | |
| ) | |
| class OptaCompetitionSchema(CompetitionSchema): | |
| """Definition of a dataframe containing a list of competitions and seasons.""" | |
| class OptaGameSchema(GameSchema): | |
| """Definition of a dataframe containing a list of games.""" | |
| home_score: Optional[Series[int]] = pa.Field(nullable=True) | |
| """The final score of the home team.""" | |
| away_score: Optional[Series[int]] = pa.Field(nullable=True) | |
| """The final score of the away team.""" | |
| duration: Optional[Series[int]] = pa.Field(nullable=True) | |
| """The total duration of the game in minutes.""" | |
| referee: Optional[Series[str]] = pa.Field(nullable=True) | |
| """The name of the referee.""" | |
| venue: Optional[Series[str]] = pa.Field(nullable=True) | |
| """The name of the stadium where the game was played.""" | |
| attendance: Optional[Series[int]] = pa.Field(nullable=True) | |
| """The number of people who attended the game.""" | |
| home_manager: Optional[Series[str]] = pa.Field(nullable=True) | |
| """The name of the manager of the home team.""" | |
| away_manager: Optional[Series[str]] = pa.Field(nullable=True) | |
| """The name of the manager of the away team.""" | |
| class OptaPlayerSchema(PlayerSchema): | |
| """Definition of a dataframe containing the list of players of a game.""" | |
| starting_position: Series[str] | |
| """The starting position of the player.""" | |
| class OptaTeamSchema(TeamSchema): | |
| """Definition of a dataframe containing the list of teams of a game.""" | |
| class OptaEventSchema(EventSchema): | |
| """Definition of a dataframe containing event stream data of a game.""" | |
| timestamp: Series[DateTime] | |
| """Time in the match the event takes place, recorded to the millisecond.""" | |
| minute: Series[int] | |
| """The minutes on the clock at the time of this event.""" | |
| second: Series[int] = pa.Field(ge=0, le=59) | |
| """The second part of the timestamp.""" | |
| outcome: Series[bool] | |
| """Whether the event had a successful outcome or not.""" | |
| start_x: Series[float] = pa.Field(nullable=True) | |
| """The x coordinate of the location where the event started.""" | |
| start_y: Series[float] = pa.Field(nullable=True) | |
| """The y coordinate of the location where the event started.""" | |
| end_x: Series[float] = pa.Field(nullable=True) | |
| """The x coordinate of the location where the event ended.""" | |
| end_y: Series[float] = pa.Field(nullable=True) | |
| """The y coordinate of the location where the event ended.""" | |
| qualifiers: Series[Object] | |
| """A JSON object containing the Opta qualifiers of the event.""" | |
| assist: Optional[Series[bool]] | |
| """Whether the event was an assist or not.""" | |
| keypass: Optional[Series[bool]] | |
| """Whether the event was a keypass or not.""" | |
| goal: Optional[Series[bool]] | |
| """Whether the event was a goal or not.""" | |
| shot: Optional[Series[bool]] | |
| """Whether the event was a shot or not.""" | |
| touch: Optional[Series[bool]] | |
| """Whether the event was a on-the-ball action or not.""" | |
| related_player_id: Optional[Series[pd.Int64Dtype]] = pa.Field(nullable=True) | |
| """The ID of a second player that was involved in this event.""" | |