Spaces:
Sleeping
Sleeping
| from sqlalchemy import create_engine, Column, Integer, String, ForeignKey, Text, Float, Boolean, UniqueConstraint | |
| from sqlalchemy.orm import declarative_base, sessionmaker | |
| import pandas as pd | |
| import os | |
| import json | |
| import dotenv | |
| dotenv.load_dotenv() | |
| db_uri = os.getenv('DATABASE_PATH') | |
| db_uri = f"sqlite:///{db_uri}" | |
| engine = create_engine(db_uri, echo=False) | |
| Base = declarative_base() | |
| class Game(Base): | |
| __tablename__ = 'games' | |
| id = Column(Integer, primary_key=True) | |
| timestamp = Column(String) | |
| score = Column(String) | |
| goal_home = Column(Integer) | |
| goal_away = Column(Integer) | |
| round = Column(String) | |
| home_team_id = Column(Integer, ForeignKey('teams.id')) | |
| away_team_id = Column(Integer, ForeignKey('teams.id')) | |
| venue = Column(String) | |
| referee = Column(String) | |
| attendance = Column(String) | |
| date = Column(String) | |
| season = Column(String) | |
| league_id = Column(Integer, ForeignKey('leagues.id')) | |
| class GameLineup(Base): | |
| __tablename__ = 'game_lineup' | |
| id = Column(Integer, primary_key=True) | |
| game_id = Column(Integer, ForeignKey('games.id')) | |
| team_id = Column(Integer, ForeignKey('teams.id')) | |
| player_id = Column(Integer, ForeignKey('players.hash')) | |
| shirt_number = Column(String) | |
| position = Column(String) | |
| starting = Column(Boolean) | |
| captain = Column(Boolean) | |
| coach = Column(Boolean) | |
| tactics = Column(String) | |
| # Add a unique constraint on game_id and player_id | |
| __table_args__ = (UniqueConstraint('game_id', 'player_id', name='uc_game_id_player_id'),) | |
| class Team(Base): | |
| __tablename__ = 'teams' | |
| id = Column(Integer, primary_key=True) | |
| name = Column(String) | |
| class Player(Base): | |
| __tablename__ = 'players' | |
| hash = Column(String, primary_key=True) | |
| name = Column(String) | |
| country = Column(String) | |
| class Caption(Base): | |
| __tablename__ = 'captions' | |
| id = Column(Integer, primary_key=True) | |
| game_id = Column(Integer, ForeignKey('games.id')) | |
| game_time = Column(String) | |
| period = Column(Integer) | |
| label = Column(String) | |
| description = Column(Text) | |
| important = Column(Boolean) | |
| visibility = Column(Boolean) | |
| frame_stamp = Column(Integer) | |
| class Commentary(Base): | |
| __tablename__ = 'commentary' | |
| id = Column(Integer, primary_key=True) | |
| game_id = Column(Integer, ForeignKey('games.id')) | |
| period = Column(Integer) | |
| event_time_start = Column(Float) | |
| event_time_end = Column(Float) | |
| description = Column(Text) | |
| class League(Base): | |
| __tablename__ = 'leagues' | |
| id = Column(Integer, primary_key=True) | |
| name = Column(String) | |
| class Event(Base): | |
| __tablename__ = 'events' | |
| id = Column(Integer, primary_key=True) | |
| game_id = Column(Integer, ForeignKey('games.id')) | |
| period = Column(Integer) | |
| # half = Column(Integer) | |
| game_time = Column(Integer) | |
| team_id = Column(Integer, ForeignKey('teams.id')) | |
| frame_stamp = Column(Integer) | |
| label = Column(String) | |
| visibility = Column(Boolean) | |
| class Augmented_Team(Base): | |
| __tablename__ = 'augmented_teams' | |
| id = Column(Integer, primary_key=True) | |
| team_id = Column(Integer, ForeignKey('teams.id')) | |
| augmented_name = Column(String) | |
| class Augmented_League(Base): | |
| __tablename__ = 'augmented_leagues' | |
| id = Column(Integer, primary_key=True) | |
| league_id = Column(Integer, ForeignKey('leagues.id')) | |
| augmented_name = Column(String) | |
| class Player_Event_Label(Base): | |
| __tablename__ = 'player_event_labels' | |
| id = Column(Integer, primary_key=True) | |
| label = Column(String) | |
| class Player_Event(Base): | |
| __tablename__ = 'player_events' | |
| id = Column(Integer, primary_key=True) | |
| game_id = Column(Integer, ForeignKey('games.id')) | |
| player_id = Column(Integer, ForeignKey('players.hash')) | |
| time = Column(String) # Time in minutes of the game | |
| type = Column(Integer, ForeignKey('player_event_labels.id')) | |
| linked_player = Column(Integer, ForeignKey( | |
| 'players.hash')) # If the event is linked to another player, for example a substitution | |
| # Create Tables | |
| Base.metadata.create_all(engine) | |
| # Session setup | |
| Session = sessionmaker(bind=engine) | |
| def extract_time_from_player_event(time: str) -> str: | |
| # Extract the time from the string | |
| time = time.split("'")[0] # Need to keep it str because of overtime eg. (45+2) | |
| return time | |
| def get_or_create(session, model, **kwargs): | |
| instance = session.query(model).filter_by(**kwargs).first() | |
| if instance: | |
| return instance | |
| else: | |
| instance = model(**kwargs) | |
| session.add(instance) | |
| session.commit() | |
| return instance | |
| def process_game_data(data, data2, league, season): | |
| session = Session() | |
| # Caption = d and v2 = d2 | |
| home_team = data["gameHomeTeam"] | |
| away_team = data["gameAwayTeam"] | |
| score = data["score"] | |
| home_score = score[0] | |
| away_score = score[-1] | |
| round_ = data["round"] | |
| venue = data["venue"][0] | |
| referee = data.get("referee_found", None) | |
| referee = referee[0] if referee else data.get("referee", None) | |
| date = data["gameDate"] | |
| timestamp = data["timestamp"] | |
| attendance = data.get("attendance", []) | |
| attendance = attendance[0] if attendance else None | |
| home_team = get_or_create(session, Team, name=home_team) | |
| away_team = get_or_create(session, Team, name=away_team) | |
| # Check if the game already exists | |
| game = session.query(Game).filter_by(timestamp=timestamp, home_team_id=home_team.id).first() | |
| # Check if league exists | |
| league = get_or_create(session, League, name=league) | |
| if not game: | |
| game = Game(timestamp=timestamp, score=score, goal_home=home_score, goal_away=away_score, round=round_, | |
| home_team_id=home_team.id, away_team_id=away_team.id, | |
| venue=venue, date=date, attendance=attendance, season=season, league_id=league.id, referee=referee) | |
| session.add(game) | |
| session.commit() | |
| teams = ["home", "away"] | |
| # Lets add lineup data | |
| for team in teams: | |
| if team == "home": | |
| team_id = home_team.id | |
| else: | |
| team_id = away_team.id | |
| team_lineup = data["lineup"][team] | |
| tactic = team_lineup["tactic"] | |
| for player_data in team_lineup["players"]: | |
| player_hash = player_data["hash"] | |
| name = player_data["long_name"] | |
| if " " not in name: # Since some players are missing their first name, do this to help with the search | |
| name = "NULL " + name | |
| number = player_data["shirt_number"] | |
| captain = player_data["captain"] == "(C)" | |
| starting = player_data["starting"] | |
| country = player_data["country"] | |
| position = player_data["lineup"] | |
| facts = player_data.get("facts", None) # Facts might be empty | |
| player = get_or_create(session, Player, hash=player_hash, name=name, country=country) | |
| game_lineup = GameLineup(game_id=game.id, team_id=team_id, player_id=player.hash, | |
| shirt_number=number, position=position, starting=starting, captain=captain, | |
| coach=False, tactics=tactic) | |
| if facts: | |
| for fact in facts: | |
| type = fact["type"] | |
| time = extract_time_from_player_event(fact["time"]) | |
| event = get_or_create(session, Player_Event_Label, id=int(type)) | |
| linked_player = fact.get("linked_player_hash", None) | |
| player_event = Player_Event(game_id=game.id, player_id=player.hash, time=time, type=event.id, | |
| linked_player=linked_player) | |
| session.add(player_event) | |
| session.add(game_lineup) | |
| # Get the coach | |
| coach = team_lineup["coach"][0] | |
| coach_hash = coach["hash"] | |
| coach_name = coach["long_name"] | |
| if " " not in coach_name: # Since some players are missing their first name, do this to help with the search | |
| name = "NULL " + coach_name | |
| coach_country = coach["country"] | |
| coach_player = get_or_create(session, Player, hash=coach_hash, name=coach_name, country=coach_country) | |
| game_lineup = GameLineup(game_id=game.id, team_id=team_id, player_id=coach_player.hash, | |
| shirt_number=None, position=None, starting=None, captain=False, coach=True, | |
| tactics=tactic) | |
| session.add(game_lineup) | |
| # Commit all changes at once | |
| session.commit() | |
| # Start parsing the events | |
| events = data["annotations"] | |
| for event in events: | |
| period, time = convert_to_seconds(event["gameTime"]) | |
| label = event["label"] | |
| # Renaming labels | |
| if label == "soccer-ball": | |
| label = "goal" | |
| elif label == "y-card": | |
| label = "yellow card" | |
| elif label == "r-card": | |
| label = "red card" | |
| description = event["description"] | |
| important = event["important"] == "true" | |
| visible = event["visibility"] | |
| # Convert to boolean | |
| # True if shown, False if not | |
| visible = visible == "shown" | |
| position = int(event["position"]) | |
| event = Caption(game_id=game.id, game_time=time, period=period, label=label, description=description, | |
| important=important, visibility=visible, frame_stamp=position) | |
| session.add(event) | |
| session.commit() | |
| return game.id, home_team.id, away_team.id | |
| def process_player_data(data): | |
| pass | |
| def process_ASR_data(data, game_id, period): | |
| session = Session() | |
| seg = data["segments"] | |
| commentary_events = [] # Store the events in a list | |
| for k, v in seg.items(): | |
| start = float(v[0]) | |
| end = float(v[1]) | |
| desc = v[2] | |
| event = Commentary(game_id=game_id, period=period, event_time_start=start, event_time_end=end, description=desc) | |
| commentary_events.append(event) | |
| # Bulk save objects | |
| session.bulk_save_objects(commentary_events) | |
| session.commit() | |
| session.close() | |
| def convert_to_seconds(time_str): | |
| # Split the string into its components | |
| period, time = time_str.split(" - ") | |
| minutes, seconds = time.split(":") | |
| # Convert the components to integers | |
| period = int(period) | |
| minutes = int(minutes) | |
| seconds = int(seconds) | |
| # Calculate the time in seconds | |
| total_seconds = (minutes * 60) + seconds | |
| return period, total_seconds | |
| def parse_labels_v2(data, session, home_team_id, away_team_id, game_id): | |
| annotations_data = data["annotations"] | |
| no_team = get_or_create(session, Team, name="not applicable") | |
| for annotation in annotations_data: | |
| period, game_time = convert_to_seconds(annotation["gameTime"]) | |
| # Determine which team the annotation belongs to | |
| if annotation["team"] == "home": | |
| team_id = home_team_id | |
| elif annotation["team"] == "away": | |
| team_id = away_team_id | |
| else: | |
| team_id = no_team.id | |
| position = annotation.get("position", None) # Assuming position can be null | |
| visibility = annotation["visibility"] == "visible" | |
| # Convert to boolean | |
| # True if visible, False if not | |
| visibility = visibility == "visible" | |
| label = annotation["label"] | |
| # Create and add the Annotations instance | |
| annotation_entry = Event( | |
| game_id=game_id, | |
| period=period, # periode | |
| game_time=game_time, # Already in seconds | |
| frame_stamp=position, # Make sure this is an integer or None | |
| team_id=team_id, # Integer ID of the team | |
| visibility=visibility, # Boolean | |
| label=label # String with information | |
| ) | |
| session.add(annotation_entry) | |
| session.commit() | |
| def process_json_files(directory): | |
| session = Session() | |
| fill_player_events(session) | |
| for root, dirs, files in os.walk(directory): | |
| print(root) | |
| labels_file = None | |
| asr_files = [] | |
| path_parts = root.split("\\") | |
| if len(path_parts) > 2: | |
| league = path_parts[-3].split("/")[-1] | |
| season = path_parts[-2] | |
| # Need the labels-v2 first as it contains the game ID | |
| for file in files: | |
| if 'Labels-caption.json' in file: | |
| labels_file = file | |
| elif file.endswith('.json'): | |
| asr_files.append(file) | |
| if labels_file: | |
| with open(os.path.join(root, labels_file), 'r') as f: | |
| lb_cap = json.load(f) | |
| with open(os.path.join(root, "Labels-v2.json"), 'r') as f: | |
| lb_v2 = json.load(f) | |
| game_id, home_team_id, away_team_id = process_game_data(lb_cap, lb_v2, league, season) | |
| for file in asr_files: | |
| with open(os.path.join(root, file), 'r') as f: | |
| asr = json.load(f) | |
| # Determine the type of file and process accordingly | |
| if 'Labels-v2' in file: | |
| parse_labels_v2(asr, session, home_team_id, away_team_id, game_id) | |
| elif '1_half-ASR' in file: | |
| period = 1 | |
| # Parse and commit the data | |
| process_ASR_data(data=asr, game_id=game_id, period=period) | |
| elif '2_half-ASR' in file: | |
| period = 2 | |
| # Parse and commit the data | |
| process_ASR_data(data=asr, game_id=game_id, period=period) | |
| session.commit() | |
| session.close() | |
| def fill_player_events(session): | |
| fact_id2label = { | |
| "1": "Yellow card", | |
| # Example: "time": "71' Ivanovic B. (Unsportsmanlike conduct)", "description": "Yellow Card" | |
| "2": "Red card", # Example: "time": "70' Matic N. (Unsportsmanlike conduct)", "description": "Red Card" | |
| "3": "Goal", # Example: "time": "14' Ivanovic B. (Hazard E.)", "description": "Goal" | |
| "4": "NA", | |
| "5": "NA 2", | |
| "6": "Substitution home", # Example: "time": "72'", "description": "Ramires" | |
| "7": "Substitution away", # Example: "time": "86'", "description": "Filipe Luis" | |
| "8": "Assistance" # Example: "time": "14' Ivanovic B. (Hazard E.)", "description": "Assistance" | |
| } | |
| for key, value in fact_id2label.items(): | |
| label = get_or_create(session, Player_Event_Label, label=value) | |
| session.commit() | |
| def fill_Augmented_Team(file_path): | |
| df = pd.read_csv(file_path) | |
| # the df should have two columns, team_name and augmented_name | |
| session = Session() | |
| teams = session.query(Team).all() | |
| # For each row, find the team_id and add the augmented name | |
| for index, row in df.iterrows(): | |
| team_name = row["name"] | |
| augmented_name = row["augmented_name"] | |
| # Strip leading and trailing whitespace | |
| augmented_name = augmented_name.strip() | |
| team = session.query(Team).filter_by(name=team_name).first() | |
| if team: | |
| augmented_team = get_or_create(session, Augmented_Team, team_id=team.id, augmented_name=augmented_name) | |
| session.commit() | |
| session.close() | |
| def fill_Augmented_League(file_path): | |
| # Read the csv file | |
| df = pd.read_csv(file_path) | |
| # the df should have two columns, team_name and augmented_name | |
| session = Session() | |
| leagues = session.query(League).all() | |
| # For each row, find the team_id and add the augmented name | |
| for index, row in df.iterrows(): | |
| league_name = row["name"] | |
| augmented_name = row["augmented_name"] | |
| # Strip leading and trailing whitespace | |
| augmented_name = augmented_name.strip() | |
| league = session.query(League).filter_by(name=league_name).first() | |
| if league: | |
| augmented_league = get_or_create(session, Augmented_League, league_id=league.id, | |
| augmented_name=augmented_name) | |
| session.commit() | |
| session.close() | |
| if __name__ == "__main__": | |
| # Example directory path | |
| process_json_files('../data/Dataset/SoccerNet/') | |
| fill_Augmented_Team('../data/Dataset/augmented_teams.csv') | |
| fill_Augmented_League('../data/Dataset/augmented_leagues.csv') | |
| # Rename the event/annotation table to something more descriptive. Events are fucking everything else over | |