Spaces:
Sleeping
Sleeping
| from collections import Counter | |
| import numpy as np | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| from concurrent.futures import ThreadPoolExecutor | |
| import os | |
| TRAIN_INPUT_FILE_PATH = 'C:/Users/vigne/nfl/train_input' | |
| TRAIN_OUTPUT_FILE_PATH = 'C:/Users/vigne/nfl/train_output' | |
| def get_train_file_paths(): | |
| def get_output_file(input_filename): | |
| return input_filename.replace('input', 'output') | |
| input_file_paths = [] | |
| output_file_paths = [] | |
| input_files_dir = TRAIN_INPUT_FILE_PATH | |
| for w in range(1, 19): | |
| input_filename = f'input_2023_w{w:02d}.csv' | |
| if os.path.isfile(f'{input_files_dir}/{input_filename}'): | |
| output_filename = get_output_file(input_filename) | |
| input_file_path = os.path.join(TRAIN_INPUT_FILE_PATH, input_filename) | |
| output_file_path = os.path.join(TRAIN_OUTPUT_FILE_PATH, output_filename) | |
| input_file_paths.append(input_file_path) | |
| output_file_paths.append(output_file_path) | |
| else: | |
| raise Exception(f'input file for week {w} does not exist') | |
| return (input_file_paths, output_file_paths) | |
| def load_file(file_path): | |
| return pd.read_csv(file_path) | |
| def get_input_output_df(): | |
| input_file_paths, output_file_paths = get_train_file_paths() | |
| with ThreadPoolExecutor(max_workers = 8) as executor: | |
| input_dfs = executor.map(load_file, input_file_paths) | |
| input_df = pd.concat(input_dfs, axis=0) | |
| with ThreadPoolExecutor(max_workers = 8) as executor: | |
| output_dfs = executor.map(load_file, output_file_paths) | |
| output_df = pd.concat(output_dfs, axis=0) | |
| return input_df.reset_index(drop=True), output_df.reset_index(drop=True) | |
| def plot_distribution_of_features(input_df, output_df): | |
| predict_players_position = Counter() | |
| predict_players_role = Counter() | |
| predict_players_side = Counter() | |
| num_frames_to_predict = Counter() | |
| no_players_prediction_in_a_play = Counter() | |
| plays = input_df.groupby(['game_id', 'play_id'], as_index = False) | |
| per_play_change_in_dists = [] | |
| per_frame_change_in_dists = [] | |
| per_frame_change_in_x_dists = [] | |
| per_frame_change_in_y_dists = [] | |
| for _, play in plays: | |
| predict_players = play[play['player_to_predict']] | |
| no_players_prediction_in_a_play[predict_players['nfl_id'].nunique()]+=1 | |
| num_frames_output = play['num_frames_output'].iloc[0].item() | |
| num_frames_to_predict[num_frames_output]+=1 | |
| predict_players_last_frame = predict_players.groupby(['nfl_id'], as_index=False).last() | |
| for index, p_l in predict_players_last_frame.iterrows(): | |
| game_id = p_l['game_id'] | |
| play_id = p_l['play_id'] | |
| p_nfl_id = p_l['nfl_id'] | |
| p_output = output_df[(output_df['game_id'] == game_id) & (output_df['play_id'] == play_id) & (output_df['nfl_id'] == p_nfl_id)] | |
| s = np.array([p_l['x'], p_l['y']]) | |
| total_dis = 0 | |
| for _,p_o in p_output.iterrows(): | |
| e = np.array([p_o['x'].item(), p_o['y'].item()]) | |
| current_dis = np.linalg.norm(e - s) | |
| per_frame_change_in_dists.append(current_dis) | |
| per_frame_change_in_x_dists.append(np.abs(e[0] - s[0])) | |
| per_frame_change_in_y_dists.append(np.abs(e[1] - s[1])) | |
| total_dis+= current_dis | |
| s = e | |
| per_play_change_in_dists.append(total_dis) | |
| position = p_l['player_position'] | |
| role = p_l['player_role'] | |
| side = p_l['player_side'] | |
| predict_players_position[position]+=1 | |
| predict_players_role[role]+=1 | |
| predict_players_side[side]+=1 | |
| def plot_bargraph(dict_items, name): | |
| plt.figure(figsize=(10, 6)) | |
| df = pd.DataFrame(list(dict_items), columns = [name, 'count']) | |
| df = df.sort_values(by='count', ascending=False) | |
| sns.barplot(data=df, x=name, y='count') | |
| plt.show() | |
| plot_bargraph(predict_players_position.items(), 'predict_player position') | |
| plot_bargraph(predict_players_role.items(), 'predict_player role') | |
| plot_bargraph(predict_players_side.items(), 'predict_player side') | |
| plot_bargraph(no_players_prediction_in_a_play.items(), 'num of player to predict in a play') | |
| plot_bargraph(num_frames_to_predict.items(), 'num of frames to predict in a play') | |
| def plot_density_plot(data, title, x): | |
| plt.figure(figsize=(10, 6)) | |
| sns.kdeplot(data, fill=True, color="dodgerblue") | |
| plt.title(title) | |
| plt.xlabel(x) | |
| plt.ylabel('Density') | |
| plt.show() | |
| plot_density_plot(per_play_change_in_dists, 'total distance moved by a player in a play', 'distance') | |
| plot_density_plot(per_frame_change_in_dists, 'distance moved by a player per frame', 'distance') | |
| plot_density_plot(per_frame_change_in_x_dists, 'distance moved by a player along x per frame', 'distance') | |
| plot_density_plot(per_frame_change_in_y_dists, 'distance moved by a player along y per frame', 'distance') | |
| def get_last_frame(df): | |
| df_sorted = df.sort_values(['game_id', 'play_id', 'nfl_id', 'frame_id']).reset_index(drop=True) | |
| group_by_cols = ['game_id', 'play_id', 'nfl_id'] | |
| feature_cols = ['x', 'y', 'o', 'dir', 's', 'a'] | |
| df_sorted[[f'{c}_prev' for c in feature_cols]] = df_sorted.groupby(group_by_cols)[feature_cols].shift(1) | |
| #last() takes non none values from the last possible col | |
| #so even if last frame misses a feature , value is taken from the previous available one | |
| df_last_frame = df_sorted.groupby(group_by_cols, as_index=False).last() | |
| df_last_frame = df_last_frame.rename(columns={'x':'x_last', 'y':'y_last'}) | |
| return df_last_frame | |
| def predict_physics_baseline(input_df, output_df): | |
| def convert_to_radians(degrees): | |
| return degrees * np.pi / 180 | |
| def sin(theta): | |
| return np.sin(convert_to_radians(theta)) | |
| def cos(theta): | |
| return np.cos(convert_to_radians(theta)) | |
| input_df = input_df.copy() | |
| output_df = output_df.copy() | |
| df_last_frame = get_last_frame(input_df) | |
| df_last_frame = df_last_frame[['game_id', 'play_id', 'nfl_id', 'x_last', 'y_last', 'o', 'dir', 's', 'a', 'num_frames_output']] | |
| df = output_df.merge(df_last_frame, on=['game_id', 'play_id', 'nfl_id'], how='left') | |
| sum_ = 0 | |
| for _, group_df in df.groupby(['game_id', 'play_id', 'nfl_id'], as_index=False): | |
| group_df = group_df.sort_values('frame_id').reset_index(drop=True) | |
| prev = (group_df.iloc[0]['x'], group_df.iloc[0]['y']) | |
| for row in group_df.itertuples(): | |
| dt = 0.1 | |
| velocity_x = row.s * sin(row.dir) | |
| velocity_y_ = row.s * cos(row.dir) | |
| acc_x_ = row.a * sin(row.dir) | |
| acc_y_ = row.a * cos(row.dir) | |
| proj_x = prev[0] + velocity_x*dt + 0.5*acc_x_*(dt**2) | |
| proj_y = prev[1] + velocity_y_*dt + 0.5*acc_y_*(dt**2) | |
| sum_+= (row.x - proj_x)**2 + (row.y - proj_y)**2 | |
| prev = (proj_x, proj_y) | |
| num_ele = df.shape[0]*2 | |
| rmse = np.sqrt(sum_ / num_ele) | |
| print(f'RMSE of the simple physics based model is {rmse}') | |
| input_df, output_df = get_input_output_df() | |
| POSITION_MAPPING = [ | |
| "FS --> Free Safety", | |
| "SS --> Strong Safety", | |
| "CB --> Cornerback", | |
| "MLB --> Middle Linebacker", | |
| "WR --> Wide Receiver", | |
| "TE --> Tight End", | |
| "QB --> Quarterback", | |
| "OLB --> Outside Linebacker", | |
| "ILB --> Inside Linebacker", | |
| "RB --> Running Back", | |
| "DE --> Defensive End", | |
| "FB --> Fullback", | |
| "NT --> Nose Tackle", | |
| "DT --> Defensive Tackle", | |
| "S --> Safety", | |
| "T --> Tackle", | |
| "LB --> Linebacker", | |
| "P --> Punter", | |
| "K --> Kicker" | |
| ] | |
| PLAYER_ROLES = ['Defensive Coverage' 'Other Route Runner' 'Passer' 'Targeted Receiver'] | |
| PLAYER_SIDES = ['Defense', 'Offense'] | |
| print(f'player positions are {POSITION_MAPPING}') | |
| print(f'player roles are {PLAYER_ROLES}') | |
| print(f'player roles are {PLAYER_SIDES}') | |
| # plot_distribution_of_features(input_df[:1_000_00], output_df) | |
| predict_physics_baseline(input_df, output_df) |