sivapriya175
commited on
Commit
·
9f8cfb6
1
Parent(s):
e11191e
deploy backend files
Browse files- models/train_model.py +22 -4
models/train_model.py
CHANGED
|
@@ -6,7 +6,7 @@ from sklearn.model_selection import train_test_split
|
|
| 6 |
from sklearn.metrics import mean_squared_error, accuracy_score, r2_score
|
| 7 |
from sklearn.preprocessing import StandardScaler
|
| 8 |
|
| 9 |
-
# Load datasets
|
| 10 |
ball_df = pd.read_csv('data/cleaned_ball_data.csv')
|
| 11 |
match_df = pd.read_csv('data/cleaned_match_data.csv')
|
| 12 |
|
|
@@ -14,7 +14,25 @@ match_df = pd.read_csv('data/cleaned_match_data.csv')
|
|
| 14 |
match_df['date'] = pd.to_datetime(match_df['date'], errors='coerce')
|
| 15 |
ball_df['start_date'] = pd.to_datetime(ball_df['start_date'], errors='coerce')
|
| 16 |
|
| 17 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
def train_player_score_model():
|
| 19 |
player_runs = ball_df.groupby(['match_id', 'striker'])['runs_off_bat'].sum().reset_index()
|
| 20 |
player_runs.rename(columns={'runs_off_bat': 'player_total'}, inplace=True)
|
|
@@ -41,7 +59,7 @@ def train_player_score_model():
|
|
| 41 |
|
| 42 |
return model, scaler
|
| 43 |
|
| 44 |
-
# Train Team Performance Model
|
| 45 |
def train_team_performance_model():
|
| 46 |
data = match_df[['team1', 'team2', 'winner', 'team1_total', 'team2_total', 'venue', 'city', 'toss_winner', 'toss_decision']].dropna()
|
| 47 |
data['team1_index'] = data['team1'].astype('category').cat.codes
|
|
@@ -64,6 +82,6 @@ def train_team_performance_model():
|
|
| 64 |
|
| 65 |
return win_model, score_model
|
| 66 |
|
| 67 |
-
# Train
|
| 68 |
player_score_model, player_scaler = train_player_score_model()
|
| 69 |
team_win_model, team_score_model = train_team_performance_model()
|
|
|
|
| 6 |
from sklearn.metrics import mean_squared_error, accuracy_score, r2_score
|
| 7 |
from sklearn.preprocessing import StandardScaler
|
| 8 |
|
| 9 |
+
# 🔹 Load datasets
|
| 10 |
ball_df = pd.read_csv('data/cleaned_ball_data.csv')
|
| 11 |
match_df = pd.read_csv('data/cleaned_match_data.csv')
|
| 12 |
|
|
|
|
| 14 |
match_df['date'] = pd.to_datetime(match_df['date'], errors='coerce')
|
| 15 |
ball_df['start_date'] = pd.to_datetime(ball_df['start_date'], errors='coerce')
|
| 16 |
|
| 17 |
+
# 🔹 Compute team total scores and merge correctly
|
| 18 |
+
team_scores = ball_df.groupby(['match_id', 'batting_team'])['total_runs'].sum().reset_index()
|
| 19 |
+
team_scores.rename(columns={'total_runs': 'team_total'}, inplace=True)
|
| 20 |
+
|
| 21 |
+
# Merge team scores with match_df
|
| 22 |
+
match_df = match_df.merge(team_scores, left_on=['id', 'team1'], right_on=['match_id', 'batting_team'], how='left')
|
| 23 |
+
match_df.rename(columns={'team_total': 'team1_total'}, inplace=True)
|
| 24 |
+
|
| 25 |
+
match_df = match_df.merge(team_scores, left_on=['id', 'team2'], right_on=['match_id', 'batting_team'], how='left')
|
| 26 |
+
match_df.rename(columns={'team_total': 'team2_total'}, inplace=True)
|
| 27 |
+
|
| 28 |
+
# Fill missing values with 0 to avoid KeyError
|
| 29 |
+
match_df['team1_total'] = match_df['team1_total'].fillna(0)
|
| 30 |
+
match_df['team2_total'] = match_df['team2_total'].fillna(0)
|
| 31 |
+
|
| 32 |
+
# Drop unnecessary columns
|
| 33 |
+
match_df.drop(columns=['batting_team', 'match_id'], errors='ignore', inplace=True)
|
| 34 |
+
|
| 35 |
+
# 🔹 Train Player Score Model
|
| 36 |
def train_player_score_model():
|
| 37 |
player_runs = ball_df.groupby(['match_id', 'striker'])['runs_off_bat'].sum().reset_index()
|
| 38 |
player_runs.rename(columns={'runs_off_bat': 'player_total'}, inplace=True)
|
|
|
|
| 59 |
|
| 60 |
return model, scaler
|
| 61 |
|
| 62 |
+
# 🔹 Train Team Performance Model
|
| 63 |
def train_team_performance_model():
|
| 64 |
data = match_df[['team1', 'team2', 'winner', 'team1_total', 'team2_total', 'venue', 'city', 'toss_winner', 'toss_decision']].dropna()
|
| 65 |
data['team1_index'] = data['team1'].astype('category').cat.codes
|
|
|
|
| 82 |
|
| 83 |
return win_model, score_model
|
| 84 |
|
| 85 |
+
# 🔹 Train models dynamically
|
| 86 |
player_score_model, player_scaler = train_player_score_model()
|
| 87 |
team_win_model, team_score_model = train_team_performance_model()
|