rohitbhatt-07's picture
Update app.py
848418f verified
import streamlit as st
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
st.set_page_config(page_title="IPL Winner Predictor", page_icon="🏏", layout="centered")
st.title("🏏 IPL Winner Predictor")
st.write("Improved IPL winner predictor using team history features.")
@st.cache_data
def load_data():
df = pd.read_csv("matches.csv")
df = df[["team1", "team2", "toss_winner", "toss_decision", "venue", "city", "winner"]]
df = df.dropna()
replace_names = {
"Delhi Daredevils": "Delhi Capitals",
"Deccan Chargers": "Sunrisers Hyderabad",
"Kings XI Punjab": "Punjab Kings",
"Rising Pune Supergiant": "Rising Pune Supergiants"
}
for col in ["team1", "team2", "toss_winner", "winner"]:
df[col] = df[col].replace(replace_names)
inactive_teams = [
"Gujarat Lions",
"Rising Pune Supergiants",
"Kochi Tuskers Kerala",
"Pune Warriors"
]
df = df[
~df["team1"].isin(inactive_teams) &
~df["team2"].isin(inactive_teams) &
~df["winner"].isin(inactive_teams)
]
return df
def team_win_rate(df, team):
total = len(df[(df["team1"] == team) | (df["team2"] == team)])
wins = len(df[df["winner"] == team])
return wins / total if total > 0 else 0
def venue_win_rate(df, team, venue):
total = len(df[((df["team1"] == team) | (df["team2"] == team)) & (df["venue"] == venue)])
wins = len(df[(df["winner"] == team) & (df["venue"] == venue)])
return wins / total if total > 0 else 0
def head_to_head_rate(df, team1, team2):
matches = df[
((df["team1"] == team1) & (df["team2"] == team2)) |
((df["team1"] == team2) & (df["team2"] == team1))
]
total = len(matches)
wins = len(matches[matches["winner"] == team1])
return wins / total if total > 0 else 0
@st.cache_resource
def train_model(df):
df = df.copy()
df["team1_win_rate"] = df["team1"].apply(lambda x: team_win_rate(df, x))
df["team2_win_rate"] = df["team2"].apply(lambda x: team_win_rate(df, x))
df["team1_venue_win_rate"] = df.apply(
lambda row: venue_win_rate(df, row["team1"], row["venue"]), axis=1
)
df["team2_venue_win_rate"] = df.apply(
lambda row: venue_win_rate(df, row["team2"], row["venue"]), axis=1
)
df["head_to_head_team1"] = df.apply(
lambda row: head_to_head_rate(df, row["team1"], row["team2"]), axis=1
)
df["team1_toss_win"] = (df["team1"] == df["toss_winner"]).astype(int)
df["toss_bat"] = (df["toss_decision"] == "bat").astype(int)
le_team1 = LabelEncoder()
le_team2 = LabelEncoder()
le_toss_winner = LabelEncoder()
le_toss_decision = LabelEncoder()
le_venue = LabelEncoder()
le_city = LabelEncoder()
le_winner = LabelEncoder()
df["team1_encoded"] = le_team1.fit_transform(df["team1"])
df["team2_encoded"] = le_team2.fit_transform(df["team2"])
df["toss_winner_encoded"] = le_toss_winner.fit_transform(df["toss_winner"])
df["toss_decision_encoded"] = le_toss_decision.fit_transform(df["toss_decision"])
df["venue_encoded"] = le_venue.fit_transform(df["venue"])
df["city_encoded"] = le_city.fit_transform(df["city"])
df["winner_encoded"] = le_winner.fit_transform(df["winner"])
X = df[[
"team1_encoded",
"team2_encoded",
"toss_winner_encoded",
"toss_decision_encoded",
"venue_encoded",
"city_encoded",
"team1_win_rate",
"team2_win_rate",
"team1_venue_win_rate",
"team2_venue_win_rate",
"head_to_head_team1",
"team1_toss_win",
"toss_bat"
]]
y = df["winner_encoded"]
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
model = RandomForestClassifier(
n_estimators=700,
max_depth=15,
min_samples_split=4,
min_samples_leaf=2,
random_state=42
)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
encoders = {
"team1": le_team1,
"team2": le_team2,
"toss_winner": le_toss_winner,
"toss_decision": le_toss_decision,
"venue": le_venue,
"city": le_city,
"winner": le_winner
}
return model, encoders, accuracy
try:
df = load_data()
model, encoders, accuracy = train_model(df)
st.success("Dataset loaded and improved model trained successfully!")
st.info(f"Model Accuracy: {accuracy * 100:.2f}%")
teams = sorted(df["team1"].unique())
venues = sorted(df["venue"].unique())
cities = sorted(df["city"].unique())
toss_decisions = sorted(df["toss_decision"].unique())
st.subheader("Enter Match Details")
team1 = st.selectbox("Select Team 1", teams)
team2 = st.selectbox("Select Team 2", [team for team in teams if team != team1])
toss_winner = st.selectbox("Select Toss Winner", [team1, team2])
toss_decision = st.selectbox("Select Toss Decision", toss_decisions)
venue = st.selectbox("Select Venue", venues)
city = st.selectbox("Select City", cities)
if st.button("Predict Winner"):
team1_num = encoders["team1"].transform([team1])[0]
team2_num = encoders["team2"].transform([team2])[0]
toss_winner_num = encoders["toss_winner"].transform([toss_winner])[0]
toss_decision_num = encoders["toss_decision"].transform([toss_decision])[0]
venue_num = encoders["venue"].transform([venue])[0]
city_num = encoders["city"].transform([city])[0]
team1_wr = team_win_rate(df, team1)
team2_wr = team_win_rate(df, team2)
team1_vwr = venue_win_rate(df, team1, venue)
team2_vwr = venue_win_rate(df, team2, venue)
h2h = head_to_head_rate(df, team1, team2)
team1_toss_win = int(team1 == toss_winner)
toss_bat = int(toss_decision == "bat")
input_data = [[
team1_num,
team2_num,
toss_winner_num,
toss_decision_num,
venue_num,
city_num,
team1_wr,
team2_wr,
team1_vwr,
team2_vwr,
h2h,
team1_toss_win,
toss_bat
]]
prediction = model.predict(input_data)
predicted_winner = encoders["winner"].inverse_transform(prediction)[0]
st.success(f"🏆 Predicted Winner: {predicted_winner}")
st.write("### Match Stats Used")
st.write(f"{team1} win rate: {team1_wr:.2f}")
st.write(f"{team2} win rate: {team2_wr:.2f}")
st.write(f"{team1} venue win rate: {team1_vwr:.2f}")
st.write(f"{team2} venue win rate: {team2_vwr:.2f}")
st.write(f"Head-to-head rate for {team1}: {h2h:.2f}")
st.subheader("Top Winning Teams")
st.bar_chart(df["winner"].value_counts())
except FileNotFoundError:
st.error("matches.csv file not found. Please upload matches.csv in the same repo as app.py.")
except Exception as e:
st.error("Something went wrong.")
st.write(e)