| import streamlit as st |
| import pandas as pd |
| from sklearn.ensemble import RandomForestClassifier |
| from sklearn.preprocessing import LabelEncoder |
| from sklearn.model_selection import train_test_split |
| from sklearn.metrics import accuracy_score |
|
|
| st.set_page_config(page_title="IPL Winner Predictor", page_icon="🏏", layout="centered") |
|
|
| st.title("🏏 IPL Winner Predictor") |
| st.write("Improved IPL winner predictor using team history features.") |
|
|
| @st.cache_data |
| def load_data(): |
| df = pd.read_csv("matches.csv") |
|
|
| df = df[["team1", "team2", "toss_winner", "toss_decision", "venue", "city", "winner"]] |
| df = df.dropna() |
|
|
| replace_names = { |
| "Delhi Daredevils": "Delhi Capitals", |
| "Deccan Chargers": "Sunrisers Hyderabad", |
| "Kings XI Punjab": "Punjab Kings", |
| "Rising Pune Supergiant": "Rising Pune Supergiants" |
| } |
|
|
| for col in ["team1", "team2", "toss_winner", "winner"]: |
| df[col] = df[col].replace(replace_names) |
|
|
| inactive_teams = [ |
| "Gujarat Lions", |
| "Rising Pune Supergiants", |
| "Kochi Tuskers Kerala", |
| "Pune Warriors" |
| ] |
|
|
| df = df[ |
| ~df["team1"].isin(inactive_teams) & |
| ~df["team2"].isin(inactive_teams) & |
| ~df["winner"].isin(inactive_teams) |
| ] |
|
|
| return df |
|
|
| def team_win_rate(df, team): |
| total = len(df[(df["team1"] == team) | (df["team2"] == team)]) |
| wins = len(df[df["winner"] == team]) |
| return wins / total if total > 0 else 0 |
|
|
| def venue_win_rate(df, team, venue): |
| total = len(df[((df["team1"] == team) | (df["team2"] == team)) & (df["venue"] == venue)]) |
| wins = len(df[(df["winner"] == team) & (df["venue"] == venue)]) |
| return wins / total if total > 0 else 0 |
|
|
| def head_to_head_rate(df, team1, team2): |
| matches = df[ |
| ((df["team1"] == team1) & (df["team2"] == team2)) | |
| ((df["team1"] == team2) & (df["team2"] == team1)) |
| ] |
| total = len(matches) |
| wins = len(matches[matches["winner"] == team1]) |
| return wins / total if total > 0 else 0 |
|
|
| @st.cache_resource |
| def train_model(df): |
| df = df.copy() |
|
|
| df["team1_win_rate"] = df["team1"].apply(lambda x: team_win_rate(df, x)) |
| df["team2_win_rate"] = df["team2"].apply(lambda x: team_win_rate(df, x)) |
|
|
| df["team1_venue_win_rate"] = df.apply( |
| lambda row: venue_win_rate(df, row["team1"], row["venue"]), axis=1 |
| ) |
|
|
| df["team2_venue_win_rate"] = df.apply( |
| lambda row: venue_win_rate(df, row["team2"], row["venue"]), axis=1 |
| ) |
|
|
| df["head_to_head_team1"] = df.apply( |
| lambda row: head_to_head_rate(df, row["team1"], row["team2"]), axis=1 |
| ) |
|
|
| df["team1_toss_win"] = (df["team1"] == df["toss_winner"]).astype(int) |
| df["toss_bat"] = (df["toss_decision"] == "bat").astype(int) |
|
|
| le_team1 = LabelEncoder() |
| le_team2 = LabelEncoder() |
| le_toss_winner = LabelEncoder() |
| le_toss_decision = LabelEncoder() |
| le_venue = LabelEncoder() |
| le_city = LabelEncoder() |
| le_winner = LabelEncoder() |
|
|
| df["team1_encoded"] = le_team1.fit_transform(df["team1"]) |
| df["team2_encoded"] = le_team2.fit_transform(df["team2"]) |
| df["toss_winner_encoded"] = le_toss_winner.fit_transform(df["toss_winner"]) |
| df["toss_decision_encoded"] = le_toss_decision.fit_transform(df["toss_decision"]) |
| df["venue_encoded"] = le_venue.fit_transform(df["venue"]) |
| df["city_encoded"] = le_city.fit_transform(df["city"]) |
| df["winner_encoded"] = le_winner.fit_transform(df["winner"]) |
|
|
| X = df[[ |
| "team1_encoded", |
| "team2_encoded", |
| "toss_winner_encoded", |
| "toss_decision_encoded", |
| "venue_encoded", |
| "city_encoded", |
| "team1_win_rate", |
| "team2_win_rate", |
| "team1_venue_win_rate", |
| "team2_venue_win_rate", |
| "head_to_head_team1", |
| "team1_toss_win", |
| "toss_bat" |
| ]] |
|
|
| y = df["winner_encoded"] |
|
|
| X_train, X_test, y_train, y_test = train_test_split( |
| X, y, test_size=0.2, random_state=42 |
| ) |
|
|
| model = RandomForestClassifier( |
| n_estimators=700, |
| max_depth=15, |
| min_samples_split=4, |
| min_samples_leaf=2, |
| random_state=42 |
| ) |
|
|
| model.fit(X_train, y_train) |
|
|
| y_pred = model.predict(X_test) |
| accuracy = accuracy_score(y_test, y_pred) |
|
|
| encoders = { |
| "team1": le_team1, |
| "team2": le_team2, |
| "toss_winner": le_toss_winner, |
| "toss_decision": le_toss_decision, |
| "venue": le_venue, |
| "city": le_city, |
| "winner": le_winner |
| } |
|
|
| return model, encoders, accuracy |
|
|
| try: |
| df = load_data() |
| model, encoders, accuracy = train_model(df) |
|
|
| st.success("Dataset loaded and improved model trained successfully!") |
| st.info(f"Model Accuracy: {accuracy * 100:.2f}%") |
|
|
| teams = sorted(df["team1"].unique()) |
| venues = sorted(df["venue"].unique()) |
| cities = sorted(df["city"].unique()) |
| toss_decisions = sorted(df["toss_decision"].unique()) |
|
|
| st.subheader("Enter Match Details") |
|
|
| team1 = st.selectbox("Select Team 1", teams) |
| team2 = st.selectbox("Select Team 2", [team for team in teams if team != team1]) |
| toss_winner = st.selectbox("Select Toss Winner", [team1, team2]) |
| toss_decision = st.selectbox("Select Toss Decision", toss_decisions) |
| venue = st.selectbox("Select Venue", venues) |
| city = st.selectbox("Select City", cities) |
|
|
| if st.button("Predict Winner"): |
| team1_num = encoders["team1"].transform([team1])[0] |
| team2_num = encoders["team2"].transform([team2])[0] |
| toss_winner_num = encoders["toss_winner"].transform([toss_winner])[0] |
| toss_decision_num = encoders["toss_decision"].transform([toss_decision])[0] |
| venue_num = encoders["venue"].transform([venue])[0] |
| city_num = encoders["city"].transform([city])[0] |
|
|
| team1_wr = team_win_rate(df, team1) |
| team2_wr = team_win_rate(df, team2) |
| team1_vwr = venue_win_rate(df, team1, venue) |
| team2_vwr = venue_win_rate(df, team2, venue) |
| h2h = head_to_head_rate(df, team1, team2) |
|
|
| team1_toss_win = int(team1 == toss_winner) |
| toss_bat = int(toss_decision == "bat") |
|
|
| input_data = [[ |
| team1_num, |
| team2_num, |
| toss_winner_num, |
| toss_decision_num, |
| venue_num, |
| city_num, |
| team1_wr, |
| team2_wr, |
| team1_vwr, |
| team2_vwr, |
| h2h, |
| team1_toss_win, |
| toss_bat |
| ]] |
|
|
| prediction = model.predict(input_data) |
| predicted_winner = encoders["winner"].inverse_transform(prediction)[0] |
|
|
| st.success(f"🏆 Predicted Winner: {predicted_winner}") |
|
|
| st.write("### Match Stats Used") |
| st.write(f"{team1} win rate: {team1_wr:.2f}") |
| st.write(f"{team2} win rate: {team2_wr:.2f}") |
| st.write(f"{team1} venue win rate: {team1_vwr:.2f}") |
| st.write(f"{team2} venue win rate: {team2_vwr:.2f}") |
| st.write(f"Head-to-head rate for {team1}: {h2h:.2f}") |
|
|
| st.subheader("Top Winning Teams") |
| st.bar_chart(df["winner"].value_counts()) |
|
|
| except FileNotFoundError: |
| st.error("matches.csv file not found. Please upload matches.csv in the same repo as app.py.") |
|
|
| except Exception as e: |
| st.error("Something went wrong.") |
| st.write(e) |