import streamlit as st import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score st.set_page_config(page_title="IPL Winner Predictor", page_icon="🏏", layout="centered") st.title("🏏 IPL Winner Predictor") st.write("Improved IPL winner predictor using team history features.") @st.cache_data def load_data(): df = pd.read_csv("matches.csv") df = df[["team1", "team2", "toss_winner", "toss_decision", "venue", "city", "winner"]] df = df.dropna() replace_names = { "Delhi Daredevils": "Delhi Capitals", "Deccan Chargers": "Sunrisers Hyderabad", "Kings XI Punjab": "Punjab Kings", "Rising Pune Supergiant": "Rising Pune Supergiants" } for col in ["team1", "team2", "toss_winner", "winner"]: df[col] = df[col].replace(replace_names) inactive_teams = [ "Gujarat Lions", "Rising Pune Supergiants", "Kochi Tuskers Kerala", "Pune Warriors" ] df = df[ ~df["team1"].isin(inactive_teams) & ~df["team2"].isin(inactive_teams) & ~df["winner"].isin(inactive_teams) ] return df def team_win_rate(df, team): total = len(df[(df["team1"] == team) | (df["team2"] == team)]) wins = len(df[df["winner"] == team]) return wins / total if total > 0 else 0 def venue_win_rate(df, team, venue): total = len(df[((df["team1"] == team) | (df["team2"] == team)) & (df["venue"] == venue)]) wins = len(df[(df["winner"] == team) & (df["venue"] == venue)]) return wins / total if total > 0 else 0 def head_to_head_rate(df, team1, team2): matches = df[ ((df["team1"] == team1) & (df["team2"] == team2)) | ((df["team1"] == team2) & (df["team2"] == team1)) ] total = len(matches) wins = len(matches[matches["winner"] == team1]) return wins / total if total > 0 else 0 @st.cache_resource def train_model(df): df = df.copy() df["team1_win_rate"] = df["team1"].apply(lambda x: team_win_rate(df, x)) df["team2_win_rate"] = df["team2"].apply(lambda x: team_win_rate(df, x)) df["team1_venue_win_rate"] = df.apply( lambda row: venue_win_rate(df, row["team1"], row["venue"]), axis=1 ) df["team2_venue_win_rate"] = df.apply( lambda row: venue_win_rate(df, row["team2"], row["venue"]), axis=1 ) df["head_to_head_team1"] = df.apply( lambda row: head_to_head_rate(df, row["team1"], row["team2"]), axis=1 ) df["team1_toss_win"] = (df["team1"] == df["toss_winner"]).astype(int) df["toss_bat"] = (df["toss_decision"] == "bat").astype(int) le_team1 = LabelEncoder() le_team2 = LabelEncoder() le_toss_winner = LabelEncoder() le_toss_decision = LabelEncoder() le_venue = LabelEncoder() le_city = LabelEncoder() le_winner = LabelEncoder() df["team1_encoded"] = le_team1.fit_transform(df["team1"]) df["team2_encoded"] = le_team2.fit_transform(df["team2"]) df["toss_winner_encoded"] = le_toss_winner.fit_transform(df["toss_winner"]) df["toss_decision_encoded"] = le_toss_decision.fit_transform(df["toss_decision"]) df["venue_encoded"] = le_venue.fit_transform(df["venue"]) df["city_encoded"] = le_city.fit_transform(df["city"]) df["winner_encoded"] = le_winner.fit_transform(df["winner"]) X = df[[ "team1_encoded", "team2_encoded", "toss_winner_encoded", "toss_decision_encoded", "venue_encoded", "city_encoded", "team1_win_rate", "team2_win_rate", "team1_venue_win_rate", "team2_venue_win_rate", "head_to_head_team1", "team1_toss_win", "toss_bat" ]] y = df["winner_encoded"] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42 ) model = RandomForestClassifier( n_estimators=700, max_depth=15, min_samples_split=4, min_samples_leaf=2, random_state=42 ) model.fit(X_train, y_train) y_pred = model.predict(X_test) accuracy = accuracy_score(y_test, y_pred) encoders = { "team1": le_team1, "team2": le_team2, "toss_winner": le_toss_winner, "toss_decision": le_toss_decision, "venue": le_venue, "city": le_city, "winner": le_winner } return model, encoders, accuracy try: df = load_data() model, encoders, accuracy = train_model(df) st.success("Dataset loaded and improved model trained successfully!") st.info(f"Model Accuracy: {accuracy * 100:.2f}%") teams = sorted(df["team1"].unique()) venues = sorted(df["venue"].unique()) cities = sorted(df["city"].unique()) toss_decisions = sorted(df["toss_decision"].unique()) st.subheader("Enter Match Details") team1 = st.selectbox("Select Team 1", teams) team2 = st.selectbox("Select Team 2", [team for team in teams if team != team1]) toss_winner = st.selectbox("Select Toss Winner", [team1, team2]) toss_decision = st.selectbox("Select Toss Decision", toss_decisions) venue = st.selectbox("Select Venue", venues) city = st.selectbox("Select City", cities) if st.button("Predict Winner"): team1_num = encoders["team1"].transform([team1])[0] team2_num = encoders["team2"].transform([team2])[0] toss_winner_num = encoders["toss_winner"].transform([toss_winner])[0] toss_decision_num = encoders["toss_decision"].transform([toss_decision])[0] venue_num = encoders["venue"].transform([venue])[0] city_num = encoders["city"].transform([city])[0] team1_wr = team_win_rate(df, team1) team2_wr = team_win_rate(df, team2) team1_vwr = venue_win_rate(df, team1, venue) team2_vwr = venue_win_rate(df, team2, venue) h2h = head_to_head_rate(df, team1, team2) team1_toss_win = int(team1 == toss_winner) toss_bat = int(toss_decision == "bat") input_data = [[ team1_num, team2_num, toss_winner_num, toss_decision_num, venue_num, city_num, team1_wr, team2_wr, team1_vwr, team2_vwr, h2h, team1_toss_win, toss_bat ]] prediction = model.predict(input_data) predicted_winner = encoders["winner"].inverse_transform(prediction)[0] st.success(f"🏆 Predicted Winner: {predicted_winner}") st.write("### Match Stats Used") st.write(f"{team1} win rate: {team1_wr:.2f}") st.write(f"{team2} win rate: {team2_wr:.2f}") st.write(f"{team1} venue win rate: {team1_vwr:.2f}") st.write(f"{team2} venue win rate: {team2_vwr:.2f}") st.write(f"Head-to-head rate for {team1}: {h2h:.2f}") st.subheader("Top Winning Teams") st.bar_chart(df["winner"].value_counts()) except FileNotFoundError: st.error("matches.csv file not found. Please upload matches.csv in the same repo as app.py.") except Exception as e: st.error("Something went wrong.") st.write(e)