import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from PIL import Image
import os
from langchain_google_genai import GoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
# from langchain_experimental.agents import create_pandas_dataframe_agent
import uuid
# Set page config
st.set_page_config(page_title="🏏 Ultimate Cricket Analytics", layout="wide", initial_sidebar_state="expanded")
# ---- Custom CSS for Styling ----
st.markdown(
"""
""",
unsafe_allow_html=True
)
# ---- Sidebar ----
with st.sidebar:
st.markdown("
Cricket Analytics Hub
", unsafe_allow_html=True)
option = st.selectbox(
"Choose Option",
["Main Page", "Team Info", "Team Stats Comparison", "Player Stats", "Player Comparison"],
index=0,
format_func=lambda x: f"🏏 {x}"
)
# ---- Main Page ----
if option == "Main Page":
st.markdown(
"""
🏏 Ultimate Cricket Analytics
Unleash the Power of Cricket Data!
Select an option from the sidebar to explore cricket insights! 📊🔥
""",
unsafe_allow_html=True
)
# Create a folder to save CSVs if not exists
data_folder = "data"
os.makedirs(data_folder, exist_ok=True)
# Load data (assuming files are available)
odi_df = pd.read_csv("odi.xls")
t20_df = pd.read_csv("t20.xls")
test_df = pd.read_csv("test.xls")
test_teams_df = pd.read_csv("test-teams.xls")
odi_teams_df = pd.read_csv("odi-teams.xls")
t20_teams_df = pd.read_csv("t20-teams.xls")
batting_df = pd.read_csv("Batting.csv")
bowling_df = pd.read_csv("Bowling.csv")
# Load GenAI
api_key = st.secrets.get('gai')
model = GoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=api_key)
out_par = StrOutputParser()
# Sidebar UI based on selection
team_info = selected_teams_stats = selected_format = None
selected_team = selected_player = None
player1 = player2 = None
if option == "Team Info":
team_info = st.sidebar.selectbox("Select Team", sorted(batting_df['Country'].unique()))
elif option == "Team Stats Comparison":
num_teams = st.sidebar.selectbox("Select Number of Teams", [2, 3])
selected_teams_stats = st.sidebar.multiselect("Select Teams", sorted(batting_df['Country'].unique()), max_selections=num_teams)
selected_format = st.sidebar.selectbox("Select Format", ["ODI", "T20", "Test"])
elif option == "Player Stats":
selected_team = st.sidebar.selectbox("Select Team", sorted(batting_df['Country'].unique()))
players_list = sorted(set(batting_df[batting_df['Country'] == selected_team]['player_name']).union(
bowling_df[bowling_df['Country'] == selected_team]['player_name']
))
selected_player = st.sidebar.selectbox("Select Player", players_list)
elif option == "Player Comparison":
all_players = sorted(set(batting_df['player_name']).union(bowling_df['player_name']))
player1 = st.sidebar.selectbox("Select Player 1", all_players)
player2 = st.sidebar.selectbox("Select Player 2", [p for p in all_players if p != player1])
comparison_format = st.sidebar.selectbox("Select Format", ["All", "ODI", "T20", "Test"])
# Helper function to get batting average column
def get_batting_avg_column(df):
possible_cols = ['Avg', 'Average', 'Batting_Avg', 'Ave']
for col in possible_cols:
if col in df.columns:
return col
return None
# Sidebar Query Agent (LLM-based Stats Assistant)
# with st.sidebar:
# st.markdown("---")
# st.markdown("### 🤖 Ask CricketStatBot")
# show_input = st.button("Start Query")
# if show_input:
# user_query = st.text_input("Ask a question about batting or bowling stats:", key="agent_query")
# if user_query:
# # Combine all dataframes into a context string
# def df_to_text(df, name, max_rows=100):
# return f"{name} Data:\n" + df.head(max_rows).to_csv(index=False)
# context = (
# df_to_text(batting_df, "Batting") + "\n" +
# df_to_text(bowling_df, "Bowling") + "\n" +
# df_to_text(odi_df, "ODI") + "\n" +
# df_to_text(t20_df, "T20") + "\n" +
# df_to_text(test_df, "Test") + "\n" +
# df_to_text(odi_teams_df, "ODI Teams") + "\n" +
# df_to_text(t20_teams_df, "T20 Teams") + "\n" +
# df_to_text(test_teams_df, "Test Teams")
# )
# # Agent Prompt
# agent_prompt = ChatPromptTemplate.from_messages([
# ("system",
# "You are a cricket analytics assistant. Use the below data to answer cricket-related questions in a detailed and insightful manner:\n\n{context}"),
# ("human", "{question}")
# ])
# agent_chain = agent_prompt | model | out_par
# with st.spinner("Analyzing your question..."):
# agent_response = agent_chain.invoke({"context": context, "question": user_query})
# st.markdown("#### 🧠 CricketStatBot Answer")
# st.markdown(f"{agent_response}
", unsafe_allow_html=True)
# ---- Main Content ----
if option == "Team Info" and team_info:
st.markdown(f"Team Bio - {team_info}
", unsafe_allow_html=True)
team_prompt = ChatPromptTemplate.from_messages([
("system",
'''You are an AI cricket historian. Provide a brief overview of the team and its history in black text.
Then, provide a 'Debut Details' section with the following format:
- Add a heading **Debut Details**
- Under that, use subheadings **Test Debut**, **ODI Debut**, and **T20 Debut**
- For each debut format, include:
- Opponent team
- Date of debut
- Stadium or venue
Ensure a clear structure with headings and subheadings. Do not include performance stats.'''),
("human", "{team_name}")
])
team_chain = team_prompt | model | out_par
st.markdown("", unsafe_allow_html=True)
st.write(team_chain.invoke({"team_name": team_info}))
st.markdown("
", unsafe_allow_html=True)
# Combine all formats
odi_teams_df['Format'] = 'ODI'
t20_teams_df['Format'] = 'T20'
test_teams_df['Format'] = 'Test'
combined_stats_df = pd.concat([odi_teams_df, t20_teams_df, test_teams_df], ignore_index=True)
# Show format-wise stats for selected team
st.markdown(f"{team_info} Format-wise Statistics
", unsafe_allow_html=True)
team_stats = combined_stats_df[combined_stats_df['Team'] == team_info]
st.markdown("", unsafe_allow_html=True)
st.dataframe(team_stats.reset_index(drop=True), use_container_width=True)
st.markdown("
", unsafe_allow_html=True)
if st.button("Show Format-wise Visualizations"):
st.markdown("", unsafe_allow_html=True)
# Bar Chart
fig_bar = px.bar(team_stats, x='Format', y='Mat', color='Format', title="Matches by Format",
color_discrete_sequence=px.colors.qualitative.Vivid)
fig_bar.update_layout(transition_duration=500)
st.plotly_chart(fig_bar, use_container_width=True)
# Donut Chart
fig_donut = px.pie(team_stats, values='Won', names='Format', title="Win Distribution by Format",
hole=0.4, color_discrete_sequence=px.colors.qualitative.Bold)
fig_donut.update_traces(textinfo='percent+label', pull=[0.1, 0, 0])
st.plotly_chart(fig_donut, use_container_width=True)
# Grouped Bar Chart
st.markdown("
Format-wise Metrics Comparison
", unsafe_allow_html=True)
metrics_df = team_stats[['Format', 'Mat', 'Won', 'Lost', 'W/L']].melt(id_vars='Format',
var_name='Metric',
value_name='Value')
fig_grouped_bar = px.bar(metrics_df, x='Format', y='Value', color='Metric', barmode='group',
title="Team Metrics by Format",
color_discrete_sequence=px.colors.qualitative.Set1,
text_auto=True)
fig_grouped_bar.update_layout(transition_duration=500, showlegend=True)
st.plotly_chart(fig_grouped_bar, use_container_width=True)
st.markdown("", unsafe_allow_html=True)
elif option == "Team Stats Comparison" and selected_teams_stats:
st.markdown("Team Stats Comparison
", unsafe_allow_html=True)
odi_df['Format'] = 'ODI'
t20_df['Format'] = 'T20'
test_df['Format'] = 'Test'
combined_df = pd.concat([odi_df, t20_df, test_df], ignore_index=True)
selected_data = combined_df[(combined_df['Team'].isin(selected_teams_stats)) & (combined_df['Format'] == selected_format)]
stat_options = {
'Mat': 'Matches',
'Won': 'Wins',
'Lost': 'Losses',
'Draw': 'Draws',
'Tied': 'Ties',
'W/L': 'Win/Loss Ratio',
'%W': 'Win %',
'%L': 'Loss %',
'%D': 'Draw %'
}
stat_choice = st.selectbox("Select Stat to Compare", list(stat_options.keys()), format_func=lambda x: stat_options[x])
st.markdown("", unsafe_allow_html=True)
# Bar Chart
st.markdown("
Comparison Bar Chart
", unsafe_allow_html=True)
fig = px.bar(
selected_data,
x='Team',
y=stat_choice,
color='Team',
barmode='group',
title=f"{stat_options[stat_choice]} by Team in {selected_format}",
color_discrete_sequence=px.colors.qualitative.Set2
)
fig.update_layout(transition_duration=500)
st.plotly_chart(fig, use_container_width=True)
# Donut Chart
st.markdown("Win Percentage Donut Chart
", unsafe_allow_html=True)
pie_data = selected_data[['Team', '%W']]
fig_pie = px.pie(pie_data, values='%W', names='Team', title='Win % Comparison',
hole=0.4, color_discrete_sequence=px.colors.qualitative.Pastel)
fig_pie.update_traces(textinfo='percent+label', pull=[0.1, 0])
st.plotly_chart(fig_pie, use_container_width=True)
# Heatmap
st.markdown("Performance Heatmap
", unsafe_allow_html=True)
heatmap_data = selected_data[['Team', 'Mat', 'Won', 'Lost', 'Draw']].set_index('Team')
fig_heatmap = px.imshow(heatmap_data, text_auto=True, aspect="auto",
color_continuous_scale='Viridis', title="Team Stats Heatmap")
st.plotly_chart(fig_heatmap, use_container_width=True)
st.markdown("", unsafe_allow_html=True)
if st.button("Show Raw Data"):
st.markdown("", unsafe_allow_html=True)
st.dataframe(selected_data.reset_index(drop=True), use_container_width=True)
st.markdown("
", unsafe_allow_html=True)
elif option == "Player Stats" and selected_player:
st.markdown(f"Player Dashboard - {selected_player}
", unsafe_allow_html=True)
player_batting = batting_df[(batting_df['player_name'] == selected_player) & (batting_df['Country'] == selected_team)]
player_bowling = bowling_df[(bowling_df['player_name'] == selected_player) & (bowling_df['Country'] == selected_team)]
prompt = ChatPromptTemplate.from_messages([
("system", '''You are an AI cricket player information provider. Display the player's complete bio data in a
detailed table format with rows and columns, including personal information in black text.
Below the table, include debut details for all formats. Additionally, provide a brief description
of the player underneath. Only include player information, not their performance statistics.'''),
("human", "{player_name}")
])
chain = prompt | model | out_par
st.markdown("", unsafe_allow_html=True)
st.write(chain.invoke({"player_name": selected_player}))
st.markdown("
", unsafe_allow_html=True)
col1, col2 = st.columns(2)
with col1:
if st.button("Show Batting Card"):
st.markdown("", unsafe_allow_html=True)
st.dataframe(player_batting.iloc[:, :16], use_container_width=True)
st.markdown("
", unsafe_allow_html=True)
with col2:
if st.button("Show Bowling Card"):
st.markdown("", unsafe_allow_html=True)
st.dataframe(player_bowling.iloc[:, :15], use_container_width=True)
st.markdown("
", unsafe_allow_html=True)
if not player_batting.empty:
st.markdown("Batting Visualizations
", unsafe_allow_html=True)
st.markdown("", unsafe_allow_html=True)
col1, col2 = st.columns(2)
with col1:
fig_bar = px.bar(player_batting, x='Format', y='Runs', color='Format',
title="Runs by Format", color_discrete_sequence=px.colors.qualitative.D3)
fig_bar.update_layout(transition_duration=500)
st.plotly_chart(fig_bar, use_container_width=True)
with col2:
st.markdown("
Batting Metrics Comparison
", unsafe_allow_html=True)
avg_col = get_batting_avg_column(player_batting)
metrics = ['Runs', 'SR']
if avg_col:
metrics.append('Average')
metrics_df = player_batting[metrics + ['Format']].melt(id_vars='Format',
var_name='Metric',
value_name='Value')
fig_grouped_bar = px.bar(metrics_df, x='Format', y='Value', color='Metric', barmode='group',
title="Batting Metrics by Format",
color_discrete_sequence=px.colors.qualitative.Set1,
text_auto=True)
fig_grouped_bar.update_layout(transition_duration=500, showlegend=True)
st.plotly_chart(fig_grouped_bar, use_container_width=True)
fig_donut = px.pie(player_batting, values='Runs', names='Format', title="Runs Distribution",
hole=0.4, color_discrete_sequence=px.colors.qualitative.T10)
fig_donut.update_traces(textinfo='percent+label')
st.plotly_chart(fig_donut, use_container_width=True)
st.markdown("", unsafe_allow_html=True)
if not player_bowling.empty:
st.markdown("Bowling Visualizations
", unsafe_allow_html=True)
st.markdown("", unsafe_allow_html=True)
col3, col4 = st.columns(2)
with col3:
fig_bar = px.bar(player_bowling, x='Format', y='Wickets', color='Format',
title="Wickets by Format", color_discrete_sequence=px.colors.qualitative.Set1)
fig_bar.update_layout(transition_duration=500)
st.plotly_chart(fig_bar, use_container_width=True)
with col4:
fig_line = px.line(player_bowling, x='Format', y='Eco', title="Economy Rate",
color_discrete_sequence=['#00cc96'])
st.plotly_chart(fig_line, use_container_width=True)
fig_heatmap = px.imshow(player_bowling[['Wickets', 'Eco', 'Avg']].T, text_auto=True,
color_continuous_scale='Plasma', title="Bowling Stats Heatmap")
st.plotly_chart(fig_heatmap, use_container_width=True)
st.markdown("
", unsafe_allow_html=True)
elif option == "Player Comparison" and player1 and player2:
st.markdown(f"Player Comparison: {player1} vs {player2}
", unsafe_allow_html=True)
def get_player_data(name):
batting = batting_df[batting_df['player_name'] == name]
bowling = bowling_df[bowling_df['player_name'] == name]
if comparison_format != "All":
batting = batting[batting['Format'] == comparison_format]
bowling = bowling[bowling['Format'] == comparison_format]
return batting, bowling
bat1, bowl1 = get_player_data(player1)
bat2, bowl2 = get_player_data(player2)
# Check if data is available
if bat1.empty and bowl1.empty and bat2.empty and bowl2.empty:
st.markdown("No data available for the selected players in this format.
", unsafe_allow_html=True)
else:
st.markdown("", unsafe_allow_html=True)
# Grouped Bar Chart for Batting
if not bat1.empty or not bat2.empty:
st.markdown("
Batting Metrics Comparison
", unsafe_allow_html=True)
avg_col1 = get_batting_avg_column(bat1)
avg_col2 = get_batting_avg_column(bat2)
metrics_data = []
for player, bat in [(player1, bat1), (player2, bat2)]:
if not bat.empty:
player_data = {'Player': player, 'Runs': bat['Runs'].mean(), 'SR': bat['SR'].mean()}
avg_col = get_batting_avg_column(bat)
if avg_col:
player_data['Average'] = bat[avg_col].mean()
metrics_data.append(player_data)
if metrics_data:
metrics_df = pd.DataFrame(metrics_data).melt(id_vars='Player', var_name='Metric', value_name='Value')
fig_grouped_bar = px.bar(metrics_df, x='Player', y='Value', color='Metric', barmode='group',
title=f"Batting Metrics Comparison ({comparison_format})",
color_discrete_sequence=px.colors.qualitative.Plotly,
text_auto=True)
fig_grouped_bar.update_layout(transition_duration=500, showlegend=True)
st.plotly_chart(fig_grouped_bar, use_container_width=True)
else:
st.write("No batting data available for the selected format.")
# Bowling Bar Chart
if not bowl1.empty or not bowl2.empty:
st.markdown("Bowling Bar Chart
", unsafe_allow_html=True)
bowl_combined = pd.concat([bowl1, bowl2])
if not bowl_combined.empty:
fig_bowl_bar = px.bar(bowl_combined, x='player_name', y='Wickets',
color='Format' if comparison_format == "All" else 'player_name',
barmode='group',
title=f"Wickets Comparison ({comparison_format})",
color_discrete_sequence=px.colors.qualitative.Plotly,
text_auto=True)
fig_bowl_bar.update_layout(transition_duration=500)
st.plotly_chart(fig_bowl_bar, use_container_width=True)
else:
st.write("No bowling data available for the selected format.")
# Runs Donut Chart
if not bat1.empty or not bat2.empty:
st.markdown("Total Runs Donut Chart
", unsafe_allow_html=True)
total_runs = [bat1['Runs'].sum() if not bat1.empty else 0, bat2['Runs'].sum() if not bat2.empty else 0]
runs_data = pd.DataFrame({
'Player': [player1, player2],
'Total Runs': total_runs
})
fig_pie = px.pie(runs_data, names='Player', values='Total Runs',
title=f"Proportion of Total Runs ({comparison_format})",
hole=0.4, color_discrete_sequence=px.colors.qualitative.G10)
fig_pie.update_traces(textinfo='percent+label')
st.plotly_chart(fig_pie, use_container_width=True)
# Heatmap for Batting Stats
if not bat1.empty or not bat2.empty:
st.markdown("Batting Stats Heatmap
", unsafe_allow_html=True)
avg_col1 = get_batting_avg_column(bat1)
avg_col2 = get_batting_avg_column(bat2)
bat_combined = pd.DataFrame({
'Player': [player1, player2],
'Total Runs': [bat1['Runs'].sum() if not bat1.empty else 0, bat2['Runs'].sum() if not bat2.empty else 0],
'Strike Rate': [bat1['SR'].mean() if not bat1.empty else 0, bat2['SR'].mean() if not bat2.empty else 0]
})
if avg_col1 and not bat1.empty and avg_col2 and not bat2.empty:
bat_combined['Batting Average'] = [bat1[avg_col1].dropna().mean(), bat2[avg_col2].dropna().mean()]
bat_combined = bat_combined.set_index('Player')
fig_heatmap = px.imshow(bat_combined, text_auto=True, aspect="auto",
color_continuous_scale='RdBu',
title=f"Batting Stats Heatmap ({comparison_format})")
st.plotly_chart(fig_heatmap, use_container_width=True)
st.markdown("", unsafe_allow_html=True)
if st.button("Show Raw Stats"):
st.markdown("", unsafe_allow_html=True)
st.dataframe(pd.concat([bat1, bowl1, bat2, bowl2]), use_container_width=True)
st.markdown("
", unsafe_allow_html=True)