CricketDataZone / app.py
NeonSamurai's picture
Upload 3 files
c0b09c5 verified
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
st.set_page_config(page_icon='🏏', layout='wide', page_title='Cricket Analysis')
st.title("🏏 Cricket Stats Dashboard")
# Load dataset
df_batting = pd.read_csv("Batting_10_Teams_Final.csv", on_bad_lines='skip')
df_bowling = pd.read_csv("new_Bowling_10_Teams_Final.csv", on_bad_lines='skip')
col1, col2 = st.columns(2)
# Select country
with col1:
country = sorted(df_batting["Country"].unique().tolist())
selected_country = st.selectbox("Select a Country", country)
# Filter players based on selected country
filtered_players = df_batting[df_batting["Country"] == selected_country]["player_name"].unique()
# Select player
with col2:
players = ["All"] + sorted(filtered_players.tolist())
selected_player = st.selectbox("Select a Player", players)
selected_format = 'All'
# πŸ“Œ Display selected filters
st.write(f"πŸ“Œ You selected: **{selected_country}** β†’ **{selected_player}**")
# βœ… Filtering Data
filtered_batting_df = df_batting[df_batting["Country"] == selected_country]
filtered_bowling_df = df_bowling[df_bowling["Country"] == selected_country]
# Apply player filter only if a specific player is selected
if selected_player != "All":
filtered_batting_df = filtered_batting_df[filtered_batting_df["player_name"] == selected_player]
filtered_bowling_df = filtered_bowling_df[filtered_bowling_df["player_name"] == selected_player]
grouped_batting_df = filtered_batting_df.groupby("Format").sum().reset_index()
grouped_bowling_df = filtered_bowling_df.groupby("Format").sum().reset_index()
if not grouped_batting_df.empty and not grouped_bowling_df.empty:
st.write("### πŸ“Š Batting & Bowling Stats")
# Create Tabs for Each Format
tab_list = st.tabs(["Overall"] + grouped_batting_df["Format"].tolist()) # Add "Overall" tab
# 🏏 Overall Stats
overall_batting = filtered_batting_df.sum(numeric_only=True)
overall_bowling = filtered_bowling_df.sum(numeric_only=True)
with tab_list[0]: # Overall tab
st.subheader("Overall Stats")
col1, col9, col2 = st.columns([1,0.05,1])
# Batting Stats
with col1:
st.subheader("🏏 Batting Stats")
c1, c2, c3 = st.columns(3)
with c1:
st.metric("Total Matches", int(overall_batting["Matches"]))
st.metric("Total Innings", int(overall_batting["Innings"]))
st.metric("Strike rate", overall_batting["SR"])
with c2:
st.metric("Total Runs", int(overall_batting["Runs"]))
st.metric("Average Runs", overall_batting["Average"])
st.metric("Total Fours", int(overall_batting["Fours"]))
st.metric("Total Sixes", int(overall_batting["Sixes"]))
with c3:
st.metric("Total 100s", int(overall_batting["100s"]))
st.metric("Total 50s", int(overall_batting["50s"]))
if not overall_batting.empty:
# Conditional Strike Rate Trend Plot
if selected_player == "All":
# Average Strike Rate Across Formats (for all players)
format_sr = filtered_batting_df.groupby("Format")["SR"].mean().reset_index()
fig1 = px.line(
format_sr, x="Format", y="SR",
title="Average Strike Rate Across Formats",
markers=True,
template="plotly_dark",
line_shape="spline",
color_discrete_sequence=["#00CFFF"]
)
fig1.update_traces(marker=dict(size=10, symbol="circle"))
fig1.update_layout(
title_font=dict(size=18, family="Arial"),
xaxis_title="Match Format",
yaxis_title="Strike Rate",
hovermode="x unified"
)
st.plotly_chart(fig1, key="plot_strike_rate_trend")
else:
# Strike Rate Trend for Selected Player
player_sr = filtered_batting_df[filtered_batting_df["player_name"] == selected_player]
# Ensure player has strike rate data
if not player_sr.empty:
fig1_player = px.line(
player_sr, x="Format", y="SR",
title=f"{selected_player}: Strike Rate Across Formats",
markers=True,
template="plotly_dark",
line_shape="spline",
color_discrete_sequence=["#00CFFF"]
)
fig1_player.update_traces(marker=dict(size=10, symbol="circle"))
fig1_player.update_layout(
title_font=dict(size=18, family="Arial"),
xaxis_title="Match Format",
yaxis_title="Strike Rate",
hovermode="x unified"
)
st.plotly_chart(fig1_player, key="plot_player_sr")
# Conditional Plots Based on Player Selection
if selected_player == "All":
# Top 10 Players by Runs
top_10_batsmen = filtered_batting_df.drop_duplicates(subset=["player_name"]).nlargest(10, "Runs")
top_players_runs = filtered_batting_df[filtered_batting_df["player_name"].isin(top_10_batsmen["player_name"])]
fig2 = px.bar(
top_players_runs, x="player_name", y="Runs", color="Format",
title="Runs Across Formats for Top Players",
barmode="stack",
template="plotly_dark",
text_auto=True,
color_discrete_sequence=px.colors.qualitative.Safe
)
fig2.update_layout(
title_font=dict(size=18, family="Arial"),
xaxis_title="Player Name",
yaxis_title="Total Runs",
hovermode="x unified"
)
st.plotly_chart(fig2, key="plot_stacked_runs")
# Stacked Bar Chart: Sixes & Fours Across Formats for Top Batters
if selected_player == "All":
# Get Top 10 Batters Based on Sixes
ttop_10_batsmen = filtered_batting_df.drop_duplicates(subset=["player_name"]).nlargest(10, "Sixes")
top_batsmen_stats = filtered_batting_df[filtered_batting_df["player_name"].isin(top_10_batsmen["player_name"])]
# Aggregate Total Sixes & Fours per Player
top_batsmen_melted = top_batsmen_stats.groupby(["player_name"])[["Sixes", "Fours"]].sum().reset_index()
#print(top_batsmen_stats[['Format','player_name', 'Sixes','Fours']])
# Reshape Data for a Single Stacked Bar Chart
top_batsmen_melted = top_batsmen_melted.melt(
id_vars=["player_name"],
value_vars=["Sixes", "Fours"],
var_name="Boundary Type",
value_name="Count"
)
# Create Stacked Bar Chart with Facet for Each Format
fig3 = px.bar(
top_batsmen_melted,
x="player_name",
y="Count",
color="Boundary Type",
barmode="stack",
title="Sixes & Fours Across All Formats for Top Batters",
hover_data=["Boundary Type", "Count"],
color_discrete_sequence=px.colors.qualitative.Pastel
)
fig3.update_layout(
xaxis_title="Player Name",
yaxis_title="Total Boundaries",
hovermode="x unified",
legend=dict(title="Boundary Type")
)
st.plotly_chart(fig3, key="plot_top_boundaries")
else:
# Get stats for the selected player across formats
player_stats = filtered_batting_df[filtered_batting_df["player_name"] == selected_player]
#print(player_stats[['Format','Sixes', 'Fours']])
# Ensure we reshape correctly to show both Sixes & Fours
player_stats_melted = player_stats.melt(
id_vars=["Format"],
value_vars=["Sixes", "Fours"],
var_name="Boundary Type",
value_name="Count"
)
#print(player_stats_melted)
# Create Stacked Bar Chart for Selected Player
fig4 = px.bar(
player_stats_melted,
x="Format", y="Count",
color="Boundary Type",
barmode="stack",
title=f"Sixes & Fours Across Formats for {selected_player}",
text_auto=True,
hover_data=["Boundary Type", "Count"],
color_discrete_sequence=px.colors.qualitative.Pastel
)
fig4.update_layout(
xaxis_title="Match Format",
yaxis_title="Total Boundaries",
hovermode="x unified",
legend=dict(title="Boundary Type")
)
st.plotly_chart(fig4, key="plot_selected_player_boundaries")
# Pie Chart: Runs Contribution Across Formats
format_runs = filtered_batting_df.groupby("Format")["Runs"].sum().reset_index()
fig5 = px.pie(
format_runs, names="Format", values="Runs",
title="Runs Contribution Across Formats",
template="plotly_dark",
color_discrete_sequence=px.colors.qualitative.Pastel
)
fig5.update_traces(
title_font=dict(size=18, family="Arial"),
textinfo="label+value",
pull=[0.01, 0.01, 0.01, 0.01]
)
st.plotly_chart(fig5, key="plot_runs_contribution")
st.markdown("---") # Divider
# **Vertical Line (Divider)**
with col9:
st.markdown(
"""
<style>
.divider {
height: auto;
width: 3px;
background-color: #555;
margin: auto;
}
</style>
<div class="divider"></div>
""",
unsafe_allow_html=True
)
# OverAll Bowling Stats
with col2:
st.subheader("🎯 Bowling Stats")
c1, c2, c3, c4 = st.columns(4)
with c1:
st.metric("Total Matches", int(overall_bowling["Matches"]))
st.metric("Total Innings", int(overall_bowling["Innings"]))
st.metric("Strike Rate", overall_bowling["SR"])
with c2:
st.metric("Total Wickets", int(overall_bowling["Wickets"]))
st.metric("Average Wickets", overall_bowling["Avg"])
st.metric("Total 4W", int(overall_bowling["4w"]))
st.metric("Total 5W", int(overall_bowling["5w"]))
with c3:
st.metric("Best BBI Runs", int(overall_bowling["BBI_Runs"]))
st.metric("Best BBI Wickets", int(overall_bowling["BBI_Wickets"]))
st.metric("Best BBM Runs", int(overall_bowling["BBM_Runs"]))
st.metric("Best BBM Wickets", int(overall_bowling["BBM_Wickets"]))
with c4:
st.metric("Maidens", int(overall_bowling["Maidens"]))
st.metric("Eco", overall_bowling["Eco"])
if not overall_bowling.empty:
# Line Chart: Economy Rate Trend Across Formats
format_stats = filtered_bowling_df.groupby("Format").agg({"Eco": "mean", "Matches": "sum"}).reset_index()
fig1 = make_subplots(specs=[[{"secondary_y": True}]]) # Dual axis plot
# Economy Rate Line
fig1.add_trace(
go.Scatter(x=format_stats["Format"], y=format_stats["Eco"],
mode="lines+markers", name="Economy Rate", line_shape="spline",
marker=dict(size=10, symbol="circle", color="#FF6F61")),
secondary_y=False
)
# Number of Matches Bar
fig1.add_trace(
go.Bar(x=format_stats["Format"], y=format_stats["Matches"],
name="No. of Matches", marker_color="lightblue", opacity=0.6),
secondary_y=True
)
fig1.update_layout(
title="Economy Rate & Matches Across Formats",
xaxis_title="Match Format",
yaxis=dict(title="Average Economy Rate"),
yaxis2=dict(title="Total Matches", overlaying="y", side="right"),
hovermode="x unified",
)
st.plotly_chart(fig1, key="plot_eco_trend")
# Stacked Bar Chart: Wickets & Matches Across Formats for Top Bowlers
if selected_player == "All":
top_10_bowlers = filtered_bowling_df.drop_duplicates(subset=["player_name"]).nlargest(10, "Wickets")
top_bowler_stats = filtered_bowling_df[filtered_bowling_df["player_name"].isin(top_10_bowlers["player_name"])]
fig2 = px.bar(
top_bowler_stats, x="player_name", y="Wickets",
color="Format", barmode="stack",
title="Wickets Across Formats for Top Bowlers",
text_auto=True,
hover_data=["Wickets", "Format"],
color_discrete_sequence=px.colors.qualitative.Safe
)
fig2.update_layout(
xaxis_title="Player Name",
yaxis_title="Total Wickets",
hovermode="x unified",
legend=dict(title="Match Format")
)
st.plotly_chart(fig2, key="plot_top_bowlers")
# Stacked Bar Chart: Wickets & Matches Across Formats for Top Bowlers
if selected_player == "All":
top_10_bowlers = filtered_bowling_df.drop_duplicates(subset=["player_name"]).nlargest(10, "Wickets")
top_bowler_stats = filtered_bowling_df[filtered_bowling_df["player_name"].isin(top_10_bowlers["player_name"])]
# Aggregate Total Wickets & Maidens per Player
top_bowlers_melted = top_bowler_stats.groupby(["player_name"])[["Wickets", "Maidens"]].sum().reset_index()
# Reshape Data for Stacked Bar Chart
top_bowlers_melted = top_bowlers_melted.melt(
id_vars=["player_name"],
value_vars=["Wickets", "Maidens"],
var_name="Bowling Stat",
value_name="Count"
)
# Create Stacked Bar Chart with Facet for Each Format
fig_bowling = px.bar(
top_bowlers_melted,
x="player_name", y="Count",
color="Bowling Stat",
barmode="stack",
title="Wickets & Maidens Across Formats for Top Bowlers",
hover_data=["Bowling Stat", "Count"],
color_discrete_sequence=px.colors.qualitative.Pastel
)
fig_bowling.update_layout(
xaxis_title="Player Name",
yaxis_title="Total Count",
hovermode="x unified",
legend=dict(title="Bowling Stat")
)
st.plotly_chart(fig_bowling, key="plot_top_bowling_stats")
else:
# Get stats for the selected player across formats
player_bowling_stats = filtered_bowling_df[filtered_bowling_df["player_name"] == selected_player]
# Ensure we reshape correctly to show both Wickets & Maidens
player_bowling_melted = player_bowling_stats.melt(
id_vars=["Format"],
value_vars=["Wickets", "Maidens"],
var_name="Bowling Stat",
value_name="Count"
)
# Create Stacked Bar Chart for Selected Player
fig_player_bowling = px.bar(
player_bowling_melted,
x="Format", y="Count",
color="Bowling Stat",
barmode="stack",
title=f"Wickets & Maidens Across Formats for {selected_player}",
text_auto=True,
hover_data=["Bowling Stat", "Count"],
color_discrete_sequence=px.colors.qualitative.Pastel
)
fig_player_bowling.update_layout(
xaxis_title="Match Format",
yaxis_title="Total Count",
hovermode="x unified",
legend=dict(title="Bowling Stat")
)
st.plotly_chart(fig_player_bowling, key="plot_selected_player_bowling_stats")
# Pie Chart: Wickets & Matches Contribution Across Formats (Sunburst)
format_totals = filtered_bowling_df.groupby("Format").agg({"Wickets": "sum", "Matches":"sum"}).reset_index()
fig3 = px.sunburst(
format_totals, path=["Format"], values="Wickets",
color = "Matches",
title="Wickets & Matches Contribution Across Formats",
color_continuous_scale=px.colors.qualitative.Pastel
)
# Customize hover info to display both Wickets & Matches
fig3.update_traces(
hovertemplate="<b>%{label}</b><br>Wickets: %{value}<br>Matches: %{customdata}<extra></extra>",
customdata=format_totals["Matches"],
textinfo = "label+value",
)
fig3.update_layout(
title_font=dict(size=18, family="Arial"),
hovermode="x unified",
legend=dict(title="Match Format"),
coloraxis_showscale=False
)
st.plotly_chart(fig3, key="plot_wickets_contribution")
# πŸ“Œ **Format-wise Stats**
for i, format_name in enumerate(grouped_batting_df["Format"].unique()):
with tab_list[i + 1]: # Each format tab
st.subheader(f"{format_name} Stats")
col1, col3, col2 = st.columns([1, 0.05, 1])
# 🏏 **Batting Stats**
format_batting = grouped_batting_df[grouped_batting_df["Format"] == format_name]
if not format_batting.empty:
format_batting_row = format_batting.iloc[0]
with col1:
st.subheader("🏏 Batting Stats")
c1, c2, c3 = st.columns(3)
with c1:
st.metric("Total Matches", int(format_batting_row["Matches"]))
st.metric("Total Innings", int(format_batting_row["Innings"]))
st.metric("Strike Rate", float(format_batting_row["SR"]))
with c2:
st.metric("Total Runs", int(format_batting_row["Runs"]))
st.metric("Average Runs", float(format_batting_row["Average"]))
st.metric("Total Fours", int(format_batting_row["Fours"]))
st.metric("Total Sixes", int(format_batting_row["Sixes"]))
with c3:
st.metric("Total 100s", int(format_batting_row["100s"]))
st.metric("Total 50s", int(format_batting_row["50s"]))
format_filtered_df = filtered_batting_df[filtered_batting_df["Format"] == format_name]
if not format_filtered_df.empty:
# Show only top 10 run-scorers
if selected_player != "All":
top_batters = format_filtered_df[format_filtered_df["player_name"] == selected_player]
else:
top_batters = format_filtered_df.nlargest(10, "Runs")
# Line Chart: Strike Rate of Top 10 Players
fig2 = px.line(
top_batters,
x="player_name", y="SR",
markers=True,
line_shape="spline",
title=f"{format_name}: Strike Rate of Top 10 Players",
)
st.plotly_chart(fig2, key=f"plot_{format_name}_batting_sr")
if selected_player == "All":
top_10_batsmen_format = format_filtered_df.drop_duplicates(subset=["player_name"]).nlargest(10, "Sixes")
top_batsmen_stats_format = format_filtered_df[format_filtered_df["player_name"].isin(top_10_batsmen_format["player_name"])]
# Aggregate Total Sixes & Fours per Player
top_batsmen_melted_format = top_batsmen_stats_format.groupby(["player_name"])[["Sixes", "Fours"]].sum().reset_index()
# Reshape Data
top_batsmen_melted_format = top_batsmen_melted_format.melt(
id_vars=["player_name"],
value_vars=["Sixes", "Fours"],
var_name="Boundary Type",
value_name="Count"
)
# Create Stacked Bar Chart for this format
fig_format = px.bar(
top_batsmen_melted_format,
x="player_name",
y="Count",
color="Boundary Type",
barmode="stack",
title=f"Sixes & Fours in {format_name} Matches (Top 10 Players)",
hover_data=["Boundary Type", "Count"],
color_discrete_sequence=px.colors.qualitative.Pastel
)
fig_format.update_layout(
xaxis_title="Player Name",
yaxis_title="Total Boundaries",
hovermode="x unified",
legend=dict(title="Boundary Type")
)
st.plotly_chart(fig_format, key=f"plot_top_boundaries_{format_name}")
else:
# Get stats for the selected player in this format
player_stats_format = format_filtered_df[format_filtered_df["player_name"] == selected_player]
# Reshape Data
player_stats_melted_format = player_stats_format.melt(
id_vars=["Format"],
value_vars=["Sixes", "Fours"],
var_name="Boundary Type",
value_name="Count"
)
# Create Stacked Bar Chart for Selected Player in this format
fig_selected_format = px.bar(
player_stats_melted_format,
x="Format", y="Count",
color="Boundary Type",
barmode="group",
title=f"Sixes & Fours in {format_name} Matches for {selected_player}",
text_auto=True,
hover_data=["Boundary Type", "Count"],
color_discrete_sequence=px.colors.qualitative.Pastel
)
fig_selected_format.update_layout(
xaxis_title="Match Format",
yaxis_title="Total Boundaries",
hovermode="x unified",
legend=dict(title="Boundary Type")
)
st.plotly_chart(fig_selected_format, key=f"plot_selected_player_boundaries_{format_name}")
# **Vertical Line (Divider)**
with col3:
st.markdown(
"""
<style>
.divider {
height: auto;
width: 3px;
background-color: #555;
margin: auto;
}
</style>
<div class="divider"></div>
""",
unsafe_allow_html=True
)
# 🎯 **Bowling Stats**
format_bowling = grouped_bowling_df[grouped_bowling_df["Format"] == format_name]
if not format_bowling.empty:
format_bowling_row = format_bowling.iloc[0]
with col2:
st.subheader("🎯 Bowling Stats")
c1, c2, c3, c4 = st.columns(4)
with c1:
st.metric("Total Matches", int(format_bowling_row["Matches"]))
st.metric("Total Innings", int(format_bowling_row["Innings"]))
st.metric("Strike Rate", float(format_bowling_row["SR"]))
with c2:
st.metric("Total Wickets", int(format_bowling_row["Wickets"]))
st.metric("Average Wickets", float(format_bowling_row['Avg']))
st.metric("Total 4W", int(format_bowling_row["4w"]))
st.metric("Total 5W", int(format_bowling_row["5w"]))
with c3:
st.metric("Best BBI Runs", format_bowling_row["BBI_Runs"])
st.metric("Best BBI Wickets", format_bowling_row["BBI_Wickets"])
st.metric("Best BBM Runs", format_bowling_row["BBM_Runs"])
st.metric("Best BBM Wickets", format_bowling_row["BBM_Wickets"])
with c4:
st.metric("Maidens", int(format_bowling_row['Maidens']))
st.metric("Economy Rate", float(format_bowling_row['Eco']))
format_filtered_df2 = filtered_bowling_df[filtered_bowling_df["Format"] == format_name]
if not format_filtered_df2.empty:
if selected_player != "All":
top_bowlers = format_filtered_df2[format_filtered_df2["player_name"] == selected_player]
else:
top_bowlers = format_filtered_df2.nlargest(10, "Wickets")
# Bar Chart: Top 10 Wicket-Takers
fig_bowling_1 = px.bar(
top_bowlers,
x="player_name", y="Wickets",
color="Wickets",
title=f"{format_name}: {'Player' if selected_player != 'All' else 'Top 10'} Wicket-Takers",
text_auto=True
)
st.plotly_chart(fig_bowling_1, key=f"plot_{format_name}_bowling_wickets")
# πŸ“Š Line Plot: Economy Rate of Top 10 Bowlers
fig_bowling_2 = px.line(
top_bowlers,
x="player_name", y="Eco",
markers = True,
line_shape='spline',
title=f"πŸ’° {format_name}: {'Player' if selected_player != 'All' else 'Top 10'} Economy Rate",
)
st.plotly_chart(fig_bowling_2, key=f"plot_{format_name}_bowling_economy")