Spaces:

NeonSamurai
/

CricketDataZone

Sleeping

App Files Files Community

CricketDataZone / app.py

NeonSamurai

Upload 3 files

c0b09c5 verified 7 months ago

raw

history blame contribute delete

29.7 kB

	import streamlit as st
	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	import plotly.express as px
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots

	st.set_page_config(page_icon='🏏', layout='wide', page_title='Cricket Analysis')

	st.title("🏏 Cricket Stats Dashboard")

	# Load dataset
	df_batting = pd.read_csv("Batting_10_Teams_Final.csv", on_bad_lines='skip')
	df_bowling = pd.read_csv("new_Bowling_10_Teams_Final.csv", on_bad_lines='skip')

	col1, col2 = st.columns(2)

	# Select country
	with col1:
	country = sorted(df_batting["Country"].unique().tolist())
	selected_country = st.selectbox("Select a Country", country)

	# Filter players based on selected country
	filtered_players = df_batting[df_batting["Country"] == selected_country]["player_name"].unique()

	# Select player
	with col2:
	players = ["All"] + sorted(filtered_players.tolist())
	selected_player = st.selectbox("Select a Player", players)

	selected_format = 'All'

	# 📌 Display selected filters
	st.write(f"📌 You selected: {selected_country} → {selected_player}")

	# ✅ Filtering Data
	filtered_batting_df = df_batting[df_batting["Country"] == selected_country]
	filtered_bowling_df = df_bowling[df_bowling["Country"] == selected_country]

	# Apply player filter only if a specific player is selected
	if selected_player != "All":
	filtered_batting_df = filtered_batting_df[filtered_batting_df["player_name"] == selected_player]
	filtered_bowling_df = filtered_bowling_df[filtered_bowling_df["player_name"] == selected_player]

	grouped_batting_df = filtered_batting_df.groupby("Format").sum().reset_index()
	grouped_bowling_df = filtered_bowling_df.groupby("Format").sum().reset_index()


	if not grouped_batting_df.empty and not grouped_bowling_df.empty:
	st.write("### 📊 Batting & Bowling Stats")

	# Create Tabs for Each Format
	tab_list = st.tabs(["Overall"] + grouped_batting_df["Format"].tolist()) # Add "Overall" tab

	# 🏏 Overall Stats
	overall_batting = filtered_batting_df.sum(numeric_only=True)
	overall_bowling = filtered_bowling_df.sum(numeric_only=True)

	with tab_list[0]: # Overall tab
	st.subheader("Overall Stats")

	col1, col9, col2 = st.columns([1,0.05,1])

	# Batting Stats
	with col1:
	st.subheader("🏏 Batting Stats")
	c1, c2, c3 = st.columns(3)
	with c1:
	st.metric("Total Matches", int(overall_batting["Matches"]))
	st.metric("Total Innings", int(overall_batting["Innings"]))
	st.metric("Strike rate", overall_batting["SR"])
	with c2:
	st.metric("Total Runs", int(overall_batting["Runs"]))
	st.metric("Average Runs", overall_batting["Average"])
	st.metric("Total Fours", int(overall_batting["Fours"]))
	st.metric("Total Sixes", int(overall_batting["Sixes"]))
	with c3:
	st.metric("Total 100s", int(overall_batting["100s"]))
	st.metric("Total 50s", int(overall_batting["50s"]))

	if not overall_batting.empty:
	# Conditional Strike Rate Trend Plot
	if selected_player == "All":
	# Average Strike Rate Across Formats (for all players)
	format_sr = filtered_batting_df.groupby("Format")["SR"].mean().reset_index()
	fig1 = px.line(
	format_sr, x="Format", y="SR",
	title="Average Strike Rate Across Formats",
	markers=True,
	template="plotly_dark",
	line_shape="spline",
	color_discrete_sequence=["#00CFFF"]
	)
	fig1.update_traces(marker=dict(size=10, symbol="circle"))
	fig1.update_layout(
	title_font=dict(size=18, family="Arial"),
	xaxis_title="Match Format",
	yaxis_title="Strike Rate",
	hovermode="x unified"
	)
	st.plotly_chart(fig1, key="plot_strike_rate_trend")

	else:
	# Strike Rate Trend for Selected Player
	player_sr = filtered_batting_df[filtered_batting_df["player_name"] == selected_player]

	# Ensure player has strike rate data
	if not player_sr.empty:
	fig1_player = px.line(
	player_sr, x="Format", y="SR",
	title=f"{selected_player}: Strike Rate Across Formats",
	markers=True,
	template="plotly_dark",
	line_shape="spline",
	color_discrete_sequence=["#00CFFF"]
	)
	fig1_player.update_traces(marker=dict(size=10, symbol="circle"))
	fig1_player.update_layout(
	title_font=dict(size=18, family="Arial"),
	xaxis_title="Match Format",
	yaxis_title="Strike Rate",
	hovermode="x unified"
	)
	st.plotly_chart(fig1_player, key="plot_player_sr")

	# Conditional Plots Based on Player Selection
	if selected_player == "All":
	# Top 10 Players by Runs
	top_10_batsmen = filtered_batting_df.drop_duplicates(subset=["player_name"]).nlargest(10, "Runs")
	top_players_runs = filtered_batting_df[filtered_batting_df["player_name"].isin(top_10_batsmen["player_name"])]
	fig2 = px.bar(
	top_players_runs, x="player_name", y="Runs", color="Format",
	title="Runs Across Formats for Top Players",
	barmode="stack",
	template="plotly_dark",
	text_auto=True,
	color_discrete_sequence=px.colors.qualitative.Safe
	)
	fig2.update_layout(
	title_font=dict(size=18, family="Arial"),
	xaxis_title="Player Name",
	yaxis_title="Total Runs",
	hovermode="x unified"
	)
	st.plotly_chart(fig2, key="plot_stacked_runs")

	# Stacked Bar Chart: Sixes & Fours Across Formats for Top Batters
	if selected_player == "All":
	# Get Top 10 Batters Based on Sixes
	ttop_10_batsmen = filtered_batting_df.drop_duplicates(subset=["player_name"]).nlargest(10, "Sixes")
	top_batsmen_stats = filtered_batting_df[filtered_batting_df["player_name"].isin(top_10_batsmen["player_name"])]

	# Aggregate Total Sixes & Fours per Player
	top_batsmen_melted = top_batsmen_stats.groupby(["player_name"])[["Sixes", "Fours"]].sum().reset_index()

	#print(top_batsmen_stats[['Format','player_name', 'Sixes','Fours']])
	# Reshape Data for a Single Stacked Bar Chart
	top_batsmen_melted = top_batsmen_melted.melt(
	id_vars=["player_name"],
	value_vars=["Sixes", "Fours"],
	var_name="Boundary Type",
	value_name="Count"
	)

	# Create Stacked Bar Chart with Facet for Each Format
	fig3 = px.bar(
	top_batsmen_melted,
	x="player_name",
	y="Count",
	color="Boundary Type",
	barmode="stack",
	title="Sixes & Fours Across All Formats for Top Batters",
	hover_data=["Boundary Type", "Count"],
	color_discrete_sequence=px.colors.qualitative.Pastel
	)

	fig3.update_layout(
	xaxis_title="Player Name",
	yaxis_title="Total Boundaries",
	hovermode="x unified",
	legend=dict(title="Boundary Type")
	)

	st.plotly_chart(fig3, key="plot_top_boundaries")

	else:
	# Get stats for the selected player across formats
	player_stats = filtered_batting_df[filtered_batting_df["player_name"] == selected_player]
	#print(player_stats[['Format','Sixes', 'Fours']])

	# Ensure we reshape correctly to show both Sixes & Fours
	player_stats_melted = player_stats.melt(
	id_vars=["Format"],
	value_vars=["Sixes", "Fours"],
	var_name="Boundary Type",
	value_name="Count"
	)
	#print(player_stats_melted)

	# Create Stacked Bar Chart for Selected Player
	fig4 = px.bar(
	player_stats_melted,
	x="Format", y="Count",
	color="Boundary Type",
	barmode="stack",
	title=f"Sixes & Fours Across Formats for {selected_player}",
	text_auto=True,
	hover_data=["Boundary Type", "Count"],
	color_discrete_sequence=px.colors.qualitative.Pastel
	)

	fig4.update_layout(
	xaxis_title="Match Format",
	yaxis_title="Total Boundaries",
	hovermode="x unified",
	legend=dict(title="Boundary Type")
	)

	st.plotly_chart(fig4, key="plot_selected_player_boundaries")

	# Pie Chart: Runs Contribution Across Formats
	format_runs = filtered_batting_df.groupby("Format")["Runs"].sum().reset_index()
	fig5 = px.pie(
	format_runs, names="Format", values="Runs",
	title="Runs Contribution Across Formats",
	template="plotly_dark",
	color_discrete_sequence=px.colors.qualitative.Pastel
	)
	fig5.update_traces(
	title_font=dict(size=18, family="Arial"),
	textinfo="label+value",
	pull=[0.01, 0.01, 0.01, 0.01]
	)
	st.plotly_chart(fig5, key="plot_runs_contribution")


	st.markdown("---") # Divider

	# Vertical Line (Divider)
	with col9:
	st.markdown(
	"""
	<style>
	.divider {
	height: auto;
	width: 3px;
	background-color: #555;
	margin: auto;
	}
	</style>
	<div class="divider"></div>
	""",
	unsafe_allow_html=True
	)

	# OverAll Bowling Stats
	with col2:
	st.subheader("🎯 Bowling Stats")
	c1, c2, c3, c4 = st.columns(4)
	with c1:
	st.metric("Total Matches", int(overall_bowling["Matches"]))
	st.metric("Total Innings", int(overall_bowling["Innings"]))
	st.metric("Strike Rate", overall_bowling["SR"])
	with c2:
	st.metric("Total Wickets", int(overall_bowling["Wickets"]))
	st.metric("Average Wickets", overall_bowling["Avg"])
	st.metric("Total 4W", int(overall_bowling["4w"]))
	st.metric("Total 5W", int(overall_bowling["5w"]))
	with c3:
	st.metric("Best BBI Runs", int(overall_bowling["BBI_Runs"]))
	st.metric("Best BBI Wickets", int(overall_bowling["BBI_Wickets"]))
	st.metric("Best BBM Runs", int(overall_bowling["BBM_Runs"]))
	st.metric("Best BBM Wickets", int(overall_bowling["BBM_Wickets"]))
	with c4:
	st.metric("Maidens", int(overall_bowling["Maidens"]))
	st.metric("Eco", overall_bowling["Eco"])

	if not overall_bowling.empty:
	# Line Chart: Economy Rate Trend Across Formats
	format_stats = filtered_bowling_df.groupby("Format").agg({"Eco": "mean", "Matches": "sum"}).reset_index()

	fig1 = make_subplots(specs=[[{"secondary_y": True}]]) # Dual axis plot

	# Economy Rate Line
	fig1.add_trace(
	go.Scatter(x=format_stats["Format"], y=format_stats["Eco"],
	mode="lines+markers", name="Economy Rate", line_shape="spline",
	marker=dict(size=10, symbol="circle", color="#FF6F61")),
	secondary_y=False
	)

	# Number of Matches Bar
	fig1.add_trace(
	go.Bar(x=format_stats["Format"], y=format_stats["Matches"],
	name="No. of Matches", marker_color="lightblue", opacity=0.6),
	secondary_y=True
	)

	fig1.update_layout(
	title="Economy Rate & Matches Across Formats",
	xaxis_title="Match Format",
	yaxis=dict(title="Average Economy Rate"),
	yaxis2=dict(title="Total Matches", overlaying="y", side="right"),
	hovermode="x unified",
	)

	st.plotly_chart(fig1, key="plot_eco_trend")

	# Stacked Bar Chart: Wickets & Matches Across Formats for Top Bowlers
	if selected_player == "All":
	top_10_bowlers = filtered_bowling_df.drop_duplicates(subset=["player_name"]).nlargest(10, "Wickets")
	top_bowler_stats = filtered_bowling_df[filtered_bowling_df["player_name"].isin(top_10_bowlers["player_name"])]

	fig2 = px.bar(
	top_bowler_stats, x="player_name", y="Wickets",
	color="Format", barmode="stack",
	title="Wickets Across Formats for Top Bowlers",
	text_auto=True,
	hover_data=["Wickets", "Format"],
	color_discrete_sequence=px.colors.qualitative.Safe
	)

	fig2.update_layout(
	xaxis_title="Player Name",
	yaxis_title="Total Wickets",
	hovermode="x unified",
	legend=dict(title="Match Format")
	)

	st.plotly_chart(fig2, key="plot_top_bowlers")

	# Stacked Bar Chart: Wickets & Matches Across Formats for Top Bowlers
	if selected_player == "All":
	top_10_bowlers = filtered_bowling_df.drop_duplicates(subset=["player_name"]).nlargest(10, "Wickets")
	top_bowler_stats = filtered_bowling_df[filtered_bowling_df["player_name"].isin(top_10_bowlers["player_name"])]

	# Aggregate Total Wickets & Maidens per Player
	top_bowlers_melted = top_bowler_stats.groupby(["player_name"])[["Wickets", "Maidens"]].sum().reset_index()

	# Reshape Data for Stacked Bar Chart
	top_bowlers_melted = top_bowlers_melted.melt(
	id_vars=["player_name"],
	value_vars=["Wickets", "Maidens"],
	var_name="Bowling Stat",
	value_name="Count"
	)


	# Create Stacked Bar Chart with Facet for Each Format
	fig_bowling = px.bar(
	top_bowlers_melted,
	x="player_name", y="Count",
	color="Bowling Stat",
	barmode="stack",
	title="Wickets & Maidens Across Formats for Top Bowlers",
	hover_data=["Bowling Stat", "Count"],
	color_discrete_sequence=px.colors.qualitative.Pastel
	)

	fig_bowling.update_layout(
	xaxis_title="Player Name",
	yaxis_title="Total Count",
	hovermode="x unified",
	legend=dict(title="Bowling Stat")
	)

	st.plotly_chart(fig_bowling, key="plot_top_bowling_stats")

	else:
	# Get stats for the selected player across formats
	player_bowling_stats = filtered_bowling_df[filtered_bowling_df["player_name"] == selected_player]
	# Ensure we reshape correctly to show both Wickets & Maidens
	player_bowling_melted = player_bowling_stats.melt(
	id_vars=["Format"],
	value_vars=["Wickets", "Maidens"],
	var_name="Bowling Stat",
	value_name="Count"
	)

	# Create Stacked Bar Chart for Selected Player
	fig_player_bowling = px.bar(
	player_bowling_melted,
	x="Format", y="Count",
	color="Bowling Stat",
	barmode="stack",
	title=f"Wickets & Maidens Across Formats for {selected_player}",
	text_auto=True,
	hover_data=["Bowling Stat", "Count"],
	color_discrete_sequence=px.colors.qualitative.Pastel
	)

	fig_player_bowling.update_layout(
	xaxis_title="Match Format",
	yaxis_title="Total Count",
	hovermode="x unified",
	legend=dict(title="Bowling Stat")
	)

	st.plotly_chart(fig_player_bowling, key="plot_selected_player_bowling_stats")


	# Pie Chart: Wickets & Matches Contribution Across Formats (Sunburst)
	format_totals = filtered_bowling_df.groupby("Format").agg({"Wickets": "sum", "Matches":"sum"}).reset_index()

	fig3 = px.sunburst(
	format_totals, path=["Format"], values="Wickets",
	color = "Matches",
	title="Wickets & Matches Contribution Across Formats",
	color_continuous_scale=px.colors.qualitative.Pastel
	)
	# Customize hover info to display both Wickets & Matches
	fig3.update_traces(
	hovertemplate="<b>%{label}</b><br>Wickets: %{value}<br>Matches: %{customdata}<extra></extra>",
	customdata=format_totals["Matches"],
	textinfo = "label+value",
	)
	fig3.update_layout(
	title_font=dict(size=18, family="Arial"),
	hovermode="x unified",
	legend=dict(title="Match Format"),
	coloraxis_showscale=False
	)

	st.plotly_chart(fig3, key="plot_wickets_contribution")


	# 📌 Format-wise Stats
	for i, format_name in enumerate(grouped_batting_df["Format"].unique()):
	with tab_list[i + 1]: # Each format tab
	st.subheader(f"{format_name} Stats")

	col1, col3, col2 = st.columns([1, 0.05, 1])

	# 🏏 Batting Stats
	format_batting = grouped_batting_df[grouped_batting_df["Format"] == format_name]
	if not format_batting.empty:
	format_batting_row = format_batting.iloc[0]
	with col1:
	st.subheader("🏏 Batting Stats")
	c1, c2, c3 = st.columns(3)
	with c1:
	st.metric("Total Matches", int(format_batting_row["Matches"]))
	st.metric("Total Innings", int(format_batting_row["Innings"]))
	st.metric("Strike Rate", float(format_batting_row["SR"]))
	with c2:
	st.metric("Total Runs", int(format_batting_row["Runs"]))
	st.metric("Average Runs", float(format_batting_row["Average"]))
	st.metric("Total Fours", int(format_batting_row["Fours"]))
	st.metric("Total Sixes", int(format_batting_row["Sixes"]))
	with c3:
	st.metric("Total 100s", int(format_batting_row["100s"]))
	st.metric("Total 50s", int(format_batting_row["50s"]))

	format_filtered_df = filtered_batting_df[filtered_batting_df["Format"] == format_name]

	if not format_filtered_df.empty:
	# Show only top 10 run-scorers
	if selected_player != "All":
	top_batters = format_filtered_df[format_filtered_df["player_name"] == selected_player]
	else:
	top_batters = format_filtered_df.nlargest(10, "Runs")

	# Line Chart: Strike Rate of Top 10 Players
	fig2 = px.line(
	top_batters,
	x="player_name", y="SR",
	markers=True,
	line_shape="spline",
	title=f"{format_name}: Strike Rate of Top 10 Players",
	)
	st.plotly_chart(fig2, key=f"plot_{format_name}_batting_sr")

	if selected_player == "All":
	top_10_batsmen_format = format_filtered_df.drop_duplicates(subset=["player_name"]).nlargest(10, "Sixes")
	top_batsmen_stats_format = format_filtered_df[format_filtered_df["player_name"].isin(top_10_batsmen_format["player_name"])]

	# Aggregate Total Sixes & Fours per Player
	top_batsmen_melted_format = top_batsmen_stats_format.groupby(["player_name"])[["Sixes", "Fours"]].sum().reset_index()

	# Reshape Data
	top_batsmen_melted_format = top_batsmen_melted_format.melt(
	id_vars=["player_name"],
	value_vars=["Sixes", "Fours"],
	var_name="Boundary Type",
	value_name="Count"
	)

	# Create Stacked Bar Chart for this format
	fig_format = px.bar(
	top_batsmen_melted_format,
	x="player_name",
	y="Count",
	color="Boundary Type",
	barmode="stack",
	title=f"Sixes & Fours in {format_name} Matches (Top 10 Players)",
	hover_data=["Boundary Type", "Count"],
	color_discrete_sequence=px.colors.qualitative.Pastel
	)

	fig_format.update_layout(
	xaxis_title="Player Name",
	yaxis_title="Total Boundaries",
	hovermode="x unified",
	legend=dict(title="Boundary Type")
	)

	st.plotly_chart(fig_format, key=f"plot_top_boundaries_{format_name}")

	else:
	# Get stats for the selected player in this format
	player_stats_format = format_filtered_df[format_filtered_df["player_name"] == selected_player]

	# Reshape Data
	player_stats_melted_format = player_stats_format.melt(
	id_vars=["Format"],
	value_vars=["Sixes", "Fours"],
	var_name="Boundary Type",
	value_name="Count"
	)

	# Create Stacked Bar Chart for Selected Player in this format
	fig_selected_format = px.bar(
	player_stats_melted_format,
	x="Format", y="Count",
	color="Boundary Type",
	barmode="group",
	title=f"Sixes & Fours in {format_name} Matches for {selected_player}",
	text_auto=True,
	hover_data=["Boundary Type", "Count"],
	color_discrete_sequence=px.colors.qualitative.Pastel
	)

	fig_selected_format.update_layout(
	xaxis_title="Match Format",
	yaxis_title="Total Boundaries",
	hovermode="x unified",
	legend=dict(title="Boundary Type")
	)

	st.plotly_chart(fig_selected_format, key=f"plot_selected_player_boundaries_{format_name}")

	# Vertical Line (Divider)
	with col3:
	st.markdown(
	"""
	<style>
	.divider {
	height: auto;
	width: 3px;
	background-color: #555;
	margin: auto;
	}
	</style>
	<div class="divider"></div>
	""",
	unsafe_allow_html=True
	)

	# 🎯 Bowling Stats
	format_bowling = grouped_bowling_df[grouped_bowling_df["Format"] == format_name]
	if not format_bowling.empty:
	format_bowling_row = format_bowling.iloc[0]
	with col2:
	st.subheader("🎯 Bowling Stats")
	c1, c2, c3, c4 = st.columns(4)
	with c1:
	st.metric("Total Matches", int(format_bowling_row["Matches"]))
	st.metric("Total Innings", int(format_bowling_row["Innings"]))
	st.metric("Strike Rate", float(format_bowling_row["SR"]))
	with c2:
	st.metric("Total Wickets", int(format_bowling_row["Wickets"]))
	st.metric("Average Wickets", float(format_bowling_row['Avg']))
	st.metric("Total 4W", int(format_bowling_row["4w"]))
	st.metric("Total 5W", int(format_bowling_row["5w"]))
	with c3:
	st.metric("Best BBI Runs", format_bowling_row["BBI_Runs"])
	st.metric("Best BBI Wickets", format_bowling_row["BBI_Wickets"])
	st.metric("Best BBM Runs", format_bowling_row["BBM_Runs"])
	st.metric("Best BBM Wickets", format_bowling_row["BBM_Wickets"])
	with c4:
	st.metric("Maidens", int(format_bowling_row['Maidens']))
	st.metric("Economy Rate", float(format_bowling_row['Eco']))

	format_filtered_df2 = filtered_bowling_df[filtered_bowling_df["Format"] == format_name]

	if not format_filtered_df2.empty:
	if selected_player != "All":
	top_bowlers = format_filtered_df2[format_filtered_df2["player_name"] == selected_player]
	else:
	top_bowlers = format_filtered_df2.nlargest(10, "Wickets")

	# Bar Chart: Top 10 Wicket-Takers
	fig_bowling_1 = px.bar(
	top_bowlers,
	x="player_name", y="Wickets",
	color="Wickets",
	title=f"{format_name}: {'Player' if selected_player != 'All' else 'Top 10'} Wicket-Takers",
	text_auto=True
	)
	st.plotly_chart(fig_bowling_1, key=f"plot_{format_name}_bowling_wickets")

	# 📊 Line Plot: Economy Rate of Top 10 Bowlers
	fig_bowling_2 = px.line(
	top_bowlers,
	x="player_name", y="Eco",
	markers = True,
	line_shape='spline',
	title=f"💰 {format_name}: {'Player' if selected_player != 'All' else 'Top 10'} Economy Rate",
	)
	st.plotly_chart(fig_bowling_2, key=f"plot_{format_name}_bowling_economy")