Spaces:

mbecchis
/

streaming-visualization

Sleeping

App Files Files Community

streaming-visualization / app.py

mbecchis

Upload 5 files

8848efa verified 3 months ago

raw

history blame

13.1 kB

	import streamlit as st
	from gsheet_loader import get_data
	import pandas as pd
	import plotly.express as px
	import plotly.figure_factory as ff
	import plotly.graph_objects as go
	import datetime as dt

	st.set_page_config(
	page_title="Catalog Data Dashboard",
	layout="wide",
	page_icon="📊",
	)

	st.title("📊 Catalog Data Dashboard")
	st.markdown(
	"""
	This dashboard combines live [Google Sheets data](https://docs.google.com/spreadsheets/d/10nGgqXxunGXo_GI1LxybvsAr1TYSDdNiqqZX6DSTbDA) for:
	- catalog onboarding
	- metadata completeness
	- mapping/scraping status
	"""
	)

	cat_onboarding_df, cat_metadata_df, cat_status_df = get_data()

	tab0, tab1, tab2, tab3, tab4 = st.tabs(["Overview", "Static Data", "Onboarding Status", "Metadata Completeness", "Mapping Status"])

	# =========================================================================================================================
	# Tab 0 - Overview
	# =========================================================================================================================

	with tab0:
	st.header("Overiew")
	if st.button("🔄 Refresh Data"):
	st.cache_data.clear()
	st.toast("Refreshing data...", icon="🔄")
	st.rerun()

	st.markdown("---")
	st.subheader("Quick Data Preview")

	col1, col2, col3 = st.columns(3)
	with col1:
	st.dataframe(cat_onboarding_df.head(5))
	with col2:
	st.dataframe(cat_metadata_df.head(5))
	with col3:
	st.dataframe(cat_status_df.head(5))

	# =========================================================================================================================
	# Tab 0 - Static stuff
	# =========================================================================================================================

	with tab1:
	st.header("Static Data Preview")

	full_countries_df = pd.read_csv('countries.csv')
	full_languages_df = pd.read_csv('languages.csv')

	# countries map
	fig = px.choropleth(
	full_countries_df,
	locations="country_name",
	locationmode="country names",
	color="log_count",
	color_continuous_scale="Purples",
	hover_name="country_name",
	hover_data={"count": True, "log_count": False},
	projection="natural earth",
	title="Programs' availabilities by Country (Log Scale)"
	)

	fig.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth")
	fig.update_layout(
	width=1400,
	height=700,
	margin=dict(l=0, r=0, t=100, b=0),
	title_y=0.95
	)

	st.plotly_chart(fig, use_container_width=True)

	# languages map
	fig1 = px.choropleth(
	full_languages_df,
	locations="country_name",
	locationmode="country names",
	color="log_count",
	color_continuous_scale="Purples",
	hover_name="country_name",
	hover_data={"count": True, "log_count": False},
	projection="natural earth",
	title="Programs by Languages (Log Scale)"
	)

	fig1.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth")
	fig1.update_layout(
	width=1400,
	height=700,
	margin=dict(l=0, r=0, t=100, b=0),
	title_y=0.95
	)

	st.plotly_chart(fig1, use_container_width=True)


	# Completeness evaluation
	catalog_scores = pd.read_csv("catalog_scores.csv")
	colorscale = [
	[0.0, "#ffffff"],
	[0.1, "#dcd6f7"],
	[0.3, "#a29bfe"],
	[0.6, "#6c5ce7"],
	[1.0, "#341f97"]
	]

	fig_completeness = px.bar(
	catalog_scores,
	x="Total",
	y="Catalog",
	orientation="h",
	color="Total",
	color_continuous_scale=colorscale,
	title="Catalog Metadata Completeness Score",
	)
	fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'}, template="plotly_dark", height=1000)
	st.plotly_chart(fig_completeness, use_container_width=True)


	# ### completeness score broken down
	subcols = ["movie", "show", "season", "episode", "sport"]

	# Compute sum of raw subscores
	catalog_scores["raw_sum"] = catalog_scores[subcols].sum(axis=1)

	# Build the figure
	fig_completeness2 = go.Figure()

	for col in subcols:

	# normalized height of this bar segment
	norm_vals = (catalog_scores[col] / catalog_scores["raw_sum"]) * catalog_scores["Total"]

	fig_completeness2.add_trace(
	go.Bar(
	y=catalog_scores["Catalog"],
	x=norm_vals, # BAR SIZE = normalized values
	name=col.capitalize(),
	orientation="h",
	customdata=catalog_scores[col], # RAW values for hover
	hovertemplate=(
	"<b>%{y}</b><br>" +
	f"{col.capitalize()}: <b>%{{customdata}}</b><br>" + # RAW value
	"Normalized: %{x:.2f}<extra></extra>"
	)
	)
	)

	fig_completeness2.update_layout(
	barmode="stack",
	title="Subscore Contribution per Catalog (Scaled to Total Score)",
	xaxis_title="Total Score",
	template="plotly_dark",
	height=1200,
	yaxis={'categoryorder':'total ascending'}
	)

	st.plotly_chart(fig_completeness2, use_container_width=True)


	#scatter plot
	fig_scatter = px.scatter(
	catalog_scores,
	x="Total",
	y="Number of programs",
	size="Number of programs",
	color="Total",
	hover_name="Catalog",
	color_continuous_scale="Viridis",
	size_max=50
	)

	st.plotly_chart(fig_scatter, use_container_width=True)



	# =========================================================================================================================
	# Tab 2 - Onboarding sheet
	# =========================================================================================================================

	with tab2:
	st.header("Catalog Onboarding Status")

	# Convert onboarding date to datetime (e.g., 21/11 → 2025-11-21)
	cat_onboarding_df["Onboarding date"] = pd.to_datetime(
	cat_onboarding_df["Onboarding date"], format="%d/%m", errors="coerce"
	)
	cat_onboarding_df["Onboarding date"] = cat_onboarding_df["Onboarding date"].apply(
	lambda d: d.replace(year=2025) if pd.notna(d) else d
	)

	# Map textual months to end-of-month dates
	month_map = {
	"November 2025": dt.datetime(2025, 11, 30),
	"December 2025": dt.datetime(2025, 12, 31),
	"January 2026": dt.datetime(2026, 1, 31),
	"February 2026": dt.datetime(2026, 2, 28),
	"March 2026": dt.datetime(2026, 3, 31),
	"April 2026": dt.datetime(2026, 4, 30),
	"TBD": None,
	}
	cat_onboarding_df["Go live parsed"] = cat_onboarding_df["Go live (customer)"].map(month_map)

	# Drop missing
	timeline_df = cat_onboarding_df.dropna(subset=["Onboarding date", "Go live parsed"])

	fig_timeline = px.timeline(
	timeline_df,
	x_start="Onboarding date",
	x_end="Go live parsed",
	y="NAME",
	color="Onboarding Status",
	hover_data=["Client", "Priority"],
	title="Onboarding → Go-Live Timeline",
	)
	fig_timeline.update_yaxes(autorange="reversed")

	st.plotly_chart(fig_timeline, use_container_width=True)

	# bar chart 1
	summary = (
	cat_onboarding_df.groupby(["Client", "Onboarding Status"])
	.size()
	.reset_index(name="Count")
	)

	fig_client = px.bar(
	summary,
	x="Client",
	y="Count",
	color="Onboarding Status",
	text_auto=True,
	title="Catalogs per Client (by Onboarding Status)",
	)
	fig_client.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count")

	st.plotly_chart(fig_client, use_container_width=True)

	# bar chart 2
	summary = (
	cat_onboarding_df.groupby(["Client", "Priority"])
	.size()
	.reset_index(name="Count")
	)

	fig_client1 = px.bar(
	summary,
	x="Client",
	y="Count",
	color="Priority",
	text_auto=True,
	title="Catalogs per Client (by Priority)",
	)
	fig_client1.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count")

	st.plotly_chart(fig_client1, use_container_width=True)

	# bar chart 3

	summary = (
	cat_onboarding_df.groupby(["Onboarding Status", "Priority"])
	.size()
	.reset_index(name="Count")
	)

	fig_client2 = px.bar(
	summary,
	x="Onboarding Status",
	y="Count",
	color="Priority",
	text_auto=True,
	title="Catalogs per Onboarding Status (by Priority)",
	)
	fig_client2.update_layout(barmode="stack", xaxis_title="Onboarding Status", yaxis_title="Catalog Count")

	st.plotly_chart(fig_client2, use_container_width=True)

	# =========================================================================================================================
	# Tab 3 - Metadata completeness
	# =========================================================================================================================

	with tab3:
	st.header("Catalog Metadata Completeness")

	cat_df = cat_metadata_df.copy()
	meta_cols = [col for col in cat_df.columns if col not in ["Catalog name"]]

	score_map = {"Yes": 1.0, "Some": 0.5, "No": 0.0, "None": 0.0, "": 0.0}

	cat_df_numeric = cat_df.copy()
	cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].replace(score_map)

	# force conversion to numeric (anything else becomes NaN)
	cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].apply(pd.to_numeric, errors="coerce")

	cat_df_numeric["Completeness Score"] = cat_df_numeric[meta_cols].mean(axis=1)
	cat_df_numeric_sorted = cat_df_numeric.sort_values("Completeness Score", ascending=False)

	#graph 1
	fig_completeness = px.bar(
	cat_df_numeric_sorted,
	x="Completeness Score",
	y="Catalog name",
	orientation="h",
	color="Completeness Score",
	color_continuous_scale="Greens",
	title="Catalog Metadata Completeness Score",
	)
	fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'})

	st.plotly_chart(fig_completeness, use_container_width=True)

	# graph 2
	coverage = cat_df_numeric[meta_cols].mean().sort_values(ascending=False).reset_index()
	coverage.columns = ["Metadata Field", "Average Score"]

	fig_field_coverage = px.bar(
	coverage,
	x="Average Score",
	y="Metadata Field",
	orientation="h",
	color="Average Score",
	color_continuous_scale="Blues",
	title="Metadata Field Coverage Across All Catalogs",
	)
	fig_field_coverage.update_layout(yaxis={'categoryorder':'total ascending'})

	st.plotly_chart(fig_field_coverage, use_container_width=True)

	# heatmap 1
	# Prepare data
	z = cat_df_numeric[meta_cols].astype(float).to_numpy()
	x = list(meta_cols)
	y = list(cat_df_numeric["Catalog name"].astype(str))

	# Build the heatmap (no annotation_text)
	fig_heatmap = ff.create_annotated_heatmap(
	z=z,
	x=x,
	y=y,
	showscale=True,
	colorscale=[
	[0.0, "rgb(255,77,77)"], # red for 0 (No)
	[0.5, "rgb(255,204,0)"], # yellow for 0.5 (Some)
	[1.0, "rgb(0,204,102)"] # green for 1 (Yes)
	],
	annotation_text=None # removes numbers
	)

	# Layout adjustments
	fig_heatmap.update_layout(
	title="Metadata Completeness Heatmap (Catalog vs Field)",
	xaxis_title="Metadata Field",
	yaxis_title="Catalog Name",
	width=1600, # make it wide
	height=1000, # make it tall so names fit
	margin=dict(l=200, r=50, t=80, b=150), # spacing for labels
	)

	# Tweak label angles for readability
	fig_heatmap.update_xaxes(tickangle=-45)
	fig_heatmap.update_yaxes(automargin=True)

	st.plotly_chart(fig_heatmap, use_container_width=True)


	# heatmap 2

	fig_heatmap1 = px.imshow(
	cat_df_numeric[meta_cols],
	labels=dict(x="Metadata Field", y="Catalog Name", color="Completeness"),
	x=meta_cols,
	y=cat_df_numeric["Catalog name"],
	color_continuous_scale=[
	[0.0, "rgb(255,77,77)"],
	[0.5, "rgb(255,204,0)"],
	[1.0, "rgb(0,204,102)"]
	],
	)

	fig_heatmap1.update_layout(
	title="Metadata Completeness Heatmap (Catalog vs Field)",
	width=1600,
	height=1000,
	margin=dict(l=200, r=50, t=80, b=150),
	)
	fig_heatmap1.update_xaxes(tickangle=-45)

	st.plotly_chart(fig_heatmap1, use_container_width=True)



	with tab4:
	st.header("Catalog Mapping status")