import streamlit as st
from gsheet_loader import get_data
import pandas as pd
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
import datetime as dt
st.set_page_config(
page_title="Catalog Data Dashboard",
layout="wide",
page_icon="📊",
)
st.title("📊 Catalog Data Dashboard")
st.markdown(
"""
This dashboard combines Static Data with live Google Sheets data for:
- catalog onboarding
- metadata completeness
"""
)
if st.button("🔄 Refresh Data"):
st.cache_data.clear()
st.toast("Refreshing data...", icon="🔄")
st.rerun()
cat_onboarding_df, cat_metadata_df, cat_status_df = get_data()
tab1, tab2, tab3 = st.tabs([
# "Overview",
"Static Data",
"Onboarding Status",
"Metadata Completeness",
# "Mapping Status"
])
# =========================================================================================================================
# Tab 0 - Overview
# =========================================================================================================================
# with tab0:
# st.header("Overiew")
# if st.button("🔄 Refresh Data"):
# st.cache_data.clear()
# st.toast("Refreshing data...", icon="🔄")
# st.rerun()
# st.markdown("---")
# st.subheader("Quick Data Preview")
# col1, col2, col3 = st.columns(3)
# with col1:
# st.dataframe(cat_onboarding_df.head(5))
# with col2:
# st.dataframe(cat_metadata_df.head(5))
# with col3:
# st.dataframe(cat_status_df.head(5))
# =========================================================================================================================
# Tab 0 - Static stuff
# =========================================================================================================================
with tab1:
st.header("Static Data Preview")
full_countries_df = pd.read_csv('countries.csv')
full_languages_df = pd.read_csv('languages.csv')
# countries map
fig = px.choropleth(
full_countries_df,
locations="country_name",
locationmode="country names",
color="log_count",
color_continuous_scale="Purples",
hover_name="country_name",
hover_data={"count": True, "log_count": False},
projection="natural earth",
title="Programs' availabilities by Country (Log Scale)"
)
fig.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth")
fig.update_layout(
width=1400,
height=700,
margin=dict(l=0, r=0, t=100, b=0),
title_y=0.95
)
st.plotly_chart(fig, use_container_width=True)
# languages map
fig1 = px.choropleth(
full_languages_df,
locations="country_name",
locationmode="country names",
color="log_count",
color_continuous_scale="Purples",
hover_name="country_name",
hover_data={"count": True, "log_count": False},
projection="natural earth",
title="Programs by Languages (Log Scale)"
)
fig1.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth")
fig1.update_layout(
width=1400,
height=700,
margin=dict(l=0, r=0, t=100, b=0),
title_y=0.95
)
st.plotly_chart(fig1, use_container_width=True)
# Completeness evaluation
catalog_scores = pd.read_csv("catalog_scores.csv")
colorscale = [
[0.0, "#ffffff"],
[0.1, "#dcd6f7"],
[0.3, "#a29bfe"],
[0.6, "#6c5ce7"],
[1.0, "#341f97"]
]
fig_completeness = px.bar(
catalog_scores,
x="Total",
y="Catalog",
orientation="h",
color="Total",
color_continuous_scale=colorscale,
title="Catalog Metadata Completeness Score",
)
fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'}, template="plotly_dark", height=1000)
st.plotly_chart(fig_completeness, use_container_width=True)
# ### completeness score broken down
subcols = ["movie", "show", "season", "episode", "sport"]
# Compute sum of raw subscores
catalog_scores["raw_sum"] = catalog_scores[subcols].sum(axis=1)
# Build the figure
fig_completeness2 = go.Figure()
for col in subcols:
# normalized height of this bar segment
norm_vals = (catalog_scores[col] / catalog_scores["raw_sum"]) * catalog_scores["Total"]
fig_completeness2.add_trace(
go.Bar(
y=catalog_scores["Catalog"],
x=norm_vals, # BAR SIZE = normalized values
name=col.capitalize(),
orientation="h",
customdata=catalog_scores[col], # RAW values for hover
hovertemplate=(
"%{y}
" +
f"{col.capitalize()}: %{{customdata}}
" + # RAW value
"Normalized: %{x:.2f}"
)
)
)
fig_completeness2.update_layout(
barmode="stack",
title="Subscore Contribution per Catalog (Scaled to Total Score)",
xaxis_title="Total Score",
template="plotly_dark",
height=1200,
yaxis={'categoryorder':'total ascending'}
)
st.plotly_chart(fig_completeness2, use_container_width=True)
#scatter plot
fig_scatter = px.scatter(
catalog_scores,
x="Total",
y="Number of programs",
size="Number of programs",
color="Total",
hover_name="Catalog",
color_continuous_scale="Viridis",
size_max=50
)
st.plotly_chart(fig_scatter, use_container_width=True)
# =========================================================================================================================
# Tab 2 - Onboarding sheet
# =========================================================================================================================
with tab2:
st.header("Catalog Onboarding Status")
# Convert onboarding date to datetime (e.g., 21/11 → 2025-11-21)
cat_onboarding_df["Onboarding date"] = pd.to_datetime(
cat_onboarding_df["Onboarding date"], format="%d/%m", errors="coerce"
)
cat_onboarding_df["Onboarding date"] = cat_onboarding_df["Onboarding date"].apply(
lambda d: d.replace(year=2025) if pd.notna(d) else d
)
# Map textual months to end-of-month dates
month_map = {
"November 2025": dt.datetime(2025, 11, 30),
"December 2025": dt.datetime(2025, 12, 31),
"January 2026": dt.datetime(2026, 1, 31),
"February 2026": dt.datetime(2026, 2, 28),
"March 2026": dt.datetime(2026, 3, 31),
"April 2026": dt.datetime(2026, 4, 30),
"TBD": None,
}
cat_onboarding_df["Go live parsed"] = cat_onboarding_df["Go live (customer)"].map(month_map)
# Drop missing
timeline_df = cat_onboarding_df.dropna(subset=["Onboarding date", "Go live parsed"])
fig_timeline = px.timeline(
timeline_df,
x_start="Onboarding date",
x_end="Go live parsed",
y="NAME",
color="Onboarding Status",
hover_data=["Client", "Priority"],
title="Onboarding → Go-Live Timeline",
)
fig_timeline.update_yaxes(autorange="reversed")
st.plotly_chart(fig_timeline, use_container_width=True)
# bar chart 1
summary = (
cat_onboarding_df.groupby(["Client", "Onboarding Status"])
.size()
.reset_index(name="Count")
)
fig_client = px.bar(
summary,
x="Client",
y="Count",
color="Onboarding Status",
text_auto=True,
title="Catalogs per Client (by Onboarding Status)",
)
fig_client.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count")
st.plotly_chart(fig_client, use_container_width=True)
# bar chart 2
summary = (
cat_onboarding_df.groupby(["Client", "Priority"])
.size()
.reset_index(name="Count")
)
fig_client1 = px.bar(
summary,
x="Client",
y="Count",
color="Priority",
text_auto=True,
title="Catalogs per Client (by Priority)",
)
fig_client1.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count")
st.plotly_chart(fig_client1, use_container_width=True)
# bar chart 3
summary = (
cat_onboarding_df.groupby(["Onboarding Status", "Priority"])
.size()
.reset_index(name="Count")
)
fig_client2 = px.bar(
summary,
x="Onboarding Status",
y="Count",
color="Priority",
text_auto=True,
title="Catalogs per Onboarding Status (by Priority)",
)
fig_client2.update_layout(barmode="stack", xaxis_title="Onboarding Status", yaxis_title="Catalog Count")
st.plotly_chart(fig_client2, use_container_width=True)
# =========================================================================================================================
# Tab 3 - Metadata completeness
# =========================================================================================================================
with tab3:
st.header("Catalog Metadata Completeness")
cat_df = cat_metadata_df.copy()
meta_cols = [col for col in cat_df.columns if col not in ["Catalog name"]]
score_map = {"Yes": 1.0, "Some": 0.5, "No": 0.0, "None": 0.0, "": 0.0}
cat_df_numeric = cat_df.copy()
cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].replace(score_map)
# force conversion to numeric (anything else becomes NaN)
cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].apply(pd.to_numeric, errors="coerce")
cat_df_numeric["Completeness Score"] = cat_df_numeric[meta_cols].mean(axis=1)
cat_df_numeric_sorted = cat_df_numeric.sort_values("Completeness Score", ascending=False)
#graph 1
fig_completeness = px.bar(
cat_df_numeric_sorted,
x="Completeness Score",
y="Catalog name",
orientation="h",
color="Completeness Score",
color_continuous_scale="Greens",
title="Catalog Metadata Completeness Score",
)
fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'})
st.plotly_chart(fig_completeness, use_container_width=True)
# graph 2
coverage = cat_df_numeric[meta_cols].mean().sort_values(ascending=False).reset_index()
coverage.columns = ["Metadata Field", "Average Score"]
fig_field_coverage = px.bar(
coverage,
x="Average Score",
y="Metadata Field",
orientation="h",
color="Average Score",
color_continuous_scale="Blues",
title="Metadata Field Coverage Across All Catalogs",
)
fig_field_coverage.update_layout(yaxis={'categoryorder':'total ascending'})
st.plotly_chart(fig_field_coverage, use_container_width=True)
# # heatmap 1
# # Prepare data
# z = cat_df_numeric[meta_cols].astype(float).to_numpy()
# x = list(meta_cols)
# y = list(cat_df_numeric["Catalog name"].astype(str))
# # Build the heatmap (no annotation_text)
# fig_heatmap = ff.create_annotated_heatmap(
# z=z,
# x=x,
# y=y,
# showscale=True,
# colorscale=[
# [0.0, "rgb(255,77,77)"], # red for 0 (No)
# [0.5, "rgb(255,204,0)"], # yellow for 0.5 (Some)
# [1.0, "rgb(0,204,102)"] # green for 1 (Yes)
# ],
# annotation_text=None # removes numbers
# )
# # Layout adjustments
# fig_heatmap.update_layout(
# title="Metadata Completeness Heatmap (Catalog vs Field)",
# xaxis_title="Metadata Field",
# yaxis_title="Catalog Name",
# width=1600, # make it wide
# height=1000, # make it tall so names fit
# margin=dict(l=200, r=50, t=80, b=150), # spacing for labels
# )
# # Tweak label angles for readability
# fig_heatmap.update_xaxes(tickangle=-45)
# fig_heatmap.update_yaxes(automargin=True)
# st.plotly_chart(fig_heatmap, use_container_width=True)
# # heatmap 2
# fig_heatmap1 = px.imshow(
# cat_df_numeric[meta_cols],
# labels=dict(x="Metadata Field", y="Catalog Name", color="Completeness"),
# x=meta_cols,
# y=cat_df_numeric["Catalog name"],
# color_continuous_scale=[
# [0.0, "rgb(255,77,77)"],
# [0.5, "rgb(255,204,0)"],
# [1.0, "rgb(0,204,102)"]
# ],
# )
# fig_heatmap1.update_layout(
# title="Metadata Completeness Heatmap (Catalog vs Field)",
# width=1600,
# height=1000,
# margin=dict(l=200, r=50, t=80, b=150),
# )
# fig_heatmap1.update_xaxes(tickangle=-45)
# st.plotly_chart(fig_heatmap1, use_container_width=True)
# with tab4:
# st.header("Catalog Mapping status")