import streamlit as st from gsheet_loader import get_data import pandas as pd import plotly.express as px import plotly.figure_factory as ff import plotly.graph_objects as go import datetime as dt st.set_page_config( page_title="Catalog Data Dashboard", layout="wide", page_icon="📊", ) st.title("📊 Catalog Data Dashboard") st.markdown( """ This dashboard combines live [Google Sheets data](https://docs.google.com/spreadsheets/d/10nGgqXxunGXo_GI1LxybvsAr1TYSDdNiqqZX6DSTbDA) for: - catalog onboarding - metadata completeness - mapping/scraping status """ ) cat_onboarding_df, cat_metadata_df, cat_status_df = get_data() tab0, tab1, tab2, tab3, tab4 = st.tabs(["Overview", "Static Data", "Onboarding Status", "Metadata Completeness", "Mapping Status"]) # ========================================================================================================================= # Tab 0 - Overview # ========================================================================================================================= with tab0: st.header("Overiew") if st.button("🔄 Refresh Data"): st.cache_data.clear() st.toast("Refreshing data...", icon="🔄") st.rerun() st.markdown("---") st.subheader("Quick Data Preview") col1, col2, col3 = st.columns(3) with col1: st.dataframe(cat_onboarding_df.head(5)) with col2: st.dataframe(cat_metadata_df.head(5)) with col3: st.dataframe(cat_status_df.head(5)) # ========================================================================================================================= # Tab 0 - Static stuff # ========================================================================================================================= with tab1: st.header("Static Data Preview") full_countries_df = pd.read_csv('countries.csv') full_languages_df = pd.read_csv('languages.csv') # countries map fig = px.choropleth( full_countries_df, locations="country_name", locationmode="country names", color="log_count", color_continuous_scale="Purples", hover_name="country_name", hover_data={"count": True, "log_count": False}, projection="natural earth", title="Programs' availabilities by Country (Log Scale)" ) fig.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth") fig.update_layout( width=1400, height=700, margin=dict(l=0, r=0, t=100, b=0), title_y=0.95 ) st.plotly_chart(fig, use_container_width=True) # languages map fig1 = px.choropleth( full_languages_df, locations="country_name", locationmode="country names", color="log_count", color_continuous_scale="Purples", hover_name="country_name", hover_data={"count": True, "log_count": False}, projection="natural earth", title="Programs by Languages (Log Scale)" ) fig1.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth") fig1.update_layout( width=1400, height=700, margin=dict(l=0, r=0, t=100, b=0), title_y=0.95 ) st.plotly_chart(fig1, use_container_width=True) # Completeness evaluation catalog_scores = pd.read_csv("catalog_scores.csv") colorscale = [ [0.0, "#ffffff"], [0.1, "#dcd6f7"], [0.3, "#a29bfe"], [0.6, "#6c5ce7"], [1.0, "#341f97"] ] fig_completeness = px.bar( catalog_scores, x="Total", y="Catalog", orientation="h", color="Total", color_continuous_scale=colorscale, title="Catalog Metadata Completeness Score", ) fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'}, template="plotly_dark", height=1000) st.plotly_chart(fig_completeness, use_container_width=True) # ### completeness score broken down subcols = ["movie", "show", "season", "episode", "sport"] # Compute sum of raw subscores catalog_scores["raw_sum"] = catalog_scores[subcols].sum(axis=1) # Build the figure fig_completeness2 = go.Figure() for col in subcols: # normalized height of this bar segment norm_vals = (catalog_scores[col] / catalog_scores["raw_sum"]) * catalog_scores["Total"] fig_completeness2.add_trace( go.Bar( y=catalog_scores["Catalog"], x=norm_vals, # BAR SIZE = normalized values name=col.capitalize(), orientation="h", customdata=catalog_scores[col], # RAW values for hover hovertemplate=( "%{y}
" + f"{col.capitalize()}: %{{customdata}}
" + # RAW value "Normalized: %{x:.2f}" ) ) ) fig_completeness2.update_layout( barmode="stack", title="Subscore Contribution per Catalog (Scaled to Total Score)", xaxis_title="Total Score", template="plotly_dark", height=1200, yaxis={'categoryorder':'total ascending'} ) st.plotly_chart(fig_completeness2, use_container_width=True) #scatter plot fig_scatter = px.scatter( catalog_scores, x="Total", y="Number of programs", size="Number of programs", color="Total", hover_name="Catalog", color_continuous_scale="Viridis", size_max=50 ) st.plotly_chart(fig_scatter, use_container_width=True) # ========================================================================================================================= # Tab 2 - Onboarding sheet # ========================================================================================================================= with tab2: st.header("Catalog Onboarding Status") # Convert onboarding date to datetime (e.g., 21/11 → 2025-11-21) cat_onboarding_df["Onboarding date"] = pd.to_datetime( cat_onboarding_df["Onboarding date"], format="%d/%m", errors="coerce" ) cat_onboarding_df["Onboarding date"] = cat_onboarding_df["Onboarding date"].apply( lambda d: d.replace(year=2025) if pd.notna(d) else d ) # Map textual months to end-of-month dates month_map = { "November 2025": dt.datetime(2025, 11, 30), "December 2025": dt.datetime(2025, 12, 31), "January 2026": dt.datetime(2026, 1, 31), "February 2026": dt.datetime(2026, 2, 28), "March 2026": dt.datetime(2026, 3, 31), "April 2026": dt.datetime(2026, 4, 30), "TBD": None, } cat_onboarding_df["Go live parsed"] = cat_onboarding_df["Go live (customer)"].map(month_map) # Drop missing timeline_df = cat_onboarding_df.dropna(subset=["Onboarding date", "Go live parsed"]) fig_timeline = px.timeline( timeline_df, x_start="Onboarding date", x_end="Go live parsed", y="NAME", color="Onboarding Status", hover_data=["Client", "Priority"], title="Onboarding → Go-Live Timeline", ) fig_timeline.update_yaxes(autorange="reversed") st.plotly_chart(fig_timeline, use_container_width=True) # bar chart 1 summary = ( cat_onboarding_df.groupby(["Client", "Onboarding Status"]) .size() .reset_index(name="Count") ) fig_client = px.bar( summary, x="Client", y="Count", color="Onboarding Status", text_auto=True, title="Catalogs per Client (by Onboarding Status)", ) fig_client.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count") st.plotly_chart(fig_client, use_container_width=True) # bar chart 2 summary = ( cat_onboarding_df.groupby(["Client", "Priority"]) .size() .reset_index(name="Count") ) fig_client1 = px.bar( summary, x="Client", y="Count", color="Priority", text_auto=True, title="Catalogs per Client (by Priority)", ) fig_client1.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count") st.plotly_chart(fig_client1, use_container_width=True) # bar chart 3 summary = ( cat_onboarding_df.groupby(["Onboarding Status", "Priority"]) .size() .reset_index(name="Count") ) fig_client2 = px.bar( summary, x="Onboarding Status", y="Count", color="Priority", text_auto=True, title="Catalogs per Onboarding Status (by Priority)", ) fig_client2.update_layout(barmode="stack", xaxis_title="Onboarding Status", yaxis_title="Catalog Count") st.plotly_chart(fig_client2, use_container_width=True) # ========================================================================================================================= # Tab 3 - Metadata completeness # ========================================================================================================================= with tab3: st.header("Catalog Metadata Completeness") cat_df = cat_metadata_df.copy() meta_cols = [col for col in cat_df.columns if col not in ["Catalog name"]] score_map = {"Yes": 1.0, "Some": 0.5, "No": 0.0, "None": 0.0, "": 0.0} cat_df_numeric = cat_df.copy() cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].replace(score_map) # force conversion to numeric (anything else becomes NaN) cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].apply(pd.to_numeric, errors="coerce") cat_df_numeric["Completeness Score"] = cat_df_numeric[meta_cols].mean(axis=1) cat_df_numeric_sorted = cat_df_numeric.sort_values("Completeness Score", ascending=False) #graph 1 fig_completeness = px.bar( cat_df_numeric_sorted, x="Completeness Score", y="Catalog name", orientation="h", color="Completeness Score", color_continuous_scale="Greens", title="Catalog Metadata Completeness Score", ) fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'}) st.plotly_chart(fig_completeness, use_container_width=True) # graph 2 coverage = cat_df_numeric[meta_cols].mean().sort_values(ascending=False).reset_index() coverage.columns = ["Metadata Field", "Average Score"] fig_field_coverage = px.bar( coverage, x="Average Score", y="Metadata Field", orientation="h", color="Average Score", color_continuous_scale="Blues", title="Metadata Field Coverage Across All Catalogs", ) fig_field_coverage.update_layout(yaxis={'categoryorder':'total ascending'}) st.plotly_chart(fig_field_coverage, use_container_width=True) # heatmap 1 # Prepare data z = cat_df_numeric[meta_cols].astype(float).to_numpy() x = list(meta_cols) y = list(cat_df_numeric["Catalog name"].astype(str)) # Build the heatmap (no annotation_text) fig_heatmap = ff.create_annotated_heatmap( z=z, x=x, y=y, showscale=True, colorscale=[ [0.0, "rgb(255,77,77)"], # red for 0 (No) [0.5, "rgb(255,204,0)"], # yellow for 0.5 (Some) [1.0, "rgb(0,204,102)"] # green for 1 (Yes) ], annotation_text=None # removes numbers ) # Layout adjustments fig_heatmap.update_layout( title="Metadata Completeness Heatmap (Catalog vs Field)", xaxis_title="Metadata Field", yaxis_title="Catalog Name", width=1600, # make it wide height=1000, # make it tall so names fit margin=dict(l=200, r=50, t=80, b=150), # spacing for labels ) # Tweak label angles for readability fig_heatmap.update_xaxes(tickangle=-45) fig_heatmap.update_yaxes(automargin=True) st.plotly_chart(fig_heatmap, use_container_width=True) # heatmap 2 fig_heatmap1 = px.imshow( cat_df_numeric[meta_cols], labels=dict(x="Metadata Field", y="Catalog Name", color="Completeness"), x=meta_cols, y=cat_df_numeric["Catalog name"], color_continuous_scale=[ [0.0, "rgb(255,77,77)"], [0.5, "rgb(255,204,0)"], [1.0, "rgb(0,204,102)"] ], ) fig_heatmap1.update_layout( title="Metadata Completeness Heatmap (Catalog vs Field)", width=1600, height=1000, margin=dict(l=200, r=50, t=80, b=150), ) fig_heatmap1.update_xaxes(tickangle=-45) st.plotly_chart(fig_heatmap1, use_container_width=True) with tab4: st.header("Catalog Mapping status")