import streamlit as st from gsheet_loader import get_data import pandas as pd import plotly.express as px import plotly.figure_factory as ff import plotly.graph_objects as go import datetime as dt st.set_page_config( page_title="Catalog Data Dashboard", layout="wide", page_icon="📊", ) st.title("📊 Catalog Data Dashboard") st.markdown( """ This dashboard combines Static Data with live Google Sheets data for: - catalog onboarding - metadata completeness """ ) if st.button("🔄 Refresh Data"): st.cache_data.clear() st.toast("Refreshing data...", icon="🔄") st.rerun() cat_onboarding_df, cat_metadata_df, cat_status_df = get_data() tab1, tab2, tab3 = st.tabs([ # "Overview", "Static Data", "Onboarding Status", "Metadata Completeness", # "Mapping Status" ]) # ========================================================================================================================= # Tab 0 - Overview # ========================================================================================================================= # with tab0: # st.header("Overiew") # if st.button("🔄 Refresh Data"): # st.cache_data.clear() # st.toast("Refreshing data...", icon="🔄") # st.rerun() # st.markdown("---") # st.subheader("Quick Data Preview") # col1, col2, col3 = st.columns(3) # with col1: # st.dataframe(cat_onboarding_df.head(5)) # with col2: # st.dataframe(cat_metadata_df.head(5)) # with col3: # st.dataframe(cat_status_df.head(5)) # ========================================================================================================================= # Tab 0 - Static stuff # ========================================================================================================================= with tab1: st.header("Static Data Preview") full_countries_df = pd.read_csv('countries.csv') full_languages_df = pd.read_csv('languages.csv') # countries map fig = px.choropleth( full_countries_df, locations="country_name", locationmode="country names", color="log_count", color_continuous_scale="Purples", hover_name="country_name", hover_data={"count": True, "log_count": False}, projection="natural earth", title="Programs' availabilities by Country (Log Scale)" ) fig.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth") fig.update_layout( width=1400, height=700, margin=dict(l=0, r=0, t=100, b=0), title_y=0.95 ) st.plotly_chart(fig, use_container_width=True) # languages map fig1 = px.choropleth( full_languages_df, locations="country_name", locationmode="country names", color="log_count", color_continuous_scale="Purples", hover_name="country_name", hover_data={"count": True, "log_count": False}, projection="natural earth", title="Programs by Languages (Log Scale)" ) fig1.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth") fig1.update_layout( width=1400, height=700, margin=dict(l=0, r=0, t=100, b=0), title_y=0.95 ) st.plotly_chart(fig1, use_container_width=True) # Completeness evaluation catalog_scores = pd.read_csv("catalog_scores.csv") colorscale = [ [0.0, "#ffffff"], [0.1, "#dcd6f7"], [0.3, "#a29bfe"], [0.6, "#6c5ce7"], [1.0, "#341f97"] ] fig_completeness = px.bar( catalog_scores, x="Total", y="Catalog", orientation="h", color="Total", color_continuous_scale=colorscale, title="Catalog Metadata Completeness Score", ) fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'}, template="plotly_dark", height=1000) st.plotly_chart(fig_completeness, use_container_width=True) # ### completeness score broken down subcols = ["movie", "show", "season", "episode", "sport"] # Compute sum of raw subscores catalog_scores["raw_sum"] = catalog_scores[subcols].sum(axis=1) # Build the figure fig_completeness2 = go.Figure() for col in subcols: # normalized height of this bar segment norm_vals = (catalog_scores[col] / catalog_scores["raw_sum"]) * catalog_scores["Total"] fig_completeness2.add_trace( go.Bar( y=catalog_scores["Catalog"], x=norm_vals, # BAR SIZE = normalized values name=col.capitalize(), orientation="h", customdata=catalog_scores[col], # RAW values for hover hovertemplate=( "%{y}
" + f"{col.capitalize()}: %{{customdata}}
" + # RAW value "Normalized: %{x:.2f}" ) ) ) fig_completeness2.update_layout( barmode="stack", title="Subscore Contribution per Catalog (Scaled to Total Score)", xaxis_title="Total Score", template="plotly_dark", height=1200, yaxis={'categoryorder':'total ascending'} ) st.plotly_chart(fig_completeness2, use_container_width=True) #scatter plot fig_scatter = px.scatter( catalog_scores, x="Total", y="Number of programs", size="Number of programs", color="Total", hover_name="Catalog", color_continuous_scale="Viridis", size_max=50 ) st.plotly_chart(fig_scatter, use_container_width=True) # ========================================================================================================================= # Tab 2 - Onboarding sheet # ========================================================================================================================= with tab2: st.header("Catalog Onboarding Status") # Convert onboarding date to datetime (e.g., 21/11 → 2025-11-21) cat_onboarding_df["Onboarding date"] = pd.to_datetime( cat_onboarding_df["Onboarding date"], format="%d/%m", errors="coerce" ) cat_onboarding_df["Onboarding date"] = cat_onboarding_df["Onboarding date"].apply( lambda d: d.replace(year=2025) if pd.notna(d) else d ) # Map textual months to end-of-month dates month_map = { "November 2025": dt.datetime(2025, 11, 30), "December 2025": dt.datetime(2025, 12, 31), "January 2026": dt.datetime(2026, 1, 31), "February 2026": dt.datetime(2026, 2, 28), "March 2026": dt.datetime(2026, 3, 31), "April 2026": dt.datetime(2026, 4, 30), "TBD": None, } cat_onboarding_df["Go live parsed"] = cat_onboarding_df["Go live (customer)"].map(month_map) # Drop missing timeline_df = cat_onboarding_df.dropna(subset=["Onboarding date", "Go live parsed"]) fig_timeline = px.timeline( timeline_df, x_start="Onboarding date", x_end="Go live parsed", y="NAME", color="Onboarding Status", hover_data=["Client", "Priority"], title="Onboarding → Go-Live Timeline", ) fig_timeline.update_yaxes(autorange="reversed") st.plotly_chart(fig_timeline, use_container_width=True) # bar chart 1 summary = ( cat_onboarding_df.groupby(["Client", "Onboarding Status"]) .size() .reset_index(name="Count") ) fig_client = px.bar( summary, x="Client", y="Count", color="Onboarding Status", text_auto=True, title="Catalogs per Client (by Onboarding Status)", ) fig_client.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count") st.plotly_chart(fig_client, use_container_width=True) # bar chart 2 summary = ( cat_onboarding_df.groupby(["Client", "Priority"]) .size() .reset_index(name="Count") ) fig_client1 = px.bar( summary, x="Client", y="Count", color="Priority", text_auto=True, title="Catalogs per Client (by Priority)", ) fig_client1.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count") st.plotly_chart(fig_client1, use_container_width=True) # bar chart 3 summary = ( cat_onboarding_df.groupby(["Onboarding Status", "Priority"]) .size() .reset_index(name="Count") ) fig_client2 = px.bar( summary, x="Onboarding Status", y="Count", color="Priority", text_auto=True, title="Catalogs per Onboarding Status (by Priority)", ) fig_client2.update_layout(barmode="stack", xaxis_title="Onboarding Status", yaxis_title="Catalog Count") st.plotly_chart(fig_client2, use_container_width=True) # ========================================================================================================================= # Tab 3 - Metadata completeness # ========================================================================================================================= with tab3: st.header("Catalog Metadata Completeness") cat_df = cat_metadata_df.copy() meta_cols = [col for col in cat_df.columns if col not in ["Catalog name"]] score_map = {"Yes": 1.0, "Some": 0.5, "No": 0.0, "None": 0.0, "": 0.0} cat_df_numeric = cat_df.copy() cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].replace(score_map) # force conversion to numeric (anything else becomes NaN) cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].apply(pd.to_numeric, errors="coerce") cat_df_numeric["Completeness Score"] = cat_df_numeric[meta_cols].mean(axis=1) cat_df_numeric_sorted = cat_df_numeric.sort_values("Completeness Score", ascending=False) #graph 1 fig_completeness = px.bar( cat_df_numeric_sorted, x="Completeness Score", y="Catalog name", orientation="h", color="Completeness Score", color_continuous_scale="Greens", title="Catalog Metadata Completeness Score", ) fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'}) st.plotly_chart(fig_completeness, use_container_width=True) # graph 2 coverage = cat_df_numeric[meta_cols].mean().sort_values(ascending=False).reset_index() coverage.columns = ["Metadata Field", "Average Score"] fig_field_coverage = px.bar( coverage, x="Average Score", y="Metadata Field", orientation="h", color="Average Score", color_continuous_scale="Blues", title="Metadata Field Coverage Across All Catalogs", ) fig_field_coverage.update_layout(yaxis={'categoryorder':'total ascending'}) st.plotly_chart(fig_field_coverage, use_container_width=True) # # heatmap 1 # # Prepare data # z = cat_df_numeric[meta_cols].astype(float).to_numpy() # x = list(meta_cols) # y = list(cat_df_numeric["Catalog name"].astype(str)) # # Build the heatmap (no annotation_text) # fig_heatmap = ff.create_annotated_heatmap( # z=z, # x=x, # y=y, # showscale=True, # colorscale=[ # [0.0, "rgb(255,77,77)"], # red for 0 (No) # [0.5, "rgb(255,204,0)"], # yellow for 0.5 (Some) # [1.0, "rgb(0,204,102)"] # green for 1 (Yes) # ], # annotation_text=None # removes numbers # ) # # Layout adjustments # fig_heatmap.update_layout( # title="Metadata Completeness Heatmap (Catalog vs Field)", # xaxis_title="Metadata Field", # yaxis_title="Catalog Name", # width=1600, # make it wide # height=1000, # make it tall so names fit # margin=dict(l=200, r=50, t=80, b=150), # spacing for labels # ) # # Tweak label angles for readability # fig_heatmap.update_xaxes(tickangle=-45) # fig_heatmap.update_yaxes(automargin=True) # st.plotly_chart(fig_heatmap, use_container_width=True) # # heatmap 2 # fig_heatmap1 = px.imshow( # cat_df_numeric[meta_cols], # labels=dict(x="Metadata Field", y="Catalog Name", color="Completeness"), # x=meta_cols, # y=cat_df_numeric["Catalog name"], # color_continuous_scale=[ # [0.0, "rgb(255,77,77)"], # [0.5, "rgb(255,204,0)"], # [1.0, "rgb(0,204,102)"] # ], # ) # fig_heatmap1.update_layout( # title="Metadata Completeness Heatmap (Catalog vs Field)", # width=1600, # height=1000, # margin=dict(l=200, r=50, t=80, b=150), # ) # fig_heatmap1.update_xaxes(tickangle=-45) # st.plotly_chart(fig_heatmap1, use_container_width=True) # with tab4: # st.header("Catalog Mapping status")