Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from gsheet_loader import get_data | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.figure_factory as ff | |
| import plotly.graph_objects as go | |
| import datetime as dt | |
| st.set_page_config( | |
| page_title="Catalog Data Dashboard", | |
| layout="wide", | |
| page_icon="π", | |
| ) | |
| st.title("π Catalog Data Dashboard") | |
| st.markdown( | |
| """ | |
| This dashboard combines live [Google Sheets data](https://docs.google.com/spreadsheets/d/10nGgqXxunGXo_GI1LxybvsAr1TYSDdNiqqZX6DSTbDA) for: | |
| - catalog onboarding | |
| - metadata completeness | |
| - mapping/scraping status | |
| """ | |
| ) | |
| cat_onboarding_df, cat_metadata_df, cat_status_df = get_data() | |
| tab0, tab1, tab2, tab3, tab4 = st.tabs(["Overview", "Static Data", "Onboarding Status", "Metadata Completeness", "Mapping Status"]) | |
| # ========================================================================================================================= | |
| # Tab 0 - Overview | |
| # ========================================================================================================================= | |
| with tab0: | |
| st.header("Overiew") | |
| if st.button("π Refresh Data"): | |
| st.cache_data.clear() | |
| st.toast("Refreshing data...", icon="π") | |
| st.rerun() | |
| st.markdown("---") | |
| st.subheader("Quick Data Preview") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.dataframe(cat_onboarding_df.head(5)) | |
| with col2: | |
| st.dataframe(cat_metadata_df.head(5)) | |
| with col3: | |
| st.dataframe(cat_status_df.head(5)) | |
| # ========================================================================================================================= | |
| # Tab 0 - Static stuff | |
| # ========================================================================================================================= | |
| with tab1: | |
| st.header("Static Data Preview") | |
| full_countries_df = pd.read_csv('countries.csv') | |
| full_languages_df = pd.read_csv('languages.csv') | |
| # countries map | |
| fig = px.choropleth( | |
| full_countries_df, | |
| locations="country_name", | |
| locationmode="country names", | |
| color="log_count", | |
| color_continuous_scale="Purples", | |
| hover_name="country_name", | |
| hover_data={"count": True, "log_count": False}, | |
| projection="natural earth", | |
| title="Programs' availabilities by Country (Log Scale)" | |
| ) | |
| fig.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth") | |
| fig.update_layout( | |
| width=1400, | |
| height=700, | |
| margin=dict(l=0, r=0, t=100, b=0), | |
| title_y=0.95 | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # languages map | |
| fig1 = px.choropleth( | |
| full_languages_df, | |
| locations="country_name", | |
| locationmode="country names", | |
| color="log_count", | |
| color_continuous_scale="Purples", | |
| hover_name="country_name", | |
| hover_data={"count": True, "log_count": False}, | |
| projection="natural earth", | |
| title="Programs by Languages (Log Scale)" | |
| ) | |
| fig1.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth") | |
| fig1.update_layout( | |
| width=1400, | |
| height=700, | |
| margin=dict(l=0, r=0, t=100, b=0), | |
| title_y=0.95 | |
| ) | |
| st.plotly_chart(fig1, use_container_width=True) | |
| # Completeness evaluation | |
| catalog_scores = pd.read_csv("catalog_scores.csv") | |
| colorscale = [ | |
| [0.0, "#ffffff"], | |
| [0.1, "#dcd6f7"], | |
| [0.3, "#a29bfe"], | |
| [0.6, "#6c5ce7"], | |
| [1.0, "#341f97"] | |
| ] | |
| fig_completeness = px.bar( | |
| catalog_scores, | |
| x="Total", | |
| y="Catalog", | |
| orientation="h", | |
| color="Total", | |
| color_continuous_scale=colorscale, | |
| title="Catalog Metadata Completeness Score", | |
| ) | |
| fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'}, template="plotly_dark", height=1000) | |
| st.plotly_chart(fig_completeness, use_container_width=True) | |
| # ### completeness score broken down | |
| subcols = ["movie", "show", "season", "episode", "sport"] | |
| # Compute sum of raw subscores | |
| catalog_scores["raw_sum"] = catalog_scores[subcols].sum(axis=1) | |
| # Build the figure | |
| fig_completeness2 = go.Figure() | |
| for col in subcols: | |
| # normalized height of this bar segment | |
| norm_vals = (catalog_scores[col] / catalog_scores["raw_sum"]) * catalog_scores["Total"] | |
| fig_completeness2.add_trace( | |
| go.Bar( | |
| y=catalog_scores["Catalog"], | |
| x=norm_vals, # BAR SIZE = normalized values | |
| name=col.capitalize(), | |
| orientation="h", | |
| customdata=catalog_scores[col], # RAW values for hover | |
| hovertemplate=( | |
| "<b>%{y}</b><br>" + | |
| f"{col.capitalize()}: <b>%{{customdata}}</b><br>" + # RAW value | |
| "Normalized: %{x:.2f}<extra></extra>" | |
| ) | |
| ) | |
| ) | |
| fig_completeness2.update_layout( | |
| barmode="stack", | |
| title="Subscore Contribution per Catalog (Scaled to Total Score)", | |
| xaxis_title="Total Score", | |
| template="plotly_dark", | |
| height=1200, | |
| yaxis={'categoryorder':'total ascending'} | |
| ) | |
| st.plotly_chart(fig_completeness2, use_container_width=True) | |
| #scatter plot | |
| fig_scatter = px.scatter( | |
| catalog_scores, | |
| x="Total", | |
| y="Number of programs", | |
| size="Number of programs", | |
| color="Total", | |
| hover_name="Catalog", | |
| color_continuous_scale="Viridis", | |
| size_max=50 | |
| ) | |
| st.plotly_chart(fig_scatter, use_container_width=True) | |
| # ========================================================================================================================= | |
| # Tab 2 - Onboarding sheet | |
| # ========================================================================================================================= | |
| with tab2: | |
| st.header("Catalog Onboarding Status") | |
| # Convert onboarding date to datetime (e.g., 21/11 β 2025-11-21) | |
| cat_onboarding_df["Onboarding date"] = pd.to_datetime( | |
| cat_onboarding_df["Onboarding date"], format="%d/%m", errors="coerce" | |
| ) | |
| cat_onboarding_df["Onboarding date"] = cat_onboarding_df["Onboarding date"].apply( | |
| lambda d: d.replace(year=2025) if pd.notna(d) else d | |
| ) | |
| # Map textual months to end-of-month dates | |
| month_map = { | |
| "November 2025": dt.datetime(2025, 11, 30), | |
| "December 2025": dt.datetime(2025, 12, 31), | |
| "January 2026": dt.datetime(2026, 1, 31), | |
| "February 2026": dt.datetime(2026, 2, 28), | |
| "March 2026": dt.datetime(2026, 3, 31), | |
| "April 2026": dt.datetime(2026, 4, 30), | |
| "TBD": None, | |
| } | |
| cat_onboarding_df["Go live parsed"] = cat_onboarding_df["Go live (customer)"].map(month_map) | |
| # Drop missing | |
| timeline_df = cat_onboarding_df.dropna(subset=["Onboarding date", "Go live parsed"]) | |
| fig_timeline = px.timeline( | |
| timeline_df, | |
| x_start="Onboarding date", | |
| x_end="Go live parsed", | |
| y="NAME", | |
| color="Onboarding Status", | |
| hover_data=["Client", "Priority"], | |
| title="Onboarding β Go-Live Timeline", | |
| ) | |
| fig_timeline.update_yaxes(autorange="reversed") | |
| st.plotly_chart(fig_timeline, use_container_width=True) | |
| # bar chart 1 | |
| summary = ( | |
| cat_onboarding_df.groupby(["Client", "Onboarding Status"]) | |
| .size() | |
| .reset_index(name="Count") | |
| ) | |
| fig_client = px.bar( | |
| summary, | |
| x="Client", | |
| y="Count", | |
| color="Onboarding Status", | |
| text_auto=True, | |
| title="Catalogs per Client (by Onboarding Status)", | |
| ) | |
| fig_client.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count") | |
| st.plotly_chart(fig_client, use_container_width=True) | |
| # bar chart 2 | |
| summary = ( | |
| cat_onboarding_df.groupby(["Client", "Priority"]) | |
| .size() | |
| .reset_index(name="Count") | |
| ) | |
| fig_client1 = px.bar( | |
| summary, | |
| x="Client", | |
| y="Count", | |
| color="Priority", | |
| text_auto=True, | |
| title="Catalogs per Client (by Priority)", | |
| ) | |
| fig_client1.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count") | |
| st.plotly_chart(fig_client1, use_container_width=True) | |
| # bar chart 3 | |
| summary = ( | |
| cat_onboarding_df.groupby(["Onboarding Status", "Priority"]) | |
| .size() | |
| .reset_index(name="Count") | |
| ) | |
| fig_client2 = px.bar( | |
| summary, | |
| x="Onboarding Status", | |
| y="Count", | |
| color="Priority", | |
| text_auto=True, | |
| title="Catalogs per Onboarding Status (by Priority)", | |
| ) | |
| fig_client2.update_layout(barmode="stack", xaxis_title="Onboarding Status", yaxis_title="Catalog Count") | |
| st.plotly_chart(fig_client2, use_container_width=True) | |
| # ========================================================================================================================= | |
| # Tab 3 - Metadata completeness | |
| # ========================================================================================================================= | |
| with tab3: | |
| st.header("Catalog Metadata Completeness") | |
| cat_df = cat_metadata_df.copy() | |
| meta_cols = [col for col in cat_df.columns if col not in ["Catalog name"]] | |
| score_map = {"Yes": 1.0, "Some": 0.5, "No": 0.0, "None": 0.0, "": 0.0} | |
| cat_df_numeric = cat_df.copy() | |
| cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].replace(score_map) | |
| # force conversion to numeric (anything else becomes NaN) | |
| cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].apply(pd.to_numeric, errors="coerce") | |
| cat_df_numeric["Completeness Score"] = cat_df_numeric[meta_cols].mean(axis=1) | |
| cat_df_numeric_sorted = cat_df_numeric.sort_values("Completeness Score", ascending=False) | |
| #graph 1 | |
| fig_completeness = px.bar( | |
| cat_df_numeric_sorted, | |
| x="Completeness Score", | |
| y="Catalog name", | |
| orientation="h", | |
| color="Completeness Score", | |
| color_continuous_scale="Greens", | |
| title="Catalog Metadata Completeness Score", | |
| ) | |
| fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'}) | |
| st.plotly_chart(fig_completeness, use_container_width=True) | |
| # graph 2 | |
| coverage = cat_df_numeric[meta_cols].mean().sort_values(ascending=False).reset_index() | |
| coverage.columns = ["Metadata Field", "Average Score"] | |
| fig_field_coverage = px.bar( | |
| coverage, | |
| x="Average Score", | |
| y="Metadata Field", | |
| orientation="h", | |
| color="Average Score", | |
| color_continuous_scale="Blues", | |
| title="Metadata Field Coverage Across All Catalogs", | |
| ) | |
| fig_field_coverage.update_layout(yaxis={'categoryorder':'total ascending'}) | |
| st.plotly_chart(fig_field_coverage, use_container_width=True) | |
| # heatmap 1 | |
| # Prepare data | |
| z = cat_df_numeric[meta_cols].astype(float).to_numpy() | |
| x = list(meta_cols) | |
| y = list(cat_df_numeric["Catalog name"].astype(str)) | |
| # Build the heatmap (no annotation_text) | |
| fig_heatmap = ff.create_annotated_heatmap( | |
| z=z, | |
| x=x, | |
| y=y, | |
| showscale=True, | |
| colorscale=[ | |
| [0.0, "rgb(255,77,77)"], # red for 0 (No) | |
| [0.5, "rgb(255,204,0)"], # yellow for 0.5 (Some) | |
| [1.0, "rgb(0,204,102)"] # green for 1 (Yes) | |
| ], | |
| annotation_text=None # removes numbers | |
| ) | |
| # Layout adjustments | |
| fig_heatmap.update_layout( | |
| title="Metadata Completeness Heatmap (Catalog vs Field)", | |
| xaxis_title="Metadata Field", | |
| yaxis_title="Catalog Name", | |
| width=1600, # make it wide | |
| height=1000, # make it tall so names fit | |
| margin=dict(l=200, r=50, t=80, b=150), # spacing for labels | |
| ) | |
| # Tweak label angles for readability | |
| fig_heatmap.update_xaxes(tickangle=-45) | |
| fig_heatmap.update_yaxes(automargin=True) | |
| st.plotly_chart(fig_heatmap, use_container_width=True) | |
| # heatmap 2 | |
| fig_heatmap1 = px.imshow( | |
| cat_df_numeric[meta_cols], | |
| labels=dict(x="Metadata Field", y="Catalog Name", color="Completeness"), | |
| x=meta_cols, | |
| y=cat_df_numeric["Catalog name"], | |
| color_continuous_scale=[ | |
| [0.0, "rgb(255,77,77)"], | |
| [0.5, "rgb(255,204,0)"], | |
| [1.0, "rgb(0,204,102)"] | |
| ], | |
| ) | |
| fig_heatmap1.update_layout( | |
| title="Metadata Completeness Heatmap (Catalog vs Field)", | |
| width=1600, | |
| height=1000, | |
| margin=dict(l=200, r=50, t=80, b=150), | |
| ) | |
| fig_heatmap1.update_xaxes(tickangle=-45) | |
| st.plotly_chart(fig_heatmap1, use_container_width=True) | |
| with tab4: | |
| st.header("Catalog Mapping status") | |