Spaces:

mbecchis
/

streaming-visualization

Sleeping

File size: 13,094 Bytes

8848efa

import streamlit as st
from gsheet_loader import get_data
import pandas as pd
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
import datetime as dt

st.set_page_config(
    page_title="Catalog Data Dashboard",
    layout="wide",
    page_icon="📊",
)

st.title("📊 Catalog Data Dashboard")
st.markdown(
    """

    This dashboard combines live [Google Sheets data](https://docs.google.com/spreadsheets/d/10nGgqXxunGXo_GI1LxybvsAr1TYSDdNiqqZX6DSTbDA) for:

    - catalog onboarding  

    - metadata completeness  

    - mapping/scraping status  

    """
)

cat_onboarding_df, cat_metadata_df, cat_status_df = get_data()

tab0, tab1, tab2, tab3, tab4 = st.tabs(["Overview", "Static Data", "Onboarding Status", "Metadata Completeness", "Mapping Status"])

# =========================================================================================================================
# Tab 0 - Overview
# =========================================================================================================================

with tab0:
    st.header("Overiew")
    if st.button("🔄 Refresh Data"):
        st.cache_data.clear()
        st.toast("Refreshing data...", icon="🔄")
        st.rerun()

    st.markdown("---")
    st.subheader("Quick Data Preview")

    col1, col2, col3 = st.columns(3)
    with col1:
        st.dataframe(cat_onboarding_df.head(5))
    with col2:
        st.dataframe(cat_metadata_df.head(5))
    with col3:
        st.dataframe(cat_status_df.head(5))

# =========================================================================================================================
# Tab 0 - Static stuff
# =========================================================================================================================

with tab1:
    st.header("Static Data Preview")

    full_countries_df = pd.read_csv('countries.csv')
    full_languages_df = pd.read_csv('languages.csv')

    # countries map 
    fig = px.choropleth(
    full_countries_df,
    locations="country_name",
    locationmode="country names",
    color="log_count",
    color_continuous_scale="Purples",
    hover_name="country_name",
    hover_data={"count": True, "log_count": False},
    projection="natural earth",
    title="Programs' availabilities by Country (Log Scale)"
    )

    fig.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth")
    fig.update_layout(
        width=1400,
        height=700,
        margin=dict(l=0, r=0, t=100, b=0),
        title_y=0.95
    )

    st.plotly_chart(fig, use_container_width=True)

    # languages map
    fig1 = px.choropleth(
    full_languages_df,
    locations="country_name",
    locationmode="country names",
    color="log_count",
    color_continuous_scale="Purples",
    hover_name="country_name",
    hover_data={"count": True, "log_count": False},
    projection="natural earth",
    title="Programs by Languages (Log Scale)"
    )

    fig1.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth")
    fig1.update_layout(
        width=1400,
        height=700,
        margin=dict(l=0, r=0, t=100, b=0),
        title_y=0.95
    )

    st.plotly_chart(fig1, use_container_width=True)


    # Completeness evaluation
    catalog_scores = pd.read_csv("catalog_scores.csv")
    colorscale = [
    [0.0,  "#ffffff"],
    [0.1,  "#dcd6f7"],
    [0.3,  "#a29bfe"],
    [0.6,  "#6c5ce7"],
    [1.0,  "#341f97"]
    ]

    fig_completeness = px.bar(
        catalog_scores,
        x="Total",
        y="Catalog",
        orientation="h",
        color="Total",
        color_continuous_scale=colorscale,
        title="Catalog Metadata Completeness Score",
    )
    fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'}, template="plotly_dark", height=1000)
    st.plotly_chart(fig_completeness, use_container_width=True)


    # ### completeness score broken down
    subcols = ["movie", "show", "season", "episode", "sport"]

    # Compute sum of raw subscores
    catalog_scores["raw_sum"] = catalog_scores[subcols].sum(axis=1)

    # Build the figure
    fig_completeness2 = go.Figure()

    for col in subcols:

        # normalized height of this bar segment
        norm_vals = (catalog_scores[col] / catalog_scores["raw_sum"]) * catalog_scores["Total"]

        fig_completeness2.add_trace(
            go.Bar(
                y=catalog_scores["Catalog"],
                x=norm_vals,                  # BAR SIZE = normalized values
                name=col.capitalize(),
                orientation="h",
                customdata=catalog_scores[col],    # RAW values for hover
                hovertemplate=(
                    "<b>%{y}</b><br>" +
                    f"{col.capitalize()}: <b>%{{customdata}}</b><br>" +   # RAW value
                    "Normalized: %{x:.2f}<extra></extra>"
                )
            )
        )

    fig_completeness2.update_layout(
        barmode="stack",
        title="Subscore Contribution per Catalog (Scaled to Total Score)",
        xaxis_title="Total Score",
        template="plotly_dark",
        height=1200,
        yaxis={'categoryorder':'total ascending'}
    )

    st.plotly_chart(fig_completeness2, use_container_width=True)


    #scatter plot
    fig_scatter = px.scatter(
    catalog_scores,
    x="Total",
    y="Number of programs",
    size="Number of programs",
    color="Total",
    hover_name="Catalog",
    color_continuous_scale="Viridis",
    size_max=50
    )

    st.plotly_chart(fig_scatter, use_container_width=True)



# =========================================================================================================================
# Tab 2 - Onboarding sheet
# =========================================================================================================================

with tab2:
    st.header("Catalog Onboarding Status")

        # Convert onboarding date to datetime (e.g., 21/11 → 2025-11-21)
    cat_onboarding_df["Onboarding date"] = pd.to_datetime(
        cat_onboarding_df["Onboarding date"], format="%d/%m", errors="coerce"
    )
    cat_onboarding_df["Onboarding date"] = cat_onboarding_df["Onboarding date"].apply(
        lambda d: d.replace(year=2025) if pd.notna(d) else d
    )

    # Map textual months to end-of-month dates
    month_map = {
        "November 2025": dt.datetime(2025, 11, 30),
        "December 2025": dt.datetime(2025, 12, 31),
        "January 2026": dt.datetime(2026, 1, 31),
        "February 2026": dt.datetime(2026, 2, 28),
        "March 2026": dt.datetime(2026, 3, 31),
        "April 2026": dt.datetime(2026, 4, 30),
        "TBD": None,
    }
    cat_onboarding_df["Go live parsed"] = cat_onboarding_df["Go live (customer)"].map(month_map)

    # Drop missing
    timeline_df = cat_onboarding_df.dropna(subset=["Onboarding date", "Go live parsed"])

    fig_timeline = px.timeline(
    timeline_df,
    x_start="Onboarding date",
    x_end="Go live parsed",
    y="NAME",
    color="Onboarding Status",
    hover_data=["Client", "Priority"],
    title="Onboarding → Go-Live Timeline",
    )
    fig_timeline.update_yaxes(autorange="reversed")

    st.plotly_chart(fig_timeline, use_container_width=True)

    # bar chart 1
    summary = (
    cat_onboarding_df.groupby(["Client", "Onboarding Status"])
    .size()
    .reset_index(name="Count")
    )

    fig_client = px.bar(
        summary,
        x="Client",
        y="Count",
        color="Onboarding Status",
        text_auto=True,
        title="Catalogs per Client (by Onboarding Status)",
    )
    fig_client.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count")

    st.plotly_chart(fig_client, use_container_width=True)

    # bar chart 2
    summary = (
    cat_onboarding_df.groupby(["Client", "Priority"])
    .size()
    .reset_index(name="Count")
    )

    fig_client1 = px.bar(
        summary,
        x="Client",
        y="Count",
        color="Priority",
        text_auto=True,
        title="Catalogs per Client (by Priority)",
    )
    fig_client1.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count")

    st.plotly_chart(fig_client1, use_container_width=True)

    # bar chart 3

    summary = (
    cat_onboarding_df.groupby(["Onboarding Status", "Priority"])
    .size()
    .reset_index(name="Count")
    )

    fig_client2 = px.bar(
        summary,
        x="Onboarding Status",
        y="Count",
        color="Priority",
        text_auto=True,
        title="Catalogs per Onboarding Status (by Priority)",
    )
    fig_client2.update_layout(barmode="stack", xaxis_title="Onboarding Status", yaxis_title="Catalog Count")

    st.plotly_chart(fig_client2, use_container_width=True)

# =========================================================================================================================
# Tab 3 - Metadata completeness
# =========================================================================================================================

with tab3:
    st.header("Catalog Metadata Completeness")

    cat_df = cat_metadata_df.copy()
    meta_cols = [col for col in cat_df.columns if col not in ["Catalog name"]]

    score_map = {"Yes": 1.0, "Some": 0.5, "No": 0.0, "None": 0.0, "": 0.0}

    cat_df_numeric = cat_df.copy()
    cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].replace(score_map)

    # force conversion to numeric (anything else becomes NaN)
    cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].apply(pd.to_numeric, errors="coerce")

    cat_df_numeric["Completeness Score"] = cat_df_numeric[meta_cols].mean(axis=1)
    cat_df_numeric_sorted = cat_df_numeric.sort_values("Completeness Score", ascending=False)

    #graph 1
    fig_completeness = px.bar(
    cat_df_numeric_sorted,
    x="Completeness Score",
    y="Catalog name",
    orientation="h",
    color="Completeness Score",
    color_continuous_scale="Greens",
    title="Catalog Metadata Completeness Score",
    )
    fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'})

    st.plotly_chart(fig_completeness, use_container_width=True)

    # graph 2
    coverage = cat_df_numeric[meta_cols].mean().sort_values(ascending=False).reset_index()
    coverage.columns = ["Metadata Field", "Average Score"]

    fig_field_coverage = px.bar(
        coverage,
        x="Average Score",
        y="Metadata Field",
        orientation="h",
        color="Average Score",
        color_continuous_scale="Blues",
        title="Metadata Field Coverage Across All Catalogs",
    )
    fig_field_coverage.update_layout(yaxis={'categoryorder':'total ascending'})

    st.plotly_chart(fig_field_coverage, use_container_width=True)

    # heatmap 1
        # Prepare data
    z = cat_df_numeric[meta_cols].astype(float).to_numpy()
    x = list(meta_cols)
    y = list(cat_df_numeric["Catalog name"].astype(str))

    # Build the heatmap (no annotation_text)
    fig_heatmap = ff.create_annotated_heatmap(
        z=z,
        x=x,
        y=y,
        showscale=True,
        colorscale=[
            [0.0, "rgb(255,77,77)"],     # red for 0 (No)
            [0.5, "rgb(255,204,0)"],     # yellow for 0.5 (Some)
            [1.0, "rgb(0,204,102)"]      # green for 1 (Yes)
        ],
        annotation_text=None   # removes numbers
    )

    # Layout adjustments
    fig_heatmap.update_layout(
        title="Metadata Completeness Heatmap (Catalog vs Field)",
        xaxis_title="Metadata Field",
        yaxis_title="Catalog Name",
        width=1600,   # make it wide
        height=1000,  # make it tall so names fit
        margin=dict(l=200, r=50, t=80, b=150),  # spacing for labels
    )

    # Tweak label angles for readability
    fig_heatmap.update_xaxes(tickangle=-45)
    fig_heatmap.update_yaxes(automargin=True)

    st.plotly_chart(fig_heatmap, use_container_width=True)


    # heatmap 2

    fig_heatmap1 = px.imshow(
    cat_df_numeric[meta_cols],
    labels=dict(x="Metadata Field", y="Catalog Name", color="Completeness"),
    x=meta_cols,
    y=cat_df_numeric["Catalog name"],
    color_continuous_scale=[
        [0.0, "rgb(255,77,77)"],
        [0.5, "rgb(255,204,0)"],
        [1.0, "rgb(0,204,102)"]
    ],
    )

    fig_heatmap1.update_layout(
        title="Metadata Completeness Heatmap (Catalog vs Field)",
        width=1600,
        height=1000,
        margin=dict(l=200, r=50, t=80, b=150),
    )
    fig_heatmap1.update_xaxes(tickangle=-45)

    st.plotly_chart(fig_heatmap1, use_container_width=True)



with tab4:
    st.header("Catalog Mapping status")