Spaces:

mbecchis
/

streaming-visualization

Sleeping

App Files Files Community

mbecchis commited on Dec 1, 2025

Commit

93fc514

verified ·

1 Parent(s): 63db663

Update app.py

Browse files

Files changed (1) hide show

app.py +406 -406

app.py CHANGED Viewed

@@ -1,406 +1,406 @@
-import streamlit as st
-from gsheet_loader import get_data
-import pandas as pd
-import plotly.express as px
-import plotly.figure_factory as ff
-import plotly.graph_objects as go
-import datetime as dt
-st.set_page_config(
-    page_title="Catalog Data Dashboard",
-    layout="wide",
-    page_icon="📊",
-)
-st.title("📊 Catalog Data Dashboard")
-st.markdown(
-    """
-    This dashboard combines live [Google Sheets data](https://docs.google.com/spreadsheets/d/10nGgqXxunGXo_GI1LxybvsAr1TYSDdNiqqZX6DSTbDA) for:
-    - catalog onboarding
-    - metadata completeness
-    - mapping/scraping status
-    """
-)
-cat_onboarding_df, cat_metadata_df, cat_status_df = get_data()
-tab0, tab1, tab2, tab3, tab4 = st.tabs(["Overview", "Static Data", "Onboarding Status", "Metadata Completeness", "Mapping Status"])
-# =========================================================================================================================
-# Tab 0 - Overview
-# =========================================================================================================================
-with tab0:
-    st.header("Overiew")
-    if st.button("🔄 Refresh Data"):
-        st.cache_data.clear()
-        st.toast("Refreshing data...", icon="🔄")
-        st.rerun()
-    st.markdown("---")
-    st.subheader("Quick Data Preview")
-    col1, col2, col3 = st.columns(3)
-    with col1:
-        st.dataframe(cat_onboarding_df.head(5))
-    with col2:
-        st.dataframe(cat_metadata_df.head(5))
-    with col3:
-        st.dataframe(cat_status_df.head(5))
-# =========================================================================================================================
-# Tab 0 - Static stuff
-# =========================================================================================================================
-with tab1:
-    st.header("Static Data Preview")
-    full_countries_df = pd.read_csv('countries.csv')
-    full_languages_df = pd.read_csv('languages.csv')
-    # countries map
-    fig = px.choropleth(
-    full_countries_df,
-    locations="country_name",
-    locationmode="country names",
-    color="log_count",
-    color_continuous_scale="Purples",
-    hover_name="country_name",
-    hover_data={"count": True, "log_count": False},
-    projection="natural earth",
-    title="Programs' availabilities by Country (Log Scale)"
-    )
-    fig.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth")
-    fig.update_layout(
-        width=1400,
-        height=700,
-        margin=dict(l=0, r=0, t=100, b=0),
-        title_y=0.95
-    )
-    st.plotly_chart(fig, use_container_width=True)
-    # languages map
-    fig1 = px.choropleth(
-    full_languages_df,
-    locations="country_name",
-    locationmode="country names",
-    color="log_count",
-    color_continuous_scale="Purples",
-    hover_name="country_name",
-    hover_data={"count": True, "log_count": False},
-    projection="natural earth",
-    title="Programs by Languages (Log Scale)"
-    )
-    fig1.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth")
-    fig1.update_layout(
-        width=1400,
-        height=700,
-        margin=dict(l=0, r=0, t=100, b=0),
-        title_y=0.95
-    )
-    st.plotly_chart(fig1, use_container_width=True)
-    # Completeness evaluation
-    catalog_scores = pd.read_csv("catalog_scores.csv")
-    colorscale = [
-    [0.0,  "#ffffff"],
-    [0.1,  "#dcd6f7"],
-    [0.3,  "#a29bfe"],
-    [0.6,  "#6c5ce7"],
-    [1.0,  "#341f97"]
-    ]
-    fig_completeness = px.bar(
-        catalog_scores,
-        x="Total",
-        y="Catalog",
-        orientation="h",
-        color="Total",
-        color_continuous_scale=colorscale,
-        title="Catalog Metadata Completeness Score",
-    )
-    fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'}, template="plotly_dark", height=1000)
-    st.plotly_chart(fig_completeness, use_container_width=True)
-    # ### completeness score broken down
-    subcols = ["movie", "show", "season", "episode", "sport"]
-    # Compute sum of raw subscores
-    catalog_scores["raw_sum"] = catalog_scores[subcols].sum(axis=1)
-    # Build the figure
-    fig_completeness2 = go.Figure()
-    for col in subcols:
-        # normalized height of this bar segment
-        norm_vals = (catalog_scores[col] / catalog_scores["raw_sum"]) * catalog_scores["Total"]
-        fig_completeness2.add_trace(
-            go.Bar(
-                y=catalog_scores["Catalog"],
-                x=norm_vals,                  # BAR SIZE = normalized values
-                name=col.capitalize(),
-                orientation="h",
-                customdata=catalog_scores[col],    # RAW values for hover
-                hovertemplate=(
-                    "<b>%{y}</b><br>" +
-                    f"{col.capitalize()}: <b>%{{customdata}}</b><br>" +   # RAW value
-                    "Normalized: %{x:.2f}<extra></extra>"
-                )
-            )
-        )
-    fig_completeness2.update_layout(
-        barmode="stack",
-        title="Subscore Contribution per Catalog (Scaled to Total Score)",
-        xaxis_title="Total Score",
-        template="plotly_dark",
-        height=1200,
-        yaxis={'categoryorder':'total ascending'}
-    )
-    st.plotly_chart(fig_completeness2, use_container_width=True)
-    #scatter plot
-    fig_scatter = px.scatter(
-    catalog_scores,
-    x="Total",
-    y="Number of programs",
-    size="Number of programs",
-    color="Total",
-    hover_name="Catalog",
-    color_continuous_scale="Viridis",
-    size_max=50
-    )
-    st.plotly_chart(fig_scatter, use_container_width=True)
-# =========================================================================================================================
-# Tab 2 - Onboarding sheet
-# =========================================================================================================================
-with tab2:
-    st.header("Catalog Onboarding Status")
-        # Convert onboarding date to datetime (e.g., 21/11 → 2025-11-21)
-    cat_onboarding_df["Onboarding date"] = pd.to_datetime(
-        cat_onboarding_df["Onboarding date"], format="%d/%m", errors="coerce"
-    )
-    cat_onboarding_df["Onboarding date"] = cat_onboarding_df["Onboarding date"].apply(
-        lambda d: d.replace(year=2025) if pd.notna(d) else d
-    )
-    # Map textual months to end-of-month dates
-    month_map = {
-        "November 2025": dt.datetime(2025, 11, 30),
-        "December 2025": dt.datetime(2025, 12, 31),
-        "January 2026": dt.datetime(2026, 1, 31),
-        "February 2026": dt.datetime(2026, 2, 28),
-        "March 2026": dt.datetime(2026, 3, 31),
-        "April 2026": dt.datetime(2026, 4, 30),
-        "TBD": None,
-    }
-    cat_onboarding_df["Go live parsed"] = cat_onboarding_df["Go live (customer)"].map(month_map)
-    # Drop missing
-    timeline_df = cat_onboarding_df.dropna(subset=["Onboarding date", "Go live parsed"])
-    fig_timeline = px.timeline(
-    timeline_df,
-    x_start="Onboarding date",
-    x_end="Go live parsed",
-    y="NAME",
-    color="Onboarding Status",
-    hover_data=["Client", "Priority"],
-    title="Onboarding → Go-Live Timeline",
-    )
-    fig_timeline.update_yaxes(autorange="reversed")
-    st.plotly_chart(fig_timeline, use_container_width=True)
-    # bar chart 1
-    summary = (
-    cat_onboarding_df.groupby(["Client", "Onboarding Status"])
-    .size()
-    .reset_index(name="Count")
-    )
-    fig_client = px.bar(
-        summary,
-        x="Client",
-        y="Count",
-        color="Onboarding Status",
-        text_auto=True,
-        title="Catalogs per Client (by Onboarding Status)",
-    )
-    fig_client.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count")
-    st.plotly_chart(fig_client, use_container_width=True)
-    # bar chart 2
-    summary = (
-    cat_onboarding_df.groupby(["Client", "Priority"])
-    .size()
-    .reset_index(name="Count")
-    )
-    fig_client1 = px.bar(
-        summary,
-        x="Client",
-        y="Count",
-        color="Priority",
-        text_auto=True,
-        title="Catalogs per Client (by Priority)",
-    )
-    fig_client1.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count")
-    st.plotly_chart(fig_client1, use_container_width=True)
-    # bar chart 3
-    summary = (
-    cat_onboarding_df.groupby(["Onboarding Status", "Priority"])
-    .size()
-    .reset_index(name="Count")
-    )
-    fig_client2 = px.bar(
-        summary,
-        x="Onboarding Status",
-        y="Count",
-        color="Priority",
-        text_auto=True,
-        title="Catalogs per Onboarding Status (by Priority)",
-    )
-    fig_client2.update_layout(barmode="stack", xaxis_title="Onboarding Status", yaxis_title="Catalog Count")
-    st.plotly_chart(fig_client2, use_container_width=True)
-# =========================================================================================================================
-# Tab 3 - Metadata completeness
-# =========================================================================================================================
-with tab3:
-    st.header("Catalog Metadata Completeness")
-    cat_df = cat_metadata_df.copy()
-    meta_cols = [col for col in cat_df.columns if col not in ["Catalog name"]]
-    score_map = {"Yes": 1.0, "Some": 0.5, "No": 0.0, "None": 0.0, "": 0.0}
-    cat_df_numeric = cat_df.copy()
-    cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].replace(score_map)
-    # force conversion to numeric (anything else becomes NaN)
-    cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].apply(pd.to_numeric, errors="coerce")
-    cat_df_numeric["Completeness Score"] = cat_df_numeric[meta_cols].mean(axis=1)
-    cat_df_numeric_sorted = cat_df_numeric.sort_values("Completeness Score", ascending=False)
-    #graph 1
-    fig_completeness = px.bar(
-    cat_df_numeric_sorted,
-    x="Completeness Score",
-    y="Catalog name",
-    orientation="h",
-    color="Completeness Score",
-    color_continuous_scale="Greens",
-    title="Catalog Metadata Completeness Score",
-    )
-    fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'})
-    st.plotly_chart(fig_completeness, use_container_width=True)
-    # graph 2
-    coverage = cat_df_numeric[meta_cols].mean().sort_values(ascending=False).reset_index()
-    coverage.columns = ["Metadata Field", "Average Score"]
-    fig_field_coverage = px.bar(
-        coverage,
-        x="Average Score",
-        y="Metadata Field",
-        orientation="h",
-        color="Average Score",
-        color_continuous_scale="Blues",
-        title="Metadata Field Coverage Across All Catalogs",
-    )
-    fig_field_coverage.update_layout(yaxis={'categoryorder':'total ascending'})
-    st.plotly_chart(fig_field_coverage, use_container_width=True)
-    # heatmap 1
-        # Prepare data
-    z = cat_df_numeric[meta_cols].astype(float).to_numpy()
-    x = list(meta_cols)
-    y = list(cat_df_numeric["Catalog name"].astype(str))
-    # Build the heatmap (no annotation_text)
-    fig_heatmap = ff.create_annotated_heatmap(
-        z=z,
-        x=x,
-        y=y,
-        showscale=True,
-        colorscale=[
-            [0.0, "rgb(255,77,77)"],     # red for 0 (No)
-            [0.5, "rgb(255,204,0)"],     # yellow for 0.5 (Some)
-            [1.0, "rgb(0,204,102)"]      # green for 1 (Yes)
-        ],
-        annotation_text=None   # removes numbers
-    )
-    # Layout adjustments
-    fig_heatmap.update_layout(
-        title="Metadata Completeness Heatmap (Catalog vs Field)",
-        xaxis_title="Metadata Field",
-        yaxis_title="Catalog Name",
-        width=1600,   # make it wide
-        height=1000,  # make it tall so names fit
-        margin=dict(l=200, r=50, t=80, b=150),  # spacing for labels
-    )
-    # Tweak label angles for readability
-    fig_heatmap.update_xaxes(tickangle=-45)
-    fig_heatmap.update_yaxes(automargin=True)
-    st.plotly_chart(fig_heatmap, use_container_width=True)
-    # heatmap 2
-    fig_heatmap1 = px.imshow(
-    cat_df_numeric[meta_cols],
-    labels=dict(x="Metadata Field", y="Catalog Name", color="Completeness"),
-    x=meta_cols,
-    y=cat_df_numeric["Catalog name"],
-    color_continuous_scale=[
-        [0.0, "rgb(255,77,77)"],
-        [0.5, "rgb(255,204,0)"],
-        [1.0, "rgb(0,204,102)"]
-    ],
-    )
-    fig_heatmap1.update_layout(
-        title="Metadata Completeness Heatmap (Catalog vs Field)",
-        width=1600,
-        height=1000,
-        margin=dict(l=200, r=50, t=80, b=150),
-    )
-    fig_heatmap1.update_xaxes(tickangle=-45)
-    st.plotly_chart(fig_heatmap1, use_container_width=True)
-with tab4:
-    st.header("Catalog Mapping status")

+import streamlit as st
+from gsheet_loader import get_data
+import pandas as pd
+import plotly.express as px
+import plotly.figure_factory as ff
+import plotly.graph_objects as go
+import datetime as dt
+st.set_page_config(
+    page_title="Catalog Data Dashboard",
+    layout="wide",
+    page_icon="📊",
+)
+st.title("📊 Catalog Data Dashboard")
+st.markdown(
+    """
+    This dashboard combines live Google Sheets data for:
+    - catalog onboarding
+    - metadata completeness
+    - mapping/scraping status
+    """
+)
+cat_onboarding_df, cat_metadata_df, cat_status_df = get_data()
+tab0, tab1, tab2, tab3, tab4 = st.tabs(["Overview", "Static Data", "Onboarding Status", "Metadata Completeness", "Mapping Status"])
+# =========================================================================================================================
+# Tab 0 - Overview
+# =========================================================================================================================
+with tab0:
+    st.header("Overiew")
+    if st.button("🔄 Refresh Data"):
+        st.cache_data.clear()
+        st.toast("Refreshing data...", icon="🔄")
+        st.rerun()
+    st.markdown("---")
+    st.subheader("Quick Data Preview")
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.dataframe(cat_onboarding_df.head(5))
+    with col2:
+        st.dataframe(cat_metadata_df.head(5))
+    with col3:
+        st.dataframe(cat_status_df.head(5))
+# =========================================================================================================================
+# Tab 0 - Static stuff
+# =========================================================================================================================
+with tab1:
+    st.header("Static Data Preview")
+    full_countries_df = pd.read_csv('countries.csv')
+    full_languages_df = pd.read_csv('languages.csv')
+    # countries map
+    fig = px.choropleth(
+    full_countries_df,
+    locations="country_name",
+    locationmode="country names",
+    color="log_count",
+    color_continuous_scale="Purples",
+    hover_name="country_name",
+    hover_data={"count": True, "log_count": False},
+    projection="natural earth",
+    title="Programs' availabilities by Country (Log Scale)"
+    )
+    fig.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth")
+    fig.update_layout(
+        width=1400,
+        height=700,
+        margin=dict(l=0, r=0, t=100, b=0),
+        title_y=0.95
+    )
+    st.plotly_chart(fig, use_container_width=True)
+    # languages map
+    fig1 = px.choropleth(
+    full_languages_df,
+    locations="country_name",
+    locationmode="country names",
+    color="log_count",
+    color_continuous_scale="Purples",
+    hover_name="country_name",
+    hover_data={"count": True, "log_count": False},
+    projection="natural earth",
+    title="Programs by Languages (Log Scale)"
+    )
+    fig1.update_geos(showcountries=True, showcoastlines=True, showland=True, landcolor="white", projection_type="natural earth")
+    fig1.update_layout(
+        width=1400,
+        height=700,
+        margin=dict(l=0, r=0, t=100, b=0),
+        title_y=0.95
+    )
+    st.plotly_chart(fig1, use_container_width=True)
+    # Completeness evaluation
+    catalog_scores = pd.read_csv("catalog_scores.csv")
+    colorscale = [
+    [0.0,  "#ffffff"],
+    [0.1,  "#dcd6f7"],
+    [0.3,  "#a29bfe"],
+    [0.6,  "#6c5ce7"],
+    [1.0,  "#341f97"]
+    ]
+    fig_completeness = px.bar(
+        catalog_scores,
+        x="Total",
+        y="Catalog",
+        orientation="h",
+        color="Total",
+        color_continuous_scale=colorscale,
+        title="Catalog Metadata Completeness Score",
+    )
+    fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'}, template="plotly_dark", height=1000)
+    st.plotly_chart(fig_completeness, use_container_width=True)
+    # ### completeness score broken down
+    subcols = ["movie", "show", "season", "episode", "sport"]
+    # Compute sum of raw subscores
+    catalog_scores["raw_sum"] = catalog_scores[subcols].sum(axis=1)
+    # Build the figure
+    fig_completeness2 = go.Figure()
+    for col in subcols:
+        # normalized height of this bar segment
+        norm_vals = (catalog_scores[col] / catalog_scores["raw_sum"]) * catalog_scores["Total"]
+        fig_completeness2.add_trace(
+            go.Bar(
+                y=catalog_scores["Catalog"],
+                x=norm_vals,                  # BAR SIZE = normalized values
+                name=col.capitalize(),
+                orientation="h",
+                customdata=catalog_scores[col],    # RAW values for hover
+                hovertemplate=(
+                    "<b>%{y}</b><br>" +
+                    f"{col.capitalize()}: <b>%{{customdata}}</b><br>" +   # RAW value
+                    "Normalized: %{x:.2f}<extra></extra>"
+                )
+            )
+        )
+    fig_completeness2.update_layout(
+        barmode="stack",
+        title="Subscore Contribution per Catalog (Scaled to Total Score)",
+        xaxis_title="Total Score",
+        template="plotly_dark",
+        height=1200,
+        yaxis={'categoryorder':'total ascending'}
+    )
+    st.plotly_chart(fig_completeness2, use_container_width=True)
+    #scatter plot
+    fig_scatter = px.scatter(
+    catalog_scores,
+    x="Total",
+    y="Number of programs",
+    size="Number of programs",
+    color="Total",
+    hover_name="Catalog",
+    color_continuous_scale="Viridis",
+    size_max=50
+    )
+    st.plotly_chart(fig_scatter, use_container_width=True)
+# =========================================================================================================================
+# Tab 2 - Onboarding sheet
+# =========================================================================================================================
+with tab2:
+    st.header("Catalog Onboarding Status")
+        # Convert onboarding date to datetime (e.g., 21/11 → 2025-11-21)
+    cat_onboarding_df["Onboarding date"] = pd.to_datetime(
+        cat_onboarding_df["Onboarding date"], format="%d/%m", errors="coerce"
+    )
+    cat_onboarding_df["Onboarding date"] = cat_onboarding_df["Onboarding date"].apply(
+        lambda d: d.replace(year=2025) if pd.notna(d) else d
+    )
+    # Map textual months to end-of-month dates
+    month_map = {
+        "November 2025": dt.datetime(2025, 11, 30),
+        "December 2025": dt.datetime(2025, 12, 31),
+        "January 2026": dt.datetime(2026, 1, 31),
+        "February 2026": dt.datetime(2026, 2, 28),
+        "March 2026": dt.datetime(2026, 3, 31),
+        "April 2026": dt.datetime(2026, 4, 30),
+        "TBD": None,
+    }
+    cat_onboarding_df["Go live parsed"] = cat_onboarding_df["Go live (customer)"].map(month_map)
+    # Drop missing
+    timeline_df = cat_onboarding_df.dropna(subset=["Onboarding date", "Go live parsed"])
+    fig_timeline = px.timeline(
+    timeline_df,
+    x_start="Onboarding date",
+    x_end="Go live parsed",
+    y="NAME",
+    color="Onboarding Status",
+    hover_data=["Client", "Priority"],
+    title="Onboarding → Go-Live Timeline",
+    )
+    fig_timeline.update_yaxes(autorange="reversed")
+    st.plotly_chart(fig_timeline, use_container_width=True)
+    # bar chart 1
+    summary = (
+    cat_onboarding_df.groupby(["Client", "Onboarding Status"])
+    .size()
+    .reset_index(name="Count")
+    )
+    fig_client = px.bar(
+        summary,
+        x="Client",
+        y="Count",
+        color="Onboarding Status",
+        text_auto=True,
+        title="Catalogs per Client (by Onboarding Status)",
+    )
+    fig_client.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count")
+    st.plotly_chart(fig_client, use_container_width=True)
+    # bar chart 2
+    summary = (
+    cat_onboarding_df.groupby(["Client", "Priority"])
+    .size()
+    .reset_index(name="Count")
+    )
+    fig_client1 = px.bar(
+        summary,
+        x="Client",
+        y="Count",
+        color="Priority",
+        text_auto=True,
+        title="Catalogs per Client (by Priority)",
+    )
+    fig_client1.update_layout(barmode="stack", xaxis_title="Client", yaxis_title="Catalog Count")
+    st.plotly_chart(fig_client1, use_container_width=True)
+    # bar chart 3
+    summary = (
+    cat_onboarding_df.groupby(["Onboarding Status", "Priority"])
+    .size()
+    .reset_index(name="Count")
+    )
+    fig_client2 = px.bar(
+        summary,
+        x="Onboarding Status",
+        y="Count",
+        color="Priority",
+        text_auto=True,
+        title="Catalogs per Onboarding Status (by Priority)",
+    )
+    fig_client2.update_layout(barmode="stack", xaxis_title="Onboarding Status", yaxis_title="Catalog Count")
+    st.plotly_chart(fig_client2, use_container_width=True)
+# =========================================================================================================================
+# Tab 3 - Metadata completeness
+# =========================================================================================================================
+with tab3:
+    st.header("Catalog Metadata Completeness")
+    cat_df = cat_metadata_df.copy()
+    meta_cols = [col for col in cat_df.columns if col not in ["Catalog name"]]
+    score_map = {"Yes": 1.0, "Some": 0.5, "No": 0.0, "None": 0.0, "": 0.0}
+    cat_df_numeric = cat_df.copy()
+    cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].replace(score_map)
+    # force conversion to numeric (anything else becomes NaN)
+    cat_df_numeric[meta_cols] = cat_df_numeric[meta_cols].apply(pd.to_numeric, errors="coerce")
+    cat_df_numeric["Completeness Score"] = cat_df_numeric[meta_cols].mean(axis=1)
+    cat_df_numeric_sorted = cat_df_numeric.sort_values("Completeness Score", ascending=False)
+    #graph 1
+    fig_completeness = px.bar(
+    cat_df_numeric_sorted,
+    x="Completeness Score",
+    y="Catalog name",
+    orientation="h",
+    color="Completeness Score",
+    color_continuous_scale="Greens",
+    title="Catalog Metadata Completeness Score",
+    )
+    fig_completeness.update_layout(yaxis={'categoryorder':'total ascending'})
+    st.plotly_chart(fig_completeness, use_container_width=True)
+    # graph 2
+    coverage = cat_df_numeric[meta_cols].mean().sort_values(ascending=False).reset_index()
+    coverage.columns = ["Metadata Field", "Average Score"]
+    fig_field_coverage = px.bar(
+        coverage,
+        x="Average Score",
+        y="Metadata Field",
+        orientation="h",
+        color="Average Score",
+        color_continuous_scale="Blues",
+        title="Metadata Field Coverage Across All Catalogs",
+    )
+    fig_field_coverage.update_layout(yaxis={'categoryorder':'total ascending'})
+    st.plotly_chart(fig_field_coverage, use_container_width=True)
+    # heatmap 1
+        # Prepare data
+    z = cat_df_numeric[meta_cols].astype(float).to_numpy()
+    x = list(meta_cols)
+    y = list(cat_df_numeric["Catalog name"].astype(str))
+    # Build the heatmap (no annotation_text)
+    fig_heatmap = ff.create_annotated_heatmap(
+        z=z,
+        x=x,
+        y=y,
+        showscale=True,
+        colorscale=[
+            [0.0, "rgb(255,77,77)"],     # red for 0 (No)
+            [0.5, "rgb(255,204,0)"],     # yellow for 0.5 (Some)
+            [1.0, "rgb(0,204,102)"]      # green for 1 (Yes)
+        ],
+        annotation_text=None   # removes numbers
+    )
+    # Layout adjustments
+    fig_heatmap.update_layout(
+        title="Metadata Completeness Heatmap (Catalog vs Field)",
+        xaxis_title="Metadata Field",
+        yaxis_title="Catalog Name",
+        width=1600,   # make it wide
+        height=1000,  # make it tall so names fit
+        margin=dict(l=200, r=50, t=80, b=150),  # spacing for labels
+    )
+    # Tweak label angles for readability
+    fig_heatmap.update_xaxes(tickangle=-45)
+    fig_heatmap.update_yaxes(automargin=True)
+    st.plotly_chart(fig_heatmap, use_container_width=True)
+    # heatmap 2
+    fig_heatmap1 = px.imshow(
+    cat_df_numeric[meta_cols],
+    labels=dict(x="Metadata Field", y="Catalog Name", color="Completeness"),
+    x=meta_cols,
+    y=cat_df_numeric["Catalog name"],
+    color_continuous_scale=[
+        [0.0, "rgb(255,77,77)"],
+        [0.5, "rgb(255,204,0)"],
+        [1.0, "rgb(0,204,102)"]
+    ],
+    )
+    fig_heatmap1.update_layout(
+        title="Metadata Completeness Heatmap (Catalog vs Field)",
+        width=1600,
+        height=1000,
+        margin=dict(l=200, r=50, t=80, b=150),
+    )
+    fig_heatmap1.update_xaxes(tickangle=-45)
+    st.plotly_chart(fig_heatmap1, use_container_width=True)
+with tab4:
+    st.header("Catalog Mapping status")