Spaces:

taylerErbe
/

Legislation_Explorer

Sleeping

File size: 30,799 Bytes

bb9abee

import os
import json
import numpy as np
import pandas as pd
import faiss
import streamlit as st
import altair as alt
from sentence_transformers import SentenceTransformer
import csv
from datetime import datetime

#Config
DB_DIR = "."
FEEDBACK_CSV = os.path.join(DB_DIR, "impact_feedback.csv")
DEFAULT_TOP_K = 10

IMPACT_ORDER = [
    "Not Impactful",
    "Slightly Impactful",
    "Moderately Impactful",
    "Very Impactful"
]

st.set_page_config(
    page_title="IGPA Legislation Explorer",
    layout="wide",
    initial_sidebar_state="expanded"
)

#Loading vector database
@st.cache_resource
def load_vector_db(db_dir: str = DB_DIR):
    with open(os.path.join(db_dir, "config.json"), "r") as f:
        cfg = json.load(f)

    index = faiss.read_index(os.path.join(db_dir, "faiss_index.bin"))
    meta = pd.read_parquet(os.path.join(db_dir, "metadata.parquet"))

    if "vec_id" not in meta.columns:
        meta = meta.reset_index().rename(columns={"index": "vec_id"})

    model = SentenceTransformer(cfg["embedding_model_name"])
    return index, meta, model, cfg

index, meta_df, embed_model, cfg = load_vector_db()

DATE_COL = "status_date_y"
meta_df[DATE_COL] = pd.to_datetime(
    meta_df[DATE_COL],
    errors="coerce"
)

DEFAULT_FILTERS = {
    "intended_beneficiary": "All",
    "policy_domain": "All",
    "impact_selected": "All",
    "category_main": "All",
    "category_sub": "All",
    "status_desc": "All",
    "date_range": (
        meta_df[DATE_COL].min().date(),
        meta_df[DATE_COL].max().date()
    )
}

for key, value in DEFAULT_FILTERS.items():
    if key not in st.session_state:
        st.session_state[key] = value

if "search_results" not in st.session_state:
    st.session_state.search_results = None
if "current_query" not in st.session_state:
    st.session_state.current_query = ""

def embed_query(query: str):
    return embed_model.encode(
        [query],
        normalize_embeddings=True,
        convert_to_numpy=True
    ).astype("float32")

def impact_threshold(level):
    if level not in IMPACT_ORDER:
        return []
    return IMPACT_ORDER[IMPACT_ORDER.index(level):]

def append_feedback_row(
    bill_id,
    predicted_impact,
    user_response,
    corrected_impact=None,
    path=FEEDBACK_CSV,
):
    try:
        file_exists = os.path.isfile(path)
        with open(path, "a", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            if not file_exists:
                writer.writerow(
                    [
                        "timestamp",
                        "bill_id",
                        "predicted_impact",
                        "user_response",
                        "corrected_impact",
                    ]
                )
            writer.writerow(
                [
                    datetime.utcnow().isoformat(),
                    bill_id,
                    predicted_impact,
                    user_response,
                    corrected_impact if corrected_impact else "",
                ]
            )

        st.sidebar.success(f"Feedback saved to: `{path}`")
    except Exception as e:
        st.error(f"Failed to save feedback: {str(e)}")

def build_filter_mask(df, intended_beneficiary, policy_domain, impact_selected):
    mask = pd.Series(True, index=df.index)

    if intended_beneficiary != "All":
        mask &= df["intended_beneficiaries_standardized"] == intended_beneficiary
    if policy_domain != "All":
        mask &= df["policy_domain_standardized"] == policy_domain
    if impact_selected != "All":
        allowed = impact_threshold(impact_selected)
        mask &= df["impact_rating_standardized"].isin(allowed)
    if st.session_state.category_main != "All":
        mask &= df["category_main_label"] == st.session_state.category_main
    if st.session_state.category_sub != "All":
        mask &= df["category_sub_label"] == st.session_state.category_sub
    if "status_desc" in st.session_state and st.session_state.status_desc != "All":
        mask &= df["status_desc"] == st.session_state.status_desc
    if "date_range" in st.session_state and st.session_state.date_range:
        dr = st.session_state.date_range

        if isinstance(dr, (tuple, list)) and len(dr) == 2:
            start, end = dr
        else:
            start = end = dr
        if end == start:
            end = df[DATE_COL].max().date()

        start = pd.to_datetime(start)
        end = pd.to_datetime(end)

        mask &= df[DATE_COL].between(start, end)
    return mask

def get_sorted_filter_options(df, col_name):
    counts = df[col_name].dropna().value_counts()
    sorted_vals = counts.index.tolist()
    return ["All"] + sorted_vals

def reset_filters():
    for key, value in DEFAULT_FILTERS.items():
        st.session_state[key] = value
    st.rerun()

#Filters
with st.sidebar:
    st.header("Filters")
    if "history" not in st.session_state:
        st.session_state.history = []
    if st.button("Reset Filters"):
        reset_filters()

    intended_beneficiary = st.selectbox(
        "Intended Beneficiary",
        get_sorted_filter_options(meta_df, "intended_beneficiaries_standardized"),
        key="intended_beneficiary"
    )

    policy_domain = st.selectbox(
        "Policy Area",
        get_sorted_filter_options(meta_df, "policy_domain_standardized"),
        key="policy_domain"
    )

    impact_selected = st.selectbox(
        "Impact Rating (≥ Selected Level)",
        ["All"] + IMPACT_ORDER,
        key="impact_selected"
    )

    category_main = st.selectbox(
        "Category",
        get_sorted_filter_options(meta_df, "category_main_label"),
        key="category_main"
    )

    category_sub = st.selectbox(
        "Sub Category",
        get_sorted_filter_options(meta_df, "category_sub_label"),
        key="category_sub"
    )

    top_k = st.slider("Number of results", 5, 50, DEFAULT_TOP_K, 5)

    status_desc = st.selectbox(
        "Bill Status",
        ["All"] + sorted(meta_df["status_desc"].dropna().unique().tolist()),
        key="status_desc"
    )

    st.subheader("Time Filter")

    min_date = meta_df[DATE_COL].min().date()
    max_date = meta_df[DATE_COL].max().date()
    
    default_value = st.session_state.get("date_range", (min_date, max_date))

    if isinstance(default_value, (tuple, list)):
        if len(default_value) == 2:
            start, end = default_value
        else:  
            start = end = default_value[0]
    else: 
        start = end = default_value

    st.date_input(
        "Status Date Range",
        value=(start, end),
        min_value=min_date,
        max_value=max_date,
        key="date_range"
    )

    if os.path.exists(FEEDBACK_CSV):
        try:
            df_feedback = pd.read_csv(FEEDBACK_CSV)
            st.info(f" Feedback records: {len(df_feedback)}")
            if st.button(" Download Feedback CSV"):
                st.download_button(
                    label="Download impact_feedback.csv",
                    data=open(FEEDBACK_CSV, 'rb').read(),
                    file_name="impact_feedback.csv",
                    mime="text/csv"
                )
        except:
            st.info("Feedback CSV ready (empty)")

filtered_df = meta_df[
    build_filter_mask(
        meta_df,
        st.session_state.intended_beneficiary,
        st.session_state.policy_domain,
        st.session_state.impact_selected
    )
]

tab_search, tab_trends = st.tabs(["Search & Results", "Trends & Insights"])

#Search Tab
with tab_search:
    st.title("IGPA Legislation Explorer")

    #Overview
    col1, col2, col3, col4 = st.columns(4)

    with col1:
        st.metric("Total Bills", len(filtered_df))

    with col2:
        st.metric(
            "Policy Domains",
            filtered_df["policy_domain_standardized"].nunique()
        )

    with col3:
        st.metric(
            "Beneficiary Groups",
            filtered_df["intended_beneficiaries_standardized"].nunique()
        )

    with col4:
        impact_counts = (
            filtered_df["impact_rating_standardized"]
            .dropna()
            .value_counts()
            .reindex(IMPACT_ORDER, fill_value=0)
        )
        st.metric("Impact Breakdown", len(filtered_df))
        st.markdown(
            f"<div style='font-size:12px; color:#6b7280;'>"
            f"Very Impactful: <b>{impact_counts['Very Impactful']}</b> | "
            f"Moderately: <b>{impact_counts['Moderately Impactful']}</b> | "
            f"Slightly: <b>{impact_counts['Slightly Impactful']}</b> | "
            f"Not: <b>{impact_counts['Not Impactful']}</b>"
            f"</div>",
            unsafe_allow_html=True
        )

    #Most Impacted Beneficiary Categories
    st.subheader("Most Impacted Beneficiary Categories")
    
    impact_df = (
        filtered_df.dropna(subset=["beneficiary_category", "impact_rating_score"])
        .groupby("beneficiary_category")
        .agg(
            avg_impact=("impact_rating_score", "mean"),
            bills=("bill_id","count"),
            top_bills=("title", lambda x: "; ".join(x.head(5))),
            top_beneficiaries=("intended_beneficiaries_standardized", lambda x: ", ".join(x.value_counts().head(3).index))
        )
        .reset_index()
        .sort_values("avg_impact", ascending=False)
        .head(10)
    )
    
    if not impact_df.empty:
        st.altair_chart(
            alt.Chart(impact_df)
            .mark_bar()
            .encode(
                x=alt.X("beneficiary_category:N", sort="-y", title="Beneficiary Category"),
                y=alt.Y("avg_impact:Q", title="Average Impact Score"),
                color=alt.Color(
                    "avg_impact:Q",
                    scale=alt.Scale(domain=[0,4], range=["#FFF176","#E53935"]),
                    legend=alt.Legend(title="Impact Severity")
                ),
                tooltip=[
                    alt.Tooltip("beneficiary_category:N", title="Beneficiary"),
                    alt.Tooltip("avg_impact:Q", format=".2f", title="Average Impact"),
                    alt.Tooltip("bills:Q", title="Number of Bills"),
                    alt.Tooltip("top_bills:N", title="Top Bills"),
                    alt.Tooltip("top_beneficiaries:N", title="Top Beneficiaries")
                ]
            )
            .properties(height=350),
            use_container_width=True
        )

    # Bills from Filters
    st.subheader("Bills Matching Selected Filters")

    display_cols = {
        "bill_number": "Bill Number",
        "title": "Title",
        "description": "Description",
        "policy_domain_standardized": "Policy Domain",
        "category_main_label": "Category",
        "intent_standardized": "Intent",
        "legislative_goal_standardized": "Legislative Goal",
        "beneficiary_category": "Beneficiary Group",
        "intended_beneficiaries_standardized": "Intended Beneficiaries",
        "potential_impact_raw": "Potential Impact",
        "impact_rating_standardized": "Impact Rating",
        "status_desc": "Status",
        "full_text_url": "Bill Link"
    }
    
    available_cols = {k: v for k, v in display_cols.items() if k in filtered_df.columns}
    
    filter_bill_df = (
        filtered_df[list(available_cols.keys())]
        .rename(columns=available_cols)
        .copy()
    )
    
    st.dataframe(
        filter_bill_df,
        use_container_width=True,
        column_config={
            "Bill Link": st.column_config.LinkColumn(
                label="Bill Link",
                display_text="Open Bill"
            )
        }
    )
    
    st.markdown("---")

    #Search Bills
    st.subheader("Search Bills")
    query = st.text_area(
        "Ask a question about legislation",
        value=st.session_state.current_query,
        height=80,
        placeholder="Example: bills related to funding",
        key="search_query_input"
    )

    search_clicked = st.button("Search", key="search_button")

    if search_clicked and query.strip():
        st.session_state.current_query = query
        st.session_state.history.append({"query": query})

        q_vec = embed_query(query)
        n_search = min(len(meta_df), top_k*5)
        scores, ids = index.search(q_vec, n_search)
        ids, scores = ids[0], scores[0]

        allowed = set(filtered_df.index)
        kept = [(i,s) for i,s in zip(ids,scores) if i in allowed][:top_k]

        if not kept:
            st.warning("No results found.")
            st.session_state.search_results = None
        else:
            results = meta_df.loc[[i for i,_ in kept]].copy()
            results["similarity"] = [s for _,s in kept]
            st.session_state.search_results = results

    if st.session_state.search_results is not None:
        results = st.session_state.search_results

        #Filtered Results Table
        st.subheader("Filtered Results Table")
        review_cols = [
            "bill_number",
            "title",
            "description",
            "potential_impact_raw",
            "increasing_aspects_standardized",
            "decreasing_aspects_standardized",
            "similarity",
            "full_text_url"
        ]

        review_df = results[[c for c in review_cols if c in results.columns]].copy()

        review_df.rename(
            columns={
                "bill_number": "Bill Number",
                "title": "Title",
                "description": "Description",
                "potential_impact_raw": "Potential Impact",
                "increasing_aspects_standardized": "Increasing Aspects",
                "decreasing_aspects_standardized": "Decreasing Aspects",
                "similarity": "Score",
                "full_text_url": "Bill URL"
            },
            inplace=True
        )

        st.dataframe(
            review_df,
            use_container_width=True,
            column_config={
                "Bill URL": st.column_config.LinkColumn(
                    "ILGA URL",
                    display_text="Open bill"
                )
            }
        )

        st.markdown("---")

        st.subheader("Filtered Results")
        for idx, row in results.iterrows():
            with st.container():
                st.markdown(f"### Bill Number: {row['bill_number']}")
                st.markdown(f"**Title:** {row['title']}")
                st.write(row["description"])

                if pd.notna(row.get("category_main_label")):
                    st.write(f"**Main Category**: {row['category_main_label']}")

                if pd.notna(row.get("category_sub_label")):
                    st.write(f"**Sub Category**: {row['category_sub_label']}")

                if pd.notna(row.get("llama_summary_raw")):
                    st.markdown(f"**LLaMA Summary:** {row['llama_summary_raw']}")

                info_text = (
                    f"Session: {row.get('session','')} • "
                    f"Chamber: {row.get('chamber','')} • "
                    f"Impact: {row.get('impact_rating_standardized','')} • "
                    f"Beneficiaries: {row.get('intended_beneficiaries_standardized','')} • "
                    f"Domain: {row.get('policy_domain_standardized','')} • "
                    f"Similarity: {row.get('similarity'):.3f}"
                )
                st.caption(info_text)

                if pd.notna(row.get("full_text_url")):
                    st.markdown(f"[🔗 View Full Bill]({row['full_text_url']})", unsafe_allow_html=True)

                std_cols = [
                    c for c in results.columns
                    if c.endswith("_standardized") and c not in [
                        "impact_rating_standardized",
                        "increasing_aspects_standardized",
                        "decreasing_aspects_standardized",
                        "original_law_standardized"
                    ]
                ]

                with st.expander("More Details"):
                    for c in std_cols:
                        val = row.get(c)
                        if pd.notna(val) and str(val).strip():
                            label = c.replace("_standardized","").replace("_"," ").title()
                            st.write(f"**{label}**: {val}")

                with st.expander("Similar Bills"):
                    sim_df = results.iloc[:5][
                        ["bill_number","title","description","full_text_url"]
                    ].copy()
                    st.dataframe(
                        sim_df,
                        use_container_width=True,
                        column_config={
                            "full_text_url": st.column_config.LinkColumn(
                                "Bill Link",
                                display_text="Open"
                            )
                        }
                    )

                #Impact rating feedbacK
                with st.expander("👍👎 Rate Impact Accuracy", expanded=False):
                    st.markdown("**Is this impact rating accurate?**")
                    predicted_impact = row.get("impact_rating_standardized", "")
                    bill_id_safe = str(row.get('bill_id', idx))
                    
                    # Check if feedback was already submitted for this bill
                    feedback_submitted = st.session_state.get(f"feedback_done_{bill_id_safe}", False)
                    
                    if feedback_submitted:
                        st.success("Thank you for your feedback!")
                        st.caption(f"Bill: {row.get('bill_number', 'N/A')} | Saved to impact_feedback.csv")
                    else:
                        col1, col2 = st.columns(2)
                        with col1:
                            if st.button("👍 **Yes - Accurate**", key=f"yes_{bill_id_safe}", use_container_width=True):
                                append_feedback_row(
                                    bill_id=bill_id_safe,
                                    predicted_impact=predicted_impact,
                                    user_response="Yes",
                                    corrected_impact=None,
                                )
                                st.session_state[f"feedback_done_{bill_id_safe}"] = True
                                st.sidebar.success(f"Feedback saved for {row.get('bill_number', bill_id_safe)}")
                                st.rerun()
                
                        with col2:
                            if st.button("👎 **No - Incorrect**", key=f"no_{bill_id_safe}", use_container_width=True):
                                st.session_state[f"show_corrected_{bill_id_safe}"] = True
                                st.rerun()
                
                        if st.session_state.get(f"show_corrected_{bill_id_safe}", False):
                            st.info(f"**What should the impact rating be instead?**")
                            corrected_value = st.selectbox(
                                "**Correct impact rating**",
                                IMPACT_ORDER,
                                key=f"corrected_{bill_id_safe}",
                            )
                            
                            col_submit, col_cancel = st.columns([3, 1])
                            with col_submit:
                                if st.button("**Submit Feedback**", key=f"submit_{bill_id_safe}", type="primary"):
                                    append_feedback_row(
                                        bill_id=bill_id_safe,
                                        predicted_impact=predicted_impact,
                                        user_response="No",
                                        corrected_impact=corrected_value,
                                    )
                                    st.session_state[f"feedback_done_{bill_id_safe}"] = True
                                    st.session_state[f"show_corrected_{bill_id_safe}"] = False
                                    st.sidebar.success(f"Feedback saved for {row.get('bill_number', bill_id_safe)}")
                                    st.rerun()
                            with col_cancel:
                                if st.button("Cancel", key=f"cancel_{bill_id_safe}"):
                                    st.session_state[f"show_corrected_{bill_id_safe}"] = False
                                    st.rerun()

    #Search History
    with st.sidebar.expander("Search History"):
        for i,item in enumerate(reversed(st.session_state.history[-5:]),1):
            st.write(f"{i}. {item.get('query','')}")


# TRENDS TAB
with tab_trends:
    st.subheader("Trends & Insights")

    # Key Insights
    top_policy = filtered_df["policy_domain_standardized"].value_counts().head(1)
    top_beneficiaries = filtered_df["beneficiary_category"].value_counts().head(1)
    strategy_impact = (
        filtered_df[filtered_df["impact_rating_standardized"].notna()]
        .groupby("legislative_strategy_standardized")["impact_rating_standardized"]
        .apply(lambda x: (x=="Very Impactful").sum())
    )
    avg_impact_ben = (
        filtered_df.dropna(subset=["impact_rating_score"])
        .groupby("beneficiary_category")["impact_rating_score"]
        .mean()
        .sort_values(ascending=False)
    )

    total_bills = len(filtered_df)
    total_high_impact = (filtered_df["impact_rating_standardized"]=="Very Impactful").sum()

    st.markdown("### Key Insights")
    st.write(f"**Total Bills Considered:** {total_bills}")
    st.write(f"**Total Very Impactful Bills:** {total_high_impact}")
    st.write(f"**Most Active Policy Domain:** {top_policy.index[0]} ({top_policy.iloc[0]} bills)" if not top_policy.empty else "No data")
    st.write(f"**Most Benefited Group:** {top_beneficiaries.index[0]} ({top_beneficiaries.iloc[0]} bills)" if not top_beneficiaries.empty else "No data")
    st.write(f"**Strategy Producing Most Very Impactful Bills:** {strategy_impact.idxmax() if not strategy_impact.empty else 'N/A'}")
    st.write(f"**Highest Average Impact (Beneficiary):** {avg_impact_ben.index[0]} ({avg_impact_ben.iloc[0]:.2f})" if not avg_impact_ben.empty else "N/A")
    st.markdown("---")

    col1, col2 = st.columns(2)

    # Policy Domain 
    with col1:
        st.markdown("### Policy Domain Activity")
        policy_agg = (
            filtered_df.groupby("policy_domain_standardized")
            .agg(
                Count=("bill_id","count"),
                avg_impact=("impact_rating_score","mean"),
                top_bills=("title", lambda x: "; ".join(x.head(5))),
                top_beneficiaries=("intended_beneficiaries_standardized", lambda x: ", ".join(x.value_counts().head(3).index)),
                recent_date=("status_date_y", lambda x: x.max().strftime("%Y-%m-%d")),
                bill_numbers=("bill_number", lambda x: ", ".join(map(str, x.head(5))))
            )
            .reset_index()
            .rename(columns={"policy_domain_standardized":"Policy Domain"})
        )
        policy_chart = (
            alt.Chart(policy_agg)
            .mark_bar()
            .encode(
                x=alt.X("Policy Domain:N", sort="-y", title="Policy Domain"),
                y=alt.Y("Count:Q", title="Number of Bills"),
                color=alt.Color("Count:Q", scale=alt.Scale(scheme="reds"), legend=None),
                tooltip=[
                    alt.Tooltip("Policy Domain:N"),
                    alt.Tooltip("Count:Q", title="Number of Bills"),
                    alt.Tooltip("avg_impact:Q", format=".2f", title="Average Impact"),
                    alt.Tooltip("top_bills:N", title="Top Bills"),
                    alt.Tooltip("top_beneficiaries:N", title="Top Beneficiaries"),
                    alt.Tooltip("recent_date:N", title="Most Recent Bill"),
                    alt.Tooltip("bill_numbers:N", title="Bill Numbers")
                ]
            )
            .properties(height=400)
        )
        st.altair_chart(policy_chart, use_container_width=True)

    # Impact Distribution
    with col2:
        st.markdown("### Impact Distribution")
        impact_dist = (
            filtered_df[filtered_df["impact_rating_standardized"].notna()]["impact_rating_standardized"]
            .value_counts()
            .reindex(IMPACT_ORDER, fill_value=0)
            .reset_index()
        )
        impact_dist.columns = ["Impact Level", "Count"]

        impact_chart = (
            alt.Chart(impact_dist)
            .mark_bar()
            .encode(
                x=alt.X("Impact Level:N", sort=IMPACT_ORDER),
                y=alt.Y("Count:Q"),
                color=alt.Color("Count:Q", scale=alt.Scale(scheme="reds")),
                tooltip=[
                    alt.Tooltip("Impact Level:N"),
                    alt.Tooltip("Count:Q")
                ]
            )
            .properties(height=300)
        )
        st.altair_chart(impact_chart, use_container_width=True)

    # Strategy High Impact
    st.markdown("### Legislative Strategy: Very Impactful Bills")
    strategy_high_impact = (
        filtered_df[filtered_df["impact_rating_standardized"].notna()]
        .groupby("legislative_strategy_standardized")
        .agg(
            Very_Impactful_Bills=("impact_rating_standardized", lambda x: (x=="Very Impactful").sum()),
            top_bills=("title", lambda x: "; ".join(x.head(5))),
            top_beneficiaries=("intended_beneficiaries_standardized", lambda x: ", ".join(x.value_counts().head(3).index)),
            recent_date=("status_date_y", lambda x: x.max().strftime("%Y-%m-%d"))
        )
        .reset_index()
        .rename(columns={"legislative_strategy_standardized":"Strategy"})
    )

    strategy_chart = (
        alt.Chart(strategy_high_impact)
        .mark_bar()
        .encode(
            x=alt.X("Strategy:N", sort="-y", title="Strategy"),
            y=alt.Y("Very_Impactful_Bills:Q", title="Very Impactful Bills"),
            color=alt.Color("Very_Impactful_Bills:Q", scale=alt.Scale(scheme="orangered")),
            tooltip=[
                alt.Tooltip("Strategy:N"),
                alt.Tooltip("Very_Impactful_Bills:Q"),
                alt.Tooltip("top_bills:N", title="Top Bills"),
                alt.Tooltip("top_beneficiaries:N", title="Top Beneficiaries"),
                alt.Tooltip("recent_date:N", title="Most Recent Bill")
            ]
        )
        .properties(height=400)
    )

    st.altair_chart(strategy_chart, use_container_width=True)

    # Impact by Category
    st.markdown("### Impact by Category")
    impact_cat = (
        filtered_df[
            filtered_df["impact_rating_standardized"].notna() &
            filtered_df["category_main_label"].notna()
        ]
        .groupby(["category_main_label", "impact_rating_standardized"])
        .agg(
            Count=("bill_id","count"),
            avg_impact=("impact_rating_score","mean"),
            top_bills=("title", lambda x: "; ".join(x.head(5))),
            top_beneficiaries=("intended_beneficiaries_standardized", lambda x: ", ".join(x.value_counts().head(3).index)),
            recent_date=("status_date_y", lambda x: x.max().strftime("%Y-%m-%d")),
            bill_numbers=("bill_number", lambda x: ", ".join(map(str, x.head(5))))
        )
        .reset_index()
    )

    if impact_cat.empty:
        st.write("No data available for impact by category.")
    else:
        top_categories = (
            impact_cat.groupby("category_main_label")["Count"]
            .sum()
            .sort_values(ascending=False)
            .head(15)
            .index.tolist()
        )
        impact_cat_top = impact_cat[impact_cat["category_main_label"].isin(top_categories)]

        impact_cat_chart = (
            alt.Chart(impact_cat_top)
            .mark_bar()
            .encode(
                y=alt.Y("category_main_label:N", sort=top_categories, title="Category"),
                x=alt.X("Count:Q", stack="zero", title="Number of Bills"),
                color=alt.Color("impact_rating_standardized:N", sort=IMPACT_ORDER, scale=alt.Scale(scheme="reds"), title="Impact Rating"),
                tooltip=[
                    alt.Tooltip("category_main_label:N", title="Category"),
                    alt.Tooltip("impact_rating_standardized:N", title="Impact Rating"),
                    alt.Tooltip("Count:Q", title="Number of Bills"),
                    alt.Tooltip("avg_impact:Q", format=".2f", title="Average Impact"),
                    alt.Tooltip("top_bills:N", title="Top Bills"),
                    alt.Tooltip("top_beneficiaries:N", title="Top Beneficiaries"),
                    alt.Tooltip("recent_date:N", title="Most Recent Bill"),
                    alt.Tooltip("bill_numbers:N", title="Bill Numbers")
                ]
            )
            .properties(height=400)
        )

        st.altair_chart(impact_cat_chart, use_container_width=True)

    # Beneficiary Treemap
    st.markdown("### Beneficiary Coverage & Average Impact")
    ben_treemap_df = (
        filtered_df.dropna(subset=["beneficiary_category", "impact_rating_score"])
        .groupby("beneficiary_category")
        .agg(
            total_bills=("bill_id","count"),
            avg_impact=("impact_rating_score","mean"),
            top_bills=("title", lambda x: "; ".join(x.head(5))),
            recent_date=("status_date_y", lambda x: x.max().strftime("%Y-%m-%d")),
            bill_numbers=("bill_number", lambda x: ", ".join(map(str, x.head(5))))
        )
        .reset_index()
    )

    if not ben_treemap_df.empty:
        treemap = (
            alt.Chart(ben_treemap_df)
            .mark_rect()
            .encode(
                x=alt.X("total_bills:Q", title="Number of Bills"),
                y=alt.Y("beneficiary_category:N", sort="-x", title="Beneficiary Category"),
                size="total_bills:Q",
                color=alt.Color("avg_impact:Q", scale=alt.Scale(domain=[0,4], range=["#FFF176","#E53935"]), legend=alt.Legend(title="Average Impact Score")),
                tooltip=[
                    alt.Tooltip("beneficiary_category:N", title="Beneficiary"),
                    alt.Tooltip("total_bills:Q", title="Number of Bills"),
                    alt.Tooltip("avg_impact:Q", format=".2f", title="Average Impact"),
                    alt.Tooltip("top_bills:N", title="Top Bills"),
                    alt.Tooltip("recent_date:N", title="Most Recent Bill"),
                    alt.Tooltip("bill_numbers:N", title="Bill Numbers")
                ]
            )
            .properties(height=400)
        )
        st.altair_chart(treemap, use_container_width=True)
    else:
        st.write("No beneficiary impact data available for selected filters.")