Spaces:

taylerErbe
/

Legislation_Explorer

Sleeping

App Files Files Community

taylerErbe commited on Feb 13

Commit

bb9abee

verified ·

1 Parent(s): fca8511

Upload 8 files

Browse files

Files changed (9) hide show

.gitattributes +3 -0
README.md +26 -0
app.py +803 -0
config.json +7 -0
faiss_index.bin +3 -0
features_with_allbilldata.parquet +3 -0
gitattributes +35 -0
metadata.parquet +3 -0
requirements.txt +6 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,3 @@

+faiss_index.bin filter=lfs diff=lfs merge=lfs -text
+features_with_allbilldata.parquet filter=lfs diff=lfs merge=lfs -text
+metadata.parquet filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,26 @@

+---
+title: Igpa Legislation Explorer
+emoji: 🚀
+colorFrom: red
+colorTo: red
+sdk: streamlit
+app_file: app.py
+app_port: 8501
+tags:
+- streamlit
+- faiss
+- semantic-search
+pinned: false
+short_description: IGPA semantic search and exploration of legislation
+---
+# IGPA Legislation Explorer
+This Streamlit app lets you perform semantic search over a corpus of legislation using a FAISS index and sentence-transformers embeddings. It supports:
+- Free-text queries over bill summaries
+- Filtering by intended beneficiaries, policy domain, and impact rating
+- Viewing bill summaries, key provisions, and similarity scores
+- Downloading search results as CSV for further analysis
+To customize or extend the app, edit `app.py` at the root of this Space.

app.py ADDED Viewed

	@@ -0,0 +1,803 @@

+import os
+import json
+import numpy as np
+import pandas as pd
+import faiss
+import streamlit as st
+import altair as alt
+from sentence_transformers import SentenceTransformer
+import csv
+from datetime import datetime
+#Config
+DB_DIR = "."
+FEEDBACK_CSV = os.path.join(DB_DIR, "impact_feedback.csv")
+DEFAULT_TOP_K = 10
+IMPACT_ORDER = [
+    "Not Impactful",
+    "Slightly Impactful",
+    "Moderately Impactful",
+    "Very Impactful"
+]
+st.set_page_config(
+    page_title="IGPA Legislation Explorer",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+#Loading vector database
+@st.cache_resource
+def load_vector_db(db_dir: str = DB_DIR):
+    with open(os.path.join(db_dir, "config.json"), "r") as f:
+        cfg = json.load(f)
+    index = faiss.read_index(os.path.join(db_dir, "faiss_index.bin"))
+    meta = pd.read_parquet(os.path.join(db_dir, "metadata.parquet"))
+    if "vec_id" not in meta.columns:
+        meta = meta.reset_index().rename(columns={"index": "vec_id"})
+    model = SentenceTransformer(cfg["embedding_model_name"])
+    return index, meta, model, cfg
+index, meta_df, embed_model, cfg = load_vector_db()
+DATE_COL = "status_date_y"
+meta_df[DATE_COL] = pd.to_datetime(
+    meta_df[DATE_COL],
+    errors="coerce"
+)
+DEFAULT_FILTERS = {
+    "intended_beneficiary": "All",
+    "policy_domain": "All",
+    "impact_selected": "All",
+    "category_main": "All",
+    "category_sub": "All",
+    "status_desc": "All",
+    "date_range": (
+        meta_df[DATE_COL].min().date(),
+        meta_df[DATE_COL].max().date()
+    )
+}
+for key, value in DEFAULT_FILTERS.items():
+    if key not in st.session_state:
+        st.session_state[key] = value
+if "search_results" not in st.session_state:
+    st.session_state.search_results = None
+if "current_query" not in st.session_state:
+    st.session_state.current_query = ""
+def embed_query(query: str):
+    return embed_model.encode(
+        [query],
+        normalize_embeddings=True,
+        convert_to_numpy=True
+    ).astype("float32")
+def impact_threshold(level):
+    if level not in IMPACT_ORDER:
+        return []
+    return IMPACT_ORDER[IMPACT_ORDER.index(level):]
+def append_feedback_row(
+    bill_id,
+    predicted_impact,
+    user_response,
+    corrected_impact=None,
+    path=FEEDBACK_CSV,
+):
+    try:
+        file_exists = os.path.isfile(path)
+        with open(path, "a", newline="", encoding="utf-8") as f:
+            writer = csv.writer(f)
+            if not file_exists:
+                writer.writerow(
+                    [
+                        "timestamp",
+                        "bill_id",
+                        "predicted_impact",
+                        "user_response",
+                        "corrected_impact",
+                    ]
+                )
+            writer.writerow(
+                [
+                    datetime.utcnow().isoformat(),
+                    bill_id,
+                    predicted_impact,
+                    user_response,
+                    corrected_impact if corrected_impact else "",
+                ]
+            )
+        st.sidebar.success(f"Feedback saved to: `{path}`")
+    except Exception as e:
+        st.error(f"Failed to save feedback: {str(e)}")
+def build_filter_mask(df, intended_beneficiary, policy_domain, impact_selected):
+    mask = pd.Series(True, index=df.index)
+    if intended_beneficiary != "All":
+        mask &= df["intended_beneficiaries_standardized"] == intended_beneficiary
+    if policy_domain != "All":
+        mask &= df["policy_domain_standardized"] == policy_domain
+    if impact_selected != "All":
+        allowed = impact_threshold(impact_selected)
+        mask &= df["impact_rating_standardized"].isin(allowed)
+    if st.session_state.category_main != "All":
+        mask &= df["category_main_label"] == st.session_state.category_main
+    if st.session_state.category_sub != "All":
+        mask &= df["category_sub_label"] == st.session_state.category_sub
+    if "status_desc" in st.session_state and st.session_state.status_desc != "All":
+        mask &= df["status_desc"] == st.session_state.status_desc
+    if "date_range" in st.session_state and st.session_state.date_range:
+        dr = st.session_state.date_range
+        if isinstance(dr, (tuple, list)) and len(dr) == 2:
+            start, end = dr
+        else:
+            start = end = dr
+        if end == start:
+            end = df[DATE_COL].max().date()
+        start = pd.to_datetime(start)
+        end = pd.to_datetime(end)
+        mask &= df[DATE_COL].between(start, end)
+    return mask
+def get_sorted_filter_options(df, col_name):
+    counts = df[col_name].dropna().value_counts()
+    sorted_vals = counts.index.tolist()
+    return ["All"] + sorted_vals
+def reset_filters():
+    for key, value in DEFAULT_FILTERS.items():
+        st.session_state[key] = value
+    st.rerun()
+#Filters
+with st.sidebar:
+    st.header("Filters")
+    if "history" not in st.session_state:
+        st.session_state.history = []
+    if st.button("Reset Filters"):
+        reset_filters()
+    intended_beneficiary = st.selectbox(
+        "Intended Beneficiary",
+        get_sorted_filter_options(meta_df, "intended_beneficiaries_standardized"),
+        key="intended_beneficiary"
+    )
+    policy_domain = st.selectbox(
+        "Policy Area",
+        get_sorted_filter_options(meta_df, "policy_domain_standardized"),
+        key="policy_domain"
+    )
+    impact_selected = st.selectbox(
+        "Impact Rating (≥ Selected Level)",
+        ["All"] + IMPACT_ORDER,
+        key="impact_selected"
+    )
+    category_main = st.selectbox(
+        "Category",
+        get_sorted_filter_options(meta_df, "category_main_label"),
+        key="category_main"
+    )
+    category_sub = st.selectbox(
+        "Sub Category",
+        get_sorted_filter_options(meta_df, "category_sub_label"),
+        key="category_sub"
+    )
+    top_k = st.slider("Number of results", 5, 50, DEFAULT_TOP_K, 5)
+    status_desc = st.selectbox(
+        "Bill Status",
+        ["All"] + sorted(meta_df["status_desc"].dropna().unique().tolist()),
+        key="status_desc"
+    )
+    st.subheader("Time Filter")
+    min_date = meta_df[DATE_COL].min().date()
+    max_date = meta_df[DATE_COL].max().date()
+    default_value = st.session_state.get("date_range", (min_date, max_date))
+    if isinstance(default_value, (tuple, list)):
+        if len(default_value) == 2:
+            start, end = default_value
+        else:
+            start = end = default_value[0]
+    else:
+        start = end = default_value
+    st.date_input(
+        "Status Date Range",
+        value=(start, end),
+        min_value=min_date,
+        max_value=max_date,
+        key="date_range"
+    )
+    if os.path.exists(FEEDBACK_CSV):
+        try:
+            df_feedback = pd.read_csv(FEEDBACK_CSV)
+            st.info(f" Feedback records: {len(df_feedback)}")
+            if st.button(" Download Feedback CSV"):
+                st.download_button(
+                    label="Download impact_feedback.csv",
+                    data=open(FEEDBACK_CSV, 'rb').read(),
+                    file_name="impact_feedback.csv",
+                    mime="text/csv"
+                )
+        except:
+            st.info("Feedback CSV ready (empty)")
+filtered_df = meta_df[
+    build_filter_mask(
+        meta_df,
+        st.session_state.intended_beneficiary,
+        st.session_state.policy_domain,
+        st.session_state.impact_selected
+    )
+]
+tab_search, tab_trends = st.tabs(["Search & Results", "Trends & Insights"])
+#Search Tab
+with tab_search:
+    st.title("IGPA Legislation Explorer")
+    #Overview
+    col1, col2, col3, col4 = st.columns(4)
+    with col1:
+        st.metric("Total Bills", len(filtered_df))
+    with col2:
+        st.metric(
+            "Policy Domains",
+            filtered_df["policy_domain_standardized"].nunique()
+        )
+    with col3:
+        st.metric(
+            "Beneficiary Groups",
+            filtered_df["intended_beneficiaries_standardized"].nunique()
+        )
+    with col4:
+        impact_counts = (
+            filtered_df["impact_rating_standardized"]
+            .dropna()
+            .value_counts()
+            .reindex(IMPACT_ORDER, fill_value=0)
+        )
+        st.metric("Impact Breakdown", len(filtered_df))
+        st.markdown(
+            f"<div style='font-size:12px; color:#6b7280;'>"
+            f"Very Impactful: <b>{impact_counts['Very Impactful']}</b> | "
+            f"Moderately: <b>{impact_counts['Moderately Impactful']}</b> | "
+            f"Slightly: <b>{impact_counts['Slightly Impactful']}</b> | "
+            f"Not: <b>{impact_counts['Not Impactful']}</b>"
+            f"</div>",
+            unsafe_allow_html=True
+        )
+    #Most Impacted Beneficiary Categories
+    st.subheader("Most Impacted Beneficiary Categories")
+    impact_df = (
+        filtered_df.dropna(subset=["beneficiary_category", "impact_rating_score"])
+        .groupby("beneficiary_category")
+        .agg(
+            avg_impact=("impact_rating_score", "mean"),
+            bills=("bill_id","count"),
+            top_bills=("title", lambda x: "; ".join(x.head(5))),
+            top_beneficiaries=("intended_beneficiaries_standardized", lambda x: ", ".join(x.value_counts().head(3).index))
+        )
+        .reset_index()
+        .sort_values("avg_impact", ascending=False)
+        .head(10)
+    )
+    if not impact_df.empty:
+        st.altair_chart(
+            alt.Chart(impact_df)
+            .mark_bar()
+            .encode(
+                x=alt.X("beneficiary_category:N", sort="-y", title="Beneficiary Category"),
+                y=alt.Y("avg_impact:Q", title="Average Impact Score"),
+                color=alt.Color(
+                    "avg_impact:Q",
+                    scale=alt.Scale(domain=[0,4], range=["#FFF176","#E53935"]),
+                    legend=alt.Legend(title="Impact Severity")
+                ),
+                tooltip=[
+                    alt.Tooltip("beneficiary_category:N", title="Beneficiary"),
+                    alt.Tooltip("avg_impact:Q", format=".2f", title="Average Impact"),
+                    alt.Tooltip("bills:Q", title="Number of Bills"),
+                    alt.Tooltip("top_bills:N", title="Top Bills"),
+                    alt.Tooltip("top_beneficiaries:N", title="Top Beneficiaries")
+                ]
+            )
+            .properties(height=350),
+            use_container_width=True
+        )
+    # Bills from Filters
+    st.subheader("Bills Matching Selected Filters")
+    display_cols = {
+        "bill_number": "Bill Number",
+        "title": "Title",
+        "description": "Description",
+        "policy_domain_standardized": "Policy Domain",
+        "category_main_label": "Category",
+        "intent_standardized": "Intent",
+        "legislative_goal_standardized": "Legislative Goal",
+        "beneficiary_category": "Beneficiary Group",
+        "intended_beneficiaries_standardized": "Intended Beneficiaries",
+        "potential_impact_raw": "Potential Impact",
+        "impact_rating_standardized": "Impact Rating",
+        "status_desc": "Status",
+        "full_text_url": "Bill Link"
+    }
+    available_cols = {k: v for k, v in display_cols.items() if k in filtered_df.columns}
+    filter_bill_df = (
+        filtered_df[list(available_cols.keys())]
+        .rename(columns=available_cols)
+        .copy()
+    )
+    st.dataframe(
+        filter_bill_df,
+        use_container_width=True,
+        column_config={
+            "Bill Link": st.column_config.LinkColumn(
+                label="Bill Link",
+                display_text="Open Bill"
+            )
+        }
+    )
+    st.markdown("---")
+    #Search Bills
+    st.subheader("Search Bills")
+    query = st.text_area(
+        "Ask a question about legislation",
+        value=st.session_state.current_query,
+        height=80,
+        placeholder="Example: bills related to funding",
+        key="search_query_input"
+    )
+    search_clicked = st.button("Search", key="search_button")
+    if search_clicked and query.strip():
+        st.session_state.current_query = query
+        st.session_state.history.append({"query": query})
+        q_vec = embed_query(query)
+        n_search = min(len(meta_df), top_k*5)
+        scores, ids = index.search(q_vec, n_search)
+        ids, scores = ids[0], scores[0]
+        allowed = set(filtered_df.index)
+        kept = [(i,s) for i,s in zip(ids,scores) if i in allowed][:top_k]
+        if not kept:
+            st.warning("No results found.")
+            st.session_state.search_results = None
+        else:
+            results = meta_df.loc[[i for i,_ in kept]].copy()
+            results["similarity"] = [s for _,s in kept]
+            st.session_state.search_results = results
+    if st.session_state.search_results is not None:
+        results = st.session_state.search_results
+        #Filtered Results Table
+        st.subheader("Filtered Results Table")
+        review_cols = [
+            "bill_number",
+            "title",
+            "description",
+            "potential_impact_raw",
+            "increasing_aspects_standardized",
+            "decreasing_aspects_standardized",
+            "similarity",
+            "full_text_url"
+        ]
+        review_df = results[[c for c in review_cols if c in results.columns]].copy()
+        review_df.rename(
+            columns={
+                "bill_number": "Bill Number",
+                "title": "Title",
+                "description": "Description",
+                "potential_impact_raw": "Potential Impact",
+                "increasing_aspects_standardized": "Increasing Aspects",
+                "decreasing_aspects_standardized": "Decreasing Aspects",
+                "similarity": "Score",
+                "full_text_url": "Bill URL"
+            },
+            inplace=True
+        )
+        st.dataframe(
+            review_df,
+            use_container_width=True,
+            column_config={
+                "Bill URL": st.column_config.LinkColumn(
+                    "ILGA URL",
+                    display_text="Open bill"
+                )
+            }
+        )
+        st.markdown("---")
+        st.subheader("Filtered Results")
+        for idx, row in results.iterrows():
+            with st.container():
+                st.markdown(f"### Bill Number: {row['bill_number']}")
+                st.markdown(f"**Title:** {row['title']}")
+                st.write(row["description"])
+                if pd.notna(row.get("category_main_label")):
+                    st.write(f"**Main Category**: {row['category_main_label']}")
+                if pd.notna(row.get("category_sub_label")):
+                    st.write(f"**Sub Category**: {row['category_sub_label']}")
+                if pd.notna(row.get("llama_summary_raw")):
+                    st.markdown(f"**LLaMA Summary:** {row['llama_summary_raw']}")
+                info_text = (
+                    f"Session: {row.get('session','')} • "
+                    f"Chamber: {row.get('chamber','')} • "
+                    f"Impact: {row.get('impact_rating_standardized','')} • "
+                    f"Beneficiaries: {row.get('intended_beneficiaries_standardized','')} • "
+                    f"Domain: {row.get('policy_domain_standardized','')} • "
+                    f"Similarity: {row.get('similarity'):.3f}"
+                )
+                st.caption(info_text)
+                if pd.notna(row.get("full_text_url")):
+                    st.markdown(f"[🔗 View Full Bill]({row['full_text_url']})", unsafe_allow_html=True)
+                std_cols = [
+                    c for c in results.columns
+                    if c.endswith("_standardized") and c not in [
+                        "impact_rating_standardized",
+                        "increasing_aspects_standardized",
+                        "decreasing_aspects_standardized",
+                        "original_law_standardized"
+                    ]
+                ]
+                with st.expander("More Details"):
+                    for c in std_cols:
+                        val = row.get(c)
+                        if pd.notna(val) and str(val).strip():
+                            label = c.replace("_standardized","").replace("_"," ").title()
+                            st.write(f"**{label}**: {val}")
+                with st.expander("Similar Bills"):
+                    sim_df = results.iloc[:5][
+                        ["bill_number","title","description","full_text_url"]
+                    ].copy()
+                    st.dataframe(
+                        sim_df,
+                        use_container_width=True,
+                        column_config={
+                            "full_text_url": st.column_config.LinkColumn(
+                                "Bill Link",
+                                display_text="Open"
+                            )
+                        }
+                    )
+                #Impact rating feedbacK
+                with st.expander("👍👎 Rate Impact Accuracy", expanded=False):
+                    st.markdown("**Is this impact rating accurate?**")
+                    predicted_impact = row.get("impact_rating_standardized", "")
+                    bill_id_safe = str(row.get('bill_id', idx))
+                    # Check if feedback was already submitted for this bill
+                    feedback_submitted = st.session_state.get(f"feedback_done_{bill_id_safe}", False)
+                    if feedback_submitted:
+                        st.success("Thank you for your feedback!")
+                        st.caption(f"Bill: {row.get('bill_number', 'N/A')} | Saved to impact_feedback.csv")
+                    else:
+                        col1, col2 = st.columns(2)
+                        with col1:
+                            if st.button("👍 **Yes - Accurate**", key=f"yes_{bill_id_safe}", use_container_width=True):
+                                append_feedback_row(
+                                    bill_id=bill_id_safe,
+                                    predicted_impact=predicted_impact,
+                                    user_response="Yes",
+                                    corrected_impact=None,
+                                )
+                                st.session_state[f"feedback_done_{bill_id_safe}"] = True
+                                st.sidebar.success(f"Feedback saved for {row.get('bill_number', bill_id_safe)}")
+                                st.rerun()
+                        with col2:
+                            if st.button("👎 **No - Incorrect**", key=f"no_{bill_id_safe}", use_container_width=True):
+                                st.session_state[f"show_corrected_{bill_id_safe}"] = True
+                                st.rerun()
+                        if st.session_state.get(f"show_corrected_{bill_id_safe}", False):
+                            st.info(f"**What should the impact rating be instead?**")
+                            corrected_value = st.selectbox(
+                                "**Correct impact rating**",
+                                IMPACT_ORDER,
+                                key=f"corrected_{bill_id_safe}",
+                            )
+                            col_submit, col_cancel = st.columns([3, 1])
+                            with col_submit:
+                                if st.button("**Submit Feedback**", key=f"submit_{bill_id_safe}", type="primary"):
+                                    append_feedback_row(
+                                        bill_id=bill_id_safe,
+                                        predicted_impact=predicted_impact,
+                                        user_response="No",
+                                        corrected_impact=corrected_value,
+                                    )
+                                    st.session_state[f"feedback_done_{bill_id_safe}"] = True
+                                    st.session_state[f"show_corrected_{bill_id_safe}"] = False
+                                    st.sidebar.success(f"Feedback saved for {row.get('bill_number', bill_id_safe)}")
+                                    st.rerun()
+                            with col_cancel:
+                                if st.button("Cancel", key=f"cancel_{bill_id_safe}"):
+                                    st.session_state[f"show_corrected_{bill_id_safe}"] = False
+                                    st.rerun()
+    #Search History
+    with st.sidebar.expander("Search History"):
+        for i,item in enumerate(reversed(st.session_state.history[-5:]),1):
+            st.write(f"{i}. {item.get('query','')}")
+# TRENDS TAB
+with tab_trends:
+    st.subheader("Trends & Insights")
+    # Key Insights
+    top_policy = filtered_df["policy_domain_standardized"].value_counts().head(1)
+    top_beneficiaries = filtered_df["beneficiary_category"].value_counts().head(1)
+    strategy_impact = (
+        filtered_df[filtered_df["impact_rating_standardized"].notna()]
+        .groupby("legislative_strategy_standardized")["impact_rating_standardized"]
+        .apply(lambda x: (x=="Very Impactful").sum())
+    )
+    avg_impact_ben = (
+        filtered_df.dropna(subset=["impact_rating_score"])
+        .groupby("beneficiary_category")["impact_rating_score"]
+        .mean()
+        .sort_values(ascending=False)
+    )
+    total_bills = len(filtered_df)
+    total_high_impact = (filtered_df["impact_rating_standardized"]=="Very Impactful").sum()
+    st.markdown("### Key Insights")
+    st.write(f"**Total Bills Considered:** {total_bills}")
+    st.write(f"**Total Very Impactful Bills:** {total_high_impact}")
+    st.write(f"**Most Active Policy Domain:** {top_policy.index[0]} ({top_policy.iloc[0]} bills)" if not top_policy.empty else "No data")
+    st.write(f"**Most Benefited Group:** {top_beneficiaries.index[0]} ({top_beneficiaries.iloc[0]} bills)" if not top_beneficiaries.empty else "No data")
+    st.write(f"**Strategy Producing Most Very Impactful Bills:** {strategy_impact.idxmax() if not strategy_impact.empty else 'N/A'}")
+    st.write(f"**Highest Average Impact (Beneficiary):** {avg_impact_ben.index[0]} ({avg_impact_ben.iloc[0]:.2f})" if not avg_impact_ben.empty else "N/A")
+    st.markdown("---")
+    col1, col2 = st.columns(2)
+    # Policy Domain
+    with col1:
+        st.markdown("### Policy Domain Activity")
+        policy_agg = (
+            filtered_df.groupby("policy_domain_standardized")
+            .agg(
+                Count=("bill_id","count"),
+                avg_impact=("impact_rating_score","mean"),
+                top_bills=("title", lambda x: "; ".join(x.head(5))),
+                top_beneficiaries=("intended_beneficiaries_standardized", lambda x: ", ".join(x.value_counts().head(3).index)),
+                recent_date=("status_date_y", lambda x: x.max().strftime("%Y-%m-%d")),
+                bill_numbers=("bill_number", lambda x: ", ".join(map(str, x.head(5))))
+            )
+            .reset_index()
+            .rename(columns={"policy_domain_standardized":"Policy Domain"})
+        )
+        policy_chart = (
+            alt.Chart(policy_agg)
+            .mark_bar()
+            .encode(
+                x=alt.X("Policy Domain:N", sort="-y", title="Policy Domain"),
+                y=alt.Y("Count:Q", title="Number of Bills"),
+                color=alt.Color("Count:Q", scale=alt.Scale(scheme="reds"), legend=None),
+                tooltip=[
+                    alt.Tooltip("Policy Domain:N"),
+                    alt.Tooltip("Count:Q", title="Number of Bills"),
+                    alt.Tooltip("avg_impact:Q", format=".2f", title="Average Impact"),
+                    alt.Tooltip("top_bills:N", title="Top Bills"),
+                    alt.Tooltip("top_beneficiaries:N", title="Top Beneficiaries"),
+                    alt.Tooltip("recent_date:N", title="Most Recent Bill"),
+                    alt.Tooltip("bill_numbers:N", title="Bill Numbers")
+                ]
+            )
+            .properties(height=400)
+        )
+        st.altair_chart(policy_chart, use_container_width=True)
+    # Impact Distribution
+    with col2:
+        st.markdown("### Impact Distribution")
+        impact_dist = (
+            filtered_df[filtered_df["impact_rating_standardized"].notna()]["impact_rating_standardized"]
+            .value_counts()
+            .reindex(IMPACT_ORDER, fill_value=0)
+            .reset_index()
+        )
+        impact_dist.columns = ["Impact Level", "Count"]
+        impact_chart = (
+            alt.Chart(impact_dist)
+            .mark_bar()
+            .encode(
+                x=alt.X("Impact Level:N", sort=IMPACT_ORDER),
+                y=alt.Y("Count:Q"),
+                color=alt.Color("Count:Q", scale=alt.Scale(scheme="reds")),
+                tooltip=[
+                    alt.Tooltip("Impact Level:N"),
+                    alt.Tooltip("Count:Q")
+                ]
+            )
+            .properties(height=300)
+        )
+        st.altair_chart(impact_chart, use_container_width=True)
+    # Strategy High Impact
+    st.markdown("### Legislative Strategy: Very Impactful Bills")
+    strategy_high_impact = (
+        filtered_df[filtered_df["impact_rating_standardized"].notna()]
+        .groupby("legislative_strategy_standardized")
+        .agg(
+            Very_Impactful_Bills=("impact_rating_standardized", lambda x: (x=="Very Impactful").sum()),
+            top_bills=("title", lambda x: "; ".join(x.head(5))),
+            top_beneficiaries=("intended_beneficiaries_standardized", lambda x: ", ".join(x.value_counts().head(3).index)),
+            recent_date=("status_date_y", lambda x: x.max().strftime("%Y-%m-%d"))
+        )
+        .reset_index()
+        .rename(columns={"legislative_strategy_standardized":"Strategy"})
+    )
+    strategy_chart = (
+        alt.Chart(strategy_high_impact)
+        .mark_bar()
+        .encode(
+            x=alt.X("Strategy:N", sort="-y", title="Strategy"),
+            y=alt.Y("Very_Impactful_Bills:Q", title="Very Impactful Bills"),
+            color=alt.Color("Very_Impactful_Bills:Q", scale=alt.Scale(scheme="orangered")),
+            tooltip=[
+                alt.Tooltip("Strategy:N"),
+                alt.Tooltip("Very_Impactful_Bills:Q"),
+                alt.Tooltip("top_bills:N", title="Top Bills"),
+                alt.Tooltip("top_beneficiaries:N", title="Top Beneficiaries"),
+                alt.Tooltip("recent_date:N", title="Most Recent Bill")
+            ]
+        )
+        .properties(height=400)
+    )
+    st.altair_chart(strategy_chart, use_container_width=True)
+    # Impact by Category
+    st.markdown("### Impact by Category")
+    impact_cat = (
+        filtered_df[
+            filtered_df["impact_rating_standardized"].notna() &
+            filtered_df["category_main_label"].notna()
+        ]
+        .groupby(["category_main_label", "impact_rating_standardized"])
+        .agg(
+            Count=("bill_id","count"),
+            avg_impact=("impact_rating_score","mean"),
+            top_bills=("title", lambda x: "; ".join(x.head(5))),
+            top_beneficiaries=("intended_beneficiaries_standardized", lambda x: ", ".join(x.value_counts().head(3).index)),
+            recent_date=("status_date_y", lambda x: x.max().strftime("%Y-%m-%d")),
+            bill_numbers=("bill_number", lambda x: ", ".join(map(str, x.head(5))))
+        )
+        .reset_index()
+    )
+    if impact_cat.empty:
+        st.write("No data available for impact by category.")
+    else:
+        top_categories = (
+            impact_cat.groupby("category_main_label")["Count"]
+            .sum()
+            .sort_values(ascending=False)
+            .head(15)
+            .index.tolist()
+        )
+        impact_cat_top = impact_cat[impact_cat["category_main_label"].isin(top_categories)]
+        impact_cat_chart = (
+            alt.Chart(impact_cat_top)
+            .mark_bar()
+            .encode(
+                y=alt.Y("category_main_label:N", sort=top_categories, title="Category"),
+                x=alt.X("Count:Q", stack="zero", title="Number of Bills"),
+                color=alt.Color("impact_rating_standardized:N", sort=IMPACT_ORDER, scale=alt.Scale(scheme="reds"), title="Impact Rating"),
+                tooltip=[
+                    alt.Tooltip("category_main_label:N", title="Category"),
+                    alt.Tooltip("impact_rating_standardized:N", title="Impact Rating"),
+                    alt.Tooltip("Count:Q", title="Number of Bills"),
+                    alt.Tooltip("avg_impact:Q", format=".2f", title="Average Impact"),
+                    alt.Tooltip("top_bills:N", title="Top Bills"),
+                    alt.Tooltip("top_beneficiaries:N", title="Top Beneficiaries"),
+                    alt.Tooltip("recent_date:N", title="Most Recent Bill"),
+                    alt.Tooltip("bill_numbers:N", title="Bill Numbers")
+                ]
+            )
+            .properties(height=400)
+        )
+        st.altair_chart(impact_cat_chart, use_container_width=True)
+    # Beneficiary Treemap
+    st.markdown("### Beneficiary Coverage & Average Impact")
+    ben_treemap_df = (
+        filtered_df.dropna(subset=["beneficiary_category", "impact_rating_score"])
+        .groupby("beneficiary_category")
+        .agg(
+            total_bills=("bill_id","count"),
+            avg_impact=("impact_rating_score","mean"),
+            top_bills=("title", lambda x: "; ".join(x.head(5))),
+            recent_date=("status_date_y", lambda x: x.max().strftime("%Y-%m-%d")),
+            bill_numbers=("bill_number", lambda x: ", ".join(map(str, x.head(5))))
+        )
+        .reset_index()
+    )
+    if not ben_treemap_df.empty:
+        treemap = (
+            alt.Chart(ben_treemap_df)
+            .mark_rect()
+            .encode(
+                x=alt.X("total_bills:Q", title="Number of Bills"),
+                y=alt.Y("beneficiary_category:N", sort="-x", title="Beneficiary Category"),
+                size="total_bills:Q",
+                color=alt.Color("avg_impact:Q", scale=alt.Scale(domain=[0,4], range=["#FFF176","#E53935"]), legend=alt.Legend(title="Average Impact Score")),
+                tooltip=[
+                    alt.Tooltip("beneficiary_category:N", title="Beneficiary"),
+                    alt.Tooltip("total_bills:Q", title="Number of Bills"),
+                    alt.Tooltip("avg_impact:Q", format=".2f", title="Average Impact"),
+                    alt.Tooltip("top_bills:N", title="Top Bills"),
+                    alt.Tooltip("recent_date:N", title="Most Recent Bill"),
+                    alt.Tooltip("bill_numbers:N", title="Bill Numbers")
+                ]
+            )
+            .properties(height=400)
+        )
+        st.altair_chart(treemap, use_container_width=True)
+    else:
+        st.write("No beneficiary impact data available for selected filters.")

config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "embedding_model_name": "all-MiniLM-L6-v2",
+  "text_column": "llama_summary_raw",
+  "id_column": "bill_id",
+  "embedding_dimension": 384,
+  "top_k_default": 10
+}

faiss_index.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ada3c96d32ced3e164cd9492ac0c0173aee511d7145dad24d3ca17f2709e44c0
+size 7475757

features_with_allbilldata.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06ae4c4e38fdc149676ee7504d9bdf100786b1583f68a267b56f9b616951550c
+size 23351162

gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

metadata.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a3f5a5bfeec0216df7642ccee3a4ec410277a554daa369cf11c58099490fdfe7
+size 23370903

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit
+pandas
+numpy
+faiss-cpu
+sentence-transformers
+pyarrow