Spaces:

taylerErbe
/

Legislation_Explorer

Sleeping

App Files Files Community

Legislation_Explorer / app.py

taylerErbe

Upload 8 files

bb9abee verified about 2 months ago

raw

history blame contribute delete

30.8 kB

	import os
	import json
	import numpy as np
	import pandas as pd
	import faiss
	import streamlit as st
	import altair as alt
	from sentence_transformers import SentenceTransformer
	import csv
	from datetime import datetime

	#Config
	DB_DIR = "."
	FEEDBACK_CSV = os.path.join(DB_DIR, "impact_feedback.csv")
	DEFAULT_TOP_K = 10

	IMPACT_ORDER = [
	"Not Impactful",
	"Slightly Impactful",
	"Moderately Impactful",
	"Very Impactful"
	]

	st.set_page_config(
	page_title="IGPA Legislation Explorer",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	#Loading vector database
	@st.cache_resource
	def load_vector_db(db_dir: str = DB_DIR):
	with open(os.path.join(db_dir, "config.json"), "r") as f:
	cfg = json.load(f)

	index = faiss.read_index(os.path.join(db_dir, "faiss_index.bin"))
	meta = pd.read_parquet(os.path.join(db_dir, "metadata.parquet"))

	if "vec_id" not in meta.columns:
	meta = meta.reset_index().rename(columns={"index": "vec_id"})

	model = SentenceTransformer(cfg["embedding_model_name"])
	return index, meta, model, cfg

	index, meta_df, embed_model, cfg = load_vector_db()

	DATE_COL = "status_date_y"
	meta_df[DATE_COL] = pd.to_datetime(
	meta_df[DATE_COL],
	errors="coerce"
	)

	DEFAULT_FILTERS = {
	"intended_beneficiary": "All",
	"policy_domain": "All",
	"impact_selected": "All",
	"category_main": "All",
	"category_sub": "All",
	"status_desc": "All",
	"date_range": (
	meta_df[DATE_COL].min().date(),
	meta_df[DATE_COL].max().date()
	)
	}

	for key, value in DEFAULT_FILTERS.items():
	if key not in st.session_state:
	st.session_state[key] = value

	if "search_results" not in st.session_state:
	st.session_state.search_results = None
	if "current_query" not in st.session_state:
	st.session_state.current_query = ""

	def embed_query(query: str):
	return embed_model.encode(
	[query],
	normalize_embeddings=True,
	convert_to_numpy=True
	).astype("float32")

	def impact_threshold(level):
	if level not in IMPACT_ORDER:
	return []
	return IMPACT_ORDER[IMPACT_ORDER.index(level):]

	def append_feedback_row(
	bill_id,
	predicted_impact,
	user_response,
	corrected_impact=None,
	path=FEEDBACK_CSV,
	):
	try:
	file_exists = os.path.isfile(path)
	with open(path, "a", newline="", encoding="utf-8") as f:
	writer = csv.writer(f)
	if not file_exists:
	writer.writerow(
	[
	"timestamp",
	"bill_id",
	"predicted_impact",
	"user_response",
	"corrected_impact",
	]
	)
	writer.writerow(
	[
	datetime.utcnow().isoformat(),
	bill_id,
	predicted_impact,
	user_response,
	corrected_impact if corrected_impact else "",
	]
	)

	st.sidebar.success(f"Feedback saved to: `{path}`")
	except Exception as e:
	st.error(f"Failed to save feedback: {str(e)}")

	def build_filter_mask(df, intended_beneficiary, policy_domain, impact_selected):
	mask = pd.Series(True, index=df.index)

	if intended_beneficiary != "All":
	mask &= df["intended_beneficiaries_standardized"] == intended_beneficiary
	if policy_domain != "All":
	mask &= df["policy_domain_standardized"] == policy_domain
	if impact_selected != "All":
	allowed = impact_threshold(impact_selected)
	mask &= df["impact_rating_standardized"].isin(allowed)
	if st.session_state.category_main != "All":
	mask &= df["category_main_label"] == st.session_state.category_main
	if st.session_state.category_sub != "All":
	mask &= df["category_sub_label"] == st.session_state.category_sub
	if "status_desc" in st.session_state and st.session_state.status_desc != "All":
	mask &= df["status_desc"] == st.session_state.status_desc
	if "date_range" in st.session_state and st.session_state.date_range:
	dr = st.session_state.date_range

	if isinstance(dr, (tuple, list)) and len(dr) == 2:
	start, end = dr
	else:
	start = end = dr
	if end == start:
	end = df[DATE_COL].max().date()

	start = pd.to_datetime(start)
	end = pd.to_datetime(end)

	mask &= df[DATE_COL].between(start, end)
	return mask

	def get_sorted_filter_options(df, col_name):
	counts = df[col_name].dropna().value_counts()
	sorted_vals = counts.index.tolist()
	return ["All"] + sorted_vals

	def reset_filters():
	for key, value in DEFAULT_FILTERS.items():
	st.session_state[key] = value
	st.rerun()

	#Filters
	with st.sidebar:
	st.header("Filters")
	if "history" not in st.session_state:
	st.session_state.history = []
	if st.button("Reset Filters"):
	reset_filters()

	intended_beneficiary = st.selectbox(
	"Intended Beneficiary",
	get_sorted_filter_options(meta_df, "intended_beneficiaries_standardized"),
	key="intended_beneficiary"
	)

	policy_domain = st.selectbox(
	"Policy Area",
	get_sorted_filter_options(meta_df, "policy_domain_standardized"),
	key="policy_domain"
	)

	impact_selected = st.selectbox(
	"Impact Rating (≥ Selected Level)",
	["All"] + IMPACT_ORDER,
	key="impact_selected"
	)

	category_main = st.selectbox(
	"Category",
	get_sorted_filter_options(meta_df, "category_main_label"),
	key="category_main"
	)

	category_sub = st.selectbox(
	"Sub Category",
	get_sorted_filter_options(meta_df, "category_sub_label"),
	key="category_sub"
	)

	top_k = st.slider("Number of results", 5, 50, DEFAULT_TOP_K, 5)

	status_desc = st.selectbox(
	"Bill Status",
	["All"] + sorted(meta_df["status_desc"].dropna().unique().tolist()),
	key="status_desc"
	)

	st.subheader("Time Filter")

	min_date = meta_df[DATE_COL].min().date()
	max_date = meta_df[DATE_COL].max().date()

	default_value = st.session_state.get("date_range", (min_date, max_date))

	if isinstance(default_value, (tuple, list)):
	if len(default_value) == 2:
	start, end = default_value
	else:
	start = end = default_value[0]
	else:
	start = end = default_value

	st.date_input(
	"Status Date Range",
	value=(start, end),
	min_value=min_date,
	max_value=max_date,
	key="date_range"
	)

	if os.path.exists(FEEDBACK_CSV):
	try:
	df_feedback = pd.read_csv(FEEDBACK_CSV)
	st.info(f" Feedback records: {len(df_feedback)}")
	if st.button(" Download Feedback CSV"):
	st.download_button(
	label="Download impact_feedback.csv",
	data=open(FEEDBACK_CSV, 'rb').read(),
	file_name="impact_feedback.csv",
	mime="text/csv"
	)
	except:
	st.info("Feedback CSV ready (empty)")

	filtered_df = meta_df[
	build_filter_mask(
	meta_df,
	st.session_state.intended_beneficiary,
	st.session_state.policy_domain,
	st.session_state.impact_selected
	)
	]

	tab_search, tab_trends = st.tabs(["Search & Results", "Trends & Insights"])

	#Search Tab
	with tab_search:
	st.title("IGPA Legislation Explorer")

	#Overview
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.metric("Total Bills", len(filtered_df))

	with col2:
	st.metric(
	"Policy Domains",
	filtered_df["policy_domain_standardized"].nunique()
	)

	with col3:
	st.metric(
	"Beneficiary Groups",
	filtered_df["intended_beneficiaries_standardized"].nunique()
	)

	with col4:
	impact_counts = (
	filtered_df["impact_rating_standardized"]
	.dropna()
	.value_counts()
	.reindex(IMPACT_ORDER, fill_value=0)
	)
	st.metric("Impact Breakdown", len(filtered_df))
	st.markdown(
	f"<div style='font-size:12px; color:#6b7280;'>"
	f"Very Impactful: <b>{impact_counts['Very Impactful']}</b> \| "
	f"Moderately: <b>{impact_counts['Moderately Impactful']}</b> \| "
	f"Slightly: <b>{impact_counts['Slightly Impactful']}</b> \| "
	f"Not: <b>{impact_counts['Not Impactful']}</b>"
	f"</div>",
	unsafe_allow_html=True
	)

	#Most Impacted Beneficiary Categories
	st.subheader("Most Impacted Beneficiary Categories")

	impact_df = (
	filtered_df.dropna(subset=["beneficiary_category", "impact_rating_score"])
	.groupby("beneficiary_category")
	.agg(
	avg_impact=("impact_rating_score", "mean"),
	bills=("bill_id","count"),
	top_bills=("title", lambda x: "; ".join(x.head(5))),
	top_beneficiaries=("intended_beneficiaries_standardized", lambda x: ", ".join(x.value_counts().head(3).index))
	)
	.reset_index()
	.sort_values("avg_impact", ascending=False)
	.head(10)
	)

	if not impact_df.empty:
	st.altair_chart(
	alt.Chart(impact_df)
	.mark_bar()
	.encode(
	x=alt.X("beneficiary_category:N", sort="-y", title="Beneficiary Category"),
	y=alt.Y("avg_impact:Q", title="Average Impact Score"),
	color=alt.Color(
	"avg_impact:Q",
	scale=alt.Scale(domain=[0,4], range=["#FFF176","#E53935"]),
	legend=alt.Legend(title="Impact Severity")
	),
	tooltip=[
	alt.Tooltip("beneficiary_category:N", title="Beneficiary"),
	alt.Tooltip("avg_impact:Q", format=".2f", title="Average Impact"),
	alt.Tooltip("bills:Q", title="Number of Bills"),
	alt.Tooltip("top_bills:N", title="Top Bills"),
	alt.Tooltip("top_beneficiaries:N", title="Top Beneficiaries")
	]
	)
	.properties(height=350),
	use_container_width=True
	)

	# Bills from Filters
	st.subheader("Bills Matching Selected Filters")

	display_cols = {
	"bill_number": "Bill Number",
	"title": "Title",
	"description": "Description",
	"policy_domain_standardized": "Policy Domain",
	"category_main_label": "Category",
	"intent_standardized": "Intent",
	"legislative_goal_standardized": "Legislative Goal",
	"beneficiary_category": "Beneficiary Group",
	"intended_beneficiaries_standardized": "Intended Beneficiaries",
	"potential_impact_raw": "Potential Impact",
	"impact_rating_standardized": "Impact Rating",
	"status_desc": "Status",
	"full_text_url": "Bill Link"
	}

	available_cols = {k: v for k, v in display_cols.items() if k in filtered_df.columns}

	filter_bill_df = (
	filtered_df[list(available_cols.keys())]
	.rename(columns=available_cols)
	.copy()
	)

	st.dataframe(
	filter_bill_df,
	use_container_width=True,
	column_config={
	"Bill Link": st.column_config.LinkColumn(
	label="Bill Link",
	display_text="Open Bill"
	)
	}
	)

	st.markdown("---")

	#Search Bills
	st.subheader("Search Bills")
	query = st.text_area(
	"Ask a question about legislation",
	value=st.session_state.current_query,
	height=80,
	placeholder="Example: bills related to funding",
	key="search_query_input"
	)

	search_clicked = st.button("Search", key="search_button")

	if search_clicked and query.strip():
	st.session_state.current_query = query
	st.session_state.history.append({"query": query})

	q_vec = embed_query(query)
	n_search = min(len(meta_df), top_k*5)
	scores, ids = index.search(q_vec, n_search)
	ids, scores = ids[0], scores[0]

	allowed = set(filtered_df.index)
	kept = [(i,s) for i,s in zip(ids,scores) if i in allowed][:top_k]

	if not kept:
	st.warning("No results found.")
	st.session_state.search_results = None
	else:
	results = meta_df.loc[[i for i,_ in kept]].copy()
	results["similarity"] = [s for _,s in kept]
	st.session_state.search_results = results

	if st.session_state.search_results is not None:
	results = st.session_state.search_results

	#Filtered Results Table
	st.subheader("Filtered Results Table")
	review_cols = [
	"bill_number",
	"title",
	"description",
	"potential_impact_raw",
	"increasing_aspects_standardized",
	"decreasing_aspects_standardized",
	"similarity",
	"full_text_url"
	]

	review_df = results[[c for c in review_cols if c in results.columns]].copy()

	review_df.rename(
	columns={
	"bill_number": "Bill Number",
	"title": "Title",
	"description": "Description",
	"potential_impact_raw": "Potential Impact",
	"increasing_aspects_standardized": "Increasing Aspects",
	"decreasing_aspects_standardized": "Decreasing Aspects",
	"similarity": "Score",
	"full_text_url": "Bill URL"
	},
	inplace=True
	)

	st.dataframe(
	review_df,
	use_container_width=True,
	column_config={
	"Bill URL": st.column_config.LinkColumn(
	"ILGA URL",
	display_text="Open bill"
	)
	}
	)

	st.markdown("---")

	st.subheader("Filtered Results")
	for idx, row in results.iterrows():
	with st.container():
	st.markdown(f"### Bill Number: {row['bill_number']}")
	st.markdown(f"Title: {row['title']}")
	st.write(row["description"])

	if pd.notna(row.get("category_main_label")):
	st.write(f"Main Category: {row['category_main_label']}")

	if pd.notna(row.get("category_sub_label")):
	st.write(f"Sub Category: {row['category_sub_label']}")

	if pd.notna(row.get("llama_summary_raw")):
	st.markdown(f"LLaMA Summary: {row['llama_summary_raw']}")

	info_text = (
	f"Session: {row.get('session','')} • "
	f"Chamber: {row.get('chamber','')} • "
	f"Impact: {row.get('impact_rating_standardized','')} • "
	f"Beneficiaries: {row.get('intended_beneficiaries_standardized','')} • "
	f"Domain: {row.get('policy_domain_standardized','')} • "
	f"Similarity: {row.get('similarity'):.3f}"
	)
	st.caption(info_text)

	if pd.notna(row.get("full_text_url")):
	st.markdown(f"[🔗 View Full Bill]({row['full_text_url']})", unsafe_allow_html=True)

	std_cols = [
	c for c in results.columns
	if c.endswith("_standardized") and c not in [
	"impact_rating_standardized",
	"increasing_aspects_standardized",
	"decreasing_aspects_standardized",
	"original_law_standardized"
	]
	]

	with st.expander("More Details"):
	for c in std_cols:
	val = row.get(c)
	if pd.notna(val) and str(val).strip():
	label = c.replace("_standardized","").replace("_"," ").title()
	st.write(f"{label}: {val}")

	with st.expander("Similar Bills"):
	sim_df = results.iloc[:5][
	["bill_number","title","description","full_text_url"]
	].copy()
	st.dataframe(
	sim_df,
	use_container_width=True,
	column_config={
	"full_text_url": st.column_config.LinkColumn(
	"Bill Link",
	display_text="Open"
	)
	}
	)

	#Impact rating feedbacK
	with st.expander("👍👎 Rate Impact Accuracy", expanded=False):
	st.markdown("Is this impact rating accurate?")
	predicted_impact = row.get("impact_rating_standardized", "")
	bill_id_safe = str(row.get('bill_id', idx))

	# Check if feedback was already submitted for this bill
	feedback_submitted = st.session_state.get(f"feedback_done_{bill_id_safe}", False)

	if feedback_submitted:
	st.success("Thank you for your feedback!")
	st.caption(f"Bill: {row.get('bill_number', 'N/A')} \| Saved to impact_feedback.csv")
	else:
	col1, col2 = st.columns(2)
	with col1:
	if st.button("👍 Yes - Accurate", key=f"yes_{bill_id_safe}", use_container_width=True):
	append_feedback_row(
	bill_id=bill_id_safe,
	predicted_impact=predicted_impact,
	user_response="Yes",
	corrected_impact=None,
	)
	st.session_state[f"feedback_done_{bill_id_safe}"] = True
	st.sidebar.success(f"Feedback saved for {row.get('bill_number', bill_id_safe)}")
	st.rerun()

	with col2:
	if st.button("👎 No - Incorrect", key=f"no_{bill_id_safe}", use_container_width=True):
	st.session_state[f"show_corrected_{bill_id_safe}"] = True
	st.rerun()

	if st.session_state.get(f"show_corrected_{bill_id_safe}", False):
	st.info(f"What should the impact rating be instead?")
	corrected_value = st.selectbox(
	"Correct impact rating",
	IMPACT_ORDER,
	key=f"corrected_{bill_id_safe}",
	)

	col_submit, col_cancel = st.columns([3, 1])
	with col_submit:
	if st.button("Submit Feedback", key=f"submit_{bill_id_safe}", type="primary"):
	append_feedback_row(
	bill_id=bill_id_safe,
	predicted_impact=predicted_impact,
	user_response="No",
	corrected_impact=corrected_value,
	)
	st.session_state[f"feedback_done_{bill_id_safe}"] = True
	st.session_state[f"show_corrected_{bill_id_safe}"] = False
	st.sidebar.success(f"Feedback saved for {row.get('bill_number', bill_id_safe)}")
	st.rerun()
	with col_cancel:
	if st.button("Cancel", key=f"cancel_{bill_id_safe}"):
	st.session_state[f"show_corrected_{bill_id_safe}"] = False
	st.rerun()

	#Search History
	with st.sidebar.expander("Search History"):
	for i,item in enumerate(reversed(st.session_state.history[-5:]),1):
	st.write(f"{i}. {item.get('query','')}")


	# TRENDS TAB
	with tab_trends:
	st.subheader("Trends & Insights")

	# Key Insights
	top_policy = filtered_df["policy_domain_standardized"].value_counts().head(1)
	top_beneficiaries = filtered_df["beneficiary_category"].value_counts().head(1)
	strategy_impact = (
	filtered_df[filtered_df["impact_rating_standardized"].notna()]
	.groupby("legislative_strategy_standardized")["impact_rating_standardized"]
	.apply(lambda x: (x=="Very Impactful").sum())
	)
	avg_impact_ben = (
	filtered_df.dropna(subset=["impact_rating_score"])
	.groupby("beneficiary_category")["impact_rating_score"]
	.mean()
	.sort_values(ascending=False)
	)

	total_bills = len(filtered_df)
	total_high_impact = (filtered_df["impact_rating_standardized"]=="Very Impactful").sum()

	st.markdown("### Key Insights")
	st.write(f"Total Bills Considered: {total_bills}")
	st.write(f"Total Very Impactful Bills: {total_high_impact}")
	st.write(f"Most Active Policy Domain: {top_policy.index[0]} ({top_policy.iloc[0]} bills)" if not top_policy.empty else "No data")
	st.write(f"Most Benefited Group: {top_beneficiaries.index[0]} ({top_beneficiaries.iloc[0]} bills)" if not top_beneficiaries.empty else "No data")
	st.write(f"Strategy Producing Most Very Impactful Bills: {strategy_impact.idxmax() if not strategy_impact.empty else 'N/A'}")
	st.write(f"Highest Average Impact (Beneficiary): {avg_impact_ben.index[0]} ({avg_impact_ben.iloc[0]:.2f})" if not avg_impact_ben.empty else "N/A")
	st.markdown("---")

	col1, col2 = st.columns(2)

	# Policy Domain
	with col1:
	st.markdown("### Policy Domain Activity")
	policy_agg = (
	filtered_df.groupby("policy_domain_standardized")
	.agg(
	Count=("bill_id","count"),
	avg_impact=("impact_rating_score","mean"),
	top_bills=("title", lambda x: "; ".join(x.head(5))),
	top_beneficiaries=("intended_beneficiaries_standardized", lambda x: ", ".join(x.value_counts().head(3).index)),
	recent_date=("status_date_y", lambda x: x.max().strftime("%Y-%m-%d")),
	bill_numbers=("bill_number", lambda x: ", ".join(map(str, x.head(5))))
	)
	.reset_index()
	.rename(columns={"policy_domain_standardized":"Policy Domain"})
	)
	policy_chart = (
	alt.Chart(policy_agg)
	.mark_bar()
	.encode(
	x=alt.X("Policy Domain:N", sort="-y", title="Policy Domain"),
	y=alt.Y("Count:Q", title="Number of Bills"),
	color=alt.Color("Count:Q", scale=alt.Scale(scheme="reds"), legend=None),
	tooltip=[
	alt.Tooltip("Policy Domain:N"),
	alt.Tooltip("Count:Q", title="Number of Bills"),
	alt.Tooltip("avg_impact:Q", format=".2f", title="Average Impact"),
	alt.Tooltip("top_bills:N", title="Top Bills"),
	alt.Tooltip("top_beneficiaries:N", title="Top Beneficiaries"),
	alt.Tooltip("recent_date:N", title="Most Recent Bill"),
	alt.Tooltip("bill_numbers:N", title="Bill Numbers")
	]
	)
	.properties(height=400)
	)
	st.altair_chart(policy_chart, use_container_width=True)

	# Impact Distribution
	with col2:
	st.markdown("### Impact Distribution")
	impact_dist = (
	filtered_df[filtered_df["impact_rating_standardized"].notna()]["impact_rating_standardized"]
	.value_counts()
	.reindex(IMPACT_ORDER, fill_value=0)
	.reset_index()
	)
	impact_dist.columns = ["Impact Level", "Count"]

	impact_chart = (
	alt.Chart(impact_dist)
	.mark_bar()
	.encode(
	x=alt.X("Impact Level:N", sort=IMPACT_ORDER),
	y=alt.Y("Count:Q"),
	color=alt.Color("Count:Q", scale=alt.Scale(scheme="reds")),
	tooltip=[
	alt.Tooltip("Impact Level:N"),
	alt.Tooltip("Count:Q")
	]
	)
	.properties(height=300)
	)
	st.altair_chart(impact_chart, use_container_width=True)

	# Strategy High Impact
	st.markdown("### Legislative Strategy: Very Impactful Bills")
	strategy_high_impact = (
	filtered_df[filtered_df["impact_rating_standardized"].notna()]
	.groupby("legislative_strategy_standardized")
	.agg(
	Very_Impactful_Bills=("impact_rating_standardized", lambda x: (x=="Very Impactful").sum()),
	top_bills=("title", lambda x: "; ".join(x.head(5))),
	top_beneficiaries=("intended_beneficiaries_standardized", lambda x: ", ".join(x.value_counts().head(3).index)),
	recent_date=("status_date_y", lambda x: x.max().strftime("%Y-%m-%d"))
	)
	.reset_index()
	.rename(columns={"legislative_strategy_standardized":"Strategy"})
	)

	strategy_chart = (
	alt.Chart(strategy_high_impact)
	.mark_bar()
	.encode(
	x=alt.X("Strategy:N", sort="-y", title="Strategy"),
	y=alt.Y("Very_Impactful_Bills:Q", title="Very Impactful Bills"),
	color=alt.Color("Very_Impactful_Bills:Q", scale=alt.Scale(scheme="orangered")),
	tooltip=[
	alt.Tooltip("Strategy:N"),
	alt.Tooltip("Very_Impactful_Bills:Q"),
	alt.Tooltip("top_bills:N", title="Top Bills"),
	alt.Tooltip("top_beneficiaries:N", title="Top Beneficiaries"),
	alt.Tooltip("recent_date:N", title="Most Recent Bill")
	]
	)
	.properties(height=400)
	)

	st.altair_chart(strategy_chart, use_container_width=True)

	# Impact by Category
	st.markdown("### Impact by Category")
	impact_cat = (
	filtered_df[
	filtered_df["impact_rating_standardized"].notna() &
	filtered_df["category_main_label"].notna()
	]
	.groupby(["category_main_label", "impact_rating_standardized"])
	.agg(
	Count=("bill_id","count"),
	avg_impact=("impact_rating_score","mean"),
	top_bills=("title", lambda x: "; ".join(x.head(5))),
	top_beneficiaries=("intended_beneficiaries_standardized", lambda x: ", ".join(x.value_counts().head(3).index)),
	recent_date=("status_date_y", lambda x: x.max().strftime("%Y-%m-%d")),
	bill_numbers=("bill_number", lambda x: ", ".join(map(str, x.head(5))))
	)
	.reset_index()
	)

	if impact_cat.empty:
	st.write("No data available for impact by category.")
	else:
	top_categories = (
	impact_cat.groupby("category_main_label")["Count"]
	.sum()
	.sort_values(ascending=False)
	.head(15)
	.index.tolist()
	)
	impact_cat_top = impact_cat[impact_cat["category_main_label"].isin(top_categories)]

	impact_cat_chart = (
	alt.Chart(impact_cat_top)
	.mark_bar()
	.encode(
	y=alt.Y("category_main_label:N", sort=top_categories, title="Category"),
	x=alt.X("Count:Q", stack="zero", title="Number of Bills"),
	color=alt.Color("impact_rating_standardized:N", sort=IMPACT_ORDER, scale=alt.Scale(scheme="reds"), title="Impact Rating"),
	tooltip=[
	alt.Tooltip("category_main_label:N", title="Category"),
	alt.Tooltip("impact_rating_standardized:N", title="Impact Rating"),
	alt.Tooltip("Count:Q", title="Number of Bills"),
	alt.Tooltip("avg_impact:Q", format=".2f", title="Average Impact"),
	alt.Tooltip("top_bills:N", title="Top Bills"),
	alt.Tooltip("top_beneficiaries:N", title="Top Beneficiaries"),
	alt.Tooltip("recent_date:N", title="Most Recent Bill"),
	alt.Tooltip("bill_numbers:N", title="Bill Numbers")
	]
	)
	.properties(height=400)
	)

	st.altair_chart(impact_cat_chart, use_container_width=True)

	# Beneficiary Treemap
	st.markdown("### Beneficiary Coverage & Average Impact")
	ben_treemap_df = (
	filtered_df.dropna(subset=["beneficiary_category", "impact_rating_score"])
	.groupby("beneficiary_category")
	.agg(
	total_bills=("bill_id","count"),
	avg_impact=("impact_rating_score","mean"),
	top_bills=("title", lambda x: "; ".join(x.head(5))),
	recent_date=("status_date_y", lambda x: x.max().strftime("%Y-%m-%d")),
	bill_numbers=("bill_number", lambda x: ", ".join(map(str, x.head(5))))
	)
	.reset_index()
	)

	if not ben_treemap_df.empty:
	treemap = (
	alt.Chart(ben_treemap_df)
	.mark_rect()
	.encode(
	x=alt.X("total_bills:Q", title="Number of Bills"),
	y=alt.Y("beneficiary_category:N", sort="-x", title="Beneficiary Category"),
	size="total_bills:Q",
	color=alt.Color("avg_impact:Q", scale=alt.Scale(domain=[0,4], range=["#FFF176","#E53935"]), legend=alt.Legend(title="Average Impact Score")),
	tooltip=[
	alt.Tooltip("beneficiary_category:N", title="Beneficiary"),
	alt.Tooltip("total_bills:Q", title="Number of Bills"),
	alt.Tooltip("avg_impact:Q", format=".2f", title="Average Impact"),
	alt.Tooltip("top_bills:N", title="Top Bills"),
	alt.Tooltip("recent_date:N", title="Most Recent Bill"),
	alt.Tooltip("bill_numbers:N", title="Bill Numbers")
	]
	)
	.properties(height=400)
	)
	st.altair_chart(treemap, use_container_width=True)
	else:
	st.write("No beneficiary impact data available for selected filters.")