Spaces:

ralate2
/

UniversityofIllinoisSystem_LoveDataWeek2026

Sleeping

App Files Files Community

UniversityofIllinoisSystem_LoveDataWeek2026 / app.py

ralate2

Update app.py

70451a9 verified 3 months ago

raw

history blame contribute delete

40.5 kB




	# app.py — U of I Legislation Impact Dashboard (HF Spaces friendly)
	# ---------------------------------------------------------------

	import os
	import re
	import ast
	import numpy as np
	import pandas as pd
	import streamlit as st
	import plotly.express as px

	# NEW (RAG)
	from sentence_transformers import SentenceTransformer
	from transformers import pipeline
	from sklearn.metrics.pairwise import cosine_similarity
	from sklearn.feature_extraction.text import TfidfVectorizer

	os.environ["TOKENIZERS_PARALLELISM"] = "false"

	# -----------------------------
	# Page config
	# -----------------------------
	st.set_page_config(
	page_title="U of I Legislation Impact Dashboard",
	page_icon="📊",
	layout="wide",
	initial_sidebar_state="collapsed",
	)

	# -----------------------------
	# U of I palette (ONLY)
	# -----------------------------
	ILLINI_BLUE = "#13294B"
	ILLINI_ORANGE = "#FF552E"
	ILLINI_ALT_BLUE = "#1E3877"
	ILLINI_LIGHT = "#E8EDF5"
	TEXT_DARK = "#0B1220"

	# -----------------------------
	# CSS (title + tight spacing + full-width buttons)
	# -----------------------------
	st.markdown(
	f"""
	<style>
	.block-container {{
	padding-top: 2.0rem !important;
	padding-bottom: 1.0rem !important;
	}}
	header[data-testid="stHeader"] {{
	height: 0.25rem;
	}}
	.main {{
	background: linear-gradient(180deg, #FFFFFF 0%, {ILLINI_LIGHT} 100%);
	}}
	.uofi-banner {{
	margin-top: 0.65rem;
	background: {ILLINI_BLUE};
	color: white;
	padding: 20px 22px;
	border-radius: 14px;
	font-weight: 950;
	font-size: clamp(24px, 2.8vw, 34px);
	letter-spacing: 0.2px;
	margin-bottom: 8px;
	box-shadow: 0 8px 22px rgba(19,41,75,0.18);
	white-space: normal !important;
	overflow: visible !important;
	line-height: 1.18;
	word-break: break-word;
	}}
	.uofi-sub {{
	margin-top: 8px;
	font-size: 13px;
	opacity: 0.92;
	font-weight: 600;
	white-space: normal !important;
	overflow: visible !important;
	}}
	/* KPI cards */
	.kpi-card {{
	background: white;
	border-radius: 14px;
	padding: 12px 12px;
	border: 1px solid rgba(19,41,75,0.10);
	box-shadow: 0 8px 18px rgba(19,41,75,0.08);
	min-height: 84px;
	}}
	.kpi-title {{
	font-size: 13px;
	color: rgba(11,18,32,0.72);
	font-weight: 900;
	}}
	.kpi-value {{
	font-size: 26px;
	font-weight: 950;
	color: {ILLINI_BLUE};
	margin-top: 2px;
	}}
	.kpi-foot {{
	font-size: 12px;
	color: rgba(11,18,32,0.60);
	margin-top: 2px;
	}}
	.kpi-accent {{
	color: {ILLINI_ORANGE};
	font-weight: 950;
	}}
	/* Section titles */
	.section-title {{
	font-size: 15px;
	font-weight: 950;
	color: white;
	background: {ILLINI_BLUE};
	padding: 8px 10px;
	border-radius: 12px;
	margin: 4px 0 8px 0;
	box-shadow: 0 8px 18px rgba(19,41,75,0.10);
	}}
	/* Panels */
	.panel {{
	background: white;
	border-radius: 14px;
	padding: 10px;
	border: 1px solid rgba(19,41,75,0.10);
	box-shadow: 0 8px 18px rgba(19,41,75,0.08);
	}}
	.stVerticalBlock {{
	gap: 0.28rem !important;
	}}
	div[data-testid="stDataFrame"] * {{
	white-space: normal !important;
	}}
	/* Full width download button without deprecated args */
	div[data-testid="stDownloadButton"] > button {{
	width: 100% !important;
	}}
	</style>
	""",
	unsafe_allow_html=True,
	)

	st.markdown(
	f"""
	<style>

	/* ============================
	FILTER ROW: bigger labels + bold
	============================ */

	/* Make ALL widget labels bigger + bold */
	div[data-testid="stWidgetLabel"] > label {{
	font-size: 16px !important;
	font-weight: 900 !important;
	color: {ILLINI_BLUE} !important;
	}}

	/* Increase spacing between the 3 filter columns */
	div[data-testid="column"] {{
	padding-left: 8px !important;
	padding-right: 8px !important;
	}}

	/* Make the Year range slider values (2019 / 2026) more readable */
	div[data-testid="stSlider"] {{
	font-size: 14px !important;
	font-weight: 700 !important;
	}}

	/* Make selectbox + multiselect text slightly bigger */
	div[data-baseweb="select"] * {{
	font-size: 15px !important;
	font-weight: 700 !important;
	}}

	/* Multi-select chips ("Pending", "Enacted") bolder */
	span[data-baseweb="tag"] {{
	font-weight: 900 !important;
	font-size: 14px !important;
	}}

	/* Reduce the weird extra top whitespace around widgets */
	section[data-testid="stSidebar"] {{
	display: none !important;
	}}

	</style>
	""",
	unsafe_allow_html=True,
	)



	# -----------------------------
	# Helpers
	# -----------------------------
	def safe_col(df, candidates):
	for c in candidates:
	if c in df.columns:
	return c
	return None

	def chamber_from_bill_id(bid):
	if pd.isna(bid):
	return np.nan
	s = str(bid).strip()
	parts = s.split()
	if len(parts) >= 2:
	return {"S": "Senate", "H": "House"}.get(parts[1], np.nan)
	return np.nan

	def party_from_author(author):
	if pd.isna(author):
	return np.nan
	m = re.search(r"$(D\|R)$\s*$", str(author).strip(), flags=re.I)
	if not m:
	return np.nan
	return m.group(1).upper()

	def to_dt(series):
	return pd.to_datetime(series, errors="coerce")

	def style_plotly(fig):
	fig.update_layout(
	template="plotly_white",
	font=dict(color=TEXT_DARK, size=11),
	margin=dict(l=10, r=10, t=46, b=10),
	legend=dict(
	orientation="h",
	yanchor="bottom",
	y=1.02,
	xanchor="right",
	x=1,
	font=dict(size=10),
	),
	title=dict(font=dict(size=14)),
	)
	return fig

	def clean_text(x):
	if pd.isna(x):
	return ""
	return re.sub(r"\s+", " ", str(x)).strip()

	def parse_listish(x):
	if pd.isna(x):
	return []
	if isinstance(x, list):
	return [str(t).strip() for t in x if str(t).strip()]
	s = str(x).strip()
	if not s:
	return []
	try:
	v = ast.literal_eval(s)
	if isinstance(v, list):
	return [str(t).strip() for t in v if str(t).strip()]
	except Exception:
	pass
	return [t.strip() for t in s.split(",") if t.strip()]

	def enforce_two_sentences(text: str) -> str:
	text = (text or "").strip()
	sents = re.split(r"(?<=[.!?])\s+", text)
	sents = [s.strip() for s in sents if s.strip()]
	return " ".join(sents[:2]).strip()

	# -----------------------------
	# Load data (Viz-ready)
	# -----------------------------
	@st.cache_data(show_spinner=False)
	def load_data():
	candidates = [
	"illinois_legislation_VIZ_READY.csv",
	"/mnt/data/illinois_legislation_VIZ_READY.csv",
	"illinois_postsecondary_legislation.csv",
	"/mnt/data/illinois_postsecondary_legislation.csv",
	]
	path = None
	for p in candidates:
	if os.path.exists(p):
	path = p
	break
	if path is None:
	raise FileNotFoundError("Could not find the viz-ready CSV in the app directory.")
	df_ = pd.read_csv(path)
	return df_, os.path.basename(path)

	raw, filename = load_data()
	df = raw.copy()

	# -----------------------------
	# Enforce 2019–2026 + derive minimal helpers if missing
	# -----------------------------
	if "year" not in df.columns:
	st.error("Missing required column: year")
	st.stop()

	df["year"] = pd.to_numeric(df["year"], errors="coerce")
	df = df[df["year"].between(2019, 2026, inclusive="both")].copy()

	# chamber
	if "chamber" not in df.columns:
	if "bill_id" in df.columns:
	df["chamber"] = df["bill_id"].apply(chamber_from_bill_id)
	else:
	df["chamber"] = np.nan

	# dates
	date_col = safe_col(df, ["last_action_date_parsed", "last_action_date_dt", "last_action_date_clean", "last_action_date"])
	if date_col is None:
	st.error("Missing last action date column (last_action_date*)")
	st.stop()

	if "last_action_date_parsed" not in df.columns:
	df["last_action_date_parsed"] = to_dt(df[date_col])
	else:
	df["last_action_date_parsed"] = to_dt(df["last_action_date_parsed"])

	NOW = pd.Timestamp("2026-01-23")
	df["days_since_last_action"] = (NOW - df["last_action_date_parsed"]).dt.days
	df["is_recent_90d"] = df["days_since_last_action"].between(0, 90, inclusive="both")

	# status stage
	status_stage_col = safe_col(df, ["status_stage", "status_label"])
	if status_stage_col is None:
	if "status" in df.columns:
	s = df["status"].fillna("").astype(str).str.lower()
	df["status_stage"] = np.where(
	s.str.contains("enacted\|signed\|public act"), "Enacted",
	np.where(s.str.contains("pending"), "Pending", np.nan),
	)
	status_stage_col = "status_stage"
	else:
	df["status_stage"] = np.nan
	status_stage_col = "status_stage"

	df[status_stage_col] = df[status_stage_col].astype(str).str.strip()
	df[status_stage_col] = df[status_stage_col].replace({"pending": "Pending", "enacted": "Enacted"})

	# party
	party_col = safe_col(df, ["primary_author_party"])
	if party_col is None:
	a_col = safe_col(df, ["author_clean", "author"])
	if a_col:
	df["primary_author_party"] = df[a_col].apply(party_from_author)
	party_col = "primary_author_party"
	else:
	df["primary_author_party"] = np.nan
	party_col = "primary_author_party"

	# standardized title bucket
	title_bucket_col = safe_col(df, ["title_std_bucket", "title_standardized", "title_nlp", "title_bucket"])
	if title_bucket_col is None:
	df["title_std_bucket"] = df["title"].fillna("").astype(str) if "title" in df.columns else "No Title"
	title_bucket_col = "title_std_bucket"

	# policy
	policy_col = safe_col(df, ["policy_area_bucket"])
	if policy_col is None:
	df["policy_area_bucket"] = "Not Available"
	policy_col = "policy(override)" # won't be used, but safe

	# stakeholder + beneficiary
	stake_col = safe_col(df, ["stakeholder_group", "stakeholder_group_bucket", "stakeholder_bucket", "stakeholders_bucket", "stakeholder"])
	if stake_col is None:
	df["stakeholder_group"] = "Not Available"
	stake_col = "stakeholder_group"

	benef_col = safe_col(df, ["beneficiary_type", "beneficiary_bucket", "beneficiary_category", "intended_beneficiaries_standardized_final", "intended_beneficiaries_bucket"])
	if benef_col is None:
	df["beneficiary_type"] = "Not Available"
	benef_col = "beneficiary_type"

	# optional fields used in RAG / watchlist
	if "bill_age_days" not in df.columns:
	df["bill_age_days"] = df["days_since_last_action"]

	if "action_recency_bucket" not in df.columns:
	bins = [-1, 30, 90, 180, 365, 999999]
	labels = ["0–30d", "31–90d", "91–180d", "181–365d", "365d+"]
	df["action_recency_bucket"] = pd.cut(df["days_since_last_action"].fillna(999999), bins=bins, labels=labels)

	for c in ["status_step", "pending_committee_name", "pending_chamber"]:
	if c not in df.columns:
	df[c] = ""

	if "author_party_combo" not in df.columns:
	df["author_party_combo"] = df[party_col].fillna("").astype(str)

	if "sponsor_count" not in df.columns:
	df["sponsor_count"] = np.nan

	# -----------------------------
	# Title spacer + Header
	# -----------------------------
	st.markdown("<div style='height:6px'></div>", unsafe_allow_html=True)

	st.markdown(
	"""
	<div class="uofi-banner">
	U of I Legislation Impact Dashboard
	<div class="uofi-sub">
	2019–2026 • Trends → Party share → Executive themes → Stakeholders → Beneficiaries → Impact view → Watchlist → Policy Domain Summary at a Glance
	</div>
	</div>
	""",
	unsafe_allow_html=True,
	)

	# -----------------------------
	# Public dataset link (NCSL)
	# -----------------------------
	st.markdown(
	f"""
	<div class="panel">
	<div style="font-weight:900; color:{ILLINI_BLUE}; margin-bottom:6px;"> Data Source Link </div>
	<div style="color:{TEXT_DARK}; font-size:13px; line-height:1.35;">
	This dashboard is made using the NCSL Postsecondary Legislation Database.
	<br/>
	<a href="https://www.ncsl.org/education/postsecondary-legislation-database" target="_blank"
	style="color:{ILLINI_ORANGE}; font-weight:900; text-decoration:none;">
	Open NCSL Postsecondary Legislation Database ↗
	</a>
	</div>
	</div>
	""",
	unsafe_allow_html=True,
	)
	st.markdown("<div style='height:28px'></div>", unsafe_allow_html=True)

	# -----------------------------
	# Filters (Year, Chamber, Status)
	# -----------------------------
	f1, f2, f3 = st.columns([2.0, 2.0, 2.0], gap="large")

	with f1:
	years = sorted([int(y) for y in df["year"].dropna().unique()])
	ymin, ymax = (min(years), max(years)) if years else (2019, 2026)
	year_range = st.slider("Year range", 2019, 2026, (max(2019, ymin), min(2026, ymax)))

	with f2:
	chamber_opts = ["All"] + [c for c in ["House", "Senate"] if c in df["chamber"].dropna().unique().tolist()]
	sel_chamber = st.selectbox("Chamber", chamber_opts, index=0)

	with f3:
	status_opts = ["Pending", "Enacted"]
	sel_status = st.multiselect("Status", options=status_opts, default=status_opts)

	st.caption(f"Dashboard dataset: {filename}")

	# Apply filters
	f = df.copy()
	f = f[f["year"].between(year_range[0], year_range[1], inclusive="both")]
	if sel_chamber != "All":
	f = f[f["chamber"] == sel_chamber]
	f = f[f[status_stage_col].isin(sel_status)].copy()

	# -----------------------------
	# KPIs
	# -----------------------------
	total_bills = len(f)
	recent_90 = int(f["is_recent_90d"].fillna(False).sum())
	enacted_ct = int((f[status_stage_col] == "Enacted").sum())
	pending_ct = int((f[status_stage_col] == "Pending").sum())
	enact_rate = (enacted_ct / (enacted_ct + pending_ct)) if (enacted_ct + pending_ct) > 0 else 0.0
	stuck_pending_365 = int(((f[status_stage_col] == "Pending") & (f["days_since_last_action"] > 365)).sum())

	k1, k2, k3, k4 = st.columns(4, gap="small")

	with k1:
	st.markdown(
	f"""
	<div class="kpi-card">
	<div class="kpi-title">Bills in View</div>
	<div class="kpi-value">{total_bills:,}</div>
	<div class="kpi-foot">Filtered cohort</div>
	</div>
	""",
	unsafe_allow_html=True,
	)
	with k2:
	st.markdown(
	f"""
	<div class="kpi-card">
	<div class="kpi-title">Recent (≤ 90 Days)</div>
	<div class="kpi-value"><span class="kpi-accent">{recent_90:,}</span></div>
	<div class="kpi-foot">Latest movement / momentum</div>
	</div>
	""",
	unsafe_allow_html=True,
	)
	with k3:
	st.markdown(
	f"""
	<div class="kpi-card">
	<div class="kpi-title">Enactment Rate</div>
	<div class="kpi-value">{enact_rate*100:,.1f}%</div>
	<div class="kpi-foot">{enacted_ct:,} enacted vs {pending_ct:,} pending</div>
	</div>
	""",
	unsafe_allow_html=True,
	)
	with k4:
	st.markdown(
	f"""
	<div class="kpi-card">
	<div class="kpi-title">Stuck at Pending Stage (> 365 Days)</div>
	<div class="kpi-value">{stuck_pending_365:,}</div>
	<div class="kpi-foot">Aging bills needing attention</div>
	</div>
	""",
	unsafe_allow_html=True,
	)

	# -----------------------------
	# Row 1: Trend + Party donut
	# -----------------------------
	c1, c2 = st.columns([1.55, 1.0], gap="small")

	with c1:
	st.markdown('<div class="section-title">Trend: Bills Over Time (Monthly)</div>', unsafe_allow_html=True)

	ts = (
	f.dropna(subset=["last_action_date_parsed"])
	.assign(ym=lambda x: x["last_action_date_parsed"].dt.to_period("M").astype(str))
	.groupby("ym")
	.size()
	.reset_index(name="bills")
	)

	if ts.empty:
	st.info("No dated bills found for the selected filters.")
	else:
	ts["ym_dt"] = pd.to_datetime(ts["ym"], errors="coerce")
	ts = ts.sort_values("ym_dt")

	fig = px.line(
	ts,
	x="ym",
	y="bills",
	markers=True,
	title="Bills per month (by last action date)",
	color_discrete_sequence=[ILLINI_ORANGE],
	)
	fig = style_plotly(fig)
	fig.update_xaxes(title="", tickangle=0)
	fig.update_yaxes(title="Bills")
	st.plotly_chart(fig, width="stretch")

	with c2:
	st.markdown('<div class="section-title">Democrat vs Republican Share</div>', unsafe_allow_html=True)

	p = f[party_col].fillna("").astype(str).str.upper()
	p = p[p.isin(["D", "R"])]

	if p.empty:
	st.info("Party share not available for this filtered view.")
	else:
	pie_df = p.value_counts().reset_index()
	pie_df.columns = ["party", "count"]
	fig = px.pie(
	pie_df,
	names="party",
	values="count",
	hole=0.58,
	title="Primary author party (D vs R)",
	color="party",
	color_discrete_map={"D": ILLINI_BLUE, "R": ILLINI_ORANGE},
	)
	fig = style_plotly(fig)
	fig.update_traces(textposition="inside", textinfo="percent+label", textfont=dict(size=11))
	st.plotly_chart(fig, width="stretch")

	# -----------------------------
	# Row 2: Policy treemap + Stakeholder pie
	# -----------------------------
	r2a, r2b = st.columns([1.05, 1.2], gap="small")

	with r2b:
	st.markdown('<div class="section-title">Executive Themes: Policy Areas</div>', unsafe_allow_html=True)

	policy_counts = (
	f[policy_col].astype(str).str.strip().replace({"": np.nan}).dropna()
	.value_counts().head(9).reset_index()
	)
	policy_counts.columns = ["policy_area_bucket", "bills"]

	if policy_counts.empty:
	st.info("Policy areas not available for this filtered view.")
	else:
	fig1 = px.treemap(
	policy_counts,
	path=["policy_area_bucket"],
	values="bills",
	title="Policy area concentration",
	color_discrete_sequence=[ILLINI_BLUE, ILLINI_ALT_BLUE, ILLINI_ORANGE],
	)
	fig1 = style_plotly(fig1)
	st.plotly_chart(fig1, width="stretch")

	with r2a:
	st.markdown('<div class="section-title">Stakeholder Themes: Who is affected?</div>', unsafe_allow_html=True)

	stake_counts = (
	f[stake_col].astype(str).str.strip().replace({"": np.nan}).dropna()
	.value_counts().head(5).reset_index()
	)
	stake_counts.columns = ["stakeholder_group", "bills"]

	if stake_counts.empty:
	st.info("Stakeholder themes not available for this filtered view.")
	else:
	fig7 = px.pie(
	stake_counts,
	names="stakeholder_group",
	values="bills",
	title="Stakeholder share (pie)",
	color_discrete_sequence=[ILLINI_ALT_BLUE, ILLINI_BLUE, ILLINI_ORANGE, ILLINI_ALT_BLUE, ILLINI_BLUE],
	)
	fig7 = style_plotly(fig7)
	fig7.update_layout(showlegend=False)
	fig7.update_traces(textposition="inside", textinfo="percent+label", textfont=dict(size=12))
	st.plotly_chart(fig7, width="stretch")

	# -----------------------------
	# Row 3: Beneficiary bar + Impact stacked
	# -----------------------------
	r3a, r3b = st.columns([1.0, 1.55], gap="small")

	with r3a:
	st.markdown('<div class="section-title">Beneficiary Themes: Who benefits?</div>', unsafe_allow_html=True)

	benef_counts = (
	f[benef_col].astype(str).str.strip().replace({"": np.nan}).dropna()
	.value_counts().head(8).reset_index()
	)
	benef_counts.columns = ["beneficiary_type", "bills"]

	if benef_counts.empty:
	st.info("Beneficiary themes not available for this filtered view.")
	else:
	fig8 = px.bar(
	benef_counts.sort_values("bills", ascending=True),
	x="bills", y="beneficiary_type",
	orientation="h",
	title="Beneficiary types",
	color_discrete_sequence=[ILLINI_ALT_BLUE],
	)
	fig8 = style_plotly(fig8)
	fig8.update_yaxes(title="", automargin=True)
	fig8.update_xaxes(title="Bills")
	st.plotly_chart(fig8, width="stretch")

	with r3b:
	st.markdown('<div class="section-title">Impact View: Pending vs Enacted by Policy Area</div>', unsafe_allow_html=True)

	top_policy = (
	f[policy_col].astype(str).str.strip().replace({"": np.nan}).dropna()
	.value_counts().head(12).index.tolist()
	)

	stage_policy = (
	f[f[policy_col].isin(top_policy)]
	.groupby([policy_col, status_stage_col])
	.size()
	.reset_index(name="bills")
	)

	if stage_policy.empty:
	st.info("Not enough data to build the impact view for this filtered view.")
	else:
	order = (
	stage_policy.groupby(policy_col)["bills"].sum()
	.sort_values(ascending=False)
	.index.tolist()
	)

	fig_u1 = px.bar(
	stage_policy,
	x=policy_col,
	y="bills",
	color=status_stage_col,
	barmode="stack",
	category_orders={policy_col: order},
	title="Status composition by policy area",
	color_discrete_map={"Pending": ILLINI_ORANGE, "Enacted": ILLINI_BLUE},
	)
	fig_u1 = style_plotly(fig_u1)
	fig_u1.update_xaxes(title="", tickangle=25)
	fig_u1.update_yaxes(title="Bills")
	st.plotly_chart(fig_u1, width="stretch")

	# -----------------------------
	# Watchlist table
	# -----------------------------
	st.markdown('<div class="section-title">Watchlist: Most Recent Pending Bills</div>', unsafe_allow_html=True)

	watch = f[f[status_stage_col] == "Pending"].copy()
	watch["last_action_date_parsed"] = pd.to_datetime(watch["last_action_date_parsed"], errors="coerce")
	watch = watch.dropna(subset=["last_action_date_parsed"]).sort_values("last_action_date_parsed", ascending=False)

	watch_cols = [
	"bill_id","year","chamber",
	policy_col,
	"status", "status_step",
	"last_action_date_parsed",
	"primary_author_party",

	]
	watch_cols = [c for c in watch_cols if c in watch.columns]

	col_cfg_watch = {}
	if "summary" in watch_cols:
	col_cfg_watch["summary"] = st.column_config.TextColumn("summary", width="large")
	if title_bucket_col in watch_cols:
	col_cfg_watch[title_bucket_col] = st.column_config.TextColumn(title_bucket_col, width="large")

	st.dataframe(
	watch.head(15)[watch_cols],
	width="stretch",
	hide_index=True,
	column_config=col_cfg_watch,
	height=380,
	)

	# # -----------------------------
	# # Local RAG (UPDATED: BART-style RAG summary like your example)
	# # -----------------------------
	# st.markdown('<div class="section-title">U of I Next Steps (Local RAG)</div>', unsafe_allow_html=True)
	# st.caption("Ask a question to see top matching bills + a RAG-generated overall summary .")

	# # ---- RAG helpers (kept local to avoid touching other app parts)
	# def clean_rag_summary(text: str) -> str:
	# text = re.sub(r"(?i)(here is\|here are) the requested output[s][:]", "", text)
	# text = re.sub(r"(?i)let me know if you'd like.*", "", text)
	# text = re.sub(r"(?i)trend summary[:]*", "", text)
	# text = re.sub(r"(?i)actionable insight[:]*", "", text)
	# return (text or "").strip()

	# @st.cache_resource(show_spinner=False)
	# def load_rag_models():
	# embed_model = SentenceTransformer("all-MiniLM-L6-v2")
	# summarizer = pipeline(
	# "summarization",
	# model="facebook/bart-large-cnn",
	# tokenizer="facebook/bart-large-cnn",
	# )
	# return embed_model, summarizer

	# @st.cache_data(show_spinner=False)
	# def compute_embeddings(texts_tuple):
	# # cache friendly: tuple of strings
	# embed_model = SentenceTransformer("all-MiniLM-L6-v2")
	# texts = list(texts_tuple)
	# return embed_model.encode(texts, show_progress_bar=False)

	# def semantic_search(query, embeddings, model, threshold=0.45):
	# q_emb = model.encode([query])
	# sims = cosine_similarity(q_emb, embeddings)[0]
	# return [(i, s) for i, s in enumerate(sims) if s > threshold]

	# def rag_summarize(texts, summarizer, top_k=6):
	# if not texts:
	# return "No relevant content to summarize."
	# vect = TfidfVectorizer(stop_words="english")
	# m = vect.fit_transform(texts)
	# mean_vec = m.mean(axis=0).A
	# scores = cosine_similarity(mean_vec, m).flatten()
	# top_indices = scores.argsort()[::-1][:top_k]
	# ctx = "\n".join(texts[i] for i in top_indices)
	# prompt = "summarize: " + ctx[:1200]
	# out = summarizer(prompt, max_length=220, min_length=80, do_sample=False)
	# return clean_rag_summary(out[0]["summary_text"])

	# # Build combined text (uses your viz-ready columns)
	# rag_df = f.copy()

	# # Make cache hashing safe: convert list objects to strings (prevents "unhashable type: 'list'")
	# for col in rag_df.columns:
	# try:
	# has_list = rag_df[col].apply(lambda x: isinstance(x, list)).any()
	# except Exception:
	# has_list = False
	# if has_list:
	# rag_df[col] = rag_df[col].apply(lambda x: ", ".join(map(str, x)) if isinstance(x, list) else x)

	# # topics list string (optional)
	# if "topics_list" in rag_df.columns:
	# rag_df["topic_list"] = rag_df["topics_list"].apply(parse_listish)
	# elif "topics" in rag_df.columns:
	# rag_df["topic_list"] = rag_df["topics"].apply(parse_listish)
	# else:
	# rag_df["topic_list"] = [[] for _ in range(len(rag_df))]

	# def build_combined_text(row):
	# parts = [
	# f"Policy area: {clean_text(row.get(policy_col,''))}",
	# f"Status: {clean_text(row.get(status_stage_col,''))} \| Step: {clean_text(row.get('status_step',''))}",
	# f"Pending: {clean_text(row.get('pending_chamber',''))} \| {clean_text(row.get('pending_committee_name',''))}",
	# f"Recency: {clean_text(row.get('action_recency_bucket',''))} \| Age days: {clean_text(row.get('bill_age_days',''))} \| Recent90: {clean_text(row.get('is_recent_90d',''))}",
	# f"Sponsors: {clean_text(row.get('sponsor_count',''))} \| Parties: {clean_text(row.get('author_party_combo',''))} \| Primary: {clean_text(row.get(party_col,''))}",
	# f"Theme: {clean_text(row.get(title_bucket_col,''))}",
	# f"Title: {clean_text(row.get('title',''))}",
	# f"Summary: {clean_text(row.get('summary',''))}",
	# f"Topics: {', '.join(row.get('topic_list', []))}",
	# ]
	# return "\n".join([p for p in parts if p and p.strip()])

	# rag_df["combined_text"] = rag_df.apply(build_combined_text, axis=1)

	# query = st.text_input("Ask a question (examples: tuition, financial aid, transfer credits, campus safety):")

	# if query and query.strip():
	# if rag_df.empty:
	# st.warning("No bills match your current filters. Expand the filters and try again.")
	# else:
	# embed_model, summarizer = load_rag_models()

	# texts = rag_df["combined_text"].fillna("").astype(str).tolist()
	# texts_tuple = tuple(texts)
	# embs = compute_embeddings(texts_tuple)

	# res = semantic_search(query, embs, embed_model, threshold=0.45)

	# if not res:
	# st.warning("No strong matches found. Try simpler keywords (e.g., “tuition”, “loan”, “safety”).")
	# else:
	# st.subheader("Top Matching Bills")
	# collected = []

	# # show top 8 matches
	# for idx, score in sorted(res, key=lambda x: x[1], reverse=True)[:8]:
	# row = rag_df.iloc[idx]
	# bill_id = clean_text(row.get("bill_id", ""))
	# yr = row.get("year", "")
	# theme = clean_text(row.get(title_bucket_col, ""))
	# pol = clean_text(row.get(policy_col, ""))
	# stat = clean_text(row.get(status_stage_col, ""))
	# step = clean_text(row.get("status_step", ""))
	# summ = clean_text(row.get("summary", ""))

	# # url optional if present
	# url_col = safe_col(rag_df, ["url", "bill_url", "legiscan_url", "full_url"])
	# url = clean_text(row.get(url_col, "")) if url_col else ""

	# st.markdown(
	# f"{bill_id} • {yr} • Score: {score:.2f}\n\n"
	# f"- Policy area: {pol}\n"
	# f"- Status/Step: {stat} / {step}\n"
	# f"- Theme: {theme}\n"
	# f"- Summary: {summ[:260]}{'…' if len(summ) > 260 else ''}\n"
	# )

	# if url:
	# st.markdown(f"[View bill source ↗]({url})")

	# st.divider()
	# collected.append(row["combined_text"])

	# st.subheader("RAG-Generated Overall Summary")
	# summary = rag_summarize(collected, summarizer, top_k=6)
	# st.success(summary)
	# else:
	# st.caption("Enter a question to generate top matches and an overall summary.")

	# # -----------------------------
	# # U of I Next Steps
	# # -----------------------------
	# st.markdown('<div class="section-title">U of I Next Steps</div>', unsafe_allow_html=True)
	# st.markdown("<div style='height:20px;'></div>", unsafe_allow_html=True)
	# st.caption("Select a policy area domain to view a 2-sentence UofI next-step recommendation.")

	# UOFI_NEXT_STEPS = {
	# "Admissions & Enrollment": (
	# "UofI should review how proposed admission, transfer, and enrollment rules could shift student access across UIUC/UIC/UIS and update campus admissions guidance accordingly. "
	# "Assign Enrollment Management owners to track committee movement and prepare impact brief on capacity, equity, and implementation timelines."
	# ),
	# "Appropriations & Budget": (
	# "UofI should assess fiscal impact (state funding, grants, cost mandates) and prepare budget scenarios for enacted vs pending bills affecting higher education finance. "
	# "Finance + Government Relations should coordinate leadership updates and ready a funding strategy, including compliance costs and implementation staffing."
	# ),
	# "Athletics & NIL": (
	# "UofI should evaluate NIL/athletics policy changes for compliance, student-athlete protections, and program risk exposure across campuses. "
	# "Assign Athletics Compliance + Legal Counsel to track pending bills and draft an implementation checklist for guidance, reporting, and student support."
	# ),
	# "Campus Safety & Title IX": (
	# "UofI should prioritize compliance readiness by mapping bill requirements to Title IX, Clery, campus safety, and student conduct procedures. "
	# "Assign System Legal + Student Affairs to monitor where bills are stuck (committee/chamber) and prepare standardized policy + training updates for rapid rollout if enacted."
	# ),
	# "Data/Reporting/Accountability": (
	# "UofI should identify required reporting fields, data owners, and system-wide definitions to ensure campus submissions remain consistent and auditable. "
	# "Assign institutional research/data governance leads to build a reporting playbook and proactively identify feasibility risks and data gaps."
	# ),
	# "Dual Credit & College Readiness": (
	# "UofI should assess impact on dual credit pathways, transfer alignment, and readiness programs to protect access and reduce credit loss for incoming students. "
	# "Assign Academic Affairs and Registrar leadership to prepare policy guidance for partner districts and articulation updates."
	# ),
	# "Financial Aid & Scholarships": (
	# "UofI should model impacts to student affordability and aid administration workflows (eligibility rules, award calculations, compliance updates). "
	# "Assign Financial Aid + Student Affairs to track pending movement and draft student-facing communications plus operational readiness steps."
	# ),
	# "Governance & Oversight": (
	# "UofI should evaluate governance-related bills for impacts on institutional autonomy, board authority, and internal approval processes. "
	# "Assign System Administration + Legal to prepare leadership briefings and recommended positions for advocacy and compliance planning."
	# ),
	# "Mental Health & Wellness": (
	# "UofI should assess staffing, service capacity, and mandated program requirements for student mental health and wellness initiatives. "
	# "Assign Student Affairs + Counseling leadership to map implementation needs and create a rollout plan with measurable outcomes and funding requirements."
	# ),
	# "Other Postsecondary Policy": (
	# "UofI should treat this as a catch-all risk bucket and prioritize rapid triage based on cost, compliance urgency, and student impact. "
	# "Assign Government Relations to flag high-risk items early and route them to the correct campus owner with a short impact summary."
	# ),
	# "Student Rights & Protections": (
	# "UofI should review potential changes affecting student protections, grievance processes, discrimination policy, and academic rights to ensure consistent campus-level enforcement. "
	# "Assign Legal + Student Affairs to create standardized guidance and prepare training updates if enacted."
	# ),
	# "Tax Credits & Deductions": (
	# "UofI should assess how tax policy changes could influence affordability, workforce incentives, and student/family financial behavior. "
	# "Assign Finance + Financial Aid to prepare a summary for leadership and update external guidance if student-facing impacts are significant."
	# ),
	# "Tuition & Fees": (
	# "UofI should model revenue impact and operational risk of tuition or fee restrictions, including differential tuition, program fees, and campus budgeting constraints. "
	# "Assign Finance + Provost offices to prepare policy scenarios and a communication plan for students and stakeholders."
	# ),
	# "Workforce & Career Readiness": (
	# "UofI should evaluate effects on workforce pipelines, internships, credentialing, and employer partnerships across programs and campuses. "
	# "Assign Career Services + Academic Affairs to align implementation plans, strengthen employer engagement, and track outcomes tied to statewide workforce goals."
	# ),
	# }

	# # Build the policy list from what's present in the filtered view, so it's always aligned with filters
	# policy_options = sorted(
	# f[policy_col].dropna().astype(str).str.strip().replace({"": np.nan}).dropna().unique().tolist()
	# )

	# if not policy_options:
	# st.info("No policy areas available in the current filtered view.")
	# else:
	# sel_policy = st.selectbox("Policy area domain", policy_options, index=0)

	# # Auto-show summary immediately when user selects a domain
	# # (No button needed; if you prefer a button, I can add it back.)
	# summary = UOFI_NEXT_STEPS.get(
	# sel_policy,
	# "No hardcoded UofI next-step summary is available for this policy area yet."
	# )
	# st.success(summary)


	# st.markdown("---")
	# st.caption("Download the filtered dataset used to build this dashboard:")

	# -----------------------------
	# Policy Domain Summary at a Glance
	# -----------------------------
	st.markdown('<div class="section-title">Policy Domain Summary at a Glance</div>', unsafe_allow_html=True)
	st.markdown("<div style='height:20px;'></div>", unsafe_allow_html=True)
	st.caption("Select a policy area domain to view a brief 2-sentence summary.")

	POLICY_DOMAIN_SUMMARY = {
	"Admissions & Enrollment": (
	"This domain includes legislation related to admission requirements, transfer policies, enrollment rules, and institutional access pathways. "
	"It covers processes that affect student entry, eligibility standards, and campus-level enrollment administration."
	),
	"Appropriations & Budget": (
	"This domain covers legislation affecting higher education funding, appropriations, budget structures, and fiscal mandates. "
	"It includes items connected to state allocations, compliance costs, program funding, and financial reporting requirements."
	),
	"Athletics & NIL": (
	"This domain includes legislation related to intercollegiate athletics governance, student-athlete participation, and NIL policies. "
	"It involves compliance requirements, athlete protections, disclosures, and institutional responsibilities."
	),
	"Campus Safety & Title IX": (
	"This domain includes legislation tied to campus safety policies, Title IX requirements, Clery reporting, and student conduct procedures. "
	"It covers institutional reporting expectations, investigations, training standards, and procedural compliance."
	),
	"Data/Reporting/Accountability": (
	"This domain includes legislation involving institutional reporting, data collection standards, accountability measures, and audit requirements. "
	"It relates to performance metrics, public disclosures, compliance reporting, and system-wide data governance considerations."
	),
	"Dual Credit & College Readiness": (
	"This domain includes legislation related to dual credit programs, college readiness initiatives, and partnerships with K–12 districts. "
	"It covers articulation policies, credit transfer alignment, eligibility requirements, and academic pathway structures."
	),
	"Financial Aid & Scholarships": (
	"This domain includes legislation tied to financial aid eligibility, scholarship programs, award administration, and student affordability. "
	"It addresses funding mechanisms, qualifying criteria, program rules, and aid-related reporting requirements."
	),
	"Governance & Oversight": (
	"This domain includes legislation affecting institutional governance structures, oversight authority, and board-related processes. "
	"It involves decision-making frameworks, administrative authority, compliance monitoring, and policy control responsibilities."
	),
	"Mental Health & Wellness": (
	"This domain includes legislation related to student mental health services, wellness resources, and mandated program initiatives. "
	"It covers service capacity, staffing requirements, program reporting, and institutional support frameworks."
	),
	"Other Postsecondary Policy": (
	"This domain groups postsecondary legislation that does not fit cleanly into the other standard categories. "
	"It includes mixed policy areas spanning compliance, student programs, operations, or administrative requirements."
	),
	"Student Rights & Protections": (
	"This domain includes legislation affecting student rights, protections, grievance processes, and academic policy standards. "
	"It covers nondiscrimination policy, procedural safeguards, enforcement expectations, and campus-level student support rules."
	),
	"Tax Credits & Deductions": (
	"This domain includes legislation related to tax credits, deductions, and financial incentives connected to education costs. "
	"It influences affordability mechanisms, eligibility definitions, and financial guidance associated with postsecondary participation."
	),
	"Tuition & Fees": (
	"This domain includes legislation affecting tuition structures, mandatory fees, program charges, and cost-setting constraints. "
	"It includes provisions related to fee caps, tuition regulation, and campus revenue planning requirements."
	),
	"Workforce & Career Readiness": (
	"This domain includes legislation tied to workforce pipelines, credentialing programs, internships, and employer partnerships. "
	"It connects higher education programs to statewide workforce priorities, training requirements, and outcomes tracking."
	),
	}

	# Build the policy list from what's present in the filtered view, so it's always aligned with filters
	policy_options = sorted(
	f[policy_col].dropna().astype(str).str.strip().replace({"": np.nan}).dropna().unique().tolist()
	)

	if not policy_options:
	st.info("No policy areas available in the current filtered view.")
	else:
	sel_policy = st.selectbox("Policy area domain", policy_options, index=0)

	# Auto-show summary immediately when user selects a domain
	summary = POLICY_DOMAIN_SUMMARY.get(
	sel_policy,
	"No 2-sentence summary is available for this policy area yet."
	)
	st.success(summary)

	st.markdown("---")
	st.caption("Download the filtered dataset used to build this dashboard:")


	st.download_button(
	"⬇️ Download filtered dashboard data (CSV)",
	data=f.to_csv(index=False).encode("utf-8"),
	file_name="uofi_legislation_filtered_2019_2026.csv",
	mime="text/csv",
	)

	st.caption("Love Data Week 2026 • University of Illinois System • Streamlit (HF Spaces)")