ralate2's picture
Update app.py
70451a9 verified
# app.py — U of I Legislation Impact Dashboard (HF Spaces friendly)
# ---------------------------------------------------------------
import os
import re
import ast
import numpy as np
import pandas as pd
import streamlit as st
import plotly.express as px
# NEW (RAG)
from sentence_transformers import SentenceTransformer
from transformers import pipeline
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# -----------------------------
# Page config
# -----------------------------
st.set_page_config(
page_title="U of I Legislation Impact Dashboard",
page_icon="📊",
layout="wide",
initial_sidebar_state="collapsed",
)
# -----------------------------
# U of I palette (ONLY)
# -----------------------------
ILLINI_BLUE = "#13294B"
ILLINI_ORANGE = "#FF552E"
ILLINI_ALT_BLUE = "#1E3877"
ILLINI_LIGHT = "#E8EDF5"
TEXT_DARK = "#0B1220"
# -----------------------------
# CSS (title + tight spacing + full-width buttons)
# -----------------------------
st.markdown(
f"""
<style>
.block-container {{
padding-top: 2.0rem !important;
padding-bottom: 1.0rem !important;
}}
header[data-testid="stHeader"] {{
height: 0.25rem;
}}
.main {{
background: linear-gradient(180deg, #FFFFFF 0%, {ILLINI_LIGHT} 100%);
}}
.uofi-banner {{
margin-top: 0.65rem;
background: {ILLINI_BLUE};
color: white;
padding: 20px 22px;
border-radius: 14px;
font-weight: 950;
font-size: clamp(24px, 2.8vw, 34px);
letter-spacing: 0.2px;
margin-bottom: 8px;
box-shadow: 0 8px 22px rgba(19,41,75,0.18);
white-space: normal !important;
overflow: visible !important;
line-height: 1.18;
word-break: break-word;
}}
.uofi-sub {{
margin-top: 8px;
font-size: 13px;
opacity: 0.92;
font-weight: 600;
white-space: normal !important;
overflow: visible !important;
}}
/* KPI cards */
.kpi-card {{
background: white;
border-radius: 14px;
padding: 12px 12px;
border: 1px solid rgba(19,41,75,0.10);
box-shadow: 0 8px 18px rgba(19,41,75,0.08);
min-height: 84px;
}}
.kpi-title {{
font-size: 13px;
color: rgba(11,18,32,0.72);
font-weight: 900;
}}
.kpi-value {{
font-size: 26px;
font-weight: 950;
color: {ILLINI_BLUE};
margin-top: 2px;
}}
.kpi-foot {{
font-size: 12px;
color: rgba(11,18,32,0.60);
margin-top: 2px;
}}
.kpi-accent {{
color: {ILLINI_ORANGE};
font-weight: 950;
}}
/* Section titles */
.section-title {{
font-size: 15px;
font-weight: 950;
color: white;
background: {ILLINI_BLUE};
padding: 8px 10px;
border-radius: 12px;
margin: 4px 0 8px 0;
box-shadow: 0 8px 18px rgba(19,41,75,0.10);
}}
/* Panels */
.panel {{
background: white;
border-radius: 14px;
padding: 10px;
border: 1px solid rgba(19,41,75,0.10);
box-shadow: 0 8px 18px rgba(19,41,75,0.08);
}}
.stVerticalBlock {{
gap: 0.28rem !important;
}}
div[data-testid="stDataFrame"] * {{
white-space: normal !important;
}}
/* Full width download button without deprecated args */
div[data-testid="stDownloadButton"] > button {{
width: 100% !important;
}}
</style>
""",
unsafe_allow_html=True,
)
st.markdown(
f"""
<style>
/* ============================
FILTER ROW: bigger labels + bold
============================ */
/* Make ALL widget labels bigger + bold */
div[data-testid="stWidgetLabel"] > label {{
font-size: 16px !important;
font-weight: 900 !important;
color: {ILLINI_BLUE} !important;
}}
/* Increase spacing between the 3 filter columns */
div[data-testid="column"] {{
padding-left: 8px !important;
padding-right: 8px !important;
}}
/* Make the Year range slider values (2019 / 2026) more readable */
div[data-testid="stSlider"] {{
font-size: 14px !important;
font-weight: 700 !important;
}}
/* Make selectbox + multiselect text slightly bigger */
div[data-baseweb="select"] * {{
font-size: 15px !important;
font-weight: 700 !important;
}}
/* Multi-select chips ("Pending", "Enacted") bolder */
span[data-baseweb="tag"] {{
font-weight: 900 !important;
font-size: 14px !important;
}}
/* Reduce the weird extra top whitespace around widgets */
section[data-testid="stSidebar"] {{
display: none !important;
}}
</style>
""",
unsafe_allow_html=True,
)
# -----------------------------
# Helpers
# -----------------------------
def safe_col(df, candidates):
for c in candidates:
if c in df.columns:
return c
return None
def chamber_from_bill_id(bid):
if pd.isna(bid):
return np.nan
s = str(bid).strip()
parts = s.split()
if len(parts) >= 2:
return {"S": "Senate", "H": "House"}.get(parts[1], np.nan)
return np.nan
def party_from_author(author):
if pd.isna(author):
return np.nan
m = re.search(r"\((D|R)\)\s*$", str(author).strip(), flags=re.I)
if not m:
return np.nan
return m.group(1).upper()
def to_dt(series):
return pd.to_datetime(series, errors="coerce")
def style_plotly(fig):
fig.update_layout(
template="plotly_white",
font=dict(color=TEXT_DARK, size=11),
margin=dict(l=10, r=10, t=46, b=10),
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1,
font=dict(size=10),
),
title=dict(font=dict(size=14)),
)
return fig
def clean_text(x):
if pd.isna(x):
return ""
return re.sub(r"\s+", " ", str(x)).strip()
def parse_listish(x):
if pd.isna(x):
return []
if isinstance(x, list):
return [str(t).strip() for t in x if str(t).strip()]
s = str(x).strip()
if not s:
return []
try:
v = ast.literal_eval(s)
if isinstance(v, list):
return [str(t).strip() for t in v if str(t).strip()]
except Exception:
pass
return [t.strip() for t in s.split(",") if t.strip()]
def enforce_two_sentences(text: str) -> str:
text = (text or "").strip()
sents = re.split(r"(?<=[.!?])\s+", text)
sents = [s.strip() for s in sents if s.strip()]
return " ".join(sents[:2]).strip()
# -----------------------------
# Load data (Viz-ready)
# -----------------------------
@st.cache_data(show_spinner=False)
def load_data():
candidates = [
"illinois_legislation_VIZ_READY.csv",
"/mnt/data/illinois_legislation_VIZ_READY.csv",
"illinois_postsecondary_legislation.csv",
"/mnt/data/illinois_postsecondary_legislation.csv",
]
path = None
for p in candidates:
if os.path.exists(p):
path = p
break
if path is None:
raise FileNotFoundError("Could not find the viz-ready CSV in the app directory.")
df_ = pd.read_csv(path)
return df_, os.path.basename(path)
raw, filename = load_data()
df = raw.copy()
# -----------------------------
# Enforce 2019–2026 + derive minimal helpers if missing
# -----------------------------
if "year" not in df.columns:
st.error("Missing required column: year")
st.stop()
df["year"] = pd.to_numeric(df["year"], errors="coerce")
df = df[df["year"].between(2019, 2026, inclusive="both")].copy()
# chamber
if "chamber" not in df.columns:
if "bill_id" in df.columns:
df["chamber"] = df["bill_id"].apply(chamber_from_bill_id)
else:
df["chamber"] = np.nan
# dates
date_col = safe_col(df, ["last_action_date_parsed", "last_action_date_dt", "last_action_date_clean", "last_action_date"])
if date_col is None:
st.error("Missing last action date column (last_action_date*)")
st.stop()
if "last_action_date_parsed" not in df.columns:
df["last_action_date_parsed"] = to_dt(df[date_col])
else:
df["last_action_date_parsed"] = to_dt(df["last_action_date_parsed"])
NOW = pd.Timestamp("2026-01-23")
df["days_since_last_action"] = (NOW - df["last_action_date_parsed"]).dt.days
df["is_recent_90d"] = df["days_since_last_action"].between(0, 90, inclusive="both")
# status stage
status_stage_col = safe_col(df, ["status_stage", "status_label"])
if status_stage_col is None:
if "status" in df.columns:
s = df["status"].fillna("").astype(str).str.lower()
df["status_stage"] = np.where(
s.str.contains("enacted|signed|public act"), "Enacted",
np.where(s.str.contains("pending"), "Pending", np.nan),
)
status_stage_col = "status_stage"
else:
df["status_stage"] = np.nan
status_stage_col = "status_stage"
df[status_stage_col] = df[status_stage_col].astype(str).str.strip()
df[status_stage_col] = df[status_stage_col].replace({"pending": "Pending", "enacted": "Enacted"})
# party
party_col = safe_col(df, ["primary_author_party"])
if party_col is None:
a_col = safe_col(df, ["author_clean", "author"])
if a_col:
df["primary_author_party"] = df[a_col].apply(party_from_author)
party_col = "primary_author_party"
else:
df["primary_author_party"] = np.nan
party_col = "primary_author_party"
# standardized title bucket
title_bucket_col = safe_col(df, ["title_std_bucket", "title_standardized", "title_nlp", "title_bucket"])
if title_bucket_col is None:
df["title_std_bucket"] = df["title"].fillna("").astype(str) if "title" in df.columns else "No Title"
title_bucket_col = "title_std_bucket"
# policy
policy_col = safe_col(df, ["policy_area_bucket"])
if policy_col is None:
df["policy_area_bucket"] = "Not Available"
policy_col = "policy(override)" # won't be used, but safe
# stakeholder + beneficiary
stake_col = safe_col(df, ["stakeholder_group", "stakeholder_group_bucket", "stakeholder_bucket", "stakeholders_bucket", "stakeholder"])
if stake_col is None:
df["stakeholder_group"] = "Not Available"
stake_col = "stakeholder_group"
benef_col = safe_col(df, ["beneficiary_type", "beneficiary_bucket", "beneficiary_category", "intended_beneficiaries_standardized_final", "intended_beneficiaries_bucket"])
if benef_col is None:
df["beneficiary_type"] = "Not Available"
benef_col = "beneficiary_type"
# optional fields used in RAG / watchlist
if "bill_age_days" not in df.columns:
df["bill_age_days"] = df["days_since_last_action"]
if "action_recency_bucket" not in df.columns:
bins = [-1, 30, 90, 180, 365, 999999]
labels = ["0–30d", "31–90d", "91–180d", "181–365d", "365d+"]
df["action_recency_bucket"] = pd.cut(df["days_since_last_action"].fillna(999999), bins=bins, labels=labels)
for c in ["status_step", "pending_committee_name", "pending_chamber"]:
if c not in df.columns:
df[c] = ""
if "author_party_combo" not in df.columns:
df["author_party_combo"] = df[party_col].fillna("").astype(str)
if "sponsor_count" not in df.columns:
df["sponsor_count"] = np.nan
# -----------------------------
# Title spacer + Header
# -----------------------------
st.markdown("<div style='height:6px'></div>", unsafe_allow_html=True)
st.markdown(
"""
<div class="uofi-banner">
U of I Legislation Impact Dashboard
<div class="uofi-sub">
2019–2026 • Trends → Party share → Executive themes → Stakeholders → Beneficiaries → Impact view → Watchlist → Policy Domain Summary at a Glance
</div>
</div>
""",
unsafe_allow_html=True,
)
# -----------------------------
# Public dataset link (NCSL)
# -----------------------------
st.markdown(
f"""
<div class="panel">
<div style="font-weight:900; color:{ILLINI_BLUE}; margin-bottom:6px;"> Data Source Link </div>
<div style="color:{TEXT_DARK}; font-size:13px; line-height:1.35;">
This dashboard is made using the NCSL Postsecondary Legislation Database.
<br/>
<a href="https://www.ncsl.org/education/postsecondary-legislation-database" target="_blank"
style="color:{ILLINI_ORANGE}; font-weight:900; text-decoration:none;">
Open NCSL Postsecondary Legislation Database ↗
</a>
</div>
</div>
""",
unsafe_allow_html=True,
)
st.markdown("<div style='height:28px'></div>", unsafe_allow_html=True)
# -----------------------------
# Filters (Year, Chamber, Status)
# -----------------------------
f1, f2, f3 = st.columns([2.0, 2.0, 2.0], gap="large")
with f1:
years = sorted([int(y) for y in df["year"].dropna().unique()])
ymin, ymax = (min(years), max(years)) if years else (2019, 2026)
year_range = st.slider("Year range", 2019, 2026, (max(2019, ymin), min(2026, ymax)))
with f2:
chamber_opts = ["All"] + [c for c in ["House", "Senate"] if c in df["chamber"].dropna().unique().tolist()]
sel_chamber = st.selectbox("Chamber", chamber_opts, index=0)
with f3:
status_opts = ["Pending", "Enacted"]
sel_status = st.multiselect("Status", options=status_opts, default=status_opts)
st.caption(f"Dashboard dataset: {filename}")
# Apply filters
f = df.copy()
f = f[f["year"].between(year_range[0], year_range[1], inclusive="both")]
if sel_chamber != "All":
f = f[f["chamber"] == sel_chamber]
f = f[f[status_stage_col].isin(sel_status)].copy()
# -----------------------------
# KPIs
# -----------------------------
total_bills = len(f)
recent_90 = int(f["is_recent_90d"].fillna(False).sum())
enacted_ct = int((f[status_stage_col] == "Enacted").sum())
pending_ct = int((f[status_stage_col] == "Pending").sum())
enact_rate = (enacted_ct / (enacted_ct + pending_ct)) if (enacted_ct + pending_ct) > 0 else 0.0
stuck_pending_365 = int(((f[status_stage_col] == "Pending") & (f["days_since_last_action"] > 365)).sum())
k1, k2, k3, k4 = st.columns(4, gap="small")
with k1:
st.markdown(
f"""
<div class="kpi-card">
<div class="kpi-title">Bills in View</div>
<div class="kpi-value">{total_bills:,}</div>
<div class="kpi-foot">Filtered cohort</div>
</div>
""",
unsafe_allow_html=True,
)
with k2:
st.markdown(
f"""
<div class="kpi-card">
<div class="kpi-title">Recent (≤ 90 Days)</div>
<div class="kpi-value"><span class="kpi-accent">{recent_90:,}</span></div>
<div class="kpi-foot">Latest movement / momentum</div>
</div>
""",
unsafe_allow_html=True,
)
with k3:
st.markdown(
f"""
<div class="kpi-card">
<div class="kpi-title">Enactment Rate</div>
<div class="kpi-value">{enact_rate*100:,.1f}%</div>
<div class="kpi-foot">{enacted_ct:,} enacted vs {pending_ct:,} pending</div>
</div>
""",
unsafe_allow_html=True,
)
with k4:
st.markdown(
f"""
<div class="kpi-card">
<div class="kpi-title">Stuck at Pending Stage (&gt; 365 Days)</div>
<div class="kpi-value">{stuck_pending_365:,}</div>
<div class="kpi-foot">Aging bills needing attention</div>
</div>
""",
unsafe_allow_html=True,
)
# -----------------------------
# Row 1: Trend + Party donut
# -----------------------------
c1, c2 = st.columns([1.55, 1.0], gap="small")
with c1:
st.markdown('<div class="section-title">Trend: Bills Over Time (Monthly)</div>', unsafe_allow_html=True)
ts = (
f.dropna(subset=["last_action_date_parsed"])
.assign(ym=lambda x: x["last_action_date_parsed"].dt.to_period("M").astype(str))
.groupby("ym")
.size()
.reset_index(name="bills")
)
if ts.empty:
st.info("No dated bills found for the selected filters.")
else:
ts["ym_dt"] = pd.to_datetime(ts["ym"], errors="coerce")
ts = ts.sort_values("ym_dt")
fig = px.line(
ts,
x="ym",
y="bills",
markers=True,
title="Bills per month (by last action date)",
color_discrete_sequence=[ILLINI_ORANGE],
)
fig = style_plotly(fig)
fig.update_xaxes(title="", tickangle=0)
fig.update_yaxes(title="Bills")
st.plotly_chart(fig, width="stretch")
with c2:
st.markdown('<div class="section-title">Democrat vs Republican Share</div>', unsafe_allow_html=True)
p = f[party_col].fillna("").astype(str).str.upper()
p = p[p.isin(["D", "R"])]
if p.empty:
st.info("Party share not available for this filtered view.")
else:
pie_df = p.value_counts().reset_index()
pie_df.columns = ["party", "count"]
fig = px.pie(
pie_df,
names="party",
values="count",
hole=0.58,
title="Primary author party (D vs R)",
color="party",
color_discrete_map={"D": ILLINI_BLUE, "R": ILLINI_ORANGE},
)
fig = style_plotly(fig)
fig.update_traces(textposition="inside", textinfo="percent+label", textfont=dict(size=11))
st.plotly_chart(fig, width="stretch")
# -----------------------------
# Row 2: Policy treemap + Stakeholder pie
# -----------------------------
r2a, r2b = st.columns([1.05, 1.2], gap="small")
with r2b:
st.markdown('<div class="section-title">Executive Themes: Policy Areas</div>', unsafe_allow_html=True)
policy_counts = (
f[policy_col].astype(str).str.strip().replace({"": np.nan}).dropna()
.value_counts().head(9).reset_index()
)
policy_counts.columns = ["policy_area_bucket", "bills"]
if policy_counts.empty:
st.info("Policy areas not available for this filtered view.")
else:
fig1 = px.treemap(
policy_counts,
path=["policy_area_bucket"],
values="bills",
title="Policy area concentration",
color_discrete_sequence=[ILLINI_BLUE, ILLINI_ALT_BLUE, ILLINI_ORANGE],
)
fig1 = style_plotly(fig1)
st.plotly_chart(fig1, width="stretch")
with r2a:
st.markdown('<div class="section-title">Stakeholder Themes: Who is affected?</div>', unsafe_allow_html=True)
stake_counts = (
f[stake_col].astype(str).str.strip().replace({"": np.nan}).dropna()
.value_counts().head(5).reset_index()
)
stake_counts.columns = ["stakeholder_group", "bills"]
if stake_counts.empty:
st.info("Stakeholder themes not available for this filtered view.")
else:
fig7 = px.pie(
stake_counts,
names="stakeholder_group",
values="bills",
title="Stakeholder share (pie)",
color_discrete_sequence=[ILLINI_ALT_BLUE, ILLINI_BLUE, ILLINI_ORANGE, ILLINI_ALT_BLUE, ILLINI_BLUE],
)
fig7 = style_plotly(fig7)
fig7.update_layout(showlegend=False)
fig7.update_traces(textposition="inside", textinfo="percent+label", textfont=dict(size=12))
st.plotly_chart(fig7, width="stretch")
# -----------------------------
# Row 3: Beneficiary bar + Impact stacked
# -----------------------------
r3a, r3b = st.columns([1.0, 1.55], gap="small")
with r3a:
st.markdown('<div class="section-title">Beneficiary Themes: Who benefits?</div>', unsafe_allow_html=True)
benef_counts = (
f[benef_col].astype(str).str.strip().replace({"": np.nan}).dropna()
.value_counts().head(8).reset_index()
)
benef_counts.columns = ["beneficiary_type", "bills"]
if benef_counts.empty:
st.info("Beneficiary themes not available for this filtered view.")
else:
fig8 = px.bar(
benef_counts.sort_values("bills", ascending=True),
x="bills", y="beneficiary_type",
orientation="h",
title="Beneficiary types",
color_discrete_sequence=[ILLINI_ALT_BLUE],
)
fig8 = style_plotly(fig8)
fig8.update_yaxes(title="", automargin=True)
fig8.update_xaxes(title="Bills")
st.plotly_chart(fig8, width="stretch")
with r3b:
st.markdown('<div class="section-title">Impact View: Pending vs Enacted by Policy Area</div>', unsafe_allow_html=True)
top_policy = (
f[policy_col].astype(str).str.strip().replace({"": np.nan}).dropna()
.value_counts().head(12).index.tolist()
)
stage_policy = (
f[f[policy_col].isin(top_policy)]
.groupby([policy_col, status_stage_col])
.size()
.reset_index(name="bills")
)
if stage_policy.empty:
st.info("Not enough data to build the impact view for this filtered view.")
else:
order = (
stage_policy.groupby(policy_col)["bills"].sum()
.sort_values(ascending=False)
.index.tolist()
)
fig_u1 = px.bar(
stage_policy,
x=policy_col,
y="bills",
color=status_stage_col,
barmode="stack",
category_orders={policy_col: order},
title="Status composition by policy area",
color_discrete_map={"Pending": ILLINI_ORANGE, "Enacted": ILLINI_BLUE},
)
fig_u1 = style_plotly(fig_u1)
fig_u1.update_xaxes(title="", tickangle=25)
fig_u1.update_yaxes(title="Bills")
st.plotly_chart(fig_u1, width="stretch")
# -----------------------------
# Watchlist table
# -----------------------------
st.markdown('<div class="section-title">Watchlist: Most Recent Pending Bills</div>', unsafe_allow_html=True)
watch = f[f[status_stage_col] == "Pending"].copy()
watch["last_action_date_parsed"] = pd.to_datetime(watch["last_action_date_parsed"], errors="coerce")
watch = watch.dropna(subset=["last_action_date_parsed"]).sort_values("last_action_date_parsed", ascending=False)
watch_cols = [
"bill_id","year","chamber",
policy_col,
"status", "status_step",
"last_action_date_parsed",
"primary_author_party",
]
watch_cols = [c for c in watch_cols if c in watch.columns]
col_cfg_watch = {}
if "summary" in watch_cols:
col_cfg_watch["summary"] = st.column_config.TextColumn("summary", width="large")
if title_bucket_col in watch_cols:
col_cfg_watch[title_bucket_col] = st.column_config.TextColumn(title_bucket_col, width="large")
st.dataframe(
watch.head(15)[watch_cols],
width="stretch",
hide_index=True,
column_config=col_cfg_watch,
height=380,
)
# # -----------------------------
# # Local RAG (UPDATED: BART-style RAG summary like your example)
# # -----------------------------
# st.markdown('<div class="section-title">U of I Next Steps (Local RAG)</div>', unsafe_allow_html=True)
# st.caption("Ask a question to see top matching bills + a RAG-generated overall summary .")
# # ---- RAG helpers (kept local to avoid touching other app parts)
# def clean_rag_summary(text: str) -> str:
# text = re.sub(r"(?i)(here is|here are) the requested output[s]*[:]*", "", text)
# text = re.sub(r"(?i)let me know if you'd like.*", "", text)
# text = re.sub(r"(?i)trend summary[:]*", "", text)
# text = re.sub(r"(?i)actionable insight[:]*", "", text)
# return (text or "").strip()
# @st.cache_resource(show_spinner=False)
# def load_rag_models():
# embed_model = SentenceTransformer("all-MiniLM-L6-v2")
# summarizer = pipeline(
# "summarization",
# model="facebook/bart-large-cnn",
# tokenizer="facebook/bart-large-cnn",
# )
# return embed_model, summarizer
# @st.cache_data(show_spinner=False)
# def compute_embeddings(texts_tuple):
# # cache friendly: tuple of strings
# embed_model = SentenceTransformer("all-MiniLM-L6-v2")
# texts = list(texts_tuple)
# return embed_model.encode(texts, show_progress_bar=False)
# def semantic_search(query, embeddings, model, threshold=0.45):
# q_emb = model.encode([query])
# sims = cosine_similarity(q_emb, embeddings)[0]
# return [(i, s) for i, s in enumerate(sims) if s > threshold]
# def rag_summarize(texts, summarizer, top_k=6):
# if not texts:
# return "No relevant content to summarize."
# vect = TfidfVectorizer(stop_words="english")
# m = vect.fit_transform(texts)
# mean_vec = m.mean(axis=0).A
# scores = cosine_similarity(mean_vec, m).flatten()
# top_indices = scores.argsort()[::-1][:top_k]
# ctx = "\n".join(texts[i] for i in top_indices)
# prompt = "summarize: " + ctx[:1200]
# out = summarizer(prompt, max_length=220, min_length=80, do_sample=False)
# return clean_rag_summary(out[0]["summary_text"])
# # Build combined text (uses your viz-ready columns)
# rag_df = f.copy()
# # Make cache hashing safe: convert list objects to strings (prevents "unhashable type: 'list'")
# for col in rag_df.columns:
# try:
# has_list = rag_df[col].apply(lambda x: isinstance(x, list)).any()
# except Exception:
# has_list = False
# if has_list:
# rag_df[col] = rag_df[col].apply(lambda x: ", ".join(map(str, x)) if isinstance(x, list) else x)
# # topics list string (optional)
# if "topics_list" in rag_df.columns:
# rag_df["topic_list"] = rag_df["topics_list"].apply(parse_listish)
# elif "topics" in rag_df.columns:
# rag_df["topic_list"] = rag_df["topics"].apply(parse_listish)
# else:
# rag_df["topic_list"] = [[] for _ in range(len(rag_df))]
# def build_combined_text(row):
# parts = [
# f"Policy area: {clean_text(row.get(policy_col,''))}",
# f"Status: {clean_text(row.get(status_stage_col,''))} | Step: {clean_text(row.get('status_step',''))}",
# f"Pending: {clean_text(row.get('pending_chamber',''))} | {clean_text(row.get('pending_committee_name',''))}",
# f"Recency: {clean_text(row.get('action_recency_bucket',''))} | Age days: {clean_text(row.get('bill_age_days',''))} | Recent90: {clean_text(row.get('is_recent_90d',''))}",
# f"Sponsors: {clean_text(row.get('sponsor_count',''))} | Parties: {clean_text(row.get('author_party_combo',''))} | Primary: {clean_text(row.get(party_col,''))}",
# f"Theme: {clean_text(row.get(title_bucket_col,''))}",
# f"Title: {clean_text(row.get('title',''))}",
# f"Summary: {clean_text(row.get('summary',''))}",
# f"Topics: {', '.join(row.get('topic_list', []))}",
# ]
# return "\n".join([p for p in parts if p and p.strip()])
# rag_df["combined_text"] = rag_df.apply(build_combined_text, axis=1)
# query = st.text_input("Ask a question (examples: tuition, financial aid, transfer credits, campus safety):")
# if query and query.strip():
# if rag_df.empty:
# st.warning("No bills match your current filters. Expand the filters and try again.")
# else:
# embed_model, summarizer = load_rag_models()
# texts = rag_df["combined_text"].fillna("").astype(str).tolist()
# texts_tuple = tuple(texts)
# embs = compute_embeddings(texts_tuple)
# res = semantic_search(query, embs, embed_model, threshold=0.45)
# if not res:
# st.warning("No strong matches found. Try simpler keywords (e.g., “tuition”, “loan”, “safety”).")
# else:
# st.subheader("Top Matching Bills")
# collected = []
# # show top 8 matches
# for idx, score in sorted(res, key=lambda x: x[1], reverse=True)[:8]:
# row = rag_df.iloc[idx]
# bill_id = clean_text(row.get("bill_id", ""))
# yr = row.get("year", "")
# theme = clean_text(row.get(title_bucket_col, ""))
# pol = clean_text(row.get(policy_col, ""))
# stat = clean_text(row.get(status_stage_col, ""))
# step = clean_text(row.get("status_step", ""))
# summ = clean_text(row.get("summary", ""))
# # url optional if present
# url_col = safe_col(rag_df, ["url", "bill_url", "legiscan_url", "full_url"])
# url = clean_text(row.get(url_col, "")) if url_col else ""
# st.markdown(
# f"**{bill_id}** • **{yr}** • **Score:** {score:.2f}\n\n"
# f"- **Policy area:** {pol}\n"
# f"- **Status/Step:** {stat} / {step}\n"
# f"- **Theme:** {theme}\n"
# f"- **Summary:** {summ[:260]}{'…' if len(summ) > 260 else ''}\n"
# )
# if url:
# st.markdown(f"[View bill source ↗]({url})")
# st.divider()
# collected.append(row["combined_text"])
# st.subheader("RAG-Generated Overall Summary")
# summary = rag_summarize(collected, summarizer, top_k=6)
# st.success(summary)
# else:
# st.caption("Enter a question to generate top matches and an overall summary.")
# # -----------------------------
# # U of I Next Steps
# # -----------------------------
# st.markdown('<div class="section-title">U of I Next Steps</div>', unsafe_allow_html=True)
# st.markdown("<div style='height:20px;'></div>", unsafe_allow_html=True)
# st.caption("Select a policy area domain to view a 2-sentence UofI next-step recommendation.")
# UOFI_NEXT_STEPS = {
# "Admissions & Enrollment": (
# "UofI should review how proposed admission, transfer, and enrollment rules could shift student access across UIUC/UIC/UIS and update campus admissions guidance accordingly. "
# "Assign Enrollment Management owners to track committee movement and prepare impact brief on capacity, equity, and implementation timelines."
# ),
# "Appropriations & Budget": (
# "UofI should assess fiscal impact (state funding, grants, cost mandates) and prepare budget scenarios for enacted vs pending bills affecting higher education finance. "
# "Finance + Government Relations should coordinate leadership updates and ready a funding strategy, including compliance costs and implementation staffing."
# ),
# "Athletics & NIL": (
# "UofI should evaluate NIL/athletics policy changes for compliance, student-athlete protections, and program risk exposure across campuses. "
# "Assign Athletics Compliance + Legal Counsel to track pending bills and draft an implementation checklist for guidance, reporting, and student support."
# ),
# "Campus Safety & Title IX": (
# "UofI should prioritize compliance readiness by mapping bill requirements to Title IX, Clery, campus safety, and student conduct procedures. "
# "Assign System Legal + Student Affairs to monitor where bills are stuck (committee/chamber) and prepare standardized policy + training updates for rapid rollout if enacted."
# ),
# "Data/Reporting/Accountability": (
# "UofI should identify required reporting fields, data owners, and system-wide definitions to ensure campus submissions remain consistent and auditable. "
# "Assign institutional research/data governance leads to build a reporting playbook and proactively identify feasibility risks and data gaps."
# ),
# "Dual Credit & College Readiness": (
# "UofI should assess impact on dual credit pathways, transfer alignment, and readiness programs to protect access and reduce credit loss for incoming students. "
# "Assign Academic Affairs and Registrar leadership to prepare policy guidance for partner districts and articulation updates."
# ),
# "Financial Aid & Scholarships": (
# "UofI should model impacts to student affordability and aid administration workflows (eligibility rules, award calculations, compliance updates). "
# "Assign Financial Aid + Student Affairs to track pending movement and draft student-facing communications plus operational readiness steps."
# ),
# "Governance & Oversight": (
# "UofI should evaluate governance-related bills for impacts on institutional autonomy, board authority, and internal approval processes. "
# "Assign System Administration + Legal to prepare leadership briefings and recommended positions for advocacy and compliance planning."
# ),
# "Mental Health & Wellness": (
# "UofI should assess staffing, service capacity, and mandated program requirements for student mental health and wellness initiatives. "
# "Assign Student Affairs + Counseling leadership to map implementation needs and create a rollout plan with measurable outcomes and funding requirements."
# ),
# "Other Postsecondary Policy": (
# "UofI should treat this as a catch-all risk bucket and prioritize rapid triage based on cost, compliance urgency, and student impact. "
# "Assign Government Relations to flag high-risk items early and route them to the correct campus owner with a short impact summary."
# ),
# "Student Rights & Protections": (
# "UofI should review potential changes affecting student protections, grievance processes, discrimination policy, and academic rights to ensure consistent campus-level enforcement. "
# "Assign Legal + Student Affairs to create standardized guidance and prepare training updates if enacted."
# ),
# "Tax Credits & Deductions": (
# "UofI should assess how tax policy changes could influence affordability, workforce incentives, and student/family financial behavior. "
# "Assign Finance + Financial Aid to prepare a summary for leadership and update external guidance if student-facing impacts are significant."
# ),
# "Tuition & Fees": (
# "UofI should model revenue impact and operational risk of tuition or fee restrictions, including differential tuition, program fees, and campus budgeting constraints. "
# "Assign Finance + Provost offices to prepare policy scenarios and a communication plan for students and stakeholders."
# ),
# "Workforce & Career Readiness": (
# "UofI should evaluate effects on workforce pipelines, internships, credentialing, and employer partnerships across programs and campuses. "
# "Assign Career Services + Academic Affairs to align implementation plans, strengthen employer engagement, and track outcomes tied to statewide workforce goals."
# ),
# }
# # Build the policy list from what's present in the filtered view, so it's always aligned with filters
# policy_options = sorted(
# f[policy_col].dropna().astype(str).str.strip().replace({"": np.nan}).dropna().unique().tolist()
# )
# if not policy_options:
# st.info("No policy areas available in the current filtered view.")
# else:
# sel_policy = st.selectbox("Policy area domain", policy_options, index=0)
# # Auto-show summary immediately when user selects a domain
# # (No button needed; if you prefer a button, I can add it back.)
# summary = UOFI_NEXT_STEPS.get(
# sel_policy,
# "No hardcoded UofI next-step summary is available for this policy area yet."
# )
# st.success(summary)
# st.markdown("---")
# st.caption("Download the filtered dataset used to build this dashboard:")
# -----------------------------
# Policy Domain Summary at a Glance
# -----------------------------
st.markdown('<div class="section-title">Policy Domain Summary at a Glance</div>', unsafe_allow_html=True)
st.markdown("<div style='height:20px;'></div>", unsafe_allow_html=True)
st.caption("Select a policy area domain to view a brief 2-sentence summary.")
POLICY_DOMAIN_SUMMARY = {
"Admissions & Enrollment": (
"This domain includes legislation related to admission requirements, transfer policies, enrollment rules, and institutional access pathways. "
"It covers processes that affect student entry, eligibility standards, and campus-level enrollment administration."
),
"Appropriations & Budget": (
"This domain covers legislation affecting higher education funding, appropriations, budget structures, and fiscal mandates. "
"It includes items connected to state allocations, compliance costs, program funding, and financial reporting requirements."
),
"Athletics & NIL": (
"This domain includes legislation related to intercollegiate athletics governance, student-athlete participation, and NIL policies. "
"It involves compliance requirements, athlete protections, disclosures, and institutional responsibilities."
),
"Campus Safety & Title IX": (
"This domain includes legislation tied to campus safety policies, Title IX requirements, Clery reporting, and student conduct procedures. "
"It covers institutional reporting expectations, investigations, training standards, and procedural compliance."
),
"Data/Reporting/Accountability": (
"This domain includes legislation involving institutional reporting, data collection standards, accountability measures, and audit requirements. "
"It relates to performance metrics, public disclosures, compliance reporting, and system-wide data governance considerations."
),
"Dual Credit & College Readiness": (
"This domain includes legislation related to dual credit programs, college readiness initiatives, and partnerships with K–12 districts. "
"It covers articulation policies, credit transfer alignment, eligibility requirements, and academic pathway structures."
),
"Financial Aid & Scholarships": (
"This domain includes legislation tied to financial aid eligibility, scholarship programs, award administration, and student affordability. "
"It addresses funding mechanisms, qualifying criteria, program rules, and aid-related reporting requirements."
),
"Governance & Oversight": (
"This domain includes legislation affecting institutional governance structures, oversight authority, and board-related processes. "
"It involves decision-making frameworks, administrative authority, compliance monitoring, and policy control responsibilities."
),
"Mental Health & Wellness": (
"This domain includes legislation related to student mental health services, wellness resources, and mandated program initiatives. "
"It covers service capacity, staffing requirements, program reporting, and institutional support frameworks."
),
"Other Postsecondary Policy": (
"This domain groups postsecondary legislation that does not fit cleanly into the other standard categories. "
"It includes mixed policy areas spanning compliance, student programs, operations, or administrative requirements."
),
"Student Rights & Protections": (
"This domain includes legislation affecting student rights, protections, grievance processes, and academic policy standards. "
"It covers nondiscrimination policy, procedural safeguards, enforcement expectations, and campus-level student support rules."
),
"Tax Credits & Deductions": (
"This domain includes legislation related to tax credits, deductions, and financial incentives connected to education costs. "
"It influences affordability mechanisms, eligibility definitions, and financial guidance associated with postsecondary participation."
),
"Tuition & Fees": (
"This domain includes legislation affecting tuition structures, mandatory fees, program charges, and cost-setting constraints. "
"It includes provisions related to fee caps, tuition regulation, and campus revenue planning requirements."
),
"Workforce & Career Readiness": (
"This domain includes legislation tied to workforce pipelines, credentialing programs, internships, and employer partnerships. "
"It connects higher education programs to statewide workforce priorities, training requirements, and outcomes tracking."
),
}
# Build the policy list from what's present in the filtered view, so it's always aligned with filters
policy_options = sorted(
f[policy_col].dropna().astype(str).str.strip().replace({"": np.nan}).dropna().unique().tolist()
)
if not policy_options:
st.info("No policy areas available in the current filtered view.")
else:
sel_policy = st.selectbox("Policy area domain", policy_options, index=0)
# Auto-show summary immediately when user selects a domain
summary = POLICY_DOMAIN_SUMMARY.get(
sel_policy,
"No 2-sentence summary is available for this policy area yet."
)
st.success(summary)
st.markdown("---")
st.caption("Download the filtered dataset used to build this dashboard:")
st.download_button(
"⬇️ Download filtered dashboard data (CSV)",
data=f.to_csv(index=False).encode("utf-8"),
file_name="uofi_legislation_filtered_2019_2026.csv",
mime="text/csv",
)
st.caption("Love Data Week 2026 • University of Illinois System • Streamlit (HF Spaces)")