Spaces:

VAILL
/

legislation-tracker

Running on CPU Upgrade

App Files Files Community

legislation-tracker / utils /data_loader.py

ramanna

Upload utils/data_loader.py with huggingface_hub

0f65484 verified 3 days ago

raw

history blame contribute delete

4.88 kB

	"""
	utils/data_loader.py
	--------------------
	Shared data loading functions used across all pages.
	"""

	import json
	import time
	from pathlib import Path

	import pandas as pd
	import streamlit as st


	DATA_DIR = Path("data")
	BILLS_FILE = DATA_DIR / "known_bills_visualize.json"
	SUMMARIES_FILE = DATA_DIR / "bill_summaries.json"
	QUESTIONS_FILE = DATA_DIR / "bill_suggested_questions.json"
	REPORTS_FILE = DATA_DIR / "bill_reports.json"
	NEWSLETTER_DIR = DATA_DIR / "newsletter_drafts"
	CHANGES_DIR = DATA_DIR / "weekly_changes"
	CALENDAR_FILE = DATA_DIR / "bill_calendar.json"


	@st.cache_data(show_spinner=False)
	def load_bills() -> pd.DataFrame:
	"""Load and process the main bills JSON into a DataFrame."""
	if not BILLS_FILE.exists():
	return pd.DataFrame()
	try:
	with BILLS_FILE.open("r", encoding="utf-8") as f:
	bills_data = json.load(f)
	df = pd.DataFrame(bills_data)
	if "last_action_date" in df.columns:
	df["last_action_date"] = pd.to_datetime(df["last_action_date"], errors="coerce")
	if "lastUpdatedAt" in df.columns:
	df["lastUpdatedAt"] = pd.to_datetime(df["lastUpdatedAt"], errors="coerce")
	return df
	except Exception as e:
	st.error(f"Error loading bills: {e}")
	return pd.DataFrame()


	@st.cache_data(show_spinner=False)
	def load_summaries() -> dict:
	"""Load pre-generated bill summaries keyed by state_billnumber."""
	try:
	if SUMMARIES_FILE.exists():
	with open(SUMMARIES_FILE, "r", encoding="utf-8") as f:
	return json.load(f)
	except Exception:
	pass
	return {}


	@st.cache_data(show_spinner=False)
	def load_suggested_questions() -> dict:
	"""Load pre-generated suggested questions keyed by state_billnumber."""
	try:
	if QUESTIONS_FILE.exists():
	with open(QUESTIONS_FILE, "r", encoding="utf-8") as f:
	return json.load(f)
	except Exception:
	pass
	return {}


	@st.cache_data(show_spinner=False)
	def load_reports() -> dict:
	"""Load pre-generated bill reports keyed by bill_id."""
	try:
	if REPORTS_FILE.exists():
	with open(REPORTS_FILE, "r", encoding="utf-8") as f:
	data = json.load(f)
	return {r["bill_id"]: r["report_markdown"] for r in data}
	except Exception:
	pass
	return {}


	@st.cache_data(show_spinner=False)
	def load_calendar() -> list:
	"""Load pre-computed legislative calendar events."""
	try:
	if CALENDAR_FILE.exists():
	with open(CALENDAR_FILE, "r", encoding="utf-8") as f:
	return json.load(f)
	except Exception:
	pass
	return []


	def get_summary(bill_data: dict, summaries: dict) -> str:
	key = f"{bill_data.get('state', '')}_{bill_data.get('bill_number', '')}"
	entry = summaries.get(key, {})
	summary = entry.get("summary", "") if isinstance(entry, dict) else ""
	if not summary or summary.startswith("ERROR:"):
	return ""
	return summary


	def get_suggested_questions(bill_data: dict, questions: dict) -> list:
	key = f"{bill_data.get('state', '')}_{bill_data.get('bill_number', '')}"
	entry = questions.get(key, {})
	qs = entry.get("suggested_questions", []) if isinstance(entry, dict) else []
	if qs:
	return qs
	return [
	"What are the key definitions in this bill?",
	"What are the enforcement mechanisms?",
	"Who does this bill apply to?",
	"What are the compliance requirements?",
	"What penalties are specified?",
	]


	def get_report(bill_data: dict, reports: dict) -> str:
	bill_id = str(bill_data.get("bill_id", ""))
	report = reports.get(bill_id, "")
	if not report or str(report).startswith("ERROR:"):
	return ""
	return report


	def get_last_updated(df: pd.DataFrame) -> str:
	if "lastUpdatedAt" not in df.columns or df.empty:
	return "N/A"
	valid = df[df["lastUpdatedAt"].notna()]["lastUpdatedAt"]
	if valid.empty:
	return "N/A"
	most_recent = valid.max()
	days_ago = (pd.Timestamp.now(tz=most_recent.tzinfo if most_recent.tzinfo else None) - most_recent).days
	date_str = most_recent.strftime("%Y-%m-%d")
	if days_ago <= 3:
	color = "#28a745"
	elif days_ago <= 7:
	color = "#f0ad4e"
	else:
	color = "#dc3545"
	ago_text = "Today" if days_ago == 0 else f"{days_ago}d ago"
	return f'{date_str} <span style="color:#CFB991;">({ago_text})</span>'


	def load_newsletters() -> dict:
	"""Return {label: Path} for all newsletter drafts, newest first."""
	if not NEWSLETTER_DIR.exists():
	return {}
	files = sorted(NEWSLETTER_DIR.glob("newsletter_*.md"), reverse=True)
	result = {}
	for nf in files:
	date_part = nf.stem.replace("newsletter_", "")
	result[f"Week of {date_part}"] = nf
	return result