""" utils/data_loader.py -------------------- Shared data loading functions used across all pages. """ import json import time from pathlib import Path import pandas as pd import streamlit as st DATA_DIR = Path("data") BILLS_FILE = DATA_DIR / "known_bills_visualize.json" SUMMARIES_FILE = DATA_DIR / "bill_summaries.json" QUESTIONS_FILE = DATA_DIR / "bill_suggested_questions.json" REPORTS_FILE = DATA_DIR / "bill_reports.json" NEWSLETTER_DIR = DATA_DIR / "newsletter_drafts" CHANGES_DIR = DATA_DIR / "weekly_changes" CALENDAR_FILE = DATA_DIR / "bill_calendar.json" @st.cache_data(show_spinner=False) def load_bills() -> pd.DataFrame: """Load and process the main bills JSON into a DataFrame.""" if not BILLS_FILE.exists(): return pd.DataFrame() try: with BILLS_FILE.open("r", encoding="utf-8") as f: bills_data = json.load(f) df = pd.DataFrame(bills_data) if "last_action_date" in df.columns: df["last_action_date"] = pd.to_datetime(df["last_action_date"], errors="coerce") if "lastUpdatedAt" in df.columns: df["lastUpdatedAt"] = pd.to_datetime(df["lastUpdatedAt"], errors="coerce") return df except Exception as e: st.error(f"Error loading bills: {e}") return pd.DataFrame() @st.cache_data(show_spinner=False) def load_summaries() -> dict: """Load pre-generated bill summaries keyed by state_billnumber.""" try: if SUMMARIES_FILE.exists(): with open(SUMMARIES_FILE, "r", encoding="utf-8") as f: return json.load(f) except Exception: pass return {} @st.cache_data(show_spinner=False) def load_suggested_questions() -> dict: """Load pre-generated suggested questions keyed by state_billnumber.""" try: if QUESTIONS_FILE.exists(): with open(QUESTIONS_FILE, "r", encoding="utf-8") as f: return json.load(f) except Exception: pass return {} @st.cache_data(show_spinner=False) def load_reports() -> dict: """Load pre-generated bill reports keyed by bill_id.""" try: if REPORTS_FILE.exists(): with open(REPORTS_FILE, "r", encoding="utf-8") as f: data = json.load(f) return {r["bill_id"]: r["report_markdown"] for r in data} except Exception: pass return {} @st.cache_data(show_spinner=False) def load_calendar() -> list: """Load pre-computed legislative calendar events.""" try: if CALENDAR_FILE.exists(): with open(CALENDAR_FILE, "r", encoding="utf-8") as f: return json.load(f) except Exception: pass return [] def get_summary(bill_data: dict, summaries: dict) -> str: key = f"{bill_data.get('state', '')}_{bill_data.get('bill_number', '')}" entry = summaries.get(key, {}) summary = entry.get("summary", "") if isinstance(entry, dict) else "" if not summary or summary.startswith("ERROR:"): return "" return summary def get_suggested_questions(bill_data: dict, questions: dict) -> list: key = f"{bill_data.get('state', '')}_{bill_data.get('bill_number', '')}" entry = questions.get(key, {}) qs = entry.get("suggested_questions", []) if isinstance(entry, dict) else [] if qs: return qs return [ "What are the key definitions in this bill?", "What are the enforcement mechanisms?", "Who does this bill apply to?", "What are the compliance requirements?", "What penalties are specified?", ] def get_report(bill_data: dict, reports: dict) -> str: bill_id = str(bill_data.get("bill_id", "")) report = reports.get(bill_id, "") if not report or str(report).startswith("ERROR:"): return "" return report def get_last_updated(df: pd.DataFrame) -> str: if "lastUpdatedAt" not in df.columns or df.empty: return "N/A" valid = df[df["lastUpdatedAt"].notna()]["lastUpdatedAt"] if valid.empty: return "N/A" most_recent = valid.max() days_ago = (pd.Timestamp.now(tz=most_recent.tzinfo if most_recent.tzinfo else None) - most_recent).days date_str = most_recent.strftime("%Y-%m-%d") if days_ago <= 3: color = "#28a745" elif days_ago <= 7: color = "#f0ad4e" else: color = "#dc3545" ago_text = "Today" if days_ago == 0 else f"{days_ago}d ago" return f'{date_str} ({ago_text})' def load_newsletters() -> dict: """Return {label: Path} for all newsletter drafts, newest first.""" if not NEWSLETTER_DIR.exists(): return {} files = sorted(NEWSLETTER_DIR.glob("newsletter_*.md"), reverse=True) result = {} for nf in files: date_part = nf.stem.replace("newsletter_", "") result[f"Week of {date_part}"] = nf return result