Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| """ | |
| utils/data_loader.py | |
| -------------------- | |
| Shared data loading functions used across all pages. | |
| """ | |
| import json | |
| import time | |
| from pathlib import Path | |
| import pandas as pd | |
| import streamlit as st | |
| DATA_DIR = Path("data") | |
| BILLS_FILE = DATA_DIR / "known_bills_visualize.json" | |
| SUMMARIES_FILE = DATA_DIR / "bill_summaries.json" | |
| QUESTIONS_FILE = DATA_DIR / "bill_suggested_questions.json" | |
| REPORTS_FILE = DATA_DIR / "bill_reports.json" | |
| NEWSLETTER_DIR = DATA_DIR / "newsletter_drafts" | |
| CHANGES_DIR = DATA_DIR / "weekly_changes" | |
| CALENDAR_FILE = DATA_DIR / "bill_calendar.json" | |
| def load_bills() -> pd.DataFrame: | |
| """Load and process the main bills JSON into a DataFrame.""" | |
| if not BILLS_FILE.exists(): | |
| return pd.DataFrame() | |
| try: | |
| with BILLS_FILE.open("r", encoding="utf-8") as f: | |
| bills_data = json.load(f) | |
| df = pd.DataFrame(bills_data) | |
| if "last_action_date" in df.columns: | |
| df["last_action_date"] = pd.to_datetime(df["last_action_date"], errors="coerce") | |
| if "lastUpdatedAt" in df.columns: | |
| df["lastUpdatedAt"] = pd.to_datetime(df["lastUpdatedAt"], errors="coerce") | |
| return df | |
| except Exception as e: | |
| st.error(f"Error loading bills: {e}") | |
| return pd.DataFrame() | |
| def load_summaries() -> dict: | |
| """Load pre-generated bill summaries keyed by state_billnumber.""" | |
| try: | |
| if SUMMARIES_FILE.exists(): | |
| with open(SUMMARIES_FILE, "r", encoding="utf-8") as f: | |
| return json.load(f) | |
| except Exception: | |
| pass | |
| return {} | |
| def load_suggested_questions() -> dict: | |
| """Load pre-generated suggested questions keyed by state_billnumber.""" | |
| try: | |
| if QUESTIONS_FILE.exists(): | |
| with open(QUESTIONS_FILE, "r", encoding="utf-8") as f: | |
| return json.load(f) | |
| except Exception: | |
| pass | |
| return {} | |
| def load_reports() -> dict: | |
| """Load pre-generated bill reports keyed by bill_id.""" | |
| try: | |
| if REPORTS_FILE.exists(): | |
| with open(REPORTS_FILE, "r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| return {r["bill_id"]: r["report_markdown"] for r in data} | |
| except Exception: | |
| pass | |
| return {} | |
| def load_calendar() -> list: | |
| """Load pre-computed legislative calendar events.""" | |
| try: | |
| if CALENDAR_FILE.exists(): | |
| with open(CALENDAR_FILE, "r", encoding="utf-8") as f: | |
| return json.load(f) | |
| except Exception: | |
| pass | |
| return [] | |
| def get_summary(bill_data: dict, summaries: dict) -> str: | |
| key = f"{bill_data.get('state', '')}_{bill_data.get('bill_number', '')}" | |
| entry = summaries.get(key, {}) | |
| summary = entry.get("summary", "") if isinstance(entry, dict) else "" | |
| if not summary or summary.startswith("ERROR:"): | |
| return "" | |
| return summary | |
| def get_suggested_questions(bill_data: dict, questions: dict) -> list: | |
| key = f"{bill_data.get('state', '')}_{bill_data.get('bill_number', '')}" | |
| entry = questions.get(key, {}) | |
| qs = entry.get("suggested_questions", []) if isinstance(entry, dict) else [] | |
| if qs: | |
| return qs | |
| return [ | |
| "What are the key definitions in this bill?", | |
| "What are the enforcement mechanisms?", | |
| "Who does this bill apply to?", | |
| "What are the compliance requirements?", | |
| "What penalties are specified?", | |
| ] | |
| def get_report(bill_data: dict, reports: dict) -> str: | |
| bill_id = str(bill_data.get("bill_id", "")) | |
| report = reports.get(bill_id, "") | |
| if not report or str(report).startswith("ERROR:"): | |
| return "" | |
| return report | |
| def get_last_updated(df: pd.DataFrame) -> str: | |
| if "lastUpdatedAt" not in df.columns or df.empty: | |
| return "N/A" | |
| valid = df[df["lastUpdatedAt"].notna()]["lastUpdatedAt"] | |
| if valid.empty: | |
| return "N/A" | |
| most_recent = valid.max() | |
| days_ago = (pd.Timestamp.now(tz=most_recent.tzinfo if most_recent.tzinfo else None) - most_recent).days | |
| date_str = most_recent.strftime("%Y-%m-%d") | |
| if days_ago <= 3: | |
| color = "#28a745" | |
| elif days_ago <= 7: | |
| color = "#f0ad4e" | |
| else: | |
| color = "#dc3545" | |
| ago_text = "Today" if days_ago == 0 else f"{days_ago}d ago" | |
| return f'{date_str} <span style="color:#CFB991;">({ago_text})</span>' | |
| def load_newsletters() -> dict: | |
| """Return {label: Path} for all newsletter drafts, newest first.""" | |
| if not NEWSLETTER_DIR.exists(): | |
| return {} | |
| files = sorted(NEWSLETTER_DIR.glob("newsletter_*.md"), reverse=True) | |
| result = {} | |
| for nf in files: | |
| date_part = nf.stem.replace("newsletter_", "") | |
| result[f"Week of {date_part}"] = nf | |
| return result | |