Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
File size: 4,876 Bytes
e2cd5a1 0f65484 e2cd5a1 0f65484 e2cd5a1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | """
utils/data_loader.py
--------------------
Shared data loading functions used across all pages.
"""
import json
import time
from pathlib import Path
import pandas as pd
import streamlit as st
DATA_DIR = Path("data")
BILLS_FILE = DATA_DIR / "known_bills_visualize.json"
SUMMARIES_FILE = DATA_DIR / "bill_summaries.json"
QUESTIONS_FILE = DATA_DIR / "bill_suggested_questions.json"
REPORTS_FILE = DATA_DIR / "bill_reports.json"
NEWSLETTER_DIR = DATA_DIR / "newsletter_drafts"
CHANGES_DIR = DATA_DIR / "weekly_changes"
CALENDAR_FILE = DATA_DIR / "bill_calendar.json"
@st.cache_data(show_spinner=False)
def load_bills() -> pd.DataFrame:
"""Load and process the main bills JSON into a DataFrame."""
if not BILLS_FILE.exists():
return pd.DataFrame()
try:
with BILLS_FILE.open("r", encoding="utf-8") as f:
bills_data = json.load(f)
df = pd.DataFrame(bills_data)
if "last_action_date" in df.columns:
df["last_action_date"] = pd.to_datetime(df["last_action_date"], errors="coerce")
if "lastUpdatedAt" in df.columns:
df["lastUpdatedAt"] = pd.to_datetime(df["lastUpdatedAt"], errors="coerce")
return df
except Exception as e:
st.error(f"Error loading bills: {e}")
return pd.DataFrame()
@st.cache_data(show_spinner=False)
def load_summaries() -> dict:
"""Load pre-generated bill summaries keyed by state_billnumber."""
try:
if SUMMARIES_FILE.exists():
with open(SUMMARIES_FILE, "r", encoding="utf-8") as f:
return json.load(f)
except Exception:
pass
return {}
@st.cache_data(show_spinner=False)
def load_suggested_questions() -> dict:
"""Load pre-generated suggested questions keyed by state_billnumber."""
try:
if QUESTIONS_FILE.exists():
with open(QUESTIONS_FILE, "r", encoding="utf-8") as f:
return json.load(f)
except Exception:
pass
return {}
@st.cache_data(show_spinner=False)
def load_reports() -> dict:
"""Load pre-generated bill reports keyed by bill_id."""
try:
if REPORTS_FILE.exists():
with open(REPORTS_FILE, "r", encoding="utf-8") as f:
data = json.load(f)
return {r["bill_id"]: r["report_markdown"] for r in data}
except Exception:
pass
return {}
@st.cache_data(show_spinner=False)
def load_calendar() -> list:
"""Load pre-computed legislative calendar events."""
try:
if CALENDAR_FILE.exists():
with open(CALENDAR_FILE, "r", encoding="utf-8") as f:
return json.load(f)
except Exception:
pass
return []
def get_summary(bill_data: dict, summaries: dict) -> str:
key = f"{bill_data.get('state', '')}_{bill_data.get('bill_number', '')}"
entry = summaries.get(key, {})
summary = entry.get("summary", "") if isinstance(entry, dict) else ""
if not summary or summary.startswith("ERROR:"):
return ""
return summary
def get_suggested_questions(bill_data: dict, questions: dict) -> list:
key = f"{bill_data.get('state', '')}_{bill_data.get('bill_number', '')}"
entry = questions.get(key, {})
qs = entry.get("suggested_questions", []) if isinstance(entry, dict) else []
if qs:
return qs
return [
"What are the key definitions in this bill?",
"What are the enforcement mechanisms?",
"Who does this bill apply to?",
"What are the compliance requirements?",
"What penalties are specified?",
]
def get_report(bill_data: dict, reports: dict) -> str:
bill_id = str(bill_data.get("bill_id", ""))
report = reports.get(bill_id, "")
if not report or str(report).startswith("ERROR:"):
return ""
return report
def get_last_updated(df: pd.DataFrame) -> str:
if "lastUpdatedAt" not in df.columns or df.empty:
return "N/A"
valid = df[df["lastUpdatedAt"].notna()]["lastUpdatedAt"]
if valid.empty:
return "N/A"
most_recent = valid.max()
days_ago = (pd.Timestamp.now(tz=most_recent.tzinfo if most_recent.tzinfo else None) - most_recent).days
date_str = most_recent.strftime("%Y-%m-%d")
if days_ago <= 3:
color = "#28a745"
elif days_ago <= 7:
color = "#f0ad4e"
else:
color = "#dc3545"
ago_text = "Today" if days_ago == 0 else f"{days_ago}d ago"
return f'{date_str} <span style="color:#CFB991;">({ago_text})</span>'
def load_newsletters() -> dict:
"""Return {label: Path} for all newsletter drafts, newest first."""
if not NEWSLETTER_DIR.exists():
return {}
files = sorted(NEWSLETTER_DIR.glob("newsletter_*.md"), reverse=True)
result = {}
for nf in files:
date_part = nf.stem.replace("newsletter_", "")
result[f"Week of {date_part}"] = nf
return result
|