Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
File size: 1,392 Bytes
d91eea0 9444c45 d91eea0 9444c45 d91eea0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | """
sync_data.py
------------
Downloads the latest data files from the HuggingFace dataset repo
(VAILL/Legislation-Tracker-Data) into the local data/ directory.
Runs at container startup before Streamlit launches.
"""
import os
from pathlib import Path
from huggingface_hub import hf_hub_download
REPO_ID = "VAILL/Legislation-Tracker-Data"
DATA_DIR = Path("data")
FILES_TO_SYNC = [
"known_bills_visualize.json",
"known_bills.json",
"bill_summaries.json",
"bill_suggested_questions.json",
"bill_reports.json",
"bill_cache.json",
"users.json",
]
def sync_data():
token = os.getenv("HUGGINGFACE_HUB_TOKEN")
DATA_DIR.mkdir(parents=True, exist_ok=True)
for filename in FILES_TO_SYNC:
try:
print(f"[sync_data] Downloading {filename} from {REPO_ID}...")
hf_hub_download(
repo_id=REPO_ID,
filename=filename,
repo_type="dataset",
token=token,
local_dir=str(DATA_DIR),
)
dest = DATA_DIR / filename
size_mb = dest.stat().st_size / 1024 / 1024
print(f"[sync_data] OK: {filename} ({size_mb:.1f} MB)")
except Exception as e:
print(f"[sync_data] WARN: Could not download {filename}: {e}")
print("[sync_data] Data sync complete.")
if __name__ == "__main__":
sync_data()
|