Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| """ | |
| sync_data.py | |
| ------------ | |
| Downloads the latest data files from the HuggingFace dataset repo | |
| (VAILL/Legislation-Tracker-Data) into the local data/ directory. | |
| Runs at container startup before Streamlit launches. | |
| """ | |
| import os | |
| from pathlib import Path | |
| from huggingface_hub import hf_hub_download | |
| REPO_ID = "VAILL/Legislation-Tracker-Data" | |
| DATA_DIR = Path("data") | |
| FILES_TO_SYNC = [ | |
| "known_bills_visualize.json", | |
| "known_bills.json", | |
| "bill_summaries.json", | |
| "bill_suggested_questions.json", | |
| "bill_reports.json", | |
| "bill_cache.json", | |
| "users.json", | |
| ] | |
| def sync_data(): | |
| token = os.getenv("HUGGINGFACE_HUB_TOKEN") | |
| DATA_DIR.mkdir(parents=True, exist_ok=True) | |
| for filename in FILES_TO_SYNC: | |
| try: | |
| print(f"[sync_data] Downloading {filename} from {REPO_ID}...") | |
| hf_hub_download( | |
| repo_id=REPO_ID, | |
| filename=filename, | |
| repo_type="dataset", | |
| token=token, | |
| local_dir=str(DATA_DIR), | |
| ) | |
| dest = DATA_DIR / filename | |
| size_mb = dest.stat().st_size / 1024 / 1024 | |
| print(f"[sync_data] OK: {filename} ({size_mb:.1f} MB)") | |
| except Exception as e: | |
| print(f"[sync_data] WARN: Could not download {filename}: {e}") | |
| print("[sync_data] Data sync complete.") | |
| if __name__ == "__main__": | |
| sync_data() | |