Spaces:
Running
Running
| import requests # type: ignore | |
| import zipfile # type: ignore | |
| import io | |
| import os # type: ignore | |
| import shutil # type: ignore | |
| from pathlib import Path # type: ignore | |
| # Toronto Open Data CKAN API Constants | |
| CKAN_BASE_URL = "https://ckan0.cf.opendata.inter.prod-toronto.ca/api/3/action/package_show" | |
| PACKAGE_ID = "merged-gtfs-ttc-routes-and-schedules" | |
| STATIC_DIR = str(Path(__file__).parent.parent / "static") | |
| DB_PATH = str(Path(__file__).parent.parent / "src" / "ttc_gtfs.duckdb") | |
| def get_latest_gtfs_url(): | |
| """Queries the CKAN API to find the current download URL for the GTFS ZIP.""" | |
| params = {"id": PACKAGE_ID} | |
| response = requests.get(CKAN_BASE_URL, params=params) | |
| data = response.json() | |
| # We look for the resource that is a ZIP file and contains 'GTFS' in its name | |
| for resource in data["result"]["resources"]: | |
| if resource["format"].lower() == "zip": | |
| return resource["url"] | |
| return None | |
| def run_full_sync(): | |
| download_url = get_latest_gtfs_url() | |
| if not download_url: | |
| print("Could not find GTFS ZIP via API.") | |
| return False | |
| print(f"Found latest GTFS at: {download_url}") | |
| # 1. Clear old files | |
| if os.path.exists(STATIC_DIR): | |
| print(f"Clearing existing static directory: {STATIC_DIR}") | |
| shutil.rmtree(STATIC_DIR) | |
| os.makedirs(STATIC_DIR) | |
| # 2. Download and Extract | |
| print("Downloading and extracting...") | |
| r = requests.get(download_url) | |
| print(f"Downloaded {len(r.content):,} bytes") | |
| with zipfile.ZipFile(io.BytesIO(r.content)) as z: | |
| file_list = z.namelist() | |
| print(f"Extracting {len(file_list)} files to {STATIC_DIR}...") | |
| z.extractall(STATIC_DIR) | |
| print(f"✓ Extracted {len(os.listdir(STATIC_DIR))} files") | |
| # 3. Force DB rebuild by deleting the old DuckDB file | |
| if os.path.exists(DB_PATH): | |
| print(f"Deleting old database: {DB_PATH}") | |
| os.remove(DB_PATH) | |
| print("✓ Sync complete. Database will rebuild on next API call.") | |
| return True | |
| if __name__ == "__main__": | |
| run_full_sync() |