import requests # type: ignore import zipfile # type: ignore import io import os # type: ignore import shutil # type: ignore from pathlib import Path # type: ignore # Toronto Open Data CKAN API Constants CKAN_BASE_URL = "https://ckan0.cf.opendata.inter.prod-toronto.ca/api/3/action/package_show" PACKAGE_ID = "merged-gtfs-ttc-routes-and-schedules" STATIC_DIR = str(Path(__file__).parent.parent / "static") DB_PATH = str(Path(__file__).parent.parent / "src" / "ttc_gtfs.duckdb") def get_latest_gtfs_url(): """Queries the CKAN API to find the current download URL for the GTFS ZIP.""" params = {"id": PACKAGE_ID} response = requests.get(CKAN_BASE_URL, params=params) data = response.json() # We look for the resource that is a ZIP file and contains 'GTFS' in its name for resource in data["result"]["resources"]: if resource["format"].lower() == "zip": return resource["url"] return None def run_full_sync(): download_url = get_latest_gtfs_url() if not download_url: print("Could not find GTFS ZIP via API.") return False print(f"Found latest GTFS at: {download_url}") # 1. Clear old files if os.path.exists(STATIC_DIR): print(f"Clearing existing static directory: {STATIC_DIR}") shutil.rmtree(STATIC_DIR) os.makedirs(STATIC_DIR) # 2. Download and Extract print("Downloading and extracting...") r = requests.get(download_url) print(f"Downloaded {len(r.content):,} bytes") with zipfile.ZipFile(io.BytesIO(r.content)) as z: file_list = z.namelist() print(f"Extracting {len(file_list)} files to {STATIC_DIR}...") z.extractall(STATIC_DIR) print(f"✓ Extracted {len(os.listdir(STATIC_DIR))} files") # 3. Force DB rebuild by deleting the old DuckDB file if os.path.exists(DB_PATH): print(f"Deleting old database: {DB_PATH}") os.remove(DB_PATH) print("✓ Sync complete. Database will rebuild on next API call.") return True if __name__ == "__main__": run_full_sync()