Spaces:
Running
Running
Upload update_static.py
Browse files- api/update_static.py +41 -52
api/update_static.py
CHANGED
|
@@ -1,59 +1,48 @@
|
|
| 1 |
import requests # type: ignore
|
| 2 |
-
import zipfile
|
| 3 |
import io
|
| 4 |
-
import os
|
| 5 |
-
import shutil
|
| 6 |
-
from pathlib import Path
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
| 13 |
|
| 14 |
-
def
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
def
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
print("Could not find GTFS ZIP via API.")
|
| 30 |
-
return False
|
| 31 |
-
|
| 32 |
-
print(f"Found latest GTFS at: {download_url}")
|
| 33 |
-
|
| 34 |
-
# 1. Clear old files
|
| 35 |
-
if os.path.exists(STATIC_DIR):
|
| 36 |
-
print(f"Clearing existing static directory: {STATIC_DIR}")
|
| 37 |
-
shutil.rmtree(STATIC_DIR)
|
| 38 |
-
os.makedirs(STATIC_DIR)
|
| 39 |
-
|
| 40 |
-
# 2. Download and Extract
|
| 41 |
-
print("Downloading and extracting...")
|
| 42 |
-
r = requests.get(download_url)
|
| 43 |
-
print(f"Downloaded {len(r.content):,} bytes")
|
| 44 |
-
with zipfile.ZipFile(io.BytesIO(r.content)) as z:
|
| 45 |
-
file_list = z.namelist()
|
| 46 |
-
print(f"Extracting {len(file_list)} files to {STATIC_DIR}...")
|
| 47 |
-
z.extractall(STATIC_DIR)
|
| 48 |
-
print(f"✓ Extracted {len(os.listdir(STATIC_DIR))} files")
|
| 49 |
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
os.remove(DB_PATH)
|
| 54 |
-
|
| 55 |
-
print("✓ Sync complete. Database will rebuild on next API call.")
|
| 56 |
-
return True
|
| 57 |
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import requests # type: ignore
|
| 2 |
+
import zipfile
|
| 3 |
import io
|
| 4 |
+
import os
|
| 5 |
+
import shutil
|
| 6 |
+
from pathlib import Path
|
| 7 |
|
| 8 |
+
class GTFSSyncManager:
|
| 9 |
+
def __init__(self):
|
| 10 |
+
self.CKAN_BASE_URL = "https://ckan0.cf.opendata.inter.prod-toronto.ca/api/3/action/package_show"
|
| 11 |
+
self.PACKAGE_ID = "merged-gtfs-ttc-routes-and-schedules"
|
| 12 |
+
self.STATIC_DIR = Path(__file__).parent.parent / "static"
|
| 13 |
+
self.DB_PATH = Path(__file__).parent.parent / "src" / "ttc_gtfs.duckdb"
|
| 14 |
|
| 15 |
+
def get_remote_metadata(self):
|
| 16 |
+
"""Queries CKAN API for the latest ZIP URL and its modification date."""
|
| 17 |
+
try:
|
| 18 |
+
params = {"id": self.PACKAGE_ID}
|
| 19 |
+
response = requests.get(self.CKAN_BASE_URL, params=params, timeout=10)
|
| 20 |
+
data = response.json()
|
| 21 |
+
|
| 22 |
+
# Extract the metadata_modified date and the resource URL
|
| 23 |
+
last_modified = data["result"]["metadata_modified"]
|
| 24 |
+
resources = data["result"]["resources"]
|
| 25 |
+
download_url = next((r["url"] for r in resources if r["format"].lower() == "zip"), None)
|
| 26 |
+
|
| 27 |
+
return {"url": download_url, "updated_at": last_modified}
|
| 28 |
+
except Exception as e:
|
| 29 |
+
print(f"Metadata fetch failed: {e}")
|
| 30 |
+
return None
|
| 31 |
|
| 32 |
+
def perform_full_sync(self, download_url):
|
| 33 |
+
"""Downloads, extracts, and clears the old DB to force a rebuild."""
|
| 34 |
+
print(f"--- Downloading New GTFS Bundle from {download_url} ---")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
if self.STATIC_DIR.exists():
|
| 37 |
+
shutil.rmtree(self.STATIC_DIR)
|
| 38 |
+
self.STATIC_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
+
r = requests.get(download_url)
|
| 41 |
+
with zipfile.ZipFile(io.BytesIO(r.content)) as z:
|
| 42 |
+
z.extractall(self.STATIC_DIR)
|
| 43 |
+
print(f"✓ Extracted {len(z.namelist())} files to {self.STATIC_DIR}.")
|
| 44 |
+
|
| 45 |
+
# Nuke the old DB so init_db triggers a fresh import
|
| 46 |
+
if self.DB_PATH.exists():
|
| 47 |
+
os.remove(self.DB_PATH)
|
| 48 |
+
print("✓ Old database deleted for rebuild.")
|