File size: 20,667 Bytes
13c0c13 4700d47 30a787a 13c0c13 85e8b60 35cd242 13c0c13 f1b7e72 591dd1c 28f0042 921564c 85e8b60 28f0042 6fbb91a 12e4b6f 192a2f2 12e4b6f 192a2f2 13c0c13 6fbb91a 85e8b60 35cd242 f1b7e72 4700d47 f1b7e72 3f7d6f0 2b76608 5598a2e 13c0c13 35cd242 2b76608 13c0c13 85e8b60 35cd242 13c0c13 58ed042 2b76608 35cd242 2b76608 35cd242 13c0c13 35cd242 2b76608 28f0042 85e8b60 28f0042 13c0c13 6fbb91a 13c0c13 6fbb91a 13c0c13 6fbb91a 13c0c13 6fbb91a f111979 85e8b60 13c0c13 b7d0fd9 35cd242 2b76608 13c0c13 35cd242 85e8b60 35cd242 2b76608 35cd242 13c0c13 58ed042 13c0c13 58ed042 13c0c13 6fbb91a 13c0c13 6fbb91a 13c0c13 6fbb91a 13c0c13 6fbb91a 13c0c13 58ed042 13c0c13 1b2a824 13c0c13 1b2a824 13c0c13 1b2a824 13c0c13 1b2a824 13c0c13 1b2a824 2b76608 13c0c13 7892647 85e8b60 7892647 85e8b60 4f475ba 85e8b60 13c0c13 7892647 85e8b60 4f475ba f111979 7892647 85e8b60 7892647 35cd242 f111979 4f475ba f111979 4f475ba f111979 4f475ba f111979 4f475ba f111979 7892647 35cd242 85e8b60 7892647 85e8b60 7892647 35cd242 7892647 f111979 7892647 2b76608 35cd242 2b76608 85e8b60 13c0c13 2b76608 35cd242 2b76608 35cd242 85e8b60 2b76608 85e8b60 2b76608 35cd242 28f0042 2b76608 e52278d 13c0c13 e52278d 35cd242 e52278d 13c0c13 e52278d 4700d47 3f7d6f0 4700d47 f1b7e72 4700d47 f1b7e72 4700d47 f1b7e72 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 | """
Mile Zero Tours Image API + PDF Upload API (FastAPI)
Adds:
- /pdfupload/{tour} (admin-only) upload a PDF file (multipart)
- /pdfget/{tour}.pdf (public) fetch PDF (cached -> HF dataset -> 404 if none)
- /pdfdelete/{tour} (admin-only) delete cached PDF (and schedule HF commit)
Storage:
- JSON stays in: dataset_cache/{tour}.json -> committed to HF under data/
- PDFs go in: dataset_cache/pdfs/{tour}.pdf -> committed to HF under pdfs/
Notes:
- Keeps your existing image endpoints untouched (except minor imports cleanup).
- Uses your existing CommitScheduler to commit both JSON + PDFs (same scheduler).
- Enforces .pdf extension + content-type check + max size (configurable).
"""
from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Body
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, Response
import os, json, base64, asyncio
import httpx
from pathlib import Path
from huggingface_hub import CommitScheduler, hf_hub_download
from urllib.parse import unquote
import re
from typing import Any
# ==================================================
# APP SETUP
# ==================================================
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# ==================================================
# CONFIG
# ==================================================
ALL_TOURS = [
"Benelux",
"Epic Rail 1", "Epic Rail 2", "Epic Rail 3",
"Haida Gwaii 1", "Haida Gwaii 2", "Haida Gwaii 3",
"Haida Gwaii 4", "Haida Gwaii 5", "Haida Gwaii 6", "Haida Gwaii 7",
"Harrison",
"Ireland",
"Island Hopping", "Island Hopping 2",
"Kootenays",
"Maritimes 1", "Maritimes 2",
"New Zealand",
"Newfoundland 1", "Newfoundland 2", "Newfoundland 3", "Newfoundland 4",
"Okanagan",
"Portugal",
"Quebec",
"Quebec Holiday",
"Scotland",
"Sea to Sky",
"Skeena",
"Tofino 1", "Tofino 2",
"Van Isle",
"Yukon 1", "Yukon 2", "Yukon 3", "Yukon 4",
"Yukon Winter",
"Epic + Atlantic 1",
"Epic + Atlantic 2",
"Epic + Maritimes 1",
"Epic + Maritimes 2",
"Atlantic 1",
"Atlantic 2"
]
ADMIN_TOKEN = os.environ.get("ADMIN_TOKEN")
GOOGLE_KEY = os.environ.get("GOOGLE_KEY")
HF_TOKEN = os.environ.get("HF_TOKEN")
SHEET_ID = "1o0AUq13j-7LZWDhCwFYgq07niZtvOya5iE5bbRQMGWc"
GOOGLE_FORM_VIEW_URL = "https://docs.google.com/forms/d/e/1FAIpQLSeg_6oh7Uh46zODsawnctwT_Bfu48ojtK3b8JLZeaZuaEQ1Ig/viewform?usp=publish-editor"
GOOGLE_FORM_ACTION_URL = "https://docs.google.com/forms/d/e/1FAIpQLSeg_6oh7Uh46zODsawnctwT_Bfu48ojtK3b8JLZeaZuaEQ1Ig/formResponse"
GOOGLE_FORM_JSON_ENTRIES = ("entry.240290822", "entry.421446115")
TERMS_OF_TRAVEL_TEXT = """Please Carefully Read the Following Terms of Travel
By making a deposit for a tour with Mile Zero Tours Ltd. the guest agrees to all of the following terms and conditions of travel.
HOME PICK-UP & DROP OFF:
Tours departing from Vancouver Island - Mile Zero Tours is pleased to offer complimentary pick-up and drop-off at any location in Victoria at the beginning and end of a tour. Complimentary service is available within 35km driving distance of Victoria International Airport. Mile Zero Tours also offers pick-up and drop-off service from Nanaimo, Parksville and Qualicum. This is complimentary when available on the same day as the tour's departure and return. However, as some tours involve very early morning departures or late evening returns this service may be offered the day before or day after the tour and will require a pre or post tour night in Victoria. Guests will be advised at time of booking if pick-ups and returns are offered same day or if an additional night will be required. Mile Zero Tours can arrange hotel accommodations for guests requiring a pre or post night in Victoria at a minimal cost.
DEPARTURE TIMES:
Mile Zero Tours will advise guests of pick up times a few days prior to travel and guests need to be ready to depart at the specified time.
LUGGAGE:
The limit per person is one suitcase (not more than 50lbs) and a small carry-on bag.
ACCOMMODATIONS:
Special rooming requests should be provided at least one month prior to departure.
ITINERARY:
Mile Zero Tours reserves the right to alter the order of sightseeing activities for reasons beyond our control.
INTERNATIONAL TRAVEL:
For any travel outside of Canada, a valid passport is required and guests are responsible for admissibility and valid documentation.
DIETARY NEEDS:
Guests should advise Mile Zero Tours of dietary concerns at least one month before the tour departure date.
CANCELLATIONS:
The cancellation policy for each tour is described in detail on the tour itinerary and invoice.
PRIVACY:
Information collected by Mile Zero Tours is confidential and used only for tour operations and legal requirements.
Updated: November 2020"""
DATASET_REPO = "SalexAI/mztimgs" # 👈 change if needed
# Local cache folder that CommitScheduler watches
DATASET_DIR = Path("dataset_cache")
DATASET_DIR.mkdir(parents=True, exist_ok=True)
# PDF cache folder (also inside DATASET_DIR so scheduler can commit it)
PDF_DIR = DATASET_DIR / "pdfs"
PDF_DIR.mkdir(parents=True, exist_ok=True)
# PDF constraints
MAX_PDF_BYTES = int(os.environ.get("MAX_PDF_BYTES", str(25 * 1024 * 1024))) # default 25MB
if not ADMIN_TOKEN:
print("⚠️ WARNING: ADMIN_TOKEN not set")
if not GOOGLE_KEY:
print("⚠️ WARNING: GOOGLE_KEY not set")
if not HF_TOKEN:
print("⚠️ WARNING: HF_TOKEN not set (HF downloads/commits may fail)")
# ==================================================
# HF DATASET COMMIT SCHEDULER
# ==================================================
scheduler = CommitScheduler(
repo_id=DATASET_REPO,
repo_type="dataset",
folder_path=DATASET_DIR,
# Everything inside dataset_cache will be committed under this folder in the repo.
# So: dataset_cache/Foo.json -> data/Foo.json
# dataset_cache/pdfs/Foo.pdf -> data/pdfs/Foo.pdf
path_in_repo="data",
token=HF_TOKEN,
)
# ==================================================
# HELPERS
# ==================================================
def normalize_tour(tour: str) -> str:
return unquote(tour).strip()
def require_admin(token: str):
if not ADMIN_TOKEN or token != ADMIN_TOKEN:
raise HTTPException(status_code=403, detail="Invalid admin token")
def has_images(data: dict) -> bool:
imgs = data.get("images", {})
return bool(imgs.get("banner") or imgs.get("cover") or imgs.get("carousel"))
def get_fallback_tours(requested: str) -> list[str]:
base = requested.strip()
if re.search(r"\s\d+$", base):
return []
matches = [t for t in ALL_TOURS if t.startswith(base + " ")]
def tour_num(name: str) -> int:
m = re.search(r"(\d+)$", name)
return int(m.group(1)) if m else 0
return sorted(matches, key=tour_num)
def get_numbered_family_members(tour: str) -> list[str]:
m = re.match(r"^(.*)\s(\d+)$", tour.strip())
if not m:
return []
base = m.group(1).strip()
return [t for t in ALL_TOURS if re.match(rf"^{re.escape(base)}\s\d+$", t)]
def empty_structure():
return {"images": {"banner": "", "cover": "", "carousel": []}}
def tour_path(tour: str) -> Path:
return DATASET_DIR / f"{tour}.json"
def pdf_path(tour: str) -> Path:
return PDF_DIR / f"{tour}.pdf"
def load_json(path: Path) -> dict:
if not path.exists():
return empty_structure()
with path.open("r", encoding="utf-8") as f:
return json.load(f)
def save_json(path: Path, data: dict):
with path.open("w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
async def fetch_from_hf_json(tour: str) -> dict | None:
filename = f"{tour}.json"
print("🔍 HF HUB DOWNLOAD TRY (json):", filename)
try:
path = await asyncio.to_thread(
hf_hub_download,
repo_id=DATASET_REPO,
repo_type="dataset",
filename=f"data/{filename}",
token=HF_TOKEN,
)
print("⬇️ HF HUB DOWNLOADED (json):", path)
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
except Exception as e:
print("❌ HF HUB DOWNLOAD FAILED (json):", str(e))
return None
async def fetch_from_hf_pdf_bytes(tour: str) -> bytes | None:
filename = f"{tour}.pdf"
print("🔍 HF HUB DOWNLOAD TRY (pdf):", filename)
try:
path = await asyncio.to_thread(
hf_hub_download,
repo_id=DATASET_REPO,
repo_type="dataset",
filename=f"data/pdfs/{filename}",
token=HF_TOKEN,
)
print("⬇️ HF HUB DOWNLOADED (pdf):", path)
with open(path, "rb") as f:
return f.read()
except Exception as e:
print("❌ HF HUB DOWNLOAD FAILED (pdf):", str(e))
return None
def sniff_pdf(raw: bytes) -> bool:
# PDFs start with: %PDF-
return raw[:5] == b"%PDF-"
# ==================================================
# ROUTES
# ==================================================
@app.get("/")
async def root_status():
tours = []
for path in DATASET_DIR.glob("*.json"):
try:
with path.open("r", encoding="utf-8") as f:
data = json.load(f)
images = data.get("images", {})
banner = bool(images.get("banner"))
cover = bool(images.get("cover"))
carousel_count = len(images.get("carousel", []))
tours.append({
"tour": path.stem,
"banner": banner,
"cover": cover,
"carousel": carousel_count,
"total_images": int(banner) + int(cover) + carousel_count,
"has_pdf": pdf_path(path.stem).exists(),
})
except Exception as e:
tours.append({"tour": path.stem, "error": str(e)})
return {
"status": "ok",
"service": "Mile Zero Tours Image + PDF API",
"cached_tours": len(tours),
"tours": sorted(tours, key=lambda t: t.get("tour", "")),
}
# ==================================================
# GET IMAGE JSON
# ==================================================
@app.get("/imageget/{tour}.json")
async def get_images(tour: str):
tour = normalize_tour(tour)
# 1) exact cache
path = tour_path(tour)
if path.exists():
data = load_json(path)
if has_images(data):
return data
# 2) exact HF
data = await fetch_from_hf_json(tour)
if data and has_images(data):
save_json(path, data)
return data
# 3) fallback numbered tours
for alt in get_fallback_tours(tour):
alt_path = tour_path(alt)
if alt_path.exists():
alt_data = load_json(alt_path)
if has_images(alt_data):
return alt_data
alt_data = await fetch_from_hf_json(alt)
if alt_data and has_images(alt_data):
save_json(alt_path, alt_data)
return alt_data
return empty_structure()
# ==================================================
# UPLOAD IMAGE
# ==================================================
@app.post("/imageupload/{tour}")
async def upload_image(
tour: str,
admin_token: str = Form(...),
slot: str = Form(...), # banner | cover | carousel
share_mode: str = Form("base_if_empty"), # none | base_if_empty | share_all_from_1
file: UploadFile = File(None),
base64_data: str = Form(None),
):
require_admin(admin_token)
tour = normalize_tour(tour)
if slot not in ("banner", "cover", "carousel"):
raise HTTPException(status_code=400, detail="Invalid slot")
if share_mode not in ("none", "base_if_empty", "share_all_from_1"):
raise HTTPException(status_code=400, detail="Invalid share_mode")
if not file and not base64_data:
raise HTTPException(status_code=400, detail="No image provided")
path = tour_path(tour)
sibling_targets: list[str] = []
shared_to: list[str] = []
skipped_existing: list[str] = []
if file:
raw = await file.read()
b64 = base64.b64encode(raw).decode("utf-8")
else:
b64 = base64_data.strip()
# Determine sibling targets for optional share modes.
if share_mode in ("base_if_empty", "share_all_from_1") and re.search(r"\s1$", tour):
for sibling in get_numbered_family_members(tour):
if sibling == tour:
continue
sib_path = tour_path(sibling)
local_data = load_json(sib_path) if sib_path.exists() else None
hf_data = None
if local_data is None:
hf_data = await fetch_from_hf_json(sibling)
sibling_has_images = has_images(local_data) if local_data is not None else bool(hf_data and has_images(hf_data))
# Base-if-empty mode: skip siblings that already have any images.
if share_mode == "base_if_empty" and sibling_has_images:
skipped_existing.append(sibling)
if hf_data and not sib_path.exists():
with scheduler.lock:
save_json(sib_path, hf_data)
continue
# Share-all mode: include every sibling, but cache HF JSON first so we preserve
# existing non-target fields when applying the slot update.
if share_mode == "share_all_from_1" and hf_data and not sib_path.exists():
with scheduler.lock:
save_json(sib_path, hf_data)
sibling_targets.append(sibling)
with scheduler.lock:
data = load_json(path)
if slot == "carousel":
data["images"]["carousel"].append(b64)
else:
data["images"][slot] = b64
save_json(path, data)
for sibling in sibling_targets:
sib_path = tour_path(sibling)
sib_data = load_json(sib_path)
if has_images(sib_data):
skipped_existing.append(sibling)
continue
if slot == "carousel":
sib_data["images"]["carousel"].append(b64)
else:
sib_data["images"][slot] = b64
save_json(sib_path, sib_data)
shared_to.append(sibling)
return {
"ok": True,
"tour": tour,
"slot": slot,
"carousel_len": len(data["images"]["carousel"]),
"share_mode": share_mode,
"shared_to": shared_to,
"skipped_existing": skipped_existing,
}
# ==================================================
# DELETE IMAGE
# ==================================================
@app.post("/imagedelete/{tour}")
async def delete_image(
tour: str,
admin_token: str = Form(...),
slot: str = Form(...),
index: int = Form(None),
):
require_admin(admin_token)
tour = normalize_tour(tour)
path = tour_path(tour)
with scheduler.lock:
data = load_json(path)
if slot == "carousel":
if index is None or index >= len(data["images"]["carousel"]):
raise HTTPException(status_code=400, detail="Invalid index")
data["images"]["carousel"].pop(index)
elif slot in ("banner", "cover"):
data["images"][slot] = ""
else:
raise HTTPException(status_code=400, detail="Invalid slot")
save_json(path, data)
return {"ok": True}
# ==================================================
# PDF UPLOAD (NEW)
# ==================================================
@app.post("/pdfupload/{tour}")
async def upload_pdf(
tour: str,
admin_token: str = Form(...),
file: UploadFile = File(...),
):
require_admin(admin_token)
tour = normalize_tour(tour)
# Basic content-type / filename checks (clients are often inconsistent, so we verify bytes too)
filename = (file.filename or "").lower()
if not (filename.endswith(".pdf") or file.content_type == "application/pdf"):
raise HTTPException(status_code=400, detail="Only PDF files are allowed")
raw = await file.read()
if len(raw) == 0:
raise HTTPException(status_code=400, detail="Empty file")
if len(raw) > MAX_PDF_BYTES:
raise HTTPException(status_code=413, detail=f"PDF too large (max {MAX_PDF_BYTES} bytes)")
if not sniff_pdf(raw):
raise HTTPException(status_code=400, detail="File does not look like a valid PDF")
path = pdf_path(tour)
with scheduler.lock:
path.write_bytes(raw)
return {
"ok": True,
"tour": tour,
"bytes": len(raw),
"pdf": f"/pdfget/{tour}.pdf",
}
# ==================================================
# PDF GET (NEW)
# ==================================================
@app.get("/pdfget/{tour}.pdf")
async def get_pdf(tour: str):
tour = normalize_tour(tour)
# 1) local cache
path = pdf_path(tour)
if path.exists():
raw = path.read_bytes()
return Response(
content=raw,
media_type="application/pdf",
headers={
"Cache-Control": "public, max-age=300",
"Content-Disposition": f'inline; filename="{tour}.pdf"',
},
)
# 2) HF fetch -> cache -> return
raw = await fetch_from_hf_pdf_bytes(tour)
if raw:
# safety: avoid caching huge or non-pdf blobs
if len(raw) <= MAX_PDF_BYTES and sniff_pdf(raw):
with scheduler.lock:
path.write_bytes(raw)
return Response(
content=raw,
media_type="application/pdf",
headers={
"Cache-Control": "public, max-age=300",
"Content-Disposition": f'inline; filename="{tour}.pdf"',
},
)
raise HTTPException(status_code=404, detail="PDF not found")
# ==================================================
# PDF DELETE (NEW)
# ==================================================
@app.post("/pdfdelete/{tour}")
async def delete_pdf(
tour: str,
admin_token: str = Form(...),
):
require_admin(admin_token)
tour = normalize_tour(tour)
path = pdf_path(tour)
with scheduler.lock:
if path.exists():
path.unlink()
return {"ok": True, "tour": tour, "deleted": True}
# ==================================================
# GOOGLE SHEETS PROXY (NO KEY LEAK)
# ==================================================
@app.get("/sheets/{range:path}")
async def proxy_google_sheets(range: str):
if not GOOGLE_KEY:
raise HTTPException(status_code=503, detail="Sheets proxy not configured")
url = (
f"https://sheets.googleapis.com/v4/spreadsheets/"
f"{SHEET_ID}/values/{range}"
f"?key={GOOGLE_KEY}"
)
async with httpx.AsyncClient(timeout=15) as client:
r = await client.get(url)
if r.status_code != 200:
return JSONResponse(
status_code=r.status_code,
content={"error": "Google Sheets fetch failed"},
)
return r.json()
# ==================================================
# TERMS OF TRAVEL
# ==================================================
@app.get("/terms")
async def get_terms():
return {
"ok": True,
"updated": "November 2020",
"terms": TERMS_OF_TRAVEL_TEXT,
}
# ==================================================
# GOOGLE FORM JSON SUBMIT
# ==================================================
@app.post("/form")
async def submit_form(payload: Any = Body(...)):
json_value = json.dumps(payload, ensure_ascii=False)
async with httpx.AsyncClient(timeout=15, follow_redirects=False) as client:
view = await client.get(GOOGLE_FORM_VIEW_URL)
# Google may reject incomplete posts for some forms; pass hidden fields when available.
hidden = {
"fvv": re.search(r'name="fvv" value="([^"]+)"', view.text),
"pageHistory": re.search(r'name="pageHistory" value="([^"]+)"', view.text),
"fbzx": re.search(r'name="fbzx" value="([^"]+)"', view.text),
}
data = {}
for key in GOOGLE_FORM_JSON_ENTRIES:
data[key] = json_value
for key, match in hidden.items():
if match:
data[key] = match.group(1)
r = await client.post(GOOGLE_FORM_ACTION_URL, data=data)
if r.status_code not in (200, 302):
raise HTTPException(status_code=502, detail="Google Form submit failed")
return {"ok": True, "submitted": True, "chars": len(json_value)}
|