from __future__ import annotations import json import os from pathlib import Path from typing import Any, Dict, Optional, Union from urllib.parse import urlparse from fastapi import FastAPI, Header, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import AliasChoices, BaseModel, Field from backend.worker.gmail_client import GmailClient from backend.worker.hf_env_files import resolve_json_or_path from backend.worker.template_store import save_trainer_template app = FastAPI() def _origin_from_url(url: str) -> Optional[str]: url = (url or "").strip() if not url: return None try: parsed = urlparse(url) except Exception: return None if not parsed.scheme or not parsed.netloc: return None return f"{parsed.scheme}://{parsed.netloc}" _allowed_origins = { "http://localhost:5173", "http://localhost:3000", } trainer_origin = _origin_from_url(os.environ.get("PDF_TRAINER_BASE_URL", "")) if trainer_origin: _allowed_origins.add(trainer_origin) app.add_middleware( CORSMiddleware, allow_origins=sorted(_allowed_origins) if _allowed_origins else ["*"], allow_credentials=False, allow_methods=["*"], allow_headers=["*"], ) def _require_webhook_secret(provided: Optional[str]) -> None: expected = (os.environ.get("PDF_PIPELINE_WEBHOOK_SECRET") or "").strip() if not expected: raise HTTPException( status_code=500, detail="Server missing PDF_PIPELINE_WEBHOOK_SECRET env var", ) if (provided or "").strip() != expected: raise HTTPException(status_code=401, detail="Invalid webhook secret") def _alias_env(primary: str, fallback: str) -> None: if (os.environ.get(primary) or "").strip(): return fb = (os.environ.get(fallback) or "").strip() if fb: os.environ[primary] = fb def _gmail_paths(repo_root: Path) -> tuple[Path, Path]: _alias_env("GMAIL_CREDENTIALS_JSON", "PDF_PIPELINE_GMAIL_CREDENTIALS_JSON") _alias_env("GMAIL_TOKEN_JSON", "PDF_PIPELINE_GMAIL_TOKEN_JSON") backend_dir = repo_root / "backend" creds = resolve_json_or_path("GMAIL_CREDENTIALS_JSON", backend_dir / "credentials.json", Path("/tmp/credentials.json")) token = resolve_json_or_path("GMAIL_TOKEN_JSON", backend_dir / "token.json", Path("/tmp/token.json")) return creds, token def _uploads_dir(repo_root: Path) -> Path: return repo_root / "backend" / "worker" / "uploads" class SaveConfigRequest(BaseModel): pdf_id: str = Field( ..., validation_alias=AliasChoices("pdf_id", "pdfId"), description="The pdf_id used by the trainer link (?pdf_id=...).", ) template_id: str = Field( ..., validation_alias=AliasChoices("template_id", "templateId"), description="The template_id being saved/updated.", ) config: Union[Dict[str, Any], str] = Field( ..., validation_alias=AliasChoices("config", "configuration", "config_json", "configJson"), description="Trainer configuration JSON.", ) notify_to: Optional[str] = Field( None, validation_alias=AliasChoices("notify_to", "notifyTo"), description="Override recipient email (optional).", ) @app.post("/api/trainer/save-config") def save_config( req: SaveConfigRequest, x_webhook_secret: Optional[str] = Header(default=None, alias="X-Webhook-Secret"), ): """ Called by the PDF Trainer UI when "Save configuration" is clicked. Behavior: - persists the config JSON to backend/worker/trainer_templates/.json - sends a confirmation email with the config JSON + the original PDF attached """ _require_webhook_secret(x_webhook_secret) repo_root = Path(__file__).resolve().parent notify_from = (os.environ.get("PDF_PIPELINE_NOTIFY_FROM") or "").strip() if not notify_from: raise HTTPException(status_code=500, detail="Missing PDF_PIPELINE_NOTIFY_FROM env var") # Where to send the confirmation default_to = (os.environ.get("PDF_PIPELINE_PIPELINE_NOTIFY_TO") or "").strip() to_email = (req.notify_to or default_to).strip() if not to_email: raise HTTPException( status_code=500, detail="Missing PDF_PIPELINE_PIPELINE_NOTIFY_TO env var (or provide notify_to in request)", ) # Persist config JSON for local tracking/backups cfg_obj: Dict[str, Any] if isinstance(req.config, str): try: parsed = json.loads(req.config) except Exception as e: raise HTTPException(status_code=422, detail=f"config is not valid JSON: {e}") if not isinstance(parsed, dict): raise HTTPException(status_code=422, detail="config JSON must be an object") cfg_obj = parsed else: cfg_obj = req.config try: saved_path = save_trainer_template(req.template_id, cfg_obj) except Exception as e: raise HTTPException(status_code=500, detail=f"Failed to save template config: {e}") # Load original PDF from uploads/ (best-effort) uploads = _uploads_dir(repo_root) pdf_path = uploads / f"{req.pdf_id}.pdf" name_path = uploads / f"{req.pdf_id}.name.txt" pdf_name = f"{req.pdf_id}.pdf" if name_path.exists(): try: pdf_name = (name_path.read_text(encoding="utf-8") or "").strip() or pdf_name except Exception: pass pdf_bytes: Optional[bytes] = None if pdf_path.exists(): try: pdf_bytes = pdf_path.read_bytes() except Exception: pdf_bytes = None cfg_bytes = json.dumps(cfg_obj, indent=2).encode("utf-8") cfg_filename = f"trainer_config_{req.pdf_id}__{req.template_id}.json" attachments = [(cfg_filename, cfg_bytes)] body_lines = [ "Configuration has been updated.", "", f"template_id: {req.template_id}", f"pdf_id: {req.pdf_id}", f"saved: {saved_path}", ] if pdf_bytes and len(pdf_bytes) < 20 * 1024 * 1024: attachments.append((pdf_name, pdf_bytes)) elif pdf_bytes: body_lines.append("") body_lines.append("Note: PDF was too large to attach.") else: body_lines.append("") body_lines.append("Note: Original PDF not found on worker; only config JSON is attached.") subject = f"PDF Trainer: configuration updated ({req.template_id})" body = "\n".join(body_lines) + "\n" creds_path, token_path = _gmail_paths(repo_root) try: gmail = GmailClient(creds_path, token_path) gmail.send_email( to_email=to_email, from_email=notify_from, subject=subject, body_text=body, attachments=attachments, ) except Exception as e: raise HTTPException(status_code=500, detail=f"Failed to send confirmation email: {e}") return {"ok": True} @app.get("/") def root(): return {"ok": True} @app.get("/health") def health(): return {"ok": True}