pdf-trainer-worker / health_api.py
Avinashnalla7's picture
Add save-config webhook + gmail_auth helper
20919fe
from __future__ import annotations
import json
import os
from pathlib import Path
from typing import Any, Dict, Optional, Union
from urllib.parse import urlparse
from fastapi import FastAPI, Header, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import AliasChoices, BaseModel, Field
from backend.worker.gmail_client import GmailClient
from backend.worker.hf_env_files import resolve_json_or_path
from backend.worker.template_store import save_trainer_template
app = FastAPI()
def _origin_from_url(url: str) -> Optional[str]:
url = (url or "").strip()
if not url:
return None
try:
parsed = urlparse(url)
except Exception:
return None
if not parsed.scheme or not parsed.netloc:
return None
return f"{parsed.scheme}://{parsed.netloc}"
_allowed_origins = {
"http://localhost:5173",
"http://localhost:3000",
}
trainer_origin = _origin_from_url(os.environ.get("PDF_TRAINER_BASE_URL", ""))
if trainer_origin:
_allowed_origins.add(trainer_origin)
app.add_middleware(
CORSMiddleware,
allow_origins=sorted(_allowed_origins) if _allowed_origins else ["*"],
allow_credentials=False,
allow_methods=["*"],
allow_headers=["*"],
)
def _require_webhook_secret(provided: Optional[str]) -> None:
expected = (os.environ.get("PDF_PIPELINE_WEBHOOK_SECRET") or "").strip()
if not expected:
raise HTTPException(
status_code=500,
detail="Server missing PDF_PIPELINE_WEBHOOK_SECRET env var",
)
if (provided or "").strip() != expected:
raise HTTPException(status_code=401, detail="Invalid webhook secret")
def _alias_env(primary: str, fallback: str) -> None:
if (os.environ.get(primary) or "").strip():
return
fb = (os.environ.get(fallback) or "").strip()
if fb:
os.environ[primary] = fb
def _gmail_paths(repo_root: Path) -> tuple[Path, Path]:
_alias_env("GMAIL_CREDENTIALS_JSON", "PDF_PIPELINE_GMAIL_CREDENTIALS_JSON")
_alias_env("GMAIL_TOKEN_JSON", "PDF_PIPELINE_GMAIL_TOKEN_JSON")
backend_dir = repo_root / "backend"
creds = resolve_json_or_path("GMAIL_CREDENTIALS_JSON", backend_dir / "credentials.json", Path("/tmp/credentials.json"))
token = resolve_json_or_path("GMAIL_TOKEN_JSON", backend_dir / "token.json", Path("/tmp/token.json"))
return creds, token
def _uploads_dir(repo_root: Path) -> Path:
return repo_root / "backend" / "worker" / "uploads"
class SaveConfigRequest(BaseModel):
pdf_id: str = Field(
...,
validation_alias=AliasChoices("pdf_id", "pdfId"),
description="The pdf_id used by the trainer link (?pdf_id=...).",
)
template_id: str = Field(
...,
validation_alias=AliasChoices("template_id", "templateId"),
description="The template_id being saved/updated.",
)
config: Union[Dict[str, Any], str] = Field(
...,
validation_alias=AliasChoices("config", "configuration", "config_json", "configJson"),
description="Trainer configuration JSON.",
)
notify_to: Optional[str] = Field(
None,
validation_alias=AliasChoices("notify_to", "notifyTo"),
description="Override recipient email (optional).",
)
@app.post("/api/trainer/save-config")
def save_config(
req: SaveConfigRequest,
x_webhook_secret: Optional[str] = Header(default=None, alias="X-Webhook-Secret"),
):
"""
Called by the PDF Trainer UI when "Save configuration" is clicked.
Behavior:
- persists the config JSON to backend/worker/trainer_templates/<template_id>.json
- sends a confirmation email with the config JSON + the original PDF attached
"""
_require_webhook_secret(x_webhook_secret)
repo_root = Path(__file__).resolve().parent
notify_from = (os.environ.get("PDF_PIPELINE_NOTIFY_FROM") or "").strip()
if not notify_from:
raise HTTPException(status_code=500, detail="Missing PDF_PIPELINE_NOTIFY_FROM env var")
# Where to send the confirmation
default_to = (os.environ.get("PDF_PIPELINE_PIPELINE_NOTIFY_TO") or "").strip()
to_email = (req.notify_to or default_to).strip()
if not to_email:
raise HTTPException(
status_code=500,
detail="Missing PDF_PIPELINE_PIPELINE_NOTIFY_TO env var (or provide notify_to in request)",
)
# Persist config JSON for local tracking/backups
cfg_obj: Dict[str, Any]
if isinstance(req.config, str):
try:
parsed = json.loads(req.config)
except Exception as e:
raise HTTPException(status_code=422, detail=f"config is not valid JSON: {e}")
if not isinstance(parsed, dict):
raise HTTPException(status_code=422, detail="config JSON must be an object")
cfg_obj = parsed
else:
cfg_obj = req.config
try:
saved_path = save_trainer_template(req.template_id, cfg_obj)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to save template config: {e}")
# Load original PDF from uploads/ (best-effort)
uploads = _uploads_dir(repo_root)
pdf_path = uploads / f"{req.pdf_id}.pdf"
name_path = uploads / f"{req.pdf_id}.name.txt"
pdf_name = f"{req.pdf_id}.pdf"
if name_path.exists():
try:
pdf_name = (name_path.read_text(encoding="utf-8") or "").strip() or pdf_name
except Exception:
pass
pdf_bytes: Optional[bytes] = None
if pdf_path.exists():
try:
pdf_bytes = pdf_path.read_bytes()
except Exception:
pdf_bytes = None
cfg_bytes = json.dumps(cfg_obj, indent=2).encode("utf-8")
cfg_filename = f"trainer_config_{req.pdf_id}__{req.template_id}.json"
attachments = [(cfg_filename, cfg_bytes)]
body_lines = [
"Configuration has been updated.",
"",
f"template_id: {req.template_id}",
f"pdf_id: {req.pdf_id}",
f"saved: {saved_path}",
]
if pdf_bytes and len(pdf_bytes) < 20 * 1024 * 1024:
attachments.append((pdf_name, pdf_bytes))
elif pdf_bytes:
body_lines.append("")
body_lines.append("Note: PDF was too large to attach.")
else:
body_lines.append("")
body_lines.append("Note: Original PDF not found on worker; only config JSON is attached.")
subject = f"PDF Trainer: configuration updated ({req.template_id})"
body = "\n".join(body_lines) + "\n"
creds_path, token_path = _gmail_paths(repo_root)
try:
gmail = GmailClient(creds_path, token_path)
gmail.send_email(
to_email=to_email,
from_email=notify_from,
subject=subject,
body_text=body,
attachments=attachments,
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to send confirmation email: {e}")
return {"ok": True}
@app.get("/")
def root():
return {"ok": True}
@app.get("/health")
def health():
return {"ok": True}