Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| BUILD_SHA="8974c6e" | |
| from fastapi.responses import Response | |
| import os | |
| SENDCFG_FAST_OK=(os.getenv("SENDCFG_FAST_OK","0")=="1") | |
| from backend.sftp_store import store_to_sftp | |
| import json | |
| import os | |
| from pathlib import Path | |
| from typing import Any, Dict | |
| from dotenv import load_dotenv | |
| from fastapi import FastAPI, HTTPException, Request, UploadFile, File, Form, UploadFile, File, Form | |
| def _env(*keys: str, default: str = "") -> str: | |
| import os | |
| for k in keys: | |
| v = (os.environ.get(k) or "").strip() | |
| if v: | |
| return v | |
| return default | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.responses import FileResponse, PlainTextResponse | |
| app = FastAPI(title="PDF Trainer API", version="1.0") | |
| # Allow Vite dev server | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=[ | |
| "http://localhost:5173", | |
| "http://127.0.0.1:5173", | |
| ], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| REPO_ROOT = Path(__file__).resolve().parents[1] | |
| BACKEND_DIR = REPO_ROOT / "backend" | |
| UPLOADS_DIR = Path(os.environ.get("PDF_TRAINER_UPLOADS_DIR") or "/data/uploads") | |
| CONFIGS_DIR = UPLOADS_DIR / "configs" | |
| def _maybe_bundle_to_sftp(pdf_id: str, template_id: str) -> dict: | |
| """ | |
| If both config + PDF exist locally in this API container AND SFTP env vars exist, | |
| push JSON + PDF to SFTP. Otherwise no-op. | |
| Returns small status dict. | |
| """ | |
| try: | |
| # only attempt if env looks configured | |
| if not (os.environ.get("SFTP_HOST") and os.environ.get("SFTP_USER") and os.environ.get("SFTP_PASS")): | |
| return {"sftp": "skipped_not_configured"} | |
| cfg_path = CONFIGS_DIR / f"{pdf_id}__{template_id}.json" | |
| pdf_path = UPLOADS_DIR / f"{pdf_id}.pdf" | |
| name_path = UPLOADS_DIR / f"{pdf_id}.name.txt" | |
| if not cfg_path.exists(): | |
| return {"sftp": "skipped_no_config"} | |
| if not pdf_path.exists(): | |
| return {"sftp": "skipped_no_pdf"} | |
| cfg_bytes = cfg_path.read_bytes() | |
| pdf_bytes = pdf_path.read_bytes() | |
| pdf_name = (name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf") or f"{pdf_id}.pdf" | |
| remote_dir = store_to_sftp( | |
| pdf_id=pdf_id, | |
| template_id=template_id, | |
| cfg_json_bytes=cfg_bytes, | |
| pdf_bytes=pdf_bytes, | |
| pdf_name=pdf_name, | |
| ) | |
| return {"sftp": "ok", "remote_dir": remote_dir} | |
| except Exception as e: | |
| return {"sftp": "error", "error": str(e)} | |
| # Load backend/.env explicitly ONCE for this process | |
| load_dotenv(BACKEND_DIR / ".env", override=True) | |
| def _get_env_required(key: str) -> str: | |
| v = (os.environ.get(key) or "").strip() | |
| if not v: | |
| raise HTTPException(status_code=500, detail=f"Server missing {key} env var") | |
| return v | |
| def health(): | |
| return {"ok": True} | |
| async def put_pdf(pdf_id: str, file: UploadFile = File(...), pdf_name: str = Form("")): | |
| base = Path("/data/uploads") | |
| pdf_dir = base / "pdfs" | |
| pdf_dir.mkdir(parents=True, exist_ok=True) | |
| pdf_path = pdf_dir / f"{pdf_id}.pdf" | |
| name_path = pdf_dir / f"{pdf_id}.name.txt" | |
| data = await file.read() | |
| pdf_path.write_bytes(data) | |
| if pdf_name: | |
| name_path.write_text(pdf_name, encoding="utf-8") | |
| return {"ok": True} | |
| def get_pdf(pdf_id: str): | |
| pdf_path = _pdf_path(pdf_id) | |
| try: | |
| b = pdf_path.read_bytes() | |
| except FileNotFoundError: | |
| raise HTTPException(status_code=404, detail="pdf_not_found") | |
| return Response(content=b, media_type="application/pdf") | |
| async def put_pdf(pdf_id: str, file: UploadFile = File(...), pdf_name: str = Form("")): | |
| UPLOADS_DIR.mkdir(parents=True, exist_ok=True) | |
| data = await file.read() | |
| (UPLOADS_DIR / f"{pdf_id}.pdf").write_bytes(data) | |
| if pdf_name: | |
| (UPLOADS_DIR / f"{pdf_id}.name.txt").write_text(pdf_name.strip(), encoding="utf-8") | |
| # If any configs already exist for this pdf_id, bundle now. | |
| sftp_results = [] | |
| try: | |
| for cfg in CONFIGS_DIR.glob(f"{pdf_id}__*.json"): | |
| tid = cfg.name.split("__", 1)[1].rsplit(".json", 1)[0] | |
| sftp_results.append(_maybe_bundle_to_sftp(pdf_id, tid)) | |
| except Exception: | |
| pass | |
| return {"ok": True} | |
| async def send_config(request: Request): | |
| if SENDCFG_FAST_OK: | |
| return {"ok": True, "mode": "fast"} | |
| """ | |
| Store config JSON for later pipelines. | |
| Do NOT require PDF to exist. | |
| Do NOT send email from this API container (no Gmail creds in HF). | |
| """ | |
| payload = await request.json() | |
| pdf_id = (payload.get("pdf_id") or "").strip() | |
| template_id = (payload.get("template_id") or "").strip() | |
| config = payload.get("config") | |
| if not pdf_id: | |
| raise HTTPException(status_code=400, detail="Missing pdf_id") | |
| if not template_id: | |
| raise HTTPException(status_code=400, detail="Missing template_id") | |
| if not isinstance(config, dict): | |
| raise HTTPException(status_code=400, detail="Missing config object") | |
| CONFIGS_DIR.mkdir(parents=True, exist_ok=True) | |
| out_path = CONFIGS_DIR / f"{pdf_id}__{template_id}.json" | |
| out_path.write_text( | |
| json.dumps({"pdf_id": pdf_id, "template_id": template_id, "config": config}, indent=2), | |
| encoding="utf-8", | |
| ) | |
| # Store both config JSON + (if exists) PDF to SFTP for future pipelines. | |
| # SFTP errors must NOT break the API response. | |
| try: | |
| cfg_bytes = out_path.read_bytes() | |
| pdf_bytes = None | |
| pdf_name = None | |
| # API stores uploaded PDFs at UPLOADS_DIR/{pdf_id}.pdf | |
| pdf_path = UPLOADS_DIR / f"{pdf_id}.pdf" | |
| if pdf_path.exists(): | |
| pdf_bytes = pdf_path.read_bytes() | |
| # optional friendly name if present | |
| name_path = UPLOADS_DIR / f"{pdf_id}.name.txt" | |
| pdf_name = name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf" | |
| store_to_sftp( | |
| pdf_id=pdf_id, | |
| template_id=template_id, | |
| cfg_bytes=cfg_bytes, | |
| pdf_bytes=pdf_bytes, | |
| pdf_name=pdf_name or f"{pdf_id}.pdf", | |
| ) | |
| except Exception as e: | |
| print(f"[SFTP] store failed: {e}") | |
| pdf_path = UPLOADS_DIR / f"{pdf_id}.pdf" | |
| sftp_status = _maybe_bundle_to_sftp(pdf_id, template_id) | |
| return {"ok": True, "stored": str(out_path), "pdf_exists": pdf_path.exists(), **sftp_status} | |
| def get_config(pdf_id: str, template_id: str): | |
| path = CONFIGS_DIR / f"{pdf_id}__{template_id}.json" | |
| if not path.exists(): | |
| raise HTTPException(status_code=404, detail="Config not found") | |
| return FileResponse(path, media_type="application/json", filename=path.name) | |
| async def notify_unknown(payload: Dict[str, Any]): | |
| """ | |
| UNKNOWN TEMPLATE NOTIFICATION (rep email) | |
| REQUIRED payload: | |
| - pdf_id: str | |
| OPTIONAL: | |
| - reason: str | |
| Sends to REP inbox: | |
| - PDF_PIPELINE_NOTIFY_TO | |
| Requirements: | |
| - Includes trainer link with PDF pre-loaded | |
| - Attaches PDF | |
| - No JSON | |
| """ | |
| pdf_id = (payload.get("pdf_id") or "").strip() | |
| reason = (payload.get("reason") or "").strip() | |
| if not pdf_id: | |
| raise HTTPException(status_code=400, detail="Missing pdf_id") | |
| rep_to = _get_env_required("PDF_PIPELINE_NOTIFY_TO") | |
| notify_from = _get_env_required("PDF_PIPELINE_NOTIFY_FROM") | |
| trainer_base_url = (os.environ.get("PDF_TRAINER_BASE_URL") or "http://localhost:5173").strip() | |
| name_path = UPLOADS_DIR / f"{pdf_id}.name.txt" | |
| pdf_name = name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf" | |
| trainer_link = f"{trainer_base_url.rstrip('/')}/?pdf_id={pdf_id}" | |
| subject = "Action required: Unknown PDF format (template not found)" | |
| body = ( | |
| "Hi,\n\n" | |
| "We received a PDF that does not match any existing templates in the system.\n\n" | |
| + (f"Reason: {reason}\n\n" if reason else "") | |
| + "Please open the PDF Trainer using the link below and create or update the template configuration:\n" | |
| f"{trainer_link}\n\n" | |
| "The original PDF is attached for reference.\n\n" | |
| "Thank you,\n" | |
| "Inserio Automation\n" | |
| ) | |
| attachments = [(pdf_name, pdf_path.read_bytes())] | |
| return {"ok": True} | |
| def root(): | |
| return "PDF Trainer API. Use /health" | |