Avinashnalla7's picture
Add build sha to /health
ca668df
raw
history blame
8.61 kB
from __future__ import annotations
BUILD_SHA="8974c6e"
from fastapi.responses import Response
import os
SENDCFG_FAST_OK=(os.getenv("SENDCFG_FAST_OK","0")=="1")
from backend.sftp_store import store_to_sftp
import json
import os
from pathlib import Path
from typing import Any, Dict
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException, Request, UploadFile, File, Form, UploadFile, File, Form
def _env(*keys: str, default: str = "") -> str:
import os
for k in keys:
v = (os.environ.get(k) or "").strip()
if v:
return v
return default
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse, PlainTextResponse
app = FastAPI(title="PDF Trainer API", version="1.0")
# Allow Vite dev server
app.add_middleware(
CORSMiddleware,
allow_origins=[
"http://localhost:5173",
"http://127.0.0.1:5173",
],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
REPO_ROOT = Path(__file__).resolve().parents[1]
BACKEND_DIR = REPO_ROOT / "backend"
UPLOADS_DIR = Path(os.environ.get("PDF_TRAINER_UPLOADS_DIR") or "/data/uploads")
CONFIGS_DIR = UPLOADS_DIR / "configs"
def _maybe_bundle_to_sftp(pdf_id: str, template_id: str) -> dict:
"""
If both config + PDF exist locally in this API container AND SFTP env vars exist,
push JSON + PDF to SFTP. Otherwise no-op.
Returns small status dict.
"""
try:
# only attempt if env looks configured
if not (os.environ.get("SFTP_HOST") and os.environ.get("SFTP_USER") and os.environ.get("SFTP_PASS")):
return {"sftp": "skipped_not_configured"}
cfg_path = CONFIGS_DIR / f"{pdf_id}__{template_id}.json"
pdf_path = UPLOADS_DIR / f"{pdf_id}.pdf"
name_path = UPLOADS_DIR / f"{pdf_id}.name.txt"
if not cfg_path.exists():
return {"sftp": "skipped_no_config"}
if not pdf_path.exists():
return {"sftp": "skipped_no_pdf"}
cfg_bytes = cfg_path.read_bytes()
pdf_bytes = pdf_path.read_bytes()
pdf_name = (name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf") or f"{pdf_id}.pdf"
remote_dir = store_to_sftp(
pdf_id=pdf_id,
template_id=template_id,
cfg_json_bytes=cfg_bytes,
pdf_bytes=pdf_bytes,
pdf_name=pdf_name,
)
return {"sftp": "ok", "remote_dir": remote_dir}
except Exception as e:
return {"sftp": "error", "error": str(e)}
# Load backend/.env explicitly ONCE for this process
load_dotenv(BACKEND_DIR / ".env", override=True)
def _get_env_required(key: str) -> str:
v = (os.environ.get(key) or "").strip()
if not v:
raise HTTPException(status_code=500, detail=f"Server missing {key} env var")
return v
@app.get("/health")
def health():
return {"ok": True}
@app.post("/api/pdf/{pdf_id}")
async def put_pdf(pdf_id: str, file: UploadFile = File(...), pdf_name: str = Form("")):
base = Path("/data/uploads")
pdf_dir = base / "pdfs"
pdf_dir.mkdir(parents=True, exist_ok=True)
pdf_path = pdf_dir / f"{pdf_id}.pdf"
name_path = pdf_dir / f"{pdf_id}.name.txt"
data = await file.read()
pdf_path.write_bytes(data)
if pdf_name:
name_path.write_text(pdf_name, encoding="utf-8")
return {"ok": True}
@app.get("/api/pdf/{pdf_id}")
def get_pdf(pdf_id: str):
pdf_path = _pdf_path(pdf_id)
try:
b = pdf_path.read_bytes()
except FileNotFoundError:
raise HTTPException(status_code=404, detail="pdf_not_found")
return Response(content=b, media_type="application/pdf")
async def put_pdf(pdf_id: str, file: UploadFile = File(...), pdf_name: str = Form("")):
UPLOADS_DIR.mkdir(parents=True, exist_ok=True)
data = await file.read()
(UPLOADS_DIR / f"{pdf_id}.pdf").write_bytes(data)
if pdf_name:
(UPLOADS_DIR / f"{pdf_id}.name.txt").write_text(pdf_name.strip(), encoding="utf-8")
# If any configs already exist for this pdf_id, bundle now.
sftp_results = []
try:
for cfg in CONFIGS_DIR.glob(f"{pdf_id}__*.json"):
tid = cfg.name.split("__", 1)[1].rsplit(".json", 1)[0]
sftp_results.append(_maybe_bundle_to_sftp(pdf_id, tid))
except Exception:
pass
return {"ok": True}
@app.post("/api/send-config")
async def send_config(request: Request):
if SENDCFG_FAST_OK:
return {"ok": True, "mode": "fast"}
"""
Store config JSON for later pipelines.
Do NOT require PDF to exist.
Do NOT send email from this API container (no Gmail creds in HF).
"""
payload = await request.json()
pdf_id = (payload.get("pdf_id") or "").strip()
template_id = (payload.get("template_id") or "").strip()
config = payload.get("config")
if not pdf_id:
raise HTTPException(status_code=400, detail="Missing pdf_id")
if not template_id:
raise HTTPException(status_code=400, detail="Missing template_id")
if not isinstance(config, dict):
raise HTTPException(status_code=400, detail="Missing config object")
CONFIGS_DIR.mkdir(parents=True, exist_ok=True)
out_path = CONFIGS_DIR / f"{pdf_id}__{template_id}.json"
out_path.write_text(
json.dumps({"pdf_id": pdf_id, "template_id": template_id, "config": config}, indent=2),
encoding="utf-8",
)
# Store both config JSON + (if exists) PDF to SFTP for future pipelines.
# SFTP errors must NOT break the API response.
try:
cfg_bytes = out_path.read_bytes()
pdf_bytes = None
pdf_name = None
# API stores uploaded PDFs at UPLOADS_DIR/{pdf_id}.pdf
pdf_path = UPLOADS_DIR / f"{pdf_id}.pdf"
if pdf_path.exists():
pdf_bytes = pdf_path.read_bytes()
# optional friendly name if present
name_path = UPLOADS_DIR / f"{pdf_id}.name.txt"
pdf_name = name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf"
store_to_sftp(
pdf_id=pdf_id,
template_id=template_id,
cfg_bytes=cfg_bytes,
pdf_bytes=pdf_bytes,
pdf_name=pdf_name or f"{pdf_id}.pdf",
)
except Exception as e:
print(f"[SFTP] store failed: {e}")
pdf_path = UPLOADS_DIR / f"{pdf_id}.pdf"
sftp_status = _maybe_bundle_to_sftp(pdf_id, template_id)
return {"ok": True, "stored": str(out_path), "pdf_exists": pdf_path.exists(), **sftp_status}
@app.get("/api/config/{pdf_id}/{template_id}")
def get_config(pdf_id: str, template_id: str):
path = CONFIGS_DIR / f"{pdf_id}__{template_id}.json"
if not path.exists():
raise HTTPException(status_code=404, detail="Config not found")
return FileResponse(path, media_type="application/json", filename=path.name)
@app.post("/api/notify-unknown")
async def notify_unknown(payload: Dict[str, Any]):
"""
UNKNOWN TEMPLATE NOTIFICATION (rep email)
REQUIRED payload:
- pdf_id: str
OPTIONAL:
- reason: str
Sends to REP inbox:
- PDF_PIPELINE_NOTIFY_TO
Requirements:
- Includes trainer link with PDF pre-loaded
- Attaches PDF
- No JSON
"""
pdf_id = (payload.get("pdf_id") or "").strip()
reason = (payload.get("reason") or "").strip()
if not pdf_id:
raise HTTPException(status_code=400, detail="Missing pdf_id")
rep_to = _get_env_required("PDF_PIPELINE_NOTIFY_TO")
notify_from = _get_env_required("PDF_PIPELINE_NOTIFY_FROM")
trainer_base_url = (os.environ.get("PDF_TRAINER_BASE_URL") or "http://localhost:5173").strip()
name_path = UPLOADS_DIR / f"{pdf_id}.name.txt"
pdf_name = name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf"
trainer_link = f"{trainer_base_url.rstrip('/')}/?pdf_id={pdf_id}"
subject = "Action required: Unknown PDF format (template not found)"
body = (
"Hi,\n\n"
"We received a PDF that does not match any existing templates in the system.\n\n"
+ (f"Reason: {reason}\n\n" if reason else "")
+ "Please open the PDF Trainer using the link below and create or update the template configuration:\n"
f"{trainer_link}\n\n"
"The original PDF is attached for reference.\n\n"
"Thank you,\n"
"Inserio Automation\n"
)
attachments = [(pdf_name, pdf_path.read_bytes())]
return {"ok": True}
@app.get("/", response_class=PlainTextResponse)
def root():
return "PDF Trainer API. Use /health"