Spaces:
Sleeping
Sleeping
Commit ·
2fce05d
1
Parent(s): 8974c0c
deploy: real pdf-trainer-demo API
Browse files- .gitattributes +0 -35
- .gitignore +0 -8
- Dockerfile +19 -7
- README.md +0 -10
- api.py +0 -1
- backend/.DS_Store +0 -0
- backend/{__init__.py → .env} +0 -0
- backend/__pycache__/api.cpython-311.pyc +0 -0
- backend/api.py +180 -106
- backend/scripts/__pycache__/apply_trainer_schemas.cpython-314.pyc +0 -0
- backend/scripts/__pycache__/generate_template_schema_skeletons.cpython-314.pyc +0 -0
- backend/scripts/__pycache__/migrate_hardcoded_templates.cpython-314.pyc +0 -0
- backend/sftp_store.py +0 -42
- backend/worker/__pycache__/__init__.cpython-311.pyc +0 -0
- backend/worker/__pycache__/__init__.cpython-314.pyc +0 -0
- backend/worker/__pycache__/gmail_client.cpython-311.pyc +0 -0
- backend/worker/__pycache__/gmail_client.cpython-314.pyc +0 -0
- backend/worker/__pycache__/openai_classifier.cpython-311.pyc +0 -0
- backend/worker/__pycache__/openai_classifier.cpython-314.pyc +0 -0
- backend/worker/__pycache__/pdf_render.cpython-311.pyc +0 -0
- backend/worker/__pycache__/pdf_render.cpython-314.pyc +0 -0
- backend/worker/__pycache__/prompts.cpython-311.pyc +0 -0
- backend/worker/__pycache__/worker.cpython-311.pyc +0 -0
- backend/worker/__pycache__/worker.cpython-314.pyc +0 -0
- requirements.txt +18 -4
.gitattributes
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
DELETED
|
@@ -1,8 +0,0 @@
|
|
| 1 |
-
__pycache__/
|
| 2 |
-
*.pyc
|
| 3 |
-
.venv/
|
| 4 |
-
.env
|
| 5 |
-
.env.*
|
| 6 |
-
node_modules/
|
| 7 |
-
dist/
|
| 8 |
-
.DS_Store
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Dockerfile
CHANGED
|
@@ -1,13 +1,25 @@
|
|
| 1 |
-
|
|
|
|
| 2 |
|
| 3 |
WORKDIR /app
|
| 4 |
-
ENV PYTHONUNBUFFERED=1
|
| 5 |
-
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
|
| 6 |
|
| 7 |
-
COPY
|
| 8 |
-
RUN
|
| 9 |
|
| 10 |
COPY . .
|
|
|
|
| 11 |
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ---- Build stage ----
|
| 2 |
+
FROM node:20-alpine AS build
|
| 3 |
|
| 4 |
WORKDIR /app
|
|
|
|
|
|
|
| 5 |
|
| 6 |
+
COPY package.json package-lock.json ./
|
| 7 |
+
RUN npm ci
|
| 8 |
|
| 9 |
COPY . .
|
| 10 |
+
RUN npm run build
|
| 11 |
|
| 12 |
+
# ---- Runtime stage ----
|
| 13 |
+
FROM nginx:alpine
|
| 14 |
+
|
| 15 |
+
# Remove default nginx config
|
| 16 |
+
RUN rm /etc/nginx/conf.d/default.conf
|
| 17 |
+
|
| 18 |
+
# Custom nginx config
|
| 19 |
+
COPY nginx.conf /etc/nginx/conf.d/default.conf
|
| 20 |
+
|
| 21 |
+
# Copy built assets
|
| 22 |
+
COPY --from=build /app/dist /usr/share/nginx/html
|
| 23 |
+
|
| 24 |
+
EXPOSE 7860
|
| 25 |
+
CMD ["nginx", "-g", "daemon off;"]
|
README.md
DELETED
|
@@ -1,10 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Pdf Trainer Api
|
| 3 |
-
emoji: 🌍
|
| 4 |
-
colorFrom: indigo
|
| 5 |
-
colorTo: pink
|
| 6 |
-
sdk: docker
|
| 7 |
-
pinned: false
|
| 8 |
-
---
|
| 9 |
-
|
| 10 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
api.py
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
from backend.api import app
|
|
|
|
|
|
backend/.DS_Store
ADDED
|
Binary file (8.2 kB). View file
|
|
|
backend/{__init__.py → .env}
RENAMED
|
File without changes
|
backend/__pycache__/api.cpython-311.pyc
ADDED
|
Binary file (6.45 kB). View file
|
|
|
backend/api.py
CHANGED
|
@@ -1,20 +1,20 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
|
|
|
| 3 |
import os
|
| 4 |
-
import time
|
| 5 |
from pathlib import Path
|
| 6 |
-
from typing import
|
| 7 |
-
from uuid import uuid4
|
| 8 |
-
from collections import deque
|
| 9 |
-
from dataclasses import dataclass, asdict
|
| 10 |
|
| 11 |
-
from
|
|
|
|
| 12 |
from fastapi.middleware.cors import CORSMiddleware
|
| 13 |
-
from fastapi.responses import FileResponse,
|
| 14 |
-
|
|
|
|
| 15 |
|
| 16 |
app = FastAPI(title="PDF Trainer API", version="1.0")
|
| 17 |
|
|
|
|
| 18 |
app.add_middleware(
|
| 19 |
CORSMiddleware,
|
| 20 |
allow_origins=[
|
|
@@ -26,114 +26,188 @@ app.add_middleware(
|
|
| 26 |
allow_headers=["*"],
|
| 27 |
)
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
-
def _require_worker_token(x_worker_token: Optional[str]):
|
| 38 |
-
if not WORKER_TOKEN:
|
| 39 |
-
raise HTTPException(status_code=500, detail="Server missing WORKER_TOKEN")
|
| 40 |
-
if not x_worker_token or x_worker_token != WORKER_TOKEN:
|
| 41 |
-
raise HTTPException(status_code=401, detail="Unauthorized worker")
|
| 42 |
|
| 43 |
@app.get("/health")
|
| 44 |
-
def health()
|
| 45 |
return {"ok": True}
|
| 46 |
|
| 47 |
-
@app.get("/")
|
| 48 |
-
def root() -> Dict[str, str]:
|
| 49 |
-
return {"service": "pdf-trainer-api", "status": "running"}
|
| 50 |
-
|
| 51 |
-
def _pdf_path(pdf_id: str) -> Path:
|
| 52 |
-
safe = pdf_id.strip().replace("/", "_")
|
| 53 |
-
if not safe:
|
| 54 |
-
raise HTTPException(status_code=400, detail="Missing pdf_id")
|
| 55 |
-
if not safe.lower().endswith(".pdf"):
|
| 56 |
-
safe = safe + ".pdf"
|
| 57 |
-
return PDF_DIR / safe
|
| 58 |
-
|
| 59 |
-
@app.put("/api/pdf/{pdf_id}")
|
| 60 |
-
async def put_pdf(
|
| 61 |
-
pdf_id: str,
|
| 62 |
-
request: Request,
|
| 63 |
-
x_worker_token: Optional[str] = Header(default=None),
|
| 64 |
-
):
|
| 65 |
-
_require_worker_token(x_worker_token)
|
| 66 |
-
body = await request.body()
|
| 67 |
-
if not body:
|
| 68 |
-
raise HTTPException(status_code=400, detail="Empty body")
|
| 69 |
-
p = _pdf_path(pdf_id)
|
| 70 |
-
p.write_bytes(body)
|
| 71 |
-
return {"ok": True, "pdf_id": p.stem, "bytes": len(body)}
|
| 72 |
|
| 73 |
@app.get("/api/pdf/{pdf_id}")
|
| 74 |
def get_pdf(pdf_id: str):
|
| 75 |
-
|
| 76 |
-
if not
|
| 77 |
raise HTTPException(status_code=404, detail="PDF not found")
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
@app.post("/api/send-config")
|
| 81 |
-
async def send_config(payload:
|
| 82 |
-
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
config = payload.get("config")
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
return {"ok": True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
+
import json
|
| 4 |
import os
|
|
|
|
| 5 |
from pathlib import Path
|
| 6 |
+
from typing import Any, Dict
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
from fastapi import FastAPI, HTTPException
|
| 10 |
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
+
from fastapi.responses import FileResponse, PlainTextResponse
|
| 12 |
+
|
| 13 |
+
from backend.worker.gmail_client import GmailClient
|
| 14 |
|
| 15 |
app = FastAPI(title="PDF Trainer API", version="1.0")
|
| 16 |
|
| 17 |
+
# Allow Vite dev server
|
| 18 |
app.add_middleware(
|
| 19 |
CORSMiddleware,
|
| 20 |
allow_origins=[
|
|
|
|
| 26 |
allow_headers=["*"],
|
| 27 |
)
|
| 28 |
|
| 29 |
+
REPO_ROOT = Path(__file__).resolve().parents[1]
|
| 30 |
+
BACKEND_DIR = REPO_ROOT / "backend"
|
| 31 |
+
UPLOADS_DIR = BACKEND_DIR / "worker" / "uploads"
|
| 32 |
+
|
| 33 |
+
# Load backend/.env explicitly ONCE for this process
|
| 34 |
+
load_dotenv(BACKEND_DIR / ".env", override=True)
|
| 35 |
+
|
| 36 |
+
CREDENTIALS_JSON = Path(os.environ.get("GMAIL_CREDENTIALS_JSON", str(BACKEND_DIR / "credentials.json")))
|
| 37 |
+
TOKEN_JSON = Path(os.environ.get("GMAIL_TOKEN_JSON", str(BACKEND_DIR / "token.json")))
|
| 38 |
+
|
| 39 |
|
| 40 |
+
def _gmail() -> GmailClient:
|
| 41 |
+
return GmailClient(CREDENTIALS_JSON, TOKEN_JSON)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def _get_env_required(key: str) -> str:
|
| 45 |
+
v = (os.environ.get(key) or "").strip()
|
| 46 |
+
if not v:
|
| 47 |
+
raise HTTPException(status_code=500, detail=f"Server missing {key} env var")
|
| 48 |
+
return v
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
@app.get("/health")
|
| 52 |
+
def health():
|
| 53 |
return {"ok": True}
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
@app.get("/api/pdf/{pdf_id}")
|
| 57 |
def get_pdf(pdf_id: str):
|
| 58 |
+
path = UPLOADS_DIR / f"{pdf_id}.pdf"
|
| 59 |
+
if not path.exists():
|
| 60 |
raise HTTPException(status_code=404, detail="PDF not found")
|
| 61 |
+
|
| 62 |
+
name_path = UPLOADS_DIR / f"{pdf_id}.name.txt"
|
| 63 |
+
pdf_name = name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf"
|
| 64 |
+
|
| 65 |
+
resp = FileResponse(path, media_type="application/pdf", filename=pdf_name)
|
| 66 |
+
resp.headers["X-PDF-Name"] = pdf_name
|
| 67 |
+
return resp
|
| 68 |
+
|
| 69 |
|
| 70 |
@app.post("/api/send-config")
|
| 71 |
+
async def send_config(payload: Dict[str, Any]):
|
| 72 |
+
"""
|
| 73 |
+
PIPELINE SUBMISSION EMAIL (after rep saves config)
|
| 74 |
+
|
| 75 |
+
REQUIRED payload:
|
| 76 |
+
- pdf_id: str
|
| 77 |
+
- template_id: str
|
| 78 |
+
- config: dict
|
| 79 |
+
|
| 80 |
+
Sends to PIPELINE inbox:
|
| 81 |
+
- PDF_PIPELINE_PIPELINE_NOTIFY_TO
|
| 82 |
+
|
| 83 |
+
Requirements:
|
| 84 |
+
- Subject includes template_id
|
| 85 |
+
- Body includes pdf_id
|
| 86 |
+
- Attachments: JSON + PDF
|
| 87 |
+
"""
|
| 88 |
+
pdf_id = (payload.get("pdf_id") or "").strip()
|
| 89 |
+
template_id = (payload.get("template_id") or "").strip()
|
| 90 |
config = payload.get("config")
|
| 91 |
+
|
| 92 |
+
if not pdf_id:
|
| 93 |
+
raise HTTPException(status_code=400, detail="Missing pdf_id")
|
| 94 |
+
if not template_id:
|
| 95 |
+
raise HTTPException(status_code=400, detail="Missing template_id")
|
| 96 |
+
if not isinstance(config, dict):
|
| 97 |
+
raise HTTPException(status_code=400, detail="Missing config object")
|
| 98 |
+
|
| 99 |
+
pipeline_to = _get_env_required("PDF_PIPELINE_PIPELINE_NOTIFY_TO")
|
| 100 |
+
notify_from = _get_env_required("PDF_PIPELINE_NOTIFY_FROM")
|
| 101 |
+
trainer_base_url = (os.environ.get("PDF_TRAINER_BASE_URL") or "http://localhost:5173").strip()
|
| 102 |
+
|
| 103 |
+
pdf_path = UPLOADS_DIR / f"{pdf_id}.pdf"
|
| 104 |
+
if not pdf_path.exists():
|
| 105 |
+
raise HTTPException(status_code=404, detail="PDF not found for pdf_id")
|
| 106 |
+
|
| 107 |
+
name_path = UPLOADS_DIR / f"{pdf_id}.name.txt"
|
| 108 |
+
pdf_name = name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf"
|
| 109 |
+
|
| 110 |
+
trainer_link = f"{trainer_base_url.rstrip('/')}/?pdf_id={pdf_id}"
|
| 111 |
+
|
| 112 |
+
subject = f"PDF_TRAINER_CONFIG_SUBMITTED | template_id={template_id}"
|
| 113 |
+
body = (
|
| 114 |
+
"Hi,\n\n"
|
| 115 |
+
"A PDF Trainer configuration was submitted.\n\n"
|
| 116 |
+
f"template_id: {template_id}\n"
|
| 117 |
+
f"pdf_id: {pdf_id}\n"
|
| 118 |
+
f"trainer_link: {trainer_link}\n\n"
|
| 119 |
+
"Attachments:\n"
|
| 120 |
+
f"- trainer_config_{pdf_id}_{template_id}.json\n"
|
| 121 |
+
f"- {pdf_name}\n\n"
|
| 122 |
+
"Thank you,\n"
|
| 123 |
+
"Inserio Automation\n"
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
cfg_bytes = json.dumps(
|
| 127 |
+
{"pdf_id": pdf_id, "template_id": template_id, "config": config},
|
| 128 |
+
indent=2,
|
| 129 |
+
).encode("utf-8")
|
| 130 |
+
|
| 131 |
+
attachments = [
|
| 132 |
+
(f"trainer_config_{pdf_id}_{template_id}.json", cfg_bytes),
|
| 133 |
+
(pdf_name, pdf_path.read_bytes()),
|
| 134 |
+
]
|
| 135 |
+
|
| 136 |
+
gmail = _gmail()
|
| 137 |
+
gmail.send_email(
|
| 138 |
+
to_email=pipeline_to,
|
| 139 |
+
from_email=notify_from,
|
| 140 |
+
subject=subject,
|
| 141 |
+
body_text=body,
|
| 142 |
+
attachments=attachments,
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
return {"ok": True}
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
@app.post("/api/notify-unknown")
|
| 149 |
+
async def notify_unknown(payload: Dict[str, Any]):
|
| 150 |
+
"""
|
| 151 |
+
UNKNOWN TEMPLATE NOTIFICATION (rep email)
|
| 152 |
+
|
| 153 |
+
REQUIRED payload:
|
| 154 |
+
- pdf_id: str
|
| 155 |
+
OPTIONAL:
|
| 156 |
+
- reason: str
|
| 157 |
+
|
| 158 |
+
Sends to REP inbox:
|
| 159 |
+
- PDF_PIPELINE_NOTIFY_TO
|
| 160 |
+
|
| 161 |
+
Requirements:
|
| 162 |
+
- Includes trainer link with PDF pre-loaded
|
| 163 |
+
- Attaches PDF
|
| 164 |
+
- No JSON
|
| 165 |
+
"""
|
| 166 |
+
pdf_id = (payload.get("pdf_id") or "").strip()
|
| 167 |
+
reason = (payload.get("reason") or "").strip()
|
| 168 |
+
|
| 169 |
+
if not pdf_id:
|
| 170 |
+
raise HTTPException(status_code=400, detail="Missing pdf_id")
|
| 171 |
+
|
| 172 |
+
rep_to = _get_env_required("PDF_PIPELINE_NOTIFY_TO")
|
| 173 |
+
notify_from = _get_env_required("PDF_PIPELINE_NOTIFY_FROM")
|
| 174 |
+
trainer_base_url = (os.environ.get("PDF_TRAINER_BASE_URL") or "http://localhost:5173").strip()
|
| 175 |
+
|
| 176 |
+
pdf_path = UPLOADS_DIR / f"{pdf_id}.pdf"
|
| 177 |
+
if not pdf_path.exists():
|
| 178 |
+
raise HTTPException(status_code=404, detail="PDF not found for pdf_id")
|
| 179 |
+
|
| 180 |
+
name_path = UPLOADS_DIR / f"{pdf_id}.name.txt"
|
| 181 |
+
pdf_name = name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf"
|
| 182 |
+
|
| 183 |
+
trainer_link = f"{trainer_base_url.rstrip('/')}/?pdf_id={pdf_id}"
|
| 184 |
+
|
| 185 |
+
subject = "Action required: Unknown PDF format (template not found)"
|
| 186 |
+
body = (
|
| 187 |
+
"Hi,\n\n"
|
| 188 |
+
"We received a PDF that does not match any existing templates in the system.\n\n"
|
| 189 |
+
+ (f"Reason: {reason}\n\n" if reason else "")
|
| 190 |
+
+ "Please open the PDF Trainer using the link below and create or update the template configuration:\n"
|
| 191 |
+
f"{trainer_link}\n\n"
|
| 192 |
+
"The original PDF is attached for reference.\n\n"
|
| 193 |
+
"Thank you,\n"
|
| 194 |
+
"Inserio Automation\n"
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
attachments = [(pdf_name, pdf_path.read_bytes())]
|
| 198 |
+
|
| 199 |
+
gmail = _gmail()
|
| 200 |
+
gmail.send_email(
|
| 201 |
+
to_email=rep_to,
|
| 202 |
+
from_email=notify_from,
|
| 203 |
+
subject=subject,
|
| 204 |
+
body_text=body,
|
| 205 |
+
attachments=attachments,
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
return {"ok": True}
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
@app.get("/", response_class=PlainTextResponse)
|
| 212 |
+
def root():
|
| 213 |
+
return "PDF Trainer API. Use /health"
|
backend/scripts/__pycache__/apply_trainer_schemas.cpython-314.pyc
ADDED
|
Binary file (2.8 kB). View file
|
|
|
backend/scripts/__pycache__/generate_template_schema_skeletons.cpython-314.pyc
ADDED
|
Binary file (4.72 kB). View file
|
|
|
backend/scripts/__pycache__/migrate_hardcoded_templates.cpython-314.pyc
ADDED
|
Binary file (3.15 kB). View file
|
|
|
backend/sftp_store.py
DELETED
|
@@ -1,42 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import socket
|
| 3 |
-
import posixpath
|
| 4 |
-
import paramiko
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
def _env(name: str) -> str:
|
| 8 |
-
v = (os.getenv(name) or "").strip()
|
| 9 |
-
if not v:
|
| 10 |
-
raise RuntimeError(f"Missing env var: {name}")
|
| 11 |
-
return v
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
def download_bytes(remote_path: str) -> bytes:
|
| 15 |
-
"""
|
| 16 |
-
Downloads a file from SFTP_ROOT + remote_path.
|
| 17 |
-
remote_path should be relative like 'pdfs/<id>.pdf' (no leading slash).
|
| 18 |
-
"""
|
| 19 |
-
host = _env("SFTP_HOST")
|
| 20 |
-
port = int(_env("SFTP_PORT"))
|
| 21 |
-
user = _env("SFTP_USER")
|
| 22 |
-
pw = _env("SFTP_PASS")
|
| 23 |
-
root = (_env("SFTP_ROOT").rstrip("/") or "/")
|
| 24 |
-
|
| 25 |
-
rp = remote_path.lstrip("/")
|
| 26 |
-
|
| 27 |
-
transport = paramiko.Transport(socket.create_connection((host, port), timeout=10))
|
| 28 |
-
transport.banner_timeout = 10
|
| 29 |
-
transport.auth_timeout = 10
|
| 30 |
-
transport.connect(username=user, password=pw)
|
| 31 |
-
sftp = paramiko.SFTPClient.from_transport(transport)
|
| 32 |
-
|
| 33 |
-
try:
|
| 34 |
-
sftp.chdir(root)
|
| 35 |
-
full = posixpath.join(".", rp)
|
| 36 |
-
with sftp.open(full, "rb") as f:
|
| 37 |
-
return f.read()
|
| 38 |
-
finally:
|
| 39 |
-
try:
|
| 40 |
-
sftp.close()
|
| 41 |
-
finally:
|
| 42 |
-
transport.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/worker/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (221 Bytes). View file
|
|
|
backend/worker/__pycache__/__init__.cpython-314.pyc
ADDED
|
Binary file (211 Bytes). View file
|
|
|
backend/worker/__pycache__/gmail_client.cpython-311.pyc
ADDED
|
Binary file (11 kB). View file
|
|
|
backend/worker/__pycache__/gmail_client.cpython-314.pyc
ADDED
|
Binary file (11.2 kB). View file
|
|
|
backend/worker/__pycache__/openai_classifier.cpython-311.pyc
ADDED
|
Binary file (7.51 kB). View file
|
|
|
backend/worker/__pycache__/openai_classifier.cpython-314.pyc
ADDED
|
Binary file (12.5 kB). View file
|
|
|
backend/worker/__pycache__/pdf_render.cpython-311.pyc
ADDED
|
Binary file (2.38 kB). View file
|
|
|
backend/worker/__pycache__/pdf_render.cpython-314.pyc
ADDED
|
Binary file (2.28 kB). View file
|
|
|
backend/worker/__pycache__/prompts.cpython-311.pyc
ADDED
|
Binary file (2.85 kB). View file
|
|
|
backend/worker/__pycache__/worker.cpython-311.pyc
ADDED
|
Binary file (9.98 kB). View file
|
|
|
backend/worker/__pycache__/worker.cpython-314.pyc
ADDED
|
Binary file (14.5 kB). View file
|
|
|
requirements.txt
CHANGED
|
@@ -1,4 +1,18 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Google / Gmail
|
| 2 |
+
google-api-python-client==2.111.0
|
| 3 |
+
google-auth==2.27.0
|
| 4 |
+
google-auth-oauthlib==1.2.0
|
| 5 |
+
|
| 6 |
+
# OpenAI
|
| 7 |
+
openai==1.12.0
|
| 8 |
+
|
| 9 |
+
# PDF -> image
|
| 10 |
+
PyMuPDF==1.23.26
|
| 11 |
+
Pillow==10.2.0
|
| 12 |
+
|
| 13 |
+
# Utilities
|
| 14 |
+
python-dotenv==1.0.1
|
| 15 |
+
requests==2.31.0
|
| 16 |
+
|
| 17 |
+
fastapi==0.115.6
|
| 18 |
+
uvicorn==0.30.6
|