File size: 7,053 Bytes
20919fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2aa6bc
 
 
20919fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2aa6bc
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
from __future__ import annotations

import json
import os
from pathlib import Path
from typing import Any, Dict, Optional, Union
from urllib.parse import urlparse

from fastapi import FastAPI, Header, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import AliasChoices, BaseModel, Field

from backend.worker.gmail_client import GmailClient
from backend.worker.hf_env_files import resolve_json_or_path
from backend.worker.template_store import save_trainer_template

app = FastAPI()


def _origin_from_url(url: str) -> Optional[str]:
    url = (url or "").strip()
    if not url:
        return None
    try:
        parsed = urlparse(url)
    except Exception:
        return None
    if not parsed.scheme or not parsed.netloc:
        return None
    return f"{parsed.scheme}://{parsed.netloc}"


_allowed_origins = {
    "http://localhost:5173",
    "http://localhost:3000",
}
trainer_origin = _origin_from_url(os.environ.get("PDF_TRAINER_BASE_URL", ""))
if trainer_origin:
    _allowed_origins.add(trainer_origin)

app.add_middleware(
    CORSMiddleware,
    allow_origins=sorted(_allowed_origins) if _allowed_origins else ["*"],
    allow_credentials=False,
    allow_methods=["*"],
    allow_headers=["*"],
)


def _require_webhook_secret(provided: Optional[str]) -> None:
    expected = (os.environ.get("PDF_PIPELINE_WEBHOOK_SECRET") or "").strip()
    if not expected:
        raise HTTPException(
            status_code=500,
            detail="Server missing PDF_PIPELINE_WEBHOOK_SECRET env var",
        )
    if (provided or "").strip() != expected:
        raise HTTPException(status_code=401, detail="Invalid webhook secret")


def _alias_env(primary: str, fallback: str) -> None:
    if (os.environ.get(primary) or "").strip():
        return
    fb = (os.environ.get(fallback) or "").strip()
    if fb:
        os.environ[primary] = fb


def _gmail_paths(repo_root: Path) -> tuple[Path, Path]:
    _alias_env("GMAIL_CREDENTIALS_JSON", "PDF_PIPELINE_GMAIL_CREDENTIALS_JSON")
    _alias_env("GMAIL_TOKEN_JSON", "PDF_PIPELINE_GMAIL_TOKEN_JSON")

    backend_dir = repo_root / "backend"
    creds = resolve_json_or_path("GMAIL_CREDENTIALS_JSON", backend_dir / "credentials.json", Path("/tmp/credentials.json"))
    token = resolve_json_or_path("GMAIL_TOKEN_JSON", backend_dir / "token.json", Path("/tmp/token.json"))
    return creds, token


def _uploads_dir(repo_root: Path) -> Path:
    return repo_root / "backend" / "worker" / "uploads"


class SaveConfigRequest(BaseModel):
    pdf_id: str = Field(
        ...,
        validation_alias=AliasChoices("pdf_id", "pdfId"),
        description="The pdf_id used by the trainer link (?pdf_id=...).",
    )
    template_id: str = Field(
        ...,
        validation_alias=AliasChoices("template_id", "templateId"),
        description="The template_id being saved/updated.",
    )
    config: Union[Dict[str, Any], str] = Field(
        ...,
        validation_alias=AliasChoices("config", "configuration", "config_json", "configJson"),
        description="Trainer configuration JSON.",
    )
    notify_to: Optional[str] = Field(
        None,
        validation_alias=AliasChoices("notify_to", "notifyTo"),
        description="Override recipient email (optional).",
    )


@app.post("/api/trainer/save-config")
def save_config(
    req: SaveConfigRequest,
    x_webhook_secret: Optional[str] = Header(default=None, alias="X-Webhook-Secret"),
):
    """
    Called by the PDF Trainer UI when "Save configuration" is clicked.
    Behavior:
      - persists the config JSON to backend/worker/trainer_templates/<template_id>.json
      - sends a confirmation email with the config JSON + the original PDF attached
    """
    _require_webhook_secret(x_webhook_secret)

    repo_root = Path(__file__).resolve().parent

    notify_from = (os.environ.get("PDF_PIPELINE_NOTIFY_FROM") or "").strip()
    if not notify_from:
        raise HTTPException(status_code=500, detail="Missing PDF_PIPELINE_NOTIFY_FROM env var")

    # Where to send the confirmation
    default_to = (os.environ.get("PDF_PIPELINE_PIPELINE_NOTIFY_TO") or "").strip()
    to_email = (req.notify_to or default_to).strip()
    if not to_email:
        raise HTTPException(
            status_code=500,
            detail="Missing PDF_PIPELINE_PIPELINE_NOTIFY_TO env var (or provide notify_to in request)",
        )

    # Persist config JSON for local tracking/backups
    cfg_obj: Dict[str, Any]
    if isinstance(req.config, str):
        try:
            parsed = json.loads(req.config)
        except Exception as e:
            raise HTTPException(status_code=422, detail=f"config is not valid JSON: {e}")
        if not isinstance(parsed, dict):
            raise HTTPException(status_code=422, detail="config JSON must be an object")
        cfg_obj = parsed
    else:
        cfg_obj = req.config

    try:
        saved_path = save_trainer_template(req.template_id, cfg_obj)
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Failed to save template config: {e}")

    # Load original PDF from uploads/ (best-effort)
    uploads = _uploads_dir(repo_root)
    pdf_path = uploads / f"{req.pdf_id}.pdf"
    name_path = uploads / f"{req.pdf_id}.name.txt"
    pdf_name = f"{req.pdf_id}.pdf"
    if name_path.exists():
        try:
            pdf_name = (name_path.read_text(encoding="utf-8") or "").strip() or pdf_name
        except Exception:
            pass

    pdf_bytes: Optional[bytes] = None
    if pdf_path.exists():
        try:
            pdf_bytes = pdf_path.read_bytes()
        except Exception:
            pdf_bytes = None

    cfg_bytes = json.dumps(cfg_obj, indent=2).encode("utf-8")
    cfg_filename = f"trainer_config_{req.pdf_id}__{req.template_id}.json"

    attachments = [(cfg_filename, cfg_bytes)]
    body_lines = [
        "Configuration has been updated.",
        "",
        f"template_id: {req.template_id}",
        f"pdf_id: {req.pdf_id}",
        f"saved: {saved_path}",
    ]

    if pdf_bytes and len(pdf_bytes) < 20 * 1024 * 1024:
        attachments.append((pdf_name, pdf_bytes))
    elif pdf_bytes:
        body_lines.append("")
        body_lines.append("Note: PDF was too large to attach.")
    else:
        body_lines.append("")
        body_lines.append("Note: Original PDF not found on worker; only config JSON is attached.")

    subject = f"PDF Trainer: configuration updated ({req.template_id})"
    body = "\n".join(body_lines) + "\n"

    creds_path, token_path = _gmail_paths(repo_root)
    try:
        gmail = GmailClient(creds_path, token_path)
        gmail.send_email(
            to_email=to_email,
            from_email=notify_from,
            subject=subject,
            body_text=body,
            attachments=attachments,
        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Failed to send confirmation email: {e}")

    return {"ok": True}

@app.get("/")
def root():
    return {"ok": True}

@app.get("/health")
def health():
    return {"ok": True}