Avinashnalla7 commited on
Commit
20919fe
·
1 Parent(s): 9d11a64

Add save-config webhook + gmail_auth helper

Browse files
Files changed (4) hide show
  1. .dockerignore +20 -0
  2. README.md +29 -0
  3. backend/worker/gmail_auth.py +98 -0
  4. health_api.py +205 -1
.dockerignore ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # secrets
2
+ .env
3
+ _local_secrets/
4
+
5
+ # oauth files (should be provided via env/volume, not baked into images)
6
+ backend/credentials.json
7
+ backend/token.json
8
+ **/credentials.json
9
+ **/token.json
10
+
11
+ # runtime artifacts
12
+ backend/worker/tmp/
13
+ backend/worker/uploads/
14
+ __pycache__/
15
+ *.pyc
16
+ .DS_Store
17
+
18
+ # local tooling
19
+ .venv/
20
+ .git/
README.md CHANGED
@@ -5,3 +5,32 @@ app_port: 7860
5
  ---
6
 
7
  Worker + health API.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  ---
6
 
7
  Worker + health API.
8
+
9
+ ## Local run
10
+
11
+ ```sh
12
+ python3 -m venv .venv
13
+ source .venv/bin/activate
14
+ pip install -r requirements.txt
15
+
16
+ set -a; source .env; set +a
17
+ ./start.sh
18
+ ```
19
+
20
+ ## Trainer “Save configuration” webhook
21
+
22
+ The health API exposes a webhook that the Trainer UI can call when a rep clicks **Save configuration**.
23
+
24
+ - Endpoint: `POST /api/trainer/save-config`
25
+ - Header: `X-Webhook-Secret: $PDF_PIPELINE_WEBHOOK_SECRET`
26
+ - Body JSON:
27
+ - `pdf_id` (string)
28
+ - `template_id` (string)
29
+ - `config` (object)
30
+ - `notify_to` (string, optional override)
31
+
32
+ Required env vars for the webhook to send the confirmation email:
33
+
34
+ - `PDF_PIPELINE_WEBHOOK_SECRET`
35
+ - `PDF_PIPELINE_NOTIFY_FROM` (must be the same Gmail account you authorized)
36
+ - `PDF_PIPELINE_PIPELINE_NOTIFY_TO` (recipient for the confirmation email)
backend/worker/gmail_auth.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import os
5
+ from pathlib import Path
6
+
7
+ from dotenv import load_dotenv
8
+ from google_auth_oauthlib.flow import InstalledAppFlow
9
+
10
+ try:
11
+ from backend.worker.hf_env_files import resolve_json_or_path
12
+ except ModuleNotFoundError:
13
+ import sys
14
+
15
+ sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
16
+ from backend.worker.hf_env_files import resolve_json_or_path
17
+
18
+
19
+ SCOPES = [
20
+ "https://www.googleapis.com/auth/gmail.modify",
21
+ "https://www.googleapis.com/auth/gmail.send",
22
+ ]
23
+
24
+
25
+ def _repo_root() -> Path:
26
+ return Path(__file__).resolve().parents[2]
27
+
28
+
29
+ def _alias_env(primary: str, fallback: str) -> None:
30
+ if (os.environ.get(primary) or "").strip():
31
+ return
32
+ fb = (os.environ.get(fallback) or "").strip()
33
+ if fb:
34
+ os.environ[primary] = fb
35
+
36
+
37
+ def main() -> None:
38
+ parser = argparse.ArgumentParser(description="Interactive Gmail OAuth token generator.")
39
+ parser.add_argument(
40
+ "--credentials",
41
+ help="Path to OAuth client credentials JSON (overrides env).",
42
+ default="",
43
+ )
44
+ parser.add_argument(
45
+ "--token",
46
+ help="Path to write token JSON (overrides env).",
47
+ default="",
48
+ )
49
+ parser.add_argument(
50
+ "--console",
51
+ action="store_true",
52
+ help="Use console-based auth (no local server).",
53
+ )
54
+ args = parser.parse_args()
55
+
56
+ repo_root = _repo_root()
57
+
58
+ # Load local env files if present (helps local dev; HF will use injected env vars).
59
+ load_dotenv(repo_root / ".env", override=False)
60
+ load_dotenv(repo_root / "backend" / ".env", override=False)
61
+
62
+ if args.credentials:
63
+ os.environ["GMAIL_CREDENTIALS_JSON"] = args.credentials
64
+ if args.token:
65
+ os.environ["GMAIL_TOKEN_JSON"] = args.token
66
+
67
+ # Back-compat with older env var names.
68
+ _alias_env("GMAIL_CREDENTIALS_JSON", "PDF_PIPELINE_GMAIL_CREDENTIALS_JSON")
69
+ _alias_env("GMAIL_TOKEN_JSON", "PDF_PIPELINE_GMAIL_TOKEN_JSON")
70
+
71
+ backend_dir = repo_root / "backend"
72
+ default_creds = backend_dir / "credentials.json"
73
+ default_token = backend_dir / "token.json"
74
+
75
+ creds_path = resolve_json_or_path("GMAIL_CREDENTIALS_JSON", default_creds, Path("/tmp/credentials.json"))
76
+ token_path = resolve_json_or_path("GMAIL_TOKEN_JSON", default_token, Path("/tmp/token.json"))
77
+
78
+ if not creds_path.exists() and default_creds.exists():
79
+ print(f"[gmail_auth] WARN: {creds_path} not found; using {default_creds}")
80
+ creds_path = default_creds
81
+
82
+ if not creds_path.exists():
83
+ raise FileNotFoundError(f"Missing OAuth client json: {creds_path}")
84
+
85
+ flow = InstalledAppFlow.from_client_secrets_file(str(creds_path), SCOPES)
86
+
87
+ if args.console:
88
+ creds = flow.run_console()
89
+ else:
90
+ creds = flow.run_local_server(port=0, access_type="offline", prompt="consent")
91
+
92
+ token_path.parent.mkdir(parents=True, exist_ok=True)
93
+ token_path.write_text(creds.to_json(), encoding="utf-8")
94
+ print(f"[gmail_auth] Wrote token: {token_path}")
95
+
96
+
97
+ if __name__ == "__main__":
98
+ main()
health_api.py CHANGED
@@ -1,7 +1,211 @@
1
- from fastapi import FastAPI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  app = FastAPI()
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  @app.get("/")
6
  def root():
7
  return {"ok": True}
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ from pathlib import Path
6
+ from typing import Any, Dict, Optional, Union
7
+ from urllib.parse import urlparse
8
+
9
+ from fastapi import FastAPI, Header, HTTPException
10
+ from fastapi.middleware.cors import CORSMiddleware
11
+ from pydantic import AliasChoices, BaseModel, Field
12
+
13
+ from backend.worker.gmail_client import GmailClient
14
+ from backend.worker.hf_env_files import resolve_json_or_path
15
+ from backend.worker.template_store import save_trainer_template
16
 
17
  app = FastAPI()
18
 
19
+
20
+ def _origin_from_url(url: str) -> Optional[str]:
21
+ url = (url or "").strip()
22
+ if not url:
23
+ return None
24
+ try:
25
+ parsed = urlparse(url)
26
+ except Exception:
27
+ return None
28
+ if not parsed.scheme or not parsed.netloc:
29
+ return None
30
+ return f"{parsed.scheme}://{parsed.netloc}"
31
+
32
+
33
+ _allowed_origins = {
34
+ "http://localhost:5173",
35
+ "http://localhost:3000",
36
+ }
37
+ trainer_origin = _origin_from_url(os.environ.get("PDF_TRAINER_BASE_URL", ""))
38
+ if trainer_origin:
39
+ _allowed_origins.add(trainer_origin)
40
+
41
+ app.add_middleware(
42
+ CORSMiddleware,
43
+ allow_origins=sorted(_allowed_origins) if _allowed_origins else ["*"],
44
+ allow_credentials=False,
45
+ allow_methods=["*"],
46
+ allow_headers=["*"],
47
+ )
48
+
49
+
50
+ def _require_webhook_secret(provided: Optional[str]) -> None:
51
+ expected = (os.environ.get("PDF_PIPELINE_WEBHOOK_SECRET") or "").strip()
52
+ if not expected:
53
+ raise HTTPException(
54
+ status_code=500,
55
+ detail="Server missing PDF_PIPELINE_WEBHOOK_SECRET env var",
56
+ )
57
+ if (provided or "").strip() != expected:
58
+ raise HTTPException(status_code=401, detail="Invalid webhook secret")
59
+
60
+
61
+ def _alias_env(primary: str, fallback: str) -> None:
62
+ if (os.environ.get(primary) or "").strip():
63
+ return
64
+ fb = (os.environ.get(fallback) or "").strip()
65
+ if fb:
66
+ os.environ[primary] = fb
67
+
68
+
69
+ def _gmail_paths(repo_root: Path) -> tuple[Path, Path]:
70
+ _alias_env("GMAIL_CREDENTIALS_JSON", "PDF_PIPELINE_GMAIL_CREDENTIALS_JSON")
71
+ _alias_env("GMAIL_TOKEN_JSON", "PDF_PIPELINE_GMAIL_TOKEN_JSON")
72
+
73
+ backend_dir = repo_root / "backend"
74
+ creds = resolve_json_or_path("GMAIL_CREDENTIALS_JSON", backend_dir / "credentials.json", Path("/tmp/credentials.json"))
75
+ token = resolve_json_or_path("GMAIL_TOKEN_JSON", backend_dir / "token.json", Path("/tmp/token.json"))
76
+ return creds, token
77
+
78
+
79
+ def _uploads_dir(repo_root: Path) -> Path:
80
+ return repo_root / "backend" / "worker" / "uploads"
81
+
82
+
83
+ class SaveConfigRequest(BaseModel):
84
+ pdf_id: str = Field(
85
+ ...,
86
+ validation_alias=AliasChoices("pdf_id", "pdfId"),
87
+ description="The pdf_id used by the trainer link (?pdf_id=...).",
88
+ )
89
+ template_id: str = Field(
90
+ ...,
91
+ validation_alias=AliasChoices("template_id", "templateId"),
92
+ description="The template_id being saved/updated.",
93
+ )
94
+ config: Union[Dict[str, Any], str] = Field(
95
+ ...,
96
+ validation_alias=AliasChoices("config", "configuration", "config_json", "configJson"),
97
+ description="Trainer configuration JSON.",
98
+ )
99
+ notify_to: Optional[str] = Field(
100
+ None,
101
+ validation_alias=AliasChoices("notify_to", "notifyTo"),
102
+ description="Override recipient email (optional).",
103
+ )
104
+
105
+
106
+ @app.post("/api/trainer/save-config")
107
+ def save_config(
108
+ req: SaveConfigRequest,
109
+ x_webhook_secret: Optional[str] = Header(default=None, alias="X-Webhook-Secret"),
110
+ ):
111
+ """
112
+ Called by the PDF Trainer UI when "Save configuration" is clicked.
113
+ Behavior:
114
+ - persists the config JSON to backend/worker/trainer_templates/<template_id>.json
115
+ - sends a confirmation email with the config JSON + the original PDF attached
116
+ """
117
+ _require_webhook_secret(x_webhook_secret)
118
+
119
+ repo_root = Path(__file__).resolve().parent
120
+
121
+ notify_from = (os.environ.get("PDF_PIPELINE_NOTIFY_FROM") or "").strip()
122
+ if not notify_from:
123
+ raise HTTPException(status_code=500, detail="Missing PDF_PIPELINE_NOTIFY_FROM env var")
124
+
125
+ # Where to send the confirmation
126
+ default_to = (os.environ.get("PDF_PIPELINE_PIPELINE_NOTIFY_TO") or "").strip()
127
+ to_email = (req.notify_to or default_to).strip()
128
+ if not to_email:
129
+ raise HTTPException(
130
+ status_code=500,
131
+ detail="Missing PDF_PIPELINE_PIPELINE_NOTIFY_TO env var (or provide notify_to in request)",
132
+ )
133
+
134
+ # Persist config JSON for local tracking/backups
135
+ cfg_obj: Dict[str, Any]
136
+ if isinstance(req.config, str):
137
+ try:
138
+ parsed = json.loads(req.config)
139
+ except Exception as e:
140
+ raise HTTPException(status_code=422, detail=f"config is not valid JSON: {e}")
141
+ if not isinstance(parsed, dict):
142
+ raise HTTPException(status_code=422, detail="config JSON must be an object")
143
+ cfg_obj = parsed
144
+ else:
145
+ cfg_obj = req.config
146
+
147
+ try:
148
+ saved_path = save_trainer_template(req.template_id, cfg_obj)
149
+ except Exception as e:
150
+ raise HTTPException(status_code=500, detail=f"Failed to save template config: {e}")
151
+
152
+ # Load original PDF from uploads/ (best-effort)
153
+ uploads = _uploads_dir(repo_root)
154
+ pdf_path = uploads / f"{req.pdf_id}.pdf"
155
+ name_path = uploads / f"{req.pdf_id}.name.txt"
156
+ pdf_name = f"{req.pdf_id}.pdf"
157
+ if name_path.exists():
158
+ try:
159
+ pdf_name = (name_path.read_text(encoding="utf-8") or "").strip() or pdf_name
160
+ except Exception:
161
+ pass
162
+
163
+ pdf_bytes: Optional[bytes] = None
164
+ if pdf_path.exists():
165
+ try:
166
+ pdf_bytes = pdf_path.read_bytes()
167
+ except Exception:
168
+ pdf_bytes = None
169
+
170
+ cfg_bytes = json.dumps(cfg_obj, indent=2).encode("utf-8")
171
+ cfg_filename = f"trainer_config_{req.pdf_id}__{req.template_id}.json"
172
+
173
+ attachments = [(cfg_filename, cfg_bytes)]
174
+ body_lines = [
175
+ "Configuration has been updated.",
176
+ "",
177
+ f"template_id: {req.template_id}",
178
+ f"pdf_id: {req.pdf_id}",
179
+ f"saved: {saved_path}",
180
+ ]
181
+
182
+ if pdf_bytes and len(pdf_bytes) < 20 * 1024 * 1024:
183
+ attachments.append((pdf_name, pdf_bytes))
184
+ elif pdf_bytes:
185
+ body_lines.append("")
186
+ body_lines.append("Note: PDF was too large to attach.")
187
+ else:
188
+ body_lines.append("")
189
+ body_lines.append("Note: Original PDF not found on worker; only config JSON is attached.")
190
+
191
+ subject = f"PDF Trainer: configuration updated ({req.template_id})"
192
+ body = "\n".join(body_lines) + "\n"
193
+
194
+ creds_path, token_path = _gmail_paths(repo_root)
195
+ try:
196
+ gmail = GmailClient(creds_path, token_path)
197
+ gmail.send_email(
198
+ to_email=to_email,
199
+ from_email=notify_from,
200
+ subject=subject,
201
+ body_text=body,
202
+ attachments=attachments,
203
+ )
204
+ except Exception as e:
205
+ raise HTTPException(status_code=500, detail=f"Failed to send confirmation email: {e}")
206
+
207
+ return {"ok": True}
208
+
209
  @app.get("/")
210
  def root():
211
  return {"ok": True}