Avinashnalla7 commited on
Commit
8974c0c
·
1 Parent(s): 323986b

demo(A): local pdf storage + PUT/GET /api/pdf

Browse files
Files changed (2) hide show
  1. backend/api.py +50 -214
  2. requirements.txt +2 -2
backend/api.py CHANGED
@@ -1,32 +1,20 @@
1
  from __future__ import annotations
2
 
3
- import json
4
- import re
5
  import os
6
- import socket
7
- import paramiko
8
- import concurrent.futures
9
-
10
  from pathlib import Path
11
- from typing import Any, Dict
 
 
 
12
 
13
- from dotenv import load_dotenv
14
- from fastapi import FastAPI, HTTPException, Response, Header, Request
15
  from fastapi.middleware.cors import CORSMiddleware
16
- from fastapi.responses import FileResponse
17
-
18
-
19
- # ------------------------------------------------------------------
20
- # App
21
- # ------------------------------------------------------------------
22
 
23
  app = FastAPI(title="PDF Trainer API", version="1.0")
24
 
25
-
26
- # ------------------------------------------------------------------
27
- # CORS
28
- # ------------------------------------------------------------------
29
-
30
  app.add_middleware(
31
  CORSMiddleware,
32
  allow_origins=[
@@ -38,171 +26,76 @@ app.add_middleware(
38
  allow_headers=["*"],
39
  )
40
 
 
 
 
 
41
 
42
- # ------------------------------------------------------------------
43
- # Paths & env
44
- # ------------------------------------------------------------------
45
-
46
- REPO_ROOT = Path(__file__).resolve().parents[1]
47
- BACKEND_DIR = REPO_ROOT / "backend"
48
- UPLOADS_DIR = BACKEND_DIR / "worker" / "uploads"
49
-
50
- UPLOADS_DIR.mkdir(parents=True, exist_ok=True)
51
-
52
- load_dotenv(BACKEND_DIR / ".env", override=True)
53
-
54
-
55
- # ------------------------------------------------------------------
56
- # Helpers
57
- # ------------------------------------------------------------------
58
-
59
- def _require_env(key: str) -> str:
60
- val = (os.environ.get(key) or "").strip()
61
- if not val:
62
- raise HTTPException(
63
- status_code=500,
64
- detail=f"Server misconfigured: missing env var {key}",
65
- )
66
- return val
67
-
68
 
69
- def _gmail_client():
70
- # LAZY IMPORT — do not move to top
71
- from backend.worker.gmail_client import GmailClient
72
-
73
- credentials_json = Path(
74
- os.environ.get(
75
- "GMAIL_CREDENTIALS_JSON",
76
- BACKEND_DIR / "credentials.json",
77
- )
78
- )
79
-
80
- token_json = Path(
81
- os.environ.get(
82
- "GMAIL_TOKEN_JSON",
83
- BACKEND_DIR / "token.json",
84
- )
85
- )
86
-
87
- return GmailClient(credentials_json, token_json)
88
-
89
-
90
- # ------------------------------------------------------------------
91
- # Health / root
92
- # ------------------------------------------------------------------
93
 
94
  @app.get("/health")
95
  def health() -> Dict[str, bool]:
96
  return {"ok": True}
97
 
98
-
99
  @app.get("/")
100
  def root() -> Dict[str, str]:
101
  return {"service": "pdf-trainer-api", "status": "running"}
102
 
103
-
104
- # ------------------------------------------------------------------
105
- # API endpoints
106
- # ------------------------------------------------------------------
107
-
108
- # ------------------------------------------------------------------
109
- # PDF storage (local to API container)
110
- # Worker uploads PDFs here. UI fetches from here.
111
- # ------------------------------------------------------------------
112
-
113
- PDF_STORE_DIR = Path(os.getenv("PDF_STORE_DIR", "/app/data/pdfs")).resolve()
114
- PDF_STORE_DIR.mkdir(parents=True, exist_ok=True)
115
-
116
- def _require_worker_token(x_worker_token: Optional[str]):
117
- # reuse global WORKER_TOKEN if you already have it later in the file
118
- token = os.getenv("WORKER_TOKEN", "").strip()
119
- if not token:
120
- raise HTTPException(status_code=500, detail="Server missing WORKER_TOKEN")
121
- if not x_worker_token or x_worker_token != token:
122
- raise HTTPException(status_code=401, detail="Unauthorized worker")
123
-
124
- def _safe_id(s: str) -> str:
125
- # keep it simple: allow only safe filename chars
126
- return re.sub(r"[^a-zA-Z0-9_.-]+", "_", s)
127
-
128
- @app.post("/api/pdf/{pdf_id}")
129
- async def put_pdf(pdf_id: str, request: Request, x_worker_token: Optional[str] = Header(default=None)):
130
  _require_worker_token(x_worker_token)
131
- pid = _safe_id(pdf_id)
132
- data = await request.body()
133
- if not data:
134
  raise HTTPException(status_code=400, detail="Empty body")
135
- if len(data) > 50 * 1024 * 1024:
136
- raise HTTPException(status_code=413, detail="PDF too large")
137
- out = PDF_STORE_DIR / (pid if pid.lower().endswith(".pdf") else pid + ".pdf")
138
- out.write_bytes(data)
139
- return {"ok": True, "pdf_id": pid, "bytes": len(data)}
140
 
141
  @app.get("/api/pdf/{pdf_id}")
142
  def get_pdf(pdf_id: str):
143
- pid = _safe_id(pdf_id)
144
- f = PDF_STORE_DIR / (pid if pid.lower().endswith(".pdf") else pid + ".pdf")
145
- if not f.exists():
146
  raise HTTPException(status_code=404, detail="PDF not found")
147
- data = f.read_bytes()
148
- return Response(content=data, media_type="application/pdf")
149
 
150
  @app.post("/api/send-config")
151
- def send_config(payload: Dict[str, Any]):
152
- # Expect payload to include at least: pdf_id, template_id, config
153
  pdf_id = str(payload.get("pdf_id", "")).strip()
154
  template_id = str(payload.get("template_id", "")).strip()
155
  config = payload.get("config")
156
-
157
  if not pdf_id or not template_id or config is None:
158
  raise HTTPException(status_code=400, detail="Missing pdf_id/template_id/config")
159
-
160
- store = SFTPStore()
161
- remote_json = f"configs/{pdf_id}_{template_id}.json"
162
- store.put_bytes(remote_json, json.dumps(payload, indent=2).encode("utf-8"))
163
-
164
- return {"ok": True, "stored": remote_json}
165
-
166
-
167
- @app.post("/api/notify-unknown")
168
- def notify_unknown(payload: Dict[str, Any]):
169
- client = _gmail_client()
170
-
171
- client.send_email(
172
- to=_require_env("ALERT_EMAIL"),
173
- subject="PDF Trainer: Unknown document",
174
- body=json.dumps(payload, indent=2),
175
- )
176
-
177
- return {"ok": True}
178
-
179
- # ------------------------------------------------------------------
180
- # Worker job queue (in-memory, Phase 2)
181
- # ------------------------------------------------------------------
182
-
183
- from collections import deque
184
- from dataclasses import dataclass, asdict
185
- from typing import Optional
186
- from uuid import uuid4
187
- import time
188
- from fastapi import Header
189
- from fastapi.responses import JSONResponse
190
- from pydantic import BaseModel
191
-
192
- WORKER_TOKEN = os.getenv("WORKER_TOKEN", "")
193
-
194
- def _require_worker_token(x_worker_token: Optional[str]):
195
- if not WORKER_TOKEN:
196
- raise HTTPException(status_code=500, detail="Server missing WORKER_TOKEN")
197
- if not x_worker_token or x_worker_token != WORKER_TOKEN:
198
- raise HTTPException(status_code=401, detail="Unauthorized worker")
199
 
200
  @dataclass
201
  class Job:
202
  id: str
203
  payload: dict
204
  created_at: float
205
- status: str = "queued" # queued|running|done|failed
206
  message: str = ""
207
 
208
  JOBQ: deque[Job] = deque()
@@ -227,10 +120,10 @@ def jobs_next(x_worker_token: Optional[str] = Header(default=None)):
227
  if job.status == "queued":
228
  job.status = "running"
229
  return asdict(job)
230
- return Response(status_code=204)
231
 
232
  class StatusReq(BaseModel):
233
- status: str # running|done|failed
234
  message: str = ""
235
 
236
  @app.post("/api/jobs/{job_id}/status")
@@ -244,60 +137,3 @@ def jobs_status(job_id: str, req: StatusReq, x_worker_token: Optional[str] = Hea
244
  job.status = req.status
245
  job.message = req.message
246
  return {"ok": True, "job_id": job_id, "status": job.status}
247
-
248
- @app.get("/api/sftp-test")
249
- def api_sftp_test():
250
- host = (os.getenv("SFTP_HOST") or "").strip()
251
- port = int((os.getenv("SFTP_PORT") or "2222").strip())
252
- user = (os.getenv("SFTP_USER") or "").strip()
253
- pw = (os.getenv("SFTP_PASS") or "").strip()
254
- root = (os.getenv("SFTP_ROOT") or ".").strip() or "."
255
- timeout = float((os.getenv("SFTP_TIMEOUT_SECONDS") or "8").strip())
256
-
257
- if not host or not user or not pw:
258
- raise HTTPException(status_code=500, detail="Missing SFTP secrets: SFTP_HOST/SFTP_USER/SFTP_PASS")
259
-
260
- t0 = time.time()
261
- # HARD socket timeout
262
- try:
263
- sock = socket.create_connection((host, port), timeout=timeout)
264
- except Exception as e:
265
- return JSONResponse({"ok": False, "stage": "connect", "error": str(e), "host": host, "port": port, "timeout": timeout}, status_code=502)
266
- t_sock = time.time()
267
-
268
- tr = paramiko.Transport(sock)
269
- tr.banner_timeout = timeout
270
- tr.auth_timeout = timeout
271
-
272
- tr.connect(username=user, password=pw)
273
- t_auth = time.time()
274
-
275
- sftp = paramiko.SFTPClient.from_transport(tr)
276
-
277
- out = {
278
- "ok": False,
279
- "host": host,
280
- "port": port,
281
- "user": user,
282
- "root": root,
283
- "timeout": timeout,
284
- "t_sock_ms": int((t_sock - t0) * 1000),
285
- "t_auth_ms": int((t_auth - t_sock) * 1000),
286
- }
287
-
288
- try:
289
- sftp.chdir(root)
290
- out["listdir_root"] = sftp.listdir(".")[:50]
291
- try:
292
- st = sftp.stat("pdfs/demo_test_003.pdf")
293
- out["stat_demo_test_003"] = {"size": st.st_size}
294
- except Exception as e:
295
- out["stat_demo_test_003_error"] = str(e)
296
- out["ok"] = True
297
- return JSONResponse(out)
298
- finally:
299
- try:
300
- sftp.close()
301
- finally:
302
- tr.close()
303
-
 
1
  from __future__ import annotations
2
 
 
 
3
  import os
4
+ import time
 
 
 
5
  from pathlib import Path
6
+ from typing import Dict, Optional
7
+ from uuid import uuid4
8
+ from collections import deque
9
+ from dataclasses import dataclass, asdict
10
 
11
+ from fastapi import FastAPI, HTTPException, Header, Request
 
12
  from fastapi.middleware.cors import CORSMiddleware
13
+ from fastapi.responses import FileResponse, JSONResponse
14
+ from pydantic import BaseModel
 
 
 
 
15
 
16
  app = FastAPI(title="PDF Trainer API", version="1.0")
17
 
 
 
 
 
 
18
  app.add_middleware(
19
  CORSMiddleware,
20
  allow_origins=[
 
26
  allow_headers=["*"],
27
  )
28
 
29
+ WORKER_TOKEN = (os.getenv("WORKER_TOKEN") or "").strip()
30
+ DATA_DIR = Path(os.getenv("DATA_DIR", "/app/data")).resolve()
31
+ PDF_DIR = DATA_DIR / "pdfs"
32
+ CFG_DIR = DATA_DIR / "configs"
33
 
34
+ PDF_DIR.mkdir(parents=True, exist_ok=True)
35
+ CFG_DIR.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ def _require_worker_token(x_worker_token: Optional[str]):
38
+ if not WORKER_TOKEN:
39
+ raise HTTPException(status_code=500, detail="Server missing WORKER_TOKEN")
40
+ if not x_worker_token or x_worker_token != WORKER_TOKEN:
41
+ raise HTTPException(status_code=401, detail="Unauthorized worker")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  @app.get("/health")
44
  def health() -> Dict[str, bool]:
45
  return {"ok": True}
46
 
 
47
  @app.get("/")
48
  def root() -> Dict[str, str]:
49
  return {"service": "pdf-trainer-api", "status": "running"}
50
 
51
+ def _pdf_path(pdf_id: str) -> Path:
52
+ safe = pdf_id.strip().replace("/", "_")
53
+ if not safe:
54
+ raise HTTPException(status_code=400, detail="Missing pdf_id")
55
+ if not safe.lower().endswith(".pdf"):
56
+ safe = safe + ".pdf"
57
+ return PDF_DIR / safe
58
+
59
+ @app.put("/api/pdf/{pdf_id}")
60
+ async def put_pdf(
61
+ pdf_id: str,
62
+ request: Request,
63
+ x_worker_token: Optional[str] = Header(default=None),
64
+ ):
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  _require_worker_token(x_worker_token)
66
+ body = await request.body()
67
+ if not body:
 
68
  raise HTTPException(status_code=400, detail="Empty body")
69
+ p = _pdf_path(pdf_id)
70
+ p.write_bytes(body)
71
+ return {"ok": True, "pdf_id": p.stem, "bytes": len(body)}
 
 
72
 
73
  @app.get("/api/pdf/{pdf_id}")
74
  def get_pdf(pdf_id: str):
75
+ p = _pdf_path(pdf_id)
76
+ if not p.exists():
 
77
  raise HTTPException(status_code=404, detail="PDF not found")
78
+ return FileResponse(str(p), media_type="application/pdf", filename=p.name)
 
79
 
80
  @app.post("/api/send-config")
81
+ async def send_config(payload: dict):
 
82
  pdf_id = str(payload.get("pdf_id", "")).strip()
83
  template_id = str(payload.get("template_id", "")).strip()
84
  config = payload.get("config")
 
85
  if not pdf_id or not template_id or config is None:
86
  raise HTTPException(status_code=400, detail="Missing pdf_id/template_id/config")
87
+ cfg_name = f"{pdf_id}_{template_id}_{uuid4().hex}.json"
88
+ out = CFG_DIR / cfg_name
89
+ import json
90
+ out.write_text(json.dumps(payload, indent=2), encoding="utf-8")
91
+ return {"ok": True, "saved": cfg_name}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
  @dataclass
94
  class Job:
95
  id: str
96
  payload: dict
97
  created_at: float
98
+ status: str = "queued"
99
  message: str = ""
100
 
101
  JOBQ: deque[Job] = deque()
 
120
  if job.status == "queued":
121
  job.status = "running"
122
  return asdict(job)
123
+ return JSONResponse(status_code=204, content=None)
124
 
125
  class StatusReq(BaseModel):
126
+ status: str
127
  message: str = ""
128
 
129
  @app.post("/api/jobs/{job_id}/status")
 
137
  job.status = req.status
138
  job.message = req.message
139
  return {"ok": True, "job_id": job_id, "status": job.status}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
  fastapi
2
- uvicorn
3
  python-dotenv
4
- paramiko
 
1
  fastapi
2
+ uvicorn[standard]
3
  python-dotenv
4
+ pydantic