Avinashnalla7 commited on
Commit
8fcd819
·
1 Parent(s): ca668df

Fix PDF storage path; eliminate GET 500

Browse files
Files changed (1) hide show
  1. backend/api.py +32 -242
backend/api.py CHANGED
@@ -1,260 +1,50 @@
1
  from __future__ import annotations
2
 
3
-
4
- BUILD_SHA="8974c6e"
5
- from fastapi.responses import Response
6
-
7
-
8
- import os
9
- SENDCFG_FAST_OK=(os.getenv("SENDCFG_FAST_OK","0")=="1")
10
-
11
-
12
- from backend.sftp_store import store_to_sftp
13
- import json
14
- import os
15
  from pathlib import Path
16
- from typing import Any, Dict
17
-
18
- from dotenv import load_dotenv
19
- from fastapi import FastAPI, HTTPException, Request, UploadFile, File, Form, UploadFile, File, Form
20
-
21
- def _env(*keys: str, default: str = "") -> str:
22
- import os
23
- for k in keys:
24
- v = (os.environ.get(k) or "").strip()
25
- if v:
26
- return v
27
- return default
28
-
29
- from fastapi.middleware.cors import CORSMiddleware
30
- from fastapi.responses import FileResponse, PlainTextResponse
31
- app = FastAPI(title="PDF Trainer API", version="1.0")
32
-
33
- # Allow Vite dev server
34
- app.add_middleware(
35
- CORSMiddleware,
36
- allow_origins=[
37
- "http://localhost:5173",
38
- "http://127.0.0.1:5173",
39
- ],
40
- allow_credentials=True,
41
- allow_methods=["*"],
42
- allow_headers=["*"],
43
- )
44
-
45
- REPO_ROOT = Path(__file__).resolve().parents[1]
46
- BACKEND_DIR = REPO_ROOT / "backend"
47
- UPLOADS_DIR = Path(os.environ.get("PDF_TRAINER_UPLOADS_DIR") or "/data/uploads")
48
- CONFIGS_DIR = UPLOADS_DIR / "configs"
49
-
50
- def _maybe_bundle_to_sftp(pdf_id: str, template_id: str) -> dict:
51
- """
52
- If both config + PDF exist locally in this API container AND SFTP env vars exist,
53
- push JSON + PDF to SFTP. Otherwise no-op.
54
- Returns small status dict.
55
- """
56
- try:
57
- # only attempt if env looks configured
58
- if not (os.environ.get("SFTP_HOST") and os.environ.get("SFTP_USER") and os.environ.get("SFTP_PASS")):
59
- return {"sftp": "skipped_not_configured"}
60
-
61
- cfg_path = CONFIGS_DIR / f"{pdf_id}__{template_id}.json"
62
- pdf_path = UPLOADS_DIR / f"{pdf_id}.pdf"
63
- name_path = UPLOADS_DIR / f"{pdf_id}.name.txt"
64
 
65
- if not cfg_path.exists():
66
- return {"sftp": "skipped_no_config"}
67
- if not pdf_path.exists():
68
- return {"sftp": "skipped_no_pdf"}
69
-
70
- cfg_bytes = cfg_path.read_bytes()
71
- pdf_bytes = pdf_path.read_bytes()
72
- pdf_name = (name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf") or f"{pdf_id}.pdf"
73
-
74
- remote_dir = store_to_sftp(
75
- pdf_id=pdf_id,
76
- template_id=template_id,
77
- cfg_json_bytes=cfg_bytes,
78
- pdf_bytes=pdf_bytes,
79
- pdf_name=pdf_name,
80
- )
81
- return {"sftp": "ok", "remote_dir": remote_dir}
82
- except Exception as e:
83
- return {"sftp": "error", "error": str(e)}
84
-
85
-
86
- # Load backend/.env explicitly ONCE for this process
87
- load_dotenv(BACKEND_DIR / ".env", override=True)
88
-
89
-
90
- def _get_env_required(key: str) -> str:
91
- v = (os.environ.get(key) or "").strip()
92
- if not v:
93
- raise HTTPException(status_code=500, detail=f"Server missing {key} env var")
94
- return v
95
 
 
 
96
 
97
  @app.get("/health")
98
- def health():
99
  return {"ok": True}
100
 
101
  @app.post("/api/pdf/{pdf_id}")
102
- async def put_pdf(pdf_id: str, file: UploadFile = File(...), pdf_name: str = Form("")):
103
- base = Path("/data/uploads")
104
- pdf_dir = base / "pdfs"
105
- pdf_dir.mkdir(parents=True, exist_ok=True)
106
-
107
- pdf_path = pdf_dir / f"{pdf_id}.pdf"
108
- name_path = pdf_dir / f"{pdf_id}.name.txt"
109
 
110
  data = await file.read()
111
  pdf_path.write_bytes(data)
112
- if pdf_name:
113
- name_path.write_text(pdf_name, encoding="utf-8")
114
 
115
  return {"ok": True}
116
 
117
  @app.get("/api/pdf/{pdf_id}")
118
- def get_pdf(pdf_id: str):
119
- pdf_path = _pdf_path(pdf_id)
120
- try:
121
- b = pdf_path.read_bytes()
122
- except FileNotFoundError:
123
- raise HTTPException(status_code=404, detail="pdf_not_found")
124
- return Response(content=b, media_type="application/pdf")
125
-
126
- async def put_pdf(pdf_id: str, file: UploadFile = File(...), pdf_name: str = Form("")):
127
- UPLOADS_DIR.mkdir(parents=True, exist_ok=True)
128
- data = await file.read()
129
- (UPLOADS_DIR / f"{pdf_id}.pdf").write_bytes(data)
130
- if pdf_name:
131
- (UPLOADS_DIR / f"{pdf_id}.name.txt").write_text(pdf_name.strip(), encoding="utf-8")
132
- # If any configs already exist for this pdf_id, bundle now.
133
- sftp_results = []
134
- try:
135
- for cfg in CONFIGS_DIR.glob(f"{pdf_id}__*.json"):
136
- tid = cfg.name.split("__", 1)[1].rsplit(".json", 1)[0]
137
- sftp_results.append(_maybe_bundle_to_sftp(pdf_id, tid))
138
- except Exception:
139
- pass
140
-
141
- return {"ok": True}
142
-
143
- @app.post("/api/send-config")
144
- async def send_config(request: Request):
145
- if SENDCFG_FAST_OK:
146
- return {"ok": True, "mode": "fast"}
147
-
148
- """
149
- Store config JSON for later pipelines.
150
- Do NOT require PDF to exist.
151
- Do NOT send email from this API container (no Gmail creds in HF).
152
- """
153
- payload = await request.json()
154
-
155
- pdf_id = (payload.get("pdf_id") or "").strip()
156
- template_id = (payload.get("template_id") or "").strip()
157
- config = payload.get("config")
158
-
159
- if not pdf_id:
160
- raise HTTPException(status_code=400, detail="Missing pdf_id")
161
- if not template_id:
162
- raise HTTPException(status_code=400, detail="Missing template_id")
163
- if not isinstance(config, dict):
164
- raise HTTPException(status_code=400, detail="Missing config object")
165
-
166
- CONFIGS_DIR.mkdir(parents=True, exist_ok=True)
167
- out_path = CONFIGS_DIR / f"{pdf_id}__{template_id}.json"
168
- out_path.write_text(
169
- json.dumps({"pdf_id": pdf_id, "template_id": template_id, "config": config}, indent=2),
170
- encoding="utf-8",
171
  )
172
-
173
-
174
- # Store both config JSON + (if exists) PDF to SFTP for future pipelines.
175
- # SFTP errors must NOT break the API response.
176
- try:
177
- cfg_bytes = out_path.read_bytes()
178
-
179
- pdf_bytes = None
180
- pdf_name = None
181
-
182
- # API stores uploaded PDFs at UPLOADS_DIR/{pdf_id}.pdf
183
- pdf_path = UPLOADS_DIR / f"{pdf_id}.pdf"
184
- if pdf_path.exists():
185
- pdf_bytes = pdf_path.read_bytes()
186
-
187
- # optional friendly name if present
188
- name_path = UPLOADS_DIR / f"{pdf_id}.name.txt"
189
- pdf_name = name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf"
190
-
191
- store_to_sftp(
192
- pdf_id=pdf_id,
193
- template_id=template_id,
194
- cfg_bytes=cfg_bytes,
195
- pdf_bytes=pdf_bytes,
196
- pdf_name=pdf_name or f"{pdf_id}.pdf",
197
- )
198
- except Exception as e:
199
- print(f"[SFTP] store failed: {e}")
200
- pdf_path = UPLOADS_DIR / f"{pdf_id}.pdf"
201
- sftp_status = _maybe_bundle_to_sftp(pdf_id, template_id)
202
- return {"ok": True, "stored": str(out_path), "pdf_exists": pdf_path.exists(), **sftp_status}
203
- @app.get("/api/config/{pdf_id}/{template_id}")
204
- def get_config(pdf_id: str, template_id: str):
205
- path = CONFIGS_DIR / f"{pdf_id}__{template_id}.json"
206
- if not path.exists():
207
- raise HTTPException(status_code=404, detail="Config not found")
208
- return FileResponse(path, media_type="application/json", filename=path.name)
209
-
210
- @app.post("/api/notify-unknown")
211
- async def notify_unknown(payload: Dict[str, Any]):
212
- """
213
- UNKNOWN TEMPLATE NOTIFICATION (rep email)
214
-
215
- REQUIRED payload:
216
- - pdf_id: str
217
- OPTIONAL:
218
- - reason: str
219
-
220
- Sends to REP inbox:
221
- - PDF_PIPELINE_NOTIFY_TO
222
-
223
- Requirements:
224
- - Includes trainer link with PDF pre-loaded
225
- - Attaches PDF
226
- - No JSON
227
- """
228
- pdf_id = (payload.get("pdf_id") or "").strip()
229
- reason = (payload.get("reason") or "").strip()
230
-
231
- if not pdf_id:
232
- raise HTTPException(status_code=400, detail="Missing pdf_id")
233
-
234
- rep_to = _get_env_required("PDF_PIPELINE_NOTIFY_TO")
235
- notify_from = _get_env_required("PDF_PIPELINE_NOTIFY_FROM")
236
- trainer_base_url = (os.environ.get("PDF_TRAINER_BASE_URL") or "http://localhost:5173").strip()
237
- name_path = UPLOADS_DIR / f"{pdf_id}.name.txt"
238
- pdf_name = name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf"
239
-
240
- trainer_link = f"{trainer_base_url.rstrip('/')}/?pdf_id={pdf_id}"
241
-
242
- subject = "Action required: Unknown PDF format (template not found)"
243
- body = (
244
- "Hi,\n\n"
245
- "We received a PDF that does not match any existing templates in the system.\n\n"
246
- + (f"Reason: {reason}\n\n" if reason else "")
247
- + "Please open the PDF Trainer using the link below and create or update the template configuration:\n"
248
- f"{trainer_link}\n\n"
249
- "The original PDF is attached for reference.\n\n"
250
- "Thank you,\n"
251
- "Inserio Automation\n"
252
- )
253
-
254
- attachments = [(pdf_name, pdf_path.read_bytes())]
255
- return {"ok": True}
256
-
257
-
258
- @app.get("/", response_class=PlainTextResponse)
259
- def root():
260
- return "PDF Trainer API. Use /health"
 
1
  from __future__ import annotations
2
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from pathlib import Path
4
+ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
5
+ from fastapi.responses import FileResponse
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ app = FastAPI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ PDF_DIR = Path("/data/uploads/pdfs")
10
+ PDF_DIR.mkdir(parents=True, exist_ok=True)
11
 
12
  @app.get("/health")
13
+ async def health():
14
  return {"ok": True}
15
 
16
  @app.post("/api/pdf/{pdf_id}")
17
+ async def put_pdf(
18
+ pdf_id: str,
19
+ file: UploadFile = File(...),
20
+ pdf_name: str = Form("")
21
+ ):
22
+ pdf_path = PDF_DIR / f"{pdf_id}.pdf"
23
+ name_path = PDF_DIR / f"{pdf_id}.name.txt"
24
 
25
  data = await file.read()
26
  pdf_path.write_bytes(data)
27
+ name_path.write_text(pdf_name or file.filename or f"{pdf_id}.pdf", encoding="utf-8")
 
28
 
29
  return {"ok": True}
30
 
31
  @app.get("/api/pdf/{pdf_id}")
32
+ async def get_pdf(pdf_id: str):
33
+ pdf_path = PDF_DIR / f"{pdf_id}.pdf"
34
+ name_path = PDF_DIR / f"{pdf_id}.name.txt"
35
+
36
+ if not pdf_path.exists():
37
+ raise HTTPException(status_code=404, detail="pdf not found")
38
+
39
+ filename = f"{pdf_id}.pdf"
40
+ if name_path.exists():
41
+ try:
42
+ filename = name_path.read_text(encoding="utf-8").strip() or filename
43
+ except Exception:
44
+ pass
45
+
46
+ return FileResponse(
47
+ path=str(pdf_path),
48
+ media_type="application/pdf",
49
+ filename=filename,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  )