Avinashnalla7 commited on
Commit
6a3f561
·
1 Parent(s): a0910c0

Worker: upload PDF to API after write

Browse files
Files changed (1) hide show
  1. backend/worker/worker.py +19 -0
backend/worker/worker.py CHANGED
@@ -3,6 +3,24 @@ from __future__ import annotations
3
  import os
4
  import requests
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  PDF_TRAINER_API_BASE = (os.environ.get('PDF_TRAINER_API_BASE') or '').strip()
7
  import time
8
  import uuid
@@ -130,6 +148,7 @@ def _process_train_label(gmail: GmailClient, s: Settings, root: Path) -> None:
130
  pdf_bytes = gmail.download_attachment(m.msg_id, att_id)
131
 
132
  pdf_id, stored_pdf_path = _write_pipeline_pdf(root, filename, pdf_bytes)
 
133
  trainer_link = f"{s.trainer_base_url.rstrip('/')}/?pdf_id={pdf_id}"
134
 
135
  gmail.move_message(m.msg_id, add_labels=[], remove_labels=[], mark_read=True)
 
3
  import os
4
  import requests
5
 
6
+ def _upload_pdf_to_api(pdf_id, pdf_path, pdf_name):
7
+ base = (os.environ.get("PDF_TRAINER_API_BASE") or "").strip()
8
+ if not base:
9
+ print("[worker] PDF_TRAINER_API_BASE not set - skipping upload")
10
+ return
11
+ url = base.rstrip("/") + "/api/pdf/" + str(pdf_id)
12
+ print(f"[worker] uploading pdf_id={pdf_id} to {url}")
13
+ with open(pdf_path, "rb") as f:
14
+ r = requests.post(
15
+ url,
16
+ files={"file": (f"{pdf_id}.pdf", f, "application/pdf")},
17
+ data={"pdf_name": pdf_name},
18
+ timeout=30,
19
+ )
20
+ print(f"[worker] upload status={r.status_code}")
21
+ r.raise_for_status()
22
+
23
+
24
  PDF_TRAINER_API_BASE = (os.environ.get('PDF_TRAINER_API_BASE') or '').strip()
25
  import time
26
  import uuid
 
148
  pdf_bytes = gmail.download_attachment(m.msg_id, att_id)
149
 
150
  pdf_id, stored_pdf_path = _write_pipeline_pdf(root, filename, pdf_bytes)
151
+ _upload_pdf_to_api(pdf_id, stored_pdf_path, filename)
152
  trainer_link = f"{s.trainer_base_url.rstrip('/')}/?pdf_id={pdf_id}"
153
 
154
  gmail.move_message(m.msg_id, add_labels=[], remove_labels=[], mark_read=True)