Avinash commited on
Commit
4a5269c
·
1 Parent(s): 7dac948

integrate real backend api

Browse files
Files changed (35) hide show
  1. .gitignore +8 -0
  2. api.py +1 -7
  3. backend/__init__.py +0 -0
  4. backend/api.py +213 -0
  5. backend/oauth_bootstrap.py +51 -0
  6. backend/scripts/apply_trainer_schemas.py +48 -0
  7. backend/scripts/generate_template_schema_skeletons.py +137 -0
  8. backend/scripts/migrate_hardcoded_templates.py +99 -0
  9. backend/templates/T1_IFACTOR_DELIVERED_ORDER.json +206 -0
  10. backend/templates/T2_SEASPINE_DELIVERED_GOODS_FORM.json +200 -0
  11. backend/templates/T3_ASTURA_SALES_ORDER_FORM.json +203 -0
  12. backend/templates/T4_MEDICAL_ESTIMATION_OF_CHARGES.json +167 -0
  13. backend/templates/T5_CLINICAL_PROGRESS_NOTE_POSTOP.json +118 -0
  14. backend/templates/T6_CUSTOMER_CHARGE_SHEET_SPINE.json +204 -0
  15. backend/templates/T7_SALES_ORDER_ZIMMER.json +174 -0
  16. backend/trainer_schemas/T1_IFACTOR_DELIVERED_ORDER.schema.json +70 -0
  17. backend/trainer_schemas/T2_SEASPINE_DELIVERED_GOODS_FORM.schema.json +70 -0
  18. backend/trainer_schemas/T3_ASTURA_SALES_ORDER_FORM.schema.json +70 -0
  19. backend/trainer_schemas/T4_MEDICAL_ESTIMATION_OF_CHARGES.schema.json +49 -0
  20. backend/trainer_schemas/T5_CLINICAL_PROGRESS_NOTE_POSTOP.schema.json +35 -0
  21. backend/trainer_schemas/T6_CUSTOMER_CHARGE_SHEET_SPINE.schema.json +70 -0
  22. backend/trainer_schemas/T7_SALES_ORDER_ZIMMER.schema.json +70 -0
  23. backend/worker/__init__.py +0 -0
  24. backend/worker/config.py +89 -0
  25. backend/worker/gmail_client.py +149 -0
  26. backend/worker/openai_classifier.py +312 -0
  27. backend/worker/out/.keep +0 -0
  28. backend/worker/pdf_render.py +41 -0
  29. backend/worker/prompts.py +87 -0
  30. backend/worker/template_registry_snapshot.py +0 -0
  31. backend/worker/template_store.py +36 -0
  32. backend/worker/tmp/.keep +0 -0
  33. backend/worker/uploads/.keep +0 -0
  34. backend/worker/worker.py +286 -0
  35. requirements.txt +1 -0
.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ .venv/
4
+ .env
5
+ .env.*
6
+ node_modules/
7
+ dist/
8
+ .DS_Store
api.py CHANGED
@@ -1,7 +1 @@
1
- from fastapi import FastAPI
2
-
3
- app = FastAPI()
4
-
5
- @app.get("/health")
6
- def health():
7
- return {"ok": True}
 
1
+ from backend.api import app
 
 
 
 
 
 
backend/__init__.py ADDED
File without changes
backend/api.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ from pathlib import Path
6
+ from typing import Any, Dict
7
+
8
+ from dotenv import load_dotenv
9
+ from fastapi import FastAPI, HTTPException
10
+ from fastapi.middleware.cors import CORSMiddleware
11
+ from fastapi.responses import FileResponse, PlainTextResponse
12
+
13
+ from backend.worker.gmail_client import GmailClient
14
+
15
+ app = FastAPI(title="PDF Trainer API", version="1.0")
16
+
17
+ # Allow Vite dev server
18
+ app.add_middleware(
19
+ CORSMiddleware,
20
+ allow_origins=[
21
+ "http://localhost:5173",
22
+ "http://127.0.0.1:5173",
23
+ ],
24
+ allow_credentials=True,
25
+ allow_methods=["*"],
26
+ allow_headers=["*"],
27
+ )
28
+
29
+ REPO_ROOT = Path(__file__).resolve().parents[1]
30
+ BACKEND_DIR = REPO_ROOT / "backend"
31
+ UPLOADS_DIR = BACKEND_DIR / "worker" / "uploads"
32
+
33
+ # Load backend/.env explicitly ONCE for this process
34
+ load_dotenv(BACKEND_DIR / ".env", override=True)
35
+
36
+ CREDENTIALS_JSON = Path(os.environ.get("GMAIL_CREDENTIALS_JSON", str(BACKEND_DIR / "credentials.json")))
37
+ TOKEN_JSON = Path(os.environ.get("GMAIL_TOKEN_JSON", str(BACKEND_DIR / "token.json")))
38
+
39
+
40
+ def _gmail() -> GmailClient:
41
+ return GmailClient(CREDENTIALS_JSON, TOKEN_JSON)
42
+
43
+
44
+ def _get_env_required(key: str) -> str:
45
+ v = (os.environ.get(key) or "").strip()
46
+ if not v:
47
+ raise HTTPException(status_code=500, detail=f"Server missing {key} env var")
48
+ return v
49
+
50
+
51
+ @app.get("/health")
52
+ def health():
53
+ return {"ok": True}
54
+
55
+
56
+ @app.get("/api/pdf/{pdf_id}")
57
+ def get_pdf(pdf_id: str):
58
+ path = UPLOADS_DIR / f"{pdf_id}.pdf"
59
+ if not path.exists():
60
+ raise HTTPException(status_code=404, detail="PDF not found")
61
+
62
+ name_path = UPLOADS_DIR / f"{pdf_id}.name.txt"
63
+ pdf_name = name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf"
64
+
65
+ resp = FileResponse(path, media_type="application/pdf", filename=pdf_name)
66
+ resp.headers["X-PDF-Name"] = pdf_name
67
+ return resp
68
+
69
+
70
+ @app.post("/api/send-config")
71
+ async def send_config(payload: Dict[str, Any]):
72
+ """
73
+ PIPELINE SUBMISSION EMAIL (after rep saves config)
74
+
75
+ REQUIRED payload:
76
+ - pdf_id: str
77
+ - template_id: str
78
+ - config: dict
79
+
80
+ Sends to PIPELINE inbox:
81
+ - PDF_PIPELINE_PIPELINE_NOTIFY_TO
82
+
83
+ Requirements:
84
+ - Subject includes template_id
85
+ - Body includes pdf_id
86
+ - Attachments: JSON + PDF
87
+ """
88
+ pdf_id = (payload.get("pdf_id") or "").strip()
89
+ template_id = (payload.get("template_id") or "").strip()
90
+ config = payload.get("config")
91
+
92
+ if not pdf_id:
93
+ raise HTTPException(status_code=400, detail="Missing pdf_id")
94
+ if not template_id:
95
+ raise HTTPException(status_code=400, detail="Missing template_id")
96
+ if not isinstance(config, dict):
97
+ raise HTTPException(status_code=400, detail="Missing config object")
98
+
99
+ pipeline_to = _get_env_required("PDF_PIPELINE_PIPELINE_NOTIFY_TO")
100
+ notify_from = _get_env_required("PDF_PIPELINE_NOTIFY_FROM")
101
+ trainer_base_url = (os.environ.get("PDF_TRAINER_BASE_URL") or "http://localhost:5173").strip()
102
+
103
+ pdf_path = UPLOADS_DIR / f"{pdf_id}.pdf"
104
+ if not pdf_path.exists():
105
+ raise HTTPException(status_code=404, detail="PDF not found for pdf_id")
106
+
107
+ name_path = UPLOADS_DIR / f"{pdf_id}.name.txt"
108
+ pdf_name = name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf"
109
+
110
+ trainer_link = f"{trainer_base_url.rstrip('/')}/?pdf_id={pdf_id}"
111
+
112
+ subject = f"PDF_TRAINER_CONFIG_SUBMITTED | template_id={template_id}"
113
+ body = (
114
+ "Hi,\n\n"
115
+ "A PDF Trainer configuration was submitted.\n\n"
116
+ f"template_id: {template_id}\n"
117
+ f"pdf_id: {pdf_id}\n"
118
+ f"trainer_link: {trainer_link}\n\n"
119
+ "Attachments:\n"
120
+ f"- trainer_config_{pdf_id}_{template_id}.json\n"
121
+ f"- {pdf_name}\n\n"
122
+ "Thank you,\n"
123
+ "Inserio Automation\n"
124
+ )
125
+
126
+ cfg_bytes = json.dumps(
127
+ {"pdf_id": pdf_id, "template_id": template_id, "config": config},
128
+ indent=2,
129
+ ).encode("utf-8")
130
+
131
+ attachments = [
132
+ (f"trainer_config_{pdf_id}_{template_id}.json", cfg_bytes),
133
+ (pdf_name, pdf_path.read_bytes()),
134
+ ]
135
+
136
+ gmail = _gmail()
137
+ gmail.send_email(
138
+ to_email=pipeline_to,
139
+ from_email=notify_from,
140
+ subject=subject,
141
+ body_text=body,
142
+ attachments=attachments,
143
+ )
144
+
145
+ return {"ok": True}
146
+
147
+
148
+ @app.post("/api/notify-unknown")
149
+ async def notify_unknown(payload: Dict[str, Any]):
150
+ """
151
+ UNKNOWN TEMPLATE NOTIFICATION (rep email)
152
+
153
+ REQUIRED payload:
154
+ - pdf_id: str
155
+ OPTIONAL:
156
+ - reason: str
157
+
158
+ Sends to REP inbox:
159
+ - PDF_PIPELINE_NOTIFY_TO
160
+
161
+ Requirements:
162
+ - Includes trainer link with PDF pre-loaded
163
+ - Attaches PDF
164
+ - No JSON
165
+ """
166
+ pdf_id = (payload.get("pdf_id") or "").strip()
167
+ reason = (payload.get("reason") or "").strip()
168
+
169
+ if not pdf_id:
170
+ raise HTTPException(status_code=400, detail="Missing pdf_id")
171
+
172
+ rep_to = _get_env_required("PDF_PIPELINE_NOTIFY_TO")
173
+ notify_from = _get_env_required("PDF_PIPELINE_NOTIFY_FROM")
174
+ trainer_base_url = (os.environ.get("PDF_TRAINER_BASE_URL") or "http://localhost:5173").strip()
175
+
176
+ pdf_path = UPLOADS_DIR / f"{pdf_id}.pdf"
177
+ if not pdf_path.exists():
178
+ raise HTTPException(status_code=404, detail="PDF not found for pdf_id")
179
+
180
+ name_path = UPLOADS_DIR / f"{pdf_id}.name.txt"
181
+ pdf_name = name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf"
182
+
183
+ trainer_link = f"{trainer_base_url.rstrip('/')}/?pdf_id={pdf_id}"
184
+
185
+ subject = "Action required: Unknown PDF format (template not found)"
186
+ body = (
187
+ "Hi,\n\n"
188
+ "We received a PDF that does not match any existing templates in the system.\n\n"
189
+ + (f"Reason: {reason}\n\n" if reason else "")
190
+ + "Please open the PDF Trainer using the link below and create or update the template configuration:\n"
191
+ f"{trainer_link}\n\n"
192
+ "The original PDF is attached for reference.\n\n"
193
+ "Thank you,\n"
194
+ "Inserio Automation\n"
195
+ )
196
+
197
+ attachments = [(pdf_name, pdf_path.read_bytes())]
198
+
199
+ gmail = _gmail()
200
+ gmail.send_email(
201
+ to_email=rep_to,
202
+ from_email=notify_from,
203
+ subject=subject,
204
+ body_text=body,
205
+ attachments=attachments,
206
+ )
207
+
208
+ return {"ok": True}
209
+
210
+
211
+ @app.get("/", response_class=PlainTextResponse)
212
+ def root():
213
+ return "PDF Trainer API. Use /health"
backend/oauth_bootstrap.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from pathlib import Path
5
+
6
+ from google_auth_oauthlib.flow import InstalledAppFlow
7
+ from google.auth.transport.requests import Request
8
+ from google.oauth2.credentials import Credentials
9
+
10
+ # REQUIRED scopes based on your plan:
11
+ # - read messages, move labels, mark read => modify
12
+ # - send mail => send
13
+ SCOPES = [
14
+ "https://www.googleapis.com/auth/gmail.modify",
15
+ "https://www.googleapis.com/auth/gmail.send",
16
+ ]
17
+
18
+ ROOT = Path(__file__).resolve().parent
19
+ CREDS_PATH = Path("backend/credentials.json")
20
+ TOKEN_PATH = Path("backend/token.json")
21
+
22
+ def main() -> None:
23
+ if not CREDS_PATH.exists():
24
+ raise FileNotFoundError(
25
+ f"Missing {CREDS_PATH}. Download OAuth client JSON from Google Cloud and save as credentials.json in this folder."
26
+ )
27
+
28
+ creds: Credentials | None = None
29
+
30
+ # Load existing token if present
31
+ if TOKEN_PATH.exists():
32
+ creds = Credentials.from_authorized_user_file(str(TOKEN_PATH), SCOPES)
33
+
34
+ # Refresh or re-authenticate
35
+ if not creds or not creds.valid:
36
+ if creds and creds.expired and creds.refresh_token:
37
+ creds.refresh(Request())
38
+ else:
39
+ flow = InstalledAppFlow.from_client_secrets_file(str(CREDS_PATH), SCOPES)
40
+ # Local loopback server OAuth (Desktop app)
41
+ creds = flow.run_local_server(port=0)
42
+
43
+ # Save token
44
+ TOKEN_PATH.write_text(creds.to_json(), encoding="utf-8")
45
+
46
+ print("✅ OAuth complete.")
47
+ print(f"Saved token: {TOKEN_PATH}")
48
+ print("Scopes granted:", creds.scopes)
49
+
50
+ if __name__ == "__main__":
51
+ main()
backend/scripts/apply_trainer_schemas.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/scripts/apply_trainer_schemas.py
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ from pathlib import Path
6
+
7
+ REPO_ROOT = Path(__file__).resolve().parents[2]
8
+ TEMPLATES_DIR = REPO_ROOT / "backend" / "templates"
9
+ SCHEMAS_DIR = REPO_ROOT / "backend" / "trainer_schemas"
10
+
11
+ def main() -> None:
12
+ if not SCHEMAS_DIR.exists():
13
+ raise RuntimeError(f"Missing schemas dir: {SCHEMAS_DIR}")
14
+
15
+ schema_files = sorted(SCHEMAS_DIR.glob("*.schema.json"))
16
+ if not schema_files:
17
+ raise RuntimeError(f"No schema files found in: {SCHEMAS_DIR}")
18
+
19
+ applied = 0
20
+
21
+ for sf in schema_files:
22
+ template_id = sf.name.replace(".schema.json", "")
23
+ template_path = TEMPLATES_DIR / f"{template_id}.json"
24
+
25
+ if not template_path.exists():
26
+ print(f"⚠️ skip (no template file): {template_path}")
27
+ continue
28
+
29
+ new_schema = json.loads(sf.read_text(encoding="utf-8"))
30
+ if not isinstance(new_schema, dict):
31
+ raise RuntimeError(f"Invalid schema json (not object): {sf}")
32
+ if not isinstance(new_schema.get("fields"), list):
33
+ raise RuntimeError(f"Invalid schema json (missing fields[]): {sf}")
34
+
35
+ t = json.loads(template_path.read_text(encoding="utf-8"))
36
+ t["schema"] = new_schema
37
+
38
+ # Optional: bump template version when schema changes
39
+ # t["version"] = int(t.get("version") or 0) + 1
40
+
41
+ template_path.write_text(json.dumps(t, indent=2) + "\n", encoding="utf-8")
42
+ print(f"✅ updated {template_path} fields={len(new_schema['fields'])}")
43
+ applied += 1
44
+
45
+ print(f"done. applied={applied}")
46
+
47
+ if __name__ == "__main__":
48
+ main()
backend/scripts/generate_template_schema_skeletons.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any, Dict, List
6
+
7
+
8
+ TEMPLATES_DIR = Path(__file__).resolve().parents[1] / "templates"
9
+
10
+
11
+ FIELDS: List[Dict[str, Any]] = [
12
+ {"field_id": "facility_organization", "label": "Facility / Organization", "type": "entity"},
13
+ {"field_id": "case_location", "label": "Case Location / Address", "type": "text"},
14
+ {"field_id": "vendor", "label": "Vendor", "type": "entity"},
15
+ {"field_id": "physician_name", "label": "Physician Name", "type": "person"},
16
+ {"field_id": "date_of_surgery", "label": "Date of Surgery", "type": "date"},
17
+ {"field_id": "items", "label": "Items / Line Items", "type": "table"},
18
+ ]
19
+
20
+ TABLE_ANCHORS = [
21
+ {"key": "item_number", "expected_text": "Item Number"},
22
+ {"key": "description", "expected_text": "Description"},
23
+ {"key": "qty", "expected_text": "Qty"},
24
+ ]
25
+
26
+ TABLE_COLUMNS = [
27
+ {"key": "item_number", "label": "Item Number"},
28
+ {"key": "lot_number", "label": "Lot Number"},
29
+ {"key": "description", "label": "Description"},
30
+ {"key": "qty", "label": "Qty"},
31
+ {"key": "price", "label": "Price"},
32
+ ]
33
+
34
+
35
+ def schema_skeleton(form_id: str) -> Dict[str, Any]:
36
+ return {
37
+ "form_id": form_id,
38
+ "version": 3,
39
+ "page": 1,
40
+ "scalar_value_region_mode": "offset_from_anchor_v1",
41
+ "fields": [
42
+ # scalar fields
43
+ {
44
+ "field_id": "facility_organization",
45
+ "label": "Facility / Organization",
46
+ "type": "entity",
47
+ "anchor_bbox_norm": None,
48
+ "value_bbox_norm": None,
49
+ "value_offset_norm": None,
50
+ },
51
+ {
52
+ "field_id": "case_location",
53
+ "label": "Case Location / Address",
54
+ "type": "text",
55
+ "anchor_bbox_norm": None,
56
+ "value_bbox_norm": None,
57
+ "value_offset_norm": None,
58
+ },
59
+ {
60
+ "field_id": "vendor",
61
+ "label": "Vendor",
62
+ "type": "entity",
63
+ "anchor_bbox_norm": None,
64
+ "value_bbox_norm": None,
65
+ "value_offset_norm": None,
66
+ },
67
+ {
68
+ "field_id": "physician_name",
69
+ "label": "Physician Name",
70
+ "type": "person",
71
+ "anchor_bbox_norm": None,
72
+ "value_bbox_norm": None,
73
+ "value_offset_norm": None,
74
+ },
75
+ {
76
+ "field_id": "date_of_surgery",
77
+ "label": "Date of Surgery",
78
+ "type": "date",
79
+ "anchor_bbox_norm": None,
80
+ "value_bbox_norm": None,
81
+ "value_offset_norm": None,
82
+ },
83
+ # table field
84
+ {
85
+ "field_id": "items",
86
+ "label": "Items / Line Items",
87
+ "type": "table",
88
+ "table_bbox_norm": None,
89
+ "header_bbox_norm": None,
90
+ "row_height_hint_norm": None,
91
+ "columns": [
92
+ {"key": "item_number", "label": "Item Number", "bbox_rel_norm": None},
93
+ {"key": "lot_number", "label": "Lot Number", "bbox_rel_norm": None},
94
+ {"key": "description", "label": "Description", "bbox_rel_norm": None},
95
+ {"key": "qty", "label": "Qty", "bbox_rel_norm": None},
96
+ {"key": "price", "label": "Price", "bbox_rel_norm": None},
97
+ ],
98
+ "table_anchors": [
99
+ {"key": "item_number", "expected_text": "Item Number", "bbox_norm": None},
100
+ {"key": "description", "expected_text": "Description", "bbox_norm": None},
101
+ {"key": "qty", "expected_text": "Qty", "bbox_norm": None},
102
+ ],
103
+ "notes": "Anchors are used at runtime to localize table/header/columns under drift.",
104
+ },
105
+ ],
106
+ "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items.",
107
+ }
108
+
109
+
110
+ def main() -> None:
111
+ if not TEMPLATES_DIR.exists():
112
+ raise SystemExit(f"templates dir not found: {TEMPLATES_DIR}")
113
+
114
+ files = sorted(TEMPLATES_DIR.glob("*.json"))
115
+ if not files:
116
+ raise SystemExit(f"No template json files found in: {TEMPLATES_DIR}")
117
+
118
+ updated = 0
119
+ for fp in files:
120
+ data = json.loads(fp.read_text(encoding="utf-8"))
121
+ template_id = (data.get("template_id") or fp.stem).strip()
122
+
123
+ # Only touch your known template IDs if you want:
124
+ # if not template_id.startswith("T"): continue
125
+
126
+ # Overwrite or create schema skeleton
127
+ data["schema"] = schema_skeleton(form_id=f"template_{template_id}")
128
+
129
+ fp.write_text(json.dumps(data, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
130
+ print(f"updated schema skeleton: {fp}")
131
+ updated += 1
132
+
133
+ print(f"done. updated {updated} template files.")
134
+
135
+
136
+ if __name__ == "__main__":
137
+ main()
backend/scripts/migrate_hardcoded_templates.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/scripts/migrate_hardcoded_templates.py
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Any, Dict, List
8
+
9
+ TEMPLATES_DIR = Path(__file__).resolve().parents[1] / "templates"
10
+ TEMPLATES_DIR.mkdir(parents=True, exist_ok=True)
11
+
12
+ KNOWN_TEMPLATES: List[Dict[str, Any]] = [
13
+ {
14
+ "template_id": "T1_IFACTOR_DELIVERED_ORDER",
15
+ "name": "I-FACTOR Delivered Order Form",
16
+ "status": "active",
17
+ "version": 1,
18
+ "match": {
19
+ "keywords_all": ["delivered order form"],
20
+ "keywords_any": ["i-factor", "cerapedics", "product information", "stickers", "bill to", "delivered to"],
21
+ },
22
+ "schema": {},
23
+ },
24
+ {
25
+ "template_id": "T2_SEASPINE_DELIVERED_GOODS_FORM",
26
+ "name": "SeaSpine Delivered Goods Form",
27
+ "status": "active",
28
+ "version": 1,
29
+ "match": {
30
+ "keywords_all": ["delivered goods form"],
31
+ "keywords_any": ["seaspine", "isotis", "handling fee", "sales order", "invoice"],
32
+ },
33
+ "schema": {},
34
+ },
35
+ {
36
+ "template_id": "T3_ASTURA_SALES_ORDER_FORM",
37
+ "name": "Astura Sales Order Form",
38
+ "status": "active",
39
+ "version": 1,
40
+ "match": {
41
+ "keywords_all": [],
42
+ "keywords_any": ["astura", "dc141", "ca200", "cbba", "sales order"],
43
+ },
44
+ "schema": {},
45
+ },
46
+ {
47
+ "template_id": "T4_MEDICAL_ESTIMATION_OF_CHARGES",
48
+ "name": "Medical Estimation of Charges",
49
+ "status": "active",
50
+ "version": 1,
51
+ "match": {
52
+ "keywords_all": [],
53
+ "keywords_any": ["estimation of charges", "good faith estimate", "patient responsibility", "insurance"],
54
+ },
55
+ "schema": {},
56
+ },
57
+ {
58
+ "template_id": "T5_CLINICAL_PROGRESS_NOTE_POSTOP",
59
+ "name": "Clinical Progress Note Postop",
60
+ "status": "active",
61
+ "version": 1,
62
+ "match": {
63
+ "keywords_all": [],
64
+ "keywords_any": ["clinical progress note", "progress note", "post-op", "assessment", "plan"],
65
+ },
66
+ "schema": {},
67
+ },
68
+ {
69
+ "template_id": "T6_CUSTOMER_CHARGE_SHEET_SPINE",
70
+ "name": "Customer Charge Sheet Spine",
71
+ "status": "active",
72
+ "version": 1,
73
+ "match": {
74
+ "keywords_all": [],
75
+ "keywords_any": ["customer charge sheet", "charge sheet", "spine", "qty", "unit price", "total"],
76
+ },
77
+ "schema": {},
78
+ },
79
+ {
80
+ "template_id": "T7_SALES_ORDER_ZIMMER",
81
+ "name": "Zimmer Sales Order",
82
+ "status": "active",
83
+ "version": 1,
84
+ "match": {
85
+ "keywords_all": [],
86
+ "keywords_any": ["zimmer", "zimmer biomet", "biomet", "sales order", "purchase order", "po number"],
87
+ },
88
+ "schema": {},
89
+ },
90
+ ]
91
+
92
+ def main() -> None:
93
+ for t in KNOWN_TEMPLATES:
94
+ out_path = TEMPLATES_DIR / f"{t['template_id']}.json"
95
+ out_path.write_text(json.dumps(t, indent=2), encoding="utf-8")
96
+ print(f"wrote {out_path}")
97
+
98
+ if __name__ == "__main__":
99
+ main()
backend/templates/T1_IFACTOR_DELIVERED_ORDER.json ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "template_id": "T1_IFACTOR_DELIVERED_ORDER",
3
+ "name": "I-FACTOR Delivered Order Form",
4
+ "status": "active",
5
+ "version": 2,
6
+ "match": {
7
+ "keywords_all": [
8
+ "delivered order form"
9
+ ],
10
+ "keywords_any": [
11
+ "i-factor",
12
+ "cerapedics",
13
+ "product information",
14
+ "stickers",
15
+ "bill to",
16
+ "delivered to"
17
+ ]
18
+ },
19
+ "schema": {
20
+ "form_id": "trainer_2f7cdbc443f040c79723c74490f6282f",
21
+ "version": 3,
22
+ "page": 1,
23
+ "scalar_value_region_mode": "offset_from_anchor_v1",
24
+ "fields": [
25
+ {
26
+ "field_id": "facility_organization",
27
+ "label": "Facility / Organization",
28
+ "type": "entity",
29
+ "anchor_bbox_norm": {
30
+ "x": 0.138889,
31
+ "y": 0.328283,
32
+ "w": 0.047386,
33
+ "h": 0.027778
34
+ },
35
+ "value_bbox_norm": null,
36
+ "value_offset_norm": {
37
+ "dx": 0.052288,
38
+ "dy": -0.001263,
39
+ "w": 0.294118,
40
+ "h": 0.045455
41
+ }
42
+ },
43
+ {
44
+ "field_id": "case_location",
45
+ "label": "Case Location / Address",
46
+ "type": "text",
47
+ "anchor_bbox_norm": {
48
+ "x": 0.140523,
49
+ "y": 0.353535,
50
+ "w": 0.055556,
51
+ "h": 0.02399
52
+ },
53
+ "value_bbox_norm": null,
54
+ "value_offset_norm": {
55
+ "dx": 0.062092,
56
+ "dy": 0.005051,
57
+ "w": 0.292484,
58
+ "h": 0.056818
59
+ }
60
+ },
61
+ {
62
+ "field_id": "vendor",
63
+ "label": "Vendor",
64
+ "type": "entity",
65
+ "anchor_bbox_norm": {
66
+ "x": 0.215686,
67
+ "y": 0.170455,
68
+ "w": 0.205882,
69
+ "h": 0.059343
70
+ },
71
+ "value_bbox_norm": null,
72
+ "value_offset_norm": null
73
+ },
74
+ {
75
+ "field_id": "physician_name",
76
+ "label": "Physician Name",
77
+ "type": "person",
78
+ "anchor_bbox_norm": {
79
+ "x": 0.522876,
80
+ "y": 0.497475,
81
+ "w": 0.062092,
82
+ "h": 0.020202
83
+ },
84
+ "value_bbox_norm": null,
85
+ "value_offset_norm": {
86
+ "dx": 0.060458,
87
+ "dy": -0.005051,
88
+ "w": 0.214052,
89
+ "h": 0.025253
90
+ }
91
+ },
92
+ {
93
+ "field_id": "date_of_surgery",
94
+ "label": "Date of Surgery",
95
+ "type": "date",
96
+ "anchor_bbox_norm": {
97
+ "x": 0.138889,
98
+ "y": 0.57197,
99
+ "w": 0.160131,
100
+ "h": 0.026515
101
+ },
102
+ "value_bbox_norm": null,
103
+ "value_offset_norm": {
104
+ "dx": 0.165033,
105
+ "dy": -0.002525,
106
+ "w": 0.205882,
107
+ "h": 0.02399
108
+ }
109
+ },
110
+ {
111
+ "field_id": "items",
112
+ "label": "Items / Line Items",
113
+ "type": "table",
114
+ "table_bbox_norm": {
115
+ "x": 0.138889,
116
+ "y": 0.632576,
117
+ "w": 0.732026,
118
+ "h": 0.122475
119
+ },
120
+ "header_bbox_norm": {
121
+ "x": 0.142157,
122
+ "y": 0.632576,
123
+ "w": 0.727124,
124
+ "h": 0.034091
125
+ },
126
+ "row_height_hint_norm": null,
127
+ "columns": [
128
+ {
129
+ "key": "item_number",
130
+ "label": "Item Number",
131
+ "bbox_rel_norm": {
132
+ "x": 0.004464,
133
+ "y": 0.28866,
134
+ "w": 0.196429,
135
+ "h": 0.701031
136
+ }
137
+ },
138
+ {
139
+ "key": "lot_number",
140
+ "label": "Lot Number",
141
+ "bbox_rel_norm": null
142
+ },
143
+ {
144
+ "key": "description",
145
+ "label": "Description",
146
+ "bbox_rel_norm": {
147
+ "x": 0.209821,
148
+ "y": 0.278351,
149
+ "w": 0.241071,
150
+ "h": 0.639175
151
+ }
152
+ },
153
+ {
154
+ "key": "qty",
155
+ "label": "Qty",
156
+ "bbox_rel_norm": {
157
+ "x": 0.647321,
158
+ "y": 0.247423,
159
+ "w": 0.058036,
160
+ "h": 0.71134
161
+ }
162
+ },
163
+ {
164
+ "key": "price",
165
+ "label": "Price",
166
+ "bbox_rel_norm": null
167
+ }
168
+ ],
169
+ "table_anchors": [
170
+ {
171
+ "key": "item_number",
172
+ "expected_text": "Item Number",
173
+ "bbox_norm": {
174
+ "x": 0.140523,
175
+ "y": 0.652778,
176
+ "w": 0.145425,
177
+ "h": 0.016414
178
+ }
179
+ },
180
+ {
181
+ "key": "description",
182
+ "expected_text": "Description",
183
+ "bbox_norm": {
184
+ "x": 0.287582,
185
+ "y": 0.650253,
186
+ "w": 0.181373,
187
+ "h": 0.018939
188
+ }
189
+ },
190
+ {
191
+ "key": "qty",
192
+ "expected_text": "Qty",
193
+ "bbox_norm": {
194
+ "x": 0.614379,
195
+ "y": 0.647727,
196
+ "w": 0.047386,
197
+ "h": 0.016414
198
+ }
199
+ }
200
+ ],
201
+ "notes": "Anchors are used at runtime to localize table/header/columns under drift."
202
+ }
203
+ ],
204
+ "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
205
+ }
206
+ }
backend/templates/T2_SEASPINE_DELIVERED_GOODS_FORM.json ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "template_id": "T2_SEASPINE_DELIVERED_GOODS_FORM",
3
+ "name": "SeaSpine Delivered Goods Form",
4
+ "status": "active",
5
+ "version": 2,
6
+ "match": {
7
+ "keywords_all": [
8
+ "delivered goods form"
9
+ ],
10
+ "keywords_any": [
11
+ "seaspine",
12
+ "isotis",
13
+ "handling fee",
14
+ "sales order",
15
+ "invoice"
16
+ ]
17
+ },
18
+ "schema": {
19
+ "form_id": "trainer_245e70e31b1f4eb1b26fad626365e9ad",
20
+ "version": 3,
21
+ "page": 1,
22
+ "scalar_value_region_mode": "offset_from_anchor_v1",
23
+ "fields": [
24
+ {
25
+ "field_id": "facility_organization",
26
+ "label": "Facility / Organization",
27
+ "type": "entity",
28
+ "anchor_bbox_norm": {
29
+ "x": 0.179739,
30
+ "y": 0.284091,
31
+ "w": 0.04085,
32
+ "h": 0.020202
33
+ },
34
+ "value_bbox_norm": null,
35
+ "value_offset_norm": {
36
+ "dx": 0.044118,
37
+ "dy": -0.002525,
38
+ "w": 0.246732,
39
+ "h": 0.021465
40
+ }
41
+ },
42
+ {
43
+ "field_id": "case_location",
44
+ "label": "Case Location / Address",
45
+ "type": "text",
46
+ "anchor_bbox_norm": {
47
+ "x": 0.181373,
48
+ "y": 0.310606,
49
+ "w": 0.135621,
50
+ "h": 0.016414
51
+ },
52
+ "value_bbox_norm": null,
53
+ "value_offset_norm": {
54
+ "dx": 0.001634,
55
+ "dy": 0.013889,
56
+ "w": 0.295752,
57
+ "h": 0.027778
58
+ }
59
+ },
60
+ {
61
+ "field_id": "vendor",
62
+ "label": "Vendor",
63
+ "type": "entity",
64
+ "anchor_bbox_norm": {
65
+ "x": 0.606209,
66
+ "y": 0.152778,
67
+ "w": 0.173203,
68
+ "h": 0.068182
69
+ },
70
+ "value_bbox_norm": null,
71
+ "value_offset_norm": null
72
+ },
73
+ {
74
+ "field_id": "physician_name",
75
+ "label": "Physician Name",
76
+ "type": "person",
77
+ "anchor_bbox_norm": {
78
+ "x": 0.179739,
79
+ "y": 0.508838,
80
+ "w": 0.104575,
81
+ "h": 0.016414
82
+ },
83
+ "value_bbox_norm": null,
84
+ "value_offset_norm": {
85
+ "dx": 0.106209,
86
+ "dy": -0.001263,
87
+ "w": 0.372549,
88
+ "h": 0.015152
89
+ }
90
+ },
91
+ {
92
+ "field_id": "date_of_surgery",
93
+ "label": "Date of Surgery",
94
+ "type": "date",
95
+ "anchor_bbox_norm": {
96
+ "x": 0.179739,
97
+ "y": 0.521465,
98
+ "w": 0.081699,
99
+ "h": 0.021465
100
+ },
101
+ "value_bbox_norm": null,
102
+ "value_offset_norm": {
103
+ "dx": 0.083333,
104
+ "dy": 0.005051,
105
+ "w": 0.068627,
106
+ "h": 0.015152
107
+ }
108
+ },
109
+ {
110
+ "field_id": "items",
111
+ "label": "Items / Line Items",
112
+ "type": "table",
113
+ "table_bbox_norm": {
114
+ "x": 0.178105,
115
+ "y": 0.388889,
116
+ "w": 0.609477,
117
+ "h": 0.118687
118
+ },
119
+ "header_bbox_norm": {
120
+ "x": 0.178105,
121
+ "y": 0.390152,
122
+ "w": 0.609477,
123
+ "h": 0.02399
124
+ },
125
+ "row_height_hint_norm": null,
126
+ "columns": [
127
+ {
128
+ "key": "item_number",
129
+ "label": "Item Number",
130
+ "bbox_rel_norm": {
131
+ "x": 0.718499,
132
+ "y": 0.170213,
133
+ "w": 0.072386,
134
+ "h": 0.797872
135
+ }
136
+ },
137
+ {
138
+ "key": "lot_number",
139
+ "label": "Lot Number",
140
+ "bbox_rel_norm": {
141
+ "x": 0.168901,
142
+ "y": 0.223404,
143
+ "w": 0.171582,
144
+ "h": 0.776596
145
+ }
146
+ },
147
+ {
148
+ "key": "description",
149
+ "label": "Description",
150
+ "bbox_rel_norm": null
151
+ },
152
+ {
153
+ "key": "qty",
154
+ "label": "Qty",
155
+ "bbox_rel_norm": null
156
+ },
157
+ {
158
+ "key": "price",
159
+ "label": "Price",
160
+ "bbox_rel_norm": null
161
+ }
162
+ ],
163
+ "table_anchors": [
164
+ {
165
+ "key": "item_number",
166
+ "expected_text": "Item Number",
167
+ "bbox_norm": {
168
+ "x": 0.178105,
169
+ "y": 0.388889,
170
+ "w": 0.101307,
171
+ "h": 0.02399
172
+ }
173
+ },
174
+ {
175
+ "key": "description",
176
+ "expected_text": "Description",
177
+ "bbox_norm": {
178
+ "x": 0.488562,
179
+ "y": 0.388889,
180
+ "w": 0.129085,
181
+ "h": 0.025253
182
+ }
183
+ },
184
+ {
185
+ "key": "qty",
186
+ "expected_text": "Qty",
187
+ "bbox_norm": {
188
+ "x": 0.617647,
189
+ "y": 0.388889,
190
+ "w": 0.045752,
191
+ "h": 0.02399
192
+ }
193
+ }
194
+ ],
195
+ "notes": "Anchors are used at runtime to localize table/header/columns under drift."
196
+ }
197
+ ],
198
+ "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
199
+ }
200
+ }
backend/templates/T3_ASTURA_SALES_ORDER_FORM.json ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "template_id": "T3_ASTURA_SALES_ORDER_FORM",
3
+ "name": "Astura Sales Order Form",
4
+ "status": "active",
5
+ "version": 2,
6
+ "match": {
7
+ "keywords_all": [],
8
+ "keywords_any": [
9
+ "astura",
10
+ "dc141",
11
+ "ca200",
12
+ "cbba",
13
+ "sales order"
14
+ ]
15
+ },
16
+ "schema": {
17
+ "form_id": "trainer_b931186e13eb45d2a9a1ded8ff8641bb",
18
+ "version": 3,
19
+ "page": 1,
20
+ "scalar_value_region_mode": "offset_from_anchor_v1",
21
+ "fields": [
22
+ {
23
+ "field_id": "facility_organization",
24
+ "label": "Facility / Organization",
25
+ "type": "entity",
26
+ "anchor_bbox_norm": {
27
+ "x": 0.156863,
28
+ "y": 0.194444,
29
+ "w": 0.053922,
30
+ "h": 0.012626
31
+ },
32
+ "value_bbox_norm": null,
33
+ "value_offset_norm": {
34
+ "dx": 0.076797,
35
+ "dy": -0.002525,
36
+ "w": 0.205882,
37
+ "h": 0.021465
38
+ }
39
+ },
40
+ {
41
+ "field_id": "case_location",
42
+ "label": "Case Location / Address",
43
+ "type": "text",
44
+ "anchor_bbox_norm": {
45
+ "x": 0.155229,
46
+ "y": 0.224747,
47
+ "w": 0.05719,
48
+ "h": 0.016414
49
+ },
50
+ "value_bbox_norm": null,
51
+ "value_offset_norm": {
52
+ "dx": 0.075163,
53
+ "dy": 0,
54
+ "w": 0.212418,
55
+ "h": 0.034091
56
+ }
57
+ },
58
+ {
59
+ "field_id": "vendor",
60
+ "label": "Vendor",
61
+ "type": "entity",
62
+ "anchor_bbox_norm": {
63
+ "x": 0.160131,
64
+ "y": 0.117424,
65
+ "w": 0.098039,
66
+ "h": 0.064394
67
+ },
68
+ "value_bbox_norm": null,
69
+ "value_offset_norm": null
70
+ },
71
+ {
72
+ "field_id": "physician_name",
73
+ "label": "Physician Name",
74
+ "type": "person",
75
+ "anchor_bbox_norm": {
76
+ "x": 0.158497,
77
+ "y": 0.289141,
78
+ "w": 0.062092,
79
+ "h": 0.013889
80
+ },
81
+ "value_bbox_norm": null,
82
+ "value_offset_norm": {
83
+ "dx": 0.068627,
84
+ "dy": -0.002525,
85
+ "w": 0.212418,
86
+ "h": 0.022727
87
+ }
88
+ },
89
+ {
90
+ "field_id": "date_of_surgery",
91
+ "label": "Date of Surgery",
92
+ "type": "date",
93
+ "anchor_bbox_norm": {
94
+ "x": 0.160131,
95
+ "y": 0.256313,
96
+ "w": 0.053922,
97
+ "h": 0.016414
98
+ },
99
+ "value_bbox_norm": null,
100
+ "value_offset_norm": {
101
+ "dx": 0.071895,
102
+ "dy": 0,
103
+ "w": 0.124183,
104
+ "h": 0.018939
105
+ }
106
+ },
107
+ {
108
+ "field_id": "items",
109
+ "label": "Items / Line Items",
110
+ "type": "table",
111
+ "table_bbox_norm": {
112
+ "x": 0.153595,
113
+ "y": 0.339646,
114
+ "w": 0.620915,
115
+ "h": 0.180556
116
+ },
117
+ "header_bbox_norm": {
118
+ "x": 0.156863,
119
+ "y": 0.339646,
120
+ "w": 0.617647,
121
+ "h": 0.018939
122
+ },
123
+ "row_height_hint_norm": null,
124
+ "columns": [
125
+ {
126
+ "key": "item_number",
127
+ "label": "Item Number",
128
+ "bbox_rel_norm": {
129
+ "x": 0,
130
+ "y": 0.104895,
131
+ "w": 0.171053,
132
+ "h": 0.895105
133
+ }
134
+ },
135
+ {
136
+ "key": "lot_number",
137
+ "label": "Lot Number",
138
+ "bbox_rel_norm": null
139
+ },
140
+ {
141
+ "key": "description",
142
+ "label": "Description",
143
+ "bbox_rel_norm": {
144
+ "x": 0.171053,
145
+ "y": 0.111888,
146
+ "w": 0.323684,
147
+ "h": 0.888112
148
+ }
149
+ },
150
+ {
151
+ "key": "qty",
152
+ "label": "Qty",
153
+ "bbox_rel_norm": {
154
+ "x": 0.644737,
155
+ "y": 0.104895,
156
+ "w": 0.047368,
157
+ "h": 0.895105
158
+ }
159
+ },
160
+ {
161
+ "key": "price",
162
+ "label": "Price",
163
+ "bbox_rel_norm": null
164
+ }
165
+ ],
166
+ "table_anchors": [
167
+ {
168
+ "key": "item_number",
169
+ "expected_text": "Item Number",
170
+ "bbox_norm": {
171
+ "x": 0.153595,
172
+ "y": 0.342172,
173
+ "w": 0.104575,
174
+ "h": 0.016414
175
+ }
176
+ },
177
+ {
178
+ "key": "description",
179
+ "expected_text": "Description",
180
+ "bbox_norm": {
181
+ "x": 0.259804,
182
+ "y": 0.339646,
183
+ "w": 0.202614,
184
+ "h": 0.021465
185
+ }
186
+ },
187
+ {
188
+ "key": "qty",
189
+ "expected_text": "Qty",
190
+ "bbox_norm": {
191
+ "x": 0.555556,
192
+ "y": 0.342172,
193
+ "w": 0.034314,
194
+ "h": 0.015152
195
+ }
196
+ }
197
+ ],
198
+ "notes": "Anchors are used at runtime to localize table/header/columns under drift."
199
+ }
200
+ ],
201
+ "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
202
+ }
203
+ }
backend/templates/T4_MEDICAL_ESTIMATION_OF_CHARGES.json ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "template_id": "T4_MEDICAL_ESTIMATION_OF_CHARGES",
3
+ "name": "Medical Estimation of Charges",
4
+ "status": "active",
5
+ "version": 2,
6
+ "match": {
7
+ "keywords_all": [],
8
+ "keywords_any": [
9
+ "estimation of charges",
10
+ "good faith estimate",
11
+ "patient responsibility",
12
+ "insurance"
13
+ ]
14
+ },
15
+ "schema": {
16
+ "form_id": "trainer_20c968bf41ac4b1c8ee12a9bb15b2bfb",
17
+ "version": 3,
18
+ "page": 1,
19
+ "scalar_value_region_mode": "offset_from_anchor_v1",
20
+ "fields": [
21
+ {
22
+ "field_id": "facility_organization",
23
+ "label": "Facility / Organization",
24
+ "type": "entity",
25
+ "anchor_bbox_norm": {
26
+ "x": 0.142157,
27
+ "y": 0.25,
28
+ "w": 0.042484,
29
+ "h": 0.015152
30
+ },
31
+ "value_bbox_norm": null,
32
+ "value_offset_norm": {
33
+ "dx": 0.068627,
34
+ "dy": -0.003788,
35
+ "w": 0.117647,
36
+ "h": 0.018939
37
+ }
38
+ },
39
+ {
40
+ "field_id": "case_location",
41
+ "label": "Case Location / Address",
42
+ "type": "text",
43
+ "anchor_bbox_norm": {
44
+ "x": 0.143791,
45
+ "y": 0.271465,
46
+ "w": 0.047386,
47
+ "h": 0.017677
48
+ },
49
+ "value_bbox_norm": null,
50
+ "value_offset_norm": {
51
+ "dx": 0.071895,
52
+ "dy": -0.001263,
53
+ "w": 0.127451,
54
+ "h": 0.039141
55
+ }
56
+ },
57
+ {
58
+ "field_id": "vendor",
59
+ "label": "Vendor",
60
+ "type": "entity",
61
+ "anchor_bbox_norm": null,
62
+ "value_bbox_norm": null,
63
+ "value_offset_norm": null
64
+ },
65
+ {
66
+ "field_id": "physician_name",
67
+ "label": "Physician Name",
68
+ "type": "person",
69
+ "anchor_bbox_norm": null,
70
+ "value_bbox_norm": null,
71
+ "value_offset_norm": null
72
+ },
73
+ {
74
+ "field_id": "date_of_surgery",
75
+ "label": "Date of Surgery",
76
+ "type": "date",
77
+ "anchor_bbox_norm": null,
78
+ "value_bbox_norm": null,
79
+ "value_offset_norm": null
80
+ },
81
+ {
82
+ "field_id": "items",
83
+ "label": "Items / Line Items",
84
+ "type": "table",
85
+ "table_bbox_norm": {
86
+ "x": 0.143791,
87
+ "y": 0.409091,
88
+ "w": 0.676471,
89
+ "h": 0.132576
90
+ },
91
+ "header_bbox_norm": {
92
+ "x": 0.143791,
93
+ "y": 0.409091,
94
+ "w": 0.676471,
95
+ "h": 0.018939
96
+ },
97
+ "row_height_hint_norm": null,
98
+ "columns": [
99
+ {
100
+ "key": "item_number",
101
+ "label": "Item Number",
102
+ "bbox_rel_norm": {
103
+ "x": 0.717391,
104
+ "y": 0.114286,
105
+ "w": 0.089372,
106
+ "h": 0.857143
107
+ }
108
+ },
109
+ {
110
+ "key": "lot_number",
111
+ "label": "Lot Number",
112
+ "bbox_rel_norm": null
113
+ },
114
+ {
115
+ "key": "description",
116
+ "label": "Description",
117
+ "bbox_rel_norm": {
118
+ "x": 0.2657,
119
+ "y": 0.114286,
120
+ "w": 0.376812,
121
+ "h": 0.87619
122
+ }
123
+ },
124
+ {
125
+ "key": "qty",
126
+ "label": "Qty",
127
+ "bbox_rel_norm": null
128
+ },
129
+ {
130
+ "key": "price",
131
+ "label": "Price",
132
+ "bbox_rel_norm": null
133
+ }
134
+ ],
135
+ "table_anchors": [
136
+ {
137
+ "key": "item_number",
138
+ "expected_text": "Item Number",
139
+ "bbox_norm": {
140
+ "x": 0.632353,
141
+ "y": 0.409091,
142
+ "w": 0.045752,
143
+ "h": 0.017677
144
+ }
145
+ },
146
+ {
147
+ "key": "description",
148
+ "expected_text": "Description",
149
+ "bbox_norm": {
150
+ "x": 0.325163,
151
+ "y": 0.409091,
152
+ "w": 0.248366,
153
+ "h": 0.017677
154
+ }
155
+ },
156
+ {
157
+ "key": "qty",
158
+ "expected_text": "Qty",
159
+ "bbox_norm": null
160
+ }
161
+ ],
162
+ "notes": "Anchors are used at runtime to localize table/header/columns under drift."
163
+ }
164
+ ],
165
+ "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
166
+ }
167
+ }
backend/templates/T5_CLINICAL_PROGRESS_NOTE_POSTOP.json ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "template_id": "T5_CLINICAL_PROGRESS_NOTE_POSTOP",
3
+ "name": "Clinical Progress Note Postop",
4
+ "status": "active",
5
+ "version": 2,
6
+ "match": {
7
+ "keywords_all": [],
8
+ "keywords_any": [
9
+ "clinical progress note",
10
+ "progress note",
11
+ "post-op",
12
+ "assessment",
13
+ "plan"
14
+ ]
15
+ },
16
+ "schema": {
17
+ "form_id": "trainer_e75eb5b93bb54c28934f43cacc406cc8",
18
+ "version": 3,
19
+ "page": 1,
20
+ "scalar_value_region_mode": "offset_from_anchor_v1",
21
+ "fields": [
22
+ {
23
+ "field_id": "facility_organization",
24
+ "label": "Facility / Organization",
25
+ "type": "entity",
26
+ "anchor_bbox_norm": null,
27
+ "value_bbox_norm": null,
28
+ "value_offset_norm": null
29
+ },
30
+ {
31
+ "field_id": "case_location",
32
+ "label": "Case Location / Address",
33
+ "type": "text",
34
+ "anchor_bbox_norm": null,
35
+ "value_bbox_norm": null,
36
+ "value_offset_norm": null
37
+ },
38
+ {
39
+ "field_id": "vendor",
40
+ "label": "Vendor",
41
+ "type": "entity",
42
+ "anchor_bbox_norm": null,
43
+ "value_bbox_norm": null,
44
+ "value_offset_norm": null
45
+ },
46
+ {
47
+ "field_id": "physician_name",
48
+ "label": "Physician Name",
49
+ "type": "person",
50
+ "anchor_bbox_norm": null,
51
+ "value_bbox_norm": null,
52
+ "value_offset_norm": null
53
+ },
54
+ {
55
+ "field_id": "date_of_surgery",
56
+ "label": "Date of Surgery",
57
+ "type": "date",
58
+ "anchor_bbox_norm": null,
59
+ "value_bbox_norm": null,
60
+ "value_offset_norm": null
61
+ },
62
+ {
63
+ "field_id": "items",
64
+ "label": "Items / Line Items",
65
+ "type": "table",
66
+ "table_bbox_norm": null,
67
+ "header_bbox_norm": null,
68
+ "row_height_hint_norm": null,
69
+ "columns": [
70
+ {
71
+ "key": "item_number",
72
+ "label": "Item Number",
73
+ "bbox_rel_norm": null
74
+ },
75
+ {
76
+ "key": "lot_number",
77
+ "label": "Lot Number",
78
+ "bbox_rel_norm": null
79
+ },
80
+ {
81
+ "key": "description",
82
+ "label": "Description",
83
+ "bbox_rel_norm": null
84
+ },
85
+ {
86
+ "key": "qty",
87
+ "label": "Qty",
88
+ "bbox_rel_norm": null
89
+ },
90
+ {
91
+ "key": "price",
92
+ "label": "Price",
93
+ "bbox_rel_norm": null
94
+ }
95
+ ],
96
+ "table_anchors": [
97
+ {
98
+ "key": "item_number",
99
+ "expected_text": "Item Number",
100
+ "bbox_norm": null
101
+ },
102
+ {
103
+ "key": "description",
104
+ "expected_text": "Description",
105
+ "bbox_norm": null
106
+ },
107
+ {
108
+ "key": "qty",
109
+ "expected_text": "Qty",
110
+ "bbox_norm": null
111
+ }
112
+ ],
113
+ "notes": "Anchors are used at runtime to localize table/header/columns under drift."
114
+ }
115
+ ],
116
+ "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
117
+ }
118
+ }
backend/templates/T6_CUSTOMER_CHARGE_SHEET_SPINE.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "template_id": "T6_CUSTOMER_CHARGE_SHEET_SPINE",
3
+ "name": "Customer Charge Sheet Spine",
4
+ "status": "active",
5
+ "version": 2,
6
+ "match": {
7
+ "keywords_all": [],
8
+ "keywords_any": [
9
+ "customer charge sheet",
10
+ "charge sheet",
11
+ "spine",
12
+ "qty",
13
+ "unit price",
14
+ "total"
15
+ ]
16
+ },
17
+ "schema": {
18
+ "form_id": "trainer_6b04e85b60a9470588be4f7541029d71",
19
+ "version": 3,
20
+ "page": 1,
21
+ "scalar_value_region_mode": "offset_from_anchor_v1",
22
+ "fields": [
23
+ {
24
+ "field_id": "facility_organization",
25
+ "label": "Facility / Organization",
26
+ "type": "entity",
27
+ "anchor_bbox_norm": {
28
+ "x": 0.388386,
29
+ "y": 0.27195,
30
+ "w": 0.096782,
31
+ "h": 0.013598
32
+ },
33
+ "value_bbox_norm": null,
34
+ "value_offset_norm": {
35
+ "dx": 0,
36
+ "dy": 0.011655,
37
+ "w": 0.096782,
38
+ "h": 0.01554
39
+ }
40
+ },
41
+ {
42
+ "field_id": "case_location",
43
+ "label": "Case Location / Address",
44
+ "type": "text",
45
+ "anchor_bbox_norm": {
46
+ "x": 0.483912,
47
+ "y": 0.297203,
48
+ "w": 0.13826,
49
+ "h": 0.011655
50
+ },
51
+ "value_bbox_norm": null,
52
+ "value_offset_norm": {
53
+ "dx": 0.005028,
54
+ "dy": 0.00777,
55
+ "w": 0.124434,
56
+ "h": 0.035936
57
+ }
58
+ },
59
+ {
60
+ "field_id": "vendor",
61
+ "label": "Vendor",
62
+ "type": "entity",
63
+ "anchor_bbox_norm": {
64
+ "x": 0.618401,
65
+ "y": 0.190365,
66
+ "w": 0.137004,
67
+ "h": 0.047591
68
+ },
69
+ "value_bbox_norm": null,
70
+ "value_offset_norm": null
71
+ },
72
+ {
73
+ "field_id": "physician_name",
74
+ "label": "Physician Name",
75
+ "type": "person",
76
+ "anchor_bbox_norm": {
77
+ "x": 0.218703,
78
+ "y": 0.296232,
79
+ "w": 0.042735,
80
+ "h": 0.019425
81
+ },
82
+ "value_bbox_norm": null,
83
+ "value_offset_norm": {
84
+ "dx": 0.042735,
85
+ "dy": 0,
86
+ "w": 0.124434,
87
+ "h": 0.020396
88
+ }
89
+ },
90
+ {
91
+ "field_id": "date_of_surgery",
92
+ "label": "Date of Surgery",
93
+ "type": "date",
94
+ "anchor_bbox_norm": {
95
+ "x": 0.221217,
96
+ "y": 0.308858,
97
+ "w": 0.081699,
98
+ "h": 0.018454
99
+ },
100
+ "value_bbox_norm": null,
101
+ "value_offset_norm": {
102
+ "dx": 0.084213,
103
+ "dy": 0.001943,
104
+ "w": 0.08547,
105
+ "h": 0.018454
106
+ }
107
+ },
108
+ {
109
+ "field_id": "items",
110
+ "label": "Items / Line Items",
111
+ "type": "table",
112
+ "table_bbox_norm": {
113
+ "x": 0.224987,
114
+ "y": 0.373932,
115
+ "w": 0.549271,
116
+ "h": 0.305944
117
+ },
118
+ "header_bbox_norm": {
119
+ "x": 0.226244,
120
+ "y": 0.373932,
121
+ "w": 0.548014,
122
+ "h": 0.012626
123
+ },
124
+ "row_height_hint_norm": null,
125
+ "columns": [
126
+ {
127
+ "key": "item_number",
128
+ "label": "Item Number",
129
+ "bbox_rel_norm": {
130
+ "x": 0,
131
+ "y": 0.050794,
132
+ "w": 0.144165,
133
+ "h": 0.949206
134
+ }
135
+ },
136
+ {
137
+ "key": "lot_number",
138
+ "label": "Lot Number",
139
+ "bbox_rel_norm": null
140
+ },
141
+ {
142
+ "key": "description",
143
+ "label": "Description",
144
+ "bbox_rel_norm": {
145
+ "x": 0.15103,
146
+ "y": 0.057143,
147
+ "w": 0.157895,
148
+ "h": 0.942857
149
+ }
150
+ },
151
+ {
152
+ "key": "qty",
153
+ "label": "Qty",
154
+ "bbox_rel_norm": {
155
+ "x": 0.414188,
156
+ "y": 0.044444,
157
+ "w": 0.059497,
158
+ "h": 0.952381
159
+ }
160
+ },
161
+ {
162
+ "key": "price",
163
+ "label": "Price",
164
+ "bbox_rel_norm": null
165
+ }
166
+ ],
167
+ "table_anchors": [
168
+ {
169
+ "key": "item_number",
170
+ "expected_text": "Item Number",
171
+ "bbox_norm": {
172
+ "x": 0.224987,
173
+ "y": 0.373932,
174
+ "w": 0.080442,
175
+ "h": 0.016511
176
+ }
177
+ },
178
+ {
179
+ "key": "description",
180
+ "expected_text": "Description",
181
+ "bbox_norm": {
182
+ "x": 0.306687,
183
+ "y": 0.373932,
184
+ "w": 0.081699,
185
+ "h": 0.019425
186
+ }
187
+ },
188
+ {
189
+ "key": "qty",
190
+ "expected_text": "Qty",
191
+ "bbox_norm": {
192
+ "x": 0.453746,
193
+ "y": 0.376845,
194
+ "w": 0.030166,
195
+ "h": 0.013598
196
+ }
197
+ }
198
+ ],
199
+ "notes": "Anchors are used at runtime to localize table/header/columns under drift."
200
+ }
201
+ ],
202
+ "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
203
+ }
204
+ }
backend/templates/T7_SALES_ORDER_ZIMMER.json ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "template_id": "T7_SALES_ORDER_ZIMMER",
3
+ "name": "Zimmer Sales Order",
4
+ "status": "active",
5
+ "version": 2,
6
+ "match": {
7
+ "keywords_all": [],
8
+ "keywords_any": [
9
+ "zimmer",
10
+ "zimmer biomet",
11
+ "biomet",
12
+ "sales order",
13
+ "purchase order",
14
+ "po number"
15
+ ]
16
+ },
17
+ "schema": {
18
+ "form_id": "trainer_2a12b374e66646689308af1beea88933",
19
+ "version": 3,
20
+ "page": 1,
21
+ "scalar_value_region_mode": "offset_from_anchor_v1",
22
+ "fields": [
23
+ {
24
+ "field_id": "facility_organization",
25
+ "label": "Facility / Organization",
26
+ "type": "entity",
27
+ "anchor_bbox_norm": {
28
+ "x": 0.292484,
29
+ "y": 0.183081,
30
+ "w": 0.01634,
31
+ "h": 0.045455
32
+ },
33
+ "value_bbox_norm": null,
34
+ "value_offset_norm": {
35
+ "dx": -0.003268,
36
+ "dy": 0.045455,
37
+ "w": 0.017974,
38
+ "h": 0.162879
39
+ }
40
+ },
41
+ {
42
+ "field_id": "case_location",
43
+ "label": "Case Location / Address",
44
+ "type": "text",
45
+ "anchor_bbox_norm": {
46
+ "x": 0.271242,
47
+ "y": 0.14899,
48
+ "w": 0.013072,
49
+ "h": 0.080808
50
+ },
51
+ "value_bbox_norm": null,
52
+ "value_offset_norm": {
53
+ "dx": 0,
54
+ "dy": 0.079545,
55
+ "w": 0.017974,
56
+ "h": 0.165404
57
+ }
58
+ },
59
+ {
60
+ "field_id": "vendor",
61
+ "label": "Vendor",
62
+ "type": "entity",
63
+ "anchor_bbox_norm": {
64
+ "x": 0.785948,
65
+ "y": 0.147727,
66
+ "w": 0.027778,
67
+ "h": 0.151515
68
+ },
69
+ "value_bbox_norm": null,
70
+ "value_offset_norm": null
71
+ },
72
+ {
73
+ "field_id": "physician_name",
74
+ "label": "Physician Name",
75
+ "type": "person",
76
+ "anchor_bbox_norm": {
77
+ "x": 0.248366,
78
+ "y": 0.145202,
79
+ "w": 0.022876,
80
+ "h": 0.084596
81
+ },
82
+ "value_bbox_norm": null,
83
+ "value_offset_norm": {
84
+ "dx": 0.003268,
85
+ "dy": 0.084596,
86
+ "w": 0.02451,
87
+ "h": 0.165404
88
+ }
89
+ },
90
+ {
91
+ "field_id": "date_of_surgery",
92
+ "label": "Date of Surgery",
93
+ "type": "date",
94
+ "anchor_bbox_norm": {
95
+ "x": 0.21732,
96
+ "y": 0.156566,
97
+ "w": 0.013072,
98
+ "h": 0.074495
99
+ },
100
+ "value_bbox_norm": null,
101
+ "value_offset_norm": {
102
+ "dx": -0.006536,
103
+ "dy": 0.073232,
104
+ "w": 0.027778,
105
+ "h": 0.167929
106
+ }
107
+ },
108
+ {
109
+ "field_id": "items",
110
+ "label": "Items / Line Items",
111
+ "type": "table",
112
+ "table_bbox_norm": {
113
+ "x": 0.473856,
114
+ "y": 0.109848,
115
+ "w": 0.256536,
116
+ "h": 0.707071
117
+ },
118
+ "header_bbox_norm": {
119
+ "x": 0.707516,
120
+ "y": 0.109848,
121
+ "w": 0.021242,
122
+ "h": 0.707071
123
+ },
124
+ "row_height_hint_norm": null,
125
+ "columns": [
126
+ {
127
+ "key": "item_number",
128
+ "label": "Item Number",
129
+ "bbox_rel_norm": null
130
+ },
131
+ {
132
+ "key": "lot_number",
133
+ "label": "Lot Number",
134
+ "bbox_rel_norm": null
135
+ },
136
+ {
137
+ "key": "description",
138
+ "label": "Description",
139
+ "bbox_rel_norm": null
140
+ },
141
+ {
142
+ "key": "qty",
143
+ "label": "Qty",
144
+ "bbox_rel_norm": null
145
+ },
146
+ {
147
+ "key": "price",
148
+ "label": "Price",
149
+ "bbox_rel_norm": null
150
+ }
151
+ ],
152
+ "table_anchors": [
153
+ {
154
+ "key": "item_number",
155
+ "expected_text": "Item Number",
156
+ "bbox_norm": null
157
+ },
158
+ {
159
+ "key": "description",
160
+ "expected_text": "Description",
161
+ "bbox_norm": null
162
+ },
163
+ {
164
+ "key": "qty",
165
+ "expected_text": "Qty",
166
+ "bbox_norm": null
167
+ }
168
+ ],
169
+ "notes": "Anchors are used at runtime to localize table/header/columns under drift."
170
+ }
171
+ ],
172
+ "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
173
+ }
174
+ }
backend/trainer_schemas/T1_IFACTOR_DELIVERED_ORDER.schema.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "form_id": "trainer_2f7cdbc443f040c79723c74490f6282f",
3
+ "version": 3,
4
+ "page": 1,
5
+ "scalar_value_region_mode": "offset_from_anchor_v1",
6
+ "fields": [
7
+ {
8
+ "field_id": "facility_organization",
9
+ "label": "Facility / Organization",
10
+ "type": "entity",
11
+ "anchor_bbox_norm": { "x": 0.138889, "y": 0.328283, "w": 0.047386, "h": 0.027778 },
12
+ "value_bbox_norm": null,
13
+ "value_offset_norm": { "dx": 0.052288, "dy": -0.001263, "w": 0.294118, "h": 0.045455 }
14
+ },
15
+ {
16
+ "field_id": "case_location",
17
+ "label": "Case Location / Address",
18
+ "type": "text",
19
+ "anchor_bbox_norm": { "x": 0.140523, "y": 0.353535, "w": 0.055556, "h": 0.02399 },
20
+ "value_bbox_norm": null,
21
+ "value_offset_norm": { "dx": 0.062092, "dy": 0.005051, "w": 0.292484, "h": 0.056818 }
22
+ },
23
+ {
24
+ "field_id": "vendor",
25
+ "label": "Vendor",
26
+ "type": "entity",
27
+ "anchor_bbox_norm": { "x": 0.215686, "y": 0.170455, "w": 0.205882, "h": 0.059343 },
28
+ "value_bbox_norm": null,
29
+ "value_offset_norm": null
30
+ },
31
+ {
32
+ "field_id": "physician_name",
33
+ "label": "Physician Name",
34
+ "type": "person",
35
+ "anchor_bbox_norm": { "x": 0.522876, "y": 0.497475, "w": 0.062092, "h": 0.020202 },
36
+ "value_bbox_norm": null,
37
+ "value_offset_norm": { "dx": 0.060458, "dy": -0.005051, "w": 0.214052, "h": 0.025253 }
38
+ },
39
+ {
40
+ "field_id": "date_of_surgery",
41
+ "label": "Date of Surgery",
42
+ "type": "date",
43
+ "anchor_bbox_norm": { "x": 0.138889, "y": 0.57197, "w": 0.160131, "h": 0.026515 },
44
+ "value_bbox_norm": null,
45
+ "value_offset_norm": { "dx": 0.165033, "dy": -0.002525, "w": 0.205882, "h": 0.02399 }
46
+ },
47
+ {
48
+ "field_id": "items",
49
+ "label": "Items / Line Items",
50
+ "type": "table",
51
+ "table_bbox_norm": { "x": 0.138889, "y": 0.632576, "w": 0.732026, "h": 0.122475 },
52
+ "header_bbox_norm": { "x": 0.142157, "y": 0.632576, "w": 0.727124, "h": 0.034091 },
53
+ "row_height_hint_norm": null,
54
+ "columns": [
55
+ { "key": "item_number", "label": "Item Number", "bbox_rel_norm": { "x": 0.004464, "y": 0.28866, "w": 0.196429, "h": 0.701031 } },
56
+ { "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": null },
57
+ { "key": "description", "label": "Description", "bbox_rel_norm": { "x": 0.209821, "y": 0.278351, "w": 0.241071, "h": 0.639175 } },
58
+ { "key": "qty", "label": "Qty", "bbox_rel_norm": { "x": 0.647321, "y": 0.247423, "w": 0.058036, "h": 0.71134 } },
59
+ { "key": "price", "label": "Price", "bbox_rel_norm": null }
60
+ ],
61
+ "table_anchors": [
62
+ { "key": "item_number", "expected_text": "Item Number", "bbox_norm": { "x": 0.140523, "y": 0.652778, "w": 0.145425, "h": 0.016414 } },
63
+ { "key": "description", "expected_text": "Description", "bbox_norm": { "x": 0.287582, "y": 0.650253, "w": 0.181373, "h": 0.018939 } },
64
+ { "key": "qty", "expected_text": "Qty", "bbox_norm": { "x": 0.614379, "y": 0.647727, "w": 0.047386, "h": 0.016414 } }
65
+ ],
66
+ "notes": "Anchors are used at runtime to localize table/header/columns under drift."
67
+ }
68
+ ],
69
+ "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
70
+ }
backend/trainer_schemas/T2_SEASPINE_DELIVERED_GOODS_FORM.schema.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "form_id": "trainer_245e70e31b1f4eb1b26fad626365e9ad",
3
+ "version": 3,
4
+ "page": 1,
5
+ "scalar_value_region_mode": "offset_from_anchor_v1",
6
+ "fields": [
7
+ {
8
+ "field_id": "facility_organization",
9
+ "label": "Facility / Organization",
10
+ "type": "entity",
11
+ "anchor_bbox_norm": { "x": 0.179739, "y": 0.284091, "w": 0.04085, "h": 0.020202 },
12
+ "value_bbox_norm": null,
13
+ "value_offset_norm": { "dx": 0.044118, "dy": -0.002525, "w": 0.246732, "h": 0.021465 }
14
+ },
15
+ {
16
+ "field_id": "case_location",
17
+ "label": "Case Location / Address",
18
+ "type": "text",
19
+ "anchor_bbox_norm": { "x": 0.181373, "y": 0.310606, "w": 0.135621, "h": 0.016414 },
20
+ "value_bbox_norm": null,
21
+ "value_offset_norm": { "dx": 0.001634, "dy": 0.013889, "w": 0.295752, "h": 0.027778 }
22
+ },
23
+ {
24
+ "field_id": "vendor",
25
+ "label": "Vendor",
26
+ "type": "entity",
27
+ "anchor_bbox_norm": { "x": 0.606209, "y": 0.152778, "w": 0.173203, "h": 0.068182 },
28
+ "value_bbox_norm": null,
29
+ "value_offset_norm": null
30
+ },
31
+ {
32
+ "field_id": "physician_name",
33
+ "label": "Physician Name",
34
+ "type": "person",
35
+ "anchor_bbox_norm": { "x": 0.179739, "y": 0.508838, "w": 0.104575, "h": 0.016414 },
36
+ "value_bbox_norm": null,
37
+ "value_offset_norm": { "dx": 0.106209, "dy": -0.001263, "w": 0.372549, "h": 0.015152 }
38
+ },
39
+ {
40
+ "field_id": "date_of_surgery",
41
+ "label": "Date of Surgery",
42
+ "type": "date",
43
+ "anchor_bbox_norm": { "x": 0.179739, "y": 0.521465, "w": 0.081699, "h": 0.021465 },
44
+ "value_bbox_norm": null,
45
+ "value_offset_norm": { "dx": 0.083333, "dy": 0.005051, "w": 0.068627, "h": 0.015152 }
46
+ },
47
+ {
48
+ "field_id": "items",
49
+ "label": "Items / Line Items",
50
+ "type": "table",
51
+ "table_bbox_norm": { "x": 0.178105, "y": 0.388889, "w": 0.609477, "h": 0.118687 },
52
+ "header_bbox_norm": { "x": 0.178105, "y": 0.390152, "w": 0.609477, "h": 0.02399 },
53
+ "row_height_hint_norm": null,
54
+ "columns": [
55
+ { "key": "item_number", "label": "Item Number", "bbox_rel_norm": { "x": 0.718499, "y": 0.170213, "w": 0.072386, "h": 0.797872 } },
56
+ { "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": { "x": 0.168901, "y": 0.223404, "w": 0.171582, "h": 0.776596 } },
57
+ { "key": "description", "label": "Description", "bbox_rel_norm": null },
58
+ { "key": "qty", "label": "Qty", "bbox_rel_norm": null },
59
+ { "key": "price", "label": "Price", "bbox_rel_norm": null }
60
+ ],
61
+ "table_anchors": [
62
+ { "key": "item_number", "expected_text": "Item Number", "bbox_norm": { "x": 0.178105, "y": 0.388889, "w": 0.101307, "h": 0.02399 } },
63
+ { "key": "description", "expected_text": "Description", "bbox_norm": { "x": 0.488562, "y": 0.388889, "w": 0.129085, "h": 0.025253 } },
64
+ { "key": "qty", "expected_text": "Qty", "bbox_norm": { "x": 0.617647, "y": 0.388889, "w": 0.045752, "h": 0.02399 } }
65
+ ],
66
+ "notes": "Anchors are used at runtime to localize table/header/columns under drift."
67
+ }
68
+ ],
69
+ "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
70
+ }
backend/trainer_schemas/T3_ASTURA_SALES_ORDER_FORM.schema.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "form_id": "trainer_b931186e13eb45d2a9a1ded8ff8641bb",
3
+ "version": 3,
4
+ "page": 1,
5
+ "scalar_value_region_mode": "offset_from_anchor_v1",
6
+ "fields": [
7
+ {
8
+ "field_id": "facility_organization",
9
+ "label": "Facility / Organization",
10
+ "type": "entity",
11
+ "anchor_bbox_norm": { "x": 0.156863, "y": 0.194444, "w": 0.053922, "h": 0.012626 },
12
+ "value_bbox_norm": null,
13
+ "value_offset_norm": { "dx": 0.076797, "dy": -0.002525, "w": 0.205882, "h": 0.021465 }
14
+ },
15
+ {
16
+ "field_id": "case_location",
17
+ "label": "Case Location / Address",
18
+ "type": "text",
19
+ "anchor_bbox_norm": { "x": 0.155229, "y": 0.224747, "w": 0.05719, "h": 0.016414 },
20
+ "value_bbox_norm": null,
21
+ "value_offset_norm": { "dx": 0.075163, "dy": 0, "w": 0.212418, "h": 0.034091 }
22
+ },
23
+ {
24
+ "field_id": "vendor",
25
+ "label": "Vendor",
26
+ "type": "entity",
27
+ "anchor_bbox_norm": { "x": 0.160131, "y": 0.117424, "w": 0.098039, "h": 0.064394 },
28
+ "value_bbox_norm": null,
29
+ "value_offset_norm": null
30
+ },
31
+ {
32
+ "field_id": "physician_name",
33
+ "label": "Physician Name",
34
+ "type": "person",
35
+ "anchor_bbox_norm": { "x": 0.158497, "y": 0.289141, "w": 0.062092, "h": 0.013889 },
36
+ "value_bbox_norm": null,
37
+ "value_offset_norm": { "dx": 0.068627, "dy": -0.002525, "w": 0.212418, "h": 0.022727 }
38
+ },
39
+ {
40
+ "field_id": "date_of_surgery",
41
+ "label": "Date of Surgery",
42
+ "type": "date",
43
+ "anchor_bbox_norm": { "x": 0.160131, "y": 0.256313, "w": 0.053922, "h": 0.016414 },
44
+ "value_bbox_norm": null,
45
+ "value_offset_norm": { "dx": 0.071895, "dy": 0, "w": 0.124183, "h": 0.018939 }
46
+ },
47
+ {
48
+ "field_id": "items",
49
+ "label": "Items / Line Items",
50
+ "type": "table",
51
+ "table_bbox_norm": { "x": 0.153595, "y": 0.339646, "w": 0.620915, "h": 0.180556 },
52
+ "header_bbox_norm": { "x": 0.156863, "y": 0.339646, "w": 0.617647, "h": 0.018939 },
53
+ "row_height_hint_norm": null,
54
+ "columns": [
55
+ { "key": "item_number", "label": "Item Number", "bbox_rel_norm": { "x": 0, "y": 0.104895, "w": 0.171053, "h": 0.895105 } },
56
+ { "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": null },
57
+ { "key": "description", "label": "Description", "bbox_rel_norm": { "x": 0.171053, "y": 0.111888, "w": 0.323684, "h": 0.888112 } },
58
+ { "key": "qty", "label": "Qty", "bbox_rel_norm": { "x": 0.644737, "y": 0.104895, "w": 0.047368, "h": 0.895105 } },
59
+ { "key": "price", "label": "Price", "bbox_rel_norm": null }
60
+ ],
61
+ "table_anchors": [
62
+ { "key": "item_number", "expected_text": "Item Number", "bbox_norm": { "x": 0.153595, "y": 0.342172, "w": 0.104575, "h": 0.016414 } },
63
+ { "key": "description", "expected_text": "Description", "bbox_norm": { "x": 0.259804, "y": 0.339646, "w": 0.202614, "h": 0.021465 } },
64
+ { "key": "qty", "expected_text": "Qty", "bbox_norm": { "x": 0.555556, "y": 0.342172, "w": 0.034314, "h": 0.015152 } }
65
+ ],
66
+ "notes": "Anchors are used at runtime to localize table/header/columns under drift."
67
+ }
68
+ ],
69
+ "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
70
+ }
backend/trainer_schemas/T4_MEDICAL_ESTIMATION_OF_CHARGES.schema.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "form_id": "trainer_20c968bf41ac4b1c8ee12a9bb15b2bfb",
3
+ "version": 3,
4
+ "page": 1,
5
+ "scalar_value_region_mode": "offset_from_anchor_v1",
6
+ "fields": [
7
+ {
8
+ "field_id": "facility_organization",
9
+ "label": "Facility / Organization",
10
+ "type": "entity",
11
+ "anchor_bbox_norm": { "x": 0.142157, "y": 0.25, "w": 0.042484, "h": 0.015152 },
12
+ "value_bbox_norm": null,
13
+ "value_offset_norm": { "dx": 0.068627, "dy": -0.003788, "w": 0.117647, "h": 0.018939 }
14
+ },
15
+ {
16
+ "field_id": "case_location",
17
+ "label": "Case Location / Address",
18
+ "type": "text",
19
+ "anchor_bbox_norm": { "x": 0.143791, "y": 0.271465, "w": 0.047386, "h": 0.017677 },
20
+ "value_bbox_norm": null,
21
+ "value_offset_norm": { "dx": 0.071895, "dy": -0.001263, "w": 0.127451, "h": 0.039141 }
22
+ },
23
+ { "field_id": "vendor", "label": "Vendor", "type": "entity", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
24
+ { "field_id": "physician_name", "label": "Physician Name", "type": "person", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
25
+ { "field_id": "date_of_surgery", "label": "Date of Surgery", "type": "date", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
26
+ {
27
+ "field_id": "items",
28
+ "label": "Items / Line Items",
29
+ "type": "table",
30
+ "table_bbox_norm": { "x": 0.143791, "y": 0.409091, "w": 0.676471, "h": 0.132576 },
31
+ "header_bbox_norm": { "x": 0.143791, "y": 0.409091, "w": 0.676471, "h": 0.018939 },
32
+ "row_height_hint_norm": null,
33
+ "columns": [
34
+ { "key": "item_number", "label": "Item Number", "bbox_rel_norm": { "x": 0.717391, "y": 0.114286, "w": 0.089372, "h": 0.857143 } },
35
+ { "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": null },
36
+ { "key": "description", "label": "Description", "bbox_rel_norm": { "x": 0.2657, "y": 0.114286, "w": 0.376812, "h": 0.87619 } },
37
+ { "key": "qty", "label": "Qty", "bbox_rel_norm": null },
38
+ { "key": "price", "label": "Price", "bbox_rel_norm": null }
39
+ ],
40
+ "table_anchors": [
41
+ { "key": "item_number", "expected_text": "Item Number", "bbox_norm": { "x": 0.632353, "y": 0.409091, "w": 0.045752, "h": 0.017677 } },
42
+ { "key": "description", "expected_text": "Description", "bbox_norm": { "x": 0.325163, "y": 0.409091, "w": 0.248366, "h": 0.017677 } },
43
+ { "key": "qty", "expected_text": "Qty", "bbox_norm": null }
44
+ ],
45
+ "notes": "Anchors are used at runtime to localize table/header/columns under drift."
46
+ }
47
+ ],
48
+ "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
49
+ }
backend/trainer_schemas/T5_CLINICAL_PROGRESS_NOTE_POSTOP.schema.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "form_id": "trainer_e75eb5b93bb54c28934f43cacc406cc8",
3
+ "version": 3,
4
+ "page": 1,
5
+ "scalar_value_region_mode": "offset_from_anchor_v1",
6
+ "fields": [
7
+ { "field_id": "facility_organization", "label": "Facility / Organization", "type": "entity", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
8
+ { "field_id": "case_location", "label": "Case Location / Address", "type": "text", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
9
+ { "field_id": "vendor", "label": "Vendor", "type": "entity", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
10
+ { "field_id": "physician_name", "label": "Physician Name", "type": "person", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
11
+ { "field_id": "date_of_surgery", "label": "Date of Surgery", "type": "date", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
12
+ {
13
+ "field_id": "items",
14
+ "label": "Items / Line Items",
15
+ "type": "table",
16
+ "table_bbox_norm": null,
17
+ "header_bbox_norm": null,
18
+ "row_height_hint_norm": null,
19
+ "columns": [
20
+ { "key": "item_number", "label": "Item Number", "bbox_rel_norm": null },
21
+ { "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": null },
22
+ { "key": "description", "label": "Description", "bbox_rel_norm": null },
23
+ { "key": "qty", "label": "Qty", "bbox_rel_norm": null },
24
+ { "key": "price", "label": "Price", "bbox_rel_norm": null }
25
+ ],
26
+ "table_anchors": [
27
+ { "key": "item_number", "expected_text": "Item Number", "bbox_norm": null },
28
+ { "key": "description", "expected_text": "Description", "bbox_norm": null },
29
+ { "key": "qty", "expected_text": "Qty", "bbox_norm": null }
30
+ ],
31
+ "notes": "Anchors are used at runtime to localize table/header/columns under drift."
32
+ }
33
+ ],
34
+ "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
35
+ }
backend/trainer_schemas/T6_CUSTOMER_CHARGE_SHEET_SPINE.schema.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "form_id": "trainer_6b04e85b60a9470588be4f7541029d71",
3
+ "version": 3,
4
+ "page": 1,
5
+ "scalar_value_region_mode": "offset_from_anchor_v1",
6
+ "fields": [
7
+ {
8
+ "field_id": "facility_organization",
9
+ "label": "Facility / Organization",
10
+ "type": "entity",
11
+ "anchor_bbox_norm": { "x": 0.388386, "y": 0.27195, "w": 0.096782, "h": 0.013598 },
12
+ "value_bbox_norm": null,
13
+ "value_offset_norm": { "dx": 0, "dy": 0.011655, "w": 0.096782, "h": 0.01554 }
14
+ },
15
+ {
16
+ "field_id": "case_location",
17
+ "label": "Case Location / Address",
18
+ "type": "text",
19
+ "anchor_bbox_norm": { "x": 0.483912, "y": 0.297203, "w": 0.13826, "h": 0.011655 },
20
+ "value_bbox_norm": null,
21
+ "value_offset_norm": { "dx": 0.005028, "dy": 0.00777, "w": 0.124434, "h": 0.035936 }
22
+ },
23
+ {
24
+ "field_id": "vendor",
25
+ "label": "Vendor",
26
+ "type": "entity",
27
+ "anchor_bbox_norm": { "x": 0.618401, "y": 0.190365, "w": 0.137004, "h": 0.047591 },
28
+ "value_bbox_norm": null,
29
+ "value_offset_norm": null
30
+ },
31
+ {
32
+ "field_id": "physician_name",
33
+ "label": "Physician Name",
34
+ "type": "person",
35
+ "anchor_bbox_norm": { "x": 0.218703, "y": 0.296232, "w": 0.042735, "h": 0.019425 },
36
+ "value_bbox_norm": null,
37
+ "value_offset_norm": { "dx": 0.042735, "dy": 0, "w": 0.124434, "h": 0.020396 }
38
+ },
39
+ {
40
+ "field_id": "date_of_surgery",
41
+ "label": "Date of Surgery",
42
+ "type": "date",
43
+ "anchor_bbox_norm": { "x": 0.221217, "y": 0.308858, "w": 0.081699, "h": 0.018454 },
44
+ "value_bbox_norm": null,
45
+ "value_offset_norm": { "dx": 0.084213, "dy": 0.001943, "w": 0.08547, "h": 0.018454 }
46
+ },
47
+ {
48
+ "field_id": "items",
49
+ "label": "Items / Line Items",
50
+ "type": "table",
51
+ "table_bbox_norm": { "x": 0.224987, "y": 0.373932, "w": 0.549271, "h": 0.305944 },
52
+ "header_bbox_norm": { "x": 0.226244, "y": 0.373932, "w": 0.548014, "h": 0.012626 },
53
+ "row_height_hint_norm": null,
54
+ "columns": [
55
+ { "key": "item_number", "label": "Item Number", "bbox_rel_norm": { "x": 0, "y": 0.050794, "w": 0.144165, "h": 0.949206 } },
56
+ { "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": null },
57
+ { "key": "description", "label": "Description", "bbox_rel_norm": { "x": 0.15103, "y": 0.057143, "w": 0.157895, "h": 0.942857 } },
58
+ { "key": "qty", "label": "Qty", "bbox_rel_norm": { "x": 0.414188, "y": 0.044444, "w": 0.059497, "h": 0.952381 } },
59
+ { "key": "price", "label": "Price", "bbox_rel_norm": null }
60
+ ],
61
+ "table_anchors": [
62
+ { "key": "item_number", "expected_text": "Item Number", "bbox_norm": { "x": 0.224987, "y": 0.373932, "w": 0.080442, "h": 0.016511 } },
63
+ { "key": "description", "expected_text": "Description", "bbox_norm": { "x": 0.306687, "y": 0.373932, "w": 0.081699, "h": 0.019425 } },
64
+ { "key": "qty", "expected_text": "Qty", "bbox_norm": { "x": 0.453746, "y": 0.376845, "w": 0.030166, "h": 0.013598 } }
65
+ ],
66
+ "notes": "Anchors are used at runtime to localize table/header/columns under drift."
67
+ }
68
+ ],
69
+ "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
70
+ }
backend/trainer_schemas/T7_SALES_ORDER_ZIMMER.schema.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "form_id": "trainer_2a12b374e66646689308af1beea88933",
3
+ "version": 3,
4
+ "page": 1,
5
+ "scalar_value_region_mode": "offset_from_anchor_v1",
6
+ "fields": [
7
+ {
8
+ "field_id": "facility_organization",
9
+ "label": "Facility / Organization",
10
+ "type": "entity",
11
+ "anchor_bbox_norm": { "x": 0.292484, "y": 0.183081, "w": 0.01634, "h": 0.045455 },
12
+ "value_bbox_norm": null,
13
+ "value_offset_norm": { "dx": -0.003268, "dy": 0.045455, "w": 0.017974, "h": 0.162879 }
14
+ },
15
+ {
16
+ "field_id": "case_location",
17
+ "label": "Case Location / Address",
18
+ "type": "text",
19
+ "anchor_bbox_norm": { "x": 0.271242, "y": 0.14899, "w": 0.013072, "h": 0.080808 },
20
+ "value_bbox_norm": null,
21
+ "value_offset_norm": { "dx": 0, "dy": 0.079545, "w": 0.017974, "h": 0.165404 }
22
+ },
23
+ {
24
+ "field_id": "vendor",
25
+ "label": "Vendor",
26
+ "type": "entity",
27
+ "anchor_bbox_norm": { "x": 0.785948, "y": 0.147727, "w": 0.027778, "h": 0.151515 },
28
+ "value_bbox_norm": null,
29
+ "value_offset_norm": null
30
+ },
31
+ {
32
+ "field_id": "physician_name",
33
+ "label": "Physician Name",
34
+ "type": "person",
35
+ "anchor_bbox_norm": { "x": 0.248366, "y": 0.145202, "w": 0.022876, "h": 0.084596 },
36
+ "value_bbox_norm": null,
37
+ "value_offset_norm": { "dx": 0.003268, "dy": 0.084596, "w": 0.02451, "h": 0.165404 }
38
+ },
39
+ {
40
+ "field_id": "date_of_surgery",
41
+ "label": "Date of Surgery",
42
+ "type": "date",
43
+ "anchor_bbox_norm": { "x": 0.21732, "y": 0.156566, "w": 0.013072, "h": 0.074495 },
44
+ "value_bbox_norm": null,
45
+ "value_offset_norm": { "dx": -0.006536, "dy": 0.073232, "w": 0.027778, "h": 0.167929 }
46
+ },
47
+ {
48
+ "field_id": "items",
49
+ "label": "Items / Line Items",
50
+ "type": "table",
51
+ "table_bbox_norm": { "x": 0.473856, "y": 0.109848, "w": 0.256536, "h": 0.707071 },
52
+ "header_bbox_norm": { "x": 0.707516, "y": 0.109848, "w": 0.021242, "h": 0.707071 },
53
+ "row_height_hint_norm": null,
54
+ "columns": [
55
+ { "key": "item_number", "label": "Item Number", "bbox_rel_norm": null },
56
+ { "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": null },
57
+ { "key": "description", "label": "Description", "bbox_rel_norm": null },
58
+ { "key": "qty", "label": "Qty", "bbox_rel_norm": null },
59
+ { "key": "price", "label": "Price", "bbox_rel_norm": null }
60
+ ],
61
+ "table_anchors": [
62
+ { "key": "item_number", "expected_text": "Item Number", "bbox_norm": null },
63
+ { "key": "description", "expected_text": "Description", "bbox_norm": null },
64
+ { "key": "qty", "expected_text": "Qty", "bbox_norm": null }
65
+ ],
66
+ "notes": "Anchors are used at runtime to localize table/header/columns under drift."
67
+ }
68
+ ],
69
+ "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
70
+ }
backend/worker/__init__.py ADDED
File without changes
backend/worker/config.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+
7
+
8
+ @dataclass(frozen=True)
9
+ class Settings:
10
+ # Repo paths
11
+ repo_root: Path
12
+ backend_dir: Path
13
+ worker_dir: Path
14
+
15
+ # Gmail
16
+ credentials_path: Path
17
+ token_path: Path
18
+
19
+ label_incoming: str
20
+ label_known: str
21
+ label_unknown: str
22
+ label_train: str
23
+
24
+ # Notification
25
+ notify_to_email: str
26
+ notify_from_email: str
27
+
28
+ # Trainer
29
+ trainer_base_url: str
30
+
31
+ # OpenAI
32
+ openai_api_key: str
33
+ openai_model: str
34
+
35
+ # Worker behavior
36
+ poll_seconds: int
37
+ max_messages_per_poll: int
38
+ render_pages: int
39
+ render_dpi: int
40
+
41
+
42
+ def load_settings(repo_root: Path) -> Settings:
43
+ backend_dir = repo_root / "backend"
44
+ worker_dir = backend_dir / "worker"
45
+
46
+ # IMPORTANT: use the SAME env var you actually store in backend/.env
47
+ # Your file shows OPENAI_API_KEY_TEST=...
48
+ openai_api_key = os.environ.get("OPENAI_API_KEY_TEST", "").strip()
49
+ if not openai_api_key:
50
+ raise RuntimeError("Missing OPENAI_API_KEY_TEST env var in backend/.env")
51
+
52
+ notify_to = os.environ.get("PDF_PIPELINE_NOTIFY_TO", "").strip()
53
+ if not notify_to:
54
+ raise RuntimeError("Missing PDF_PIPELINE_NOTIFY_TO env var")
55
+
56
+ notify_from = os.environ.get("PDF_PIPELINE_NOTIFY_FROM", "").strip()
57
+ if not notify_from:
58
+ raise RuntimeError("Missing PDF_PIPELINE_NOTIFY_FROM env var")
59
+
60
+ trainer_base_url = os.environ.get("PDF_TRAINER_BASE_URL", "http://localhost:5173").strip()
61
+ if not trainer_base_url:
62
+ raise RuntimeError("Missing PDF_TRAINER_BASE_URL env var")
63
+
64
+ return Settings(
65
+ repo_root=repo_root,
66
+ backend_dir=backend_dir,
67
+ worker_dir=worker_dir,
68
+
69
+ credentials_path=Path(os.environ.get("GMAIL_CREDENTIALS_JSON", str(backend_dir / "credentials.json"))),
70
+ token_path=Path(os.environ.get("GMAIL_TOKEN_JSON", str(backend_dir / "token.json"))),
71
+
72
+ label_incoming=os.environ.get("PDF_PIPELINE_LABEL_INCOMING", "PDF_PIPELINE/INCOMING"),
73
+ label_known=os.environ.get("PDF_PIPELINE_LABEL_KNOWN", "PDF_PIPELINE/KNOWN"),
74
+ label_unknown=os.environ.get("PDF_PIPELINE_LABEL_UNKNOWN", "PDF_PIPELINE/UNKNOWN"),
75
+ label_train=os.environ.get("PDF_PIPELINE_LABEL_TRAIN", "PDF_PIPELINE/TRAIN"),
76
+
77
+ notify_to_email=notify_to,
78
+ notify_from_email=notify_from,
79
+
80
+ trainer_base_url=trainer_base_url,
81
+
82
+ openai_api_key=openai_api_key,
83
+ openai_model=os.environ.get("OPENAI_MODEL", "gpt-4.1-mini"),
84
+
85
+ poll_seconds=int(os.environ.get("PDF_PIPELINE_POLL_SECONDS", "20")),
86
+ max_messages_per_poll=int(os.environ.get("PDF_PIPELINE_MAX_PER_POLL", "5")),
87
+ render_pages=int(os.environ.get("PDF_PIPELINE_RENDER_PAGES", "2")),
88
+ render_dpi=int(os.environ.get("PDF_PIPELINE_RENDER_DPI", "200")),
89
+ )
backend/worker/gmail_client.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import base64
3
+ import os
4
+ from dataclasses import dataclass
5
+ from email.message import EmailMessage
6
+ from pathlib import Path
7
+ from typing import List, Optional, Tuple
8
+
9
+ from google.oauth2.credentials import Credentials
10
+ from googleapiclient.discovery import build
11
+
12
+
13
+ SCOPES = [
14
+ "https://www.googleapis.com/auth/gmail.modify",
15
+ "https://www.googleapis.com/auth/gmail.send",
16
+ ]
17
+
18
+
19
+ @dataclass
20
+ class GmailMessage:
21
+ msg_id: str
22
+ thread_id: str
23
+
24
+
25
+ class GmailClient:
26
+ def __init__(self, credentials_path: Path, token_path: Path):
27
+ if not credentials_path.exists():
28
+ raise FileNotFoundError(f"Missing OAuth client json: {credentials_path}")
29
+ if not token_path.exists():
30
+ raise FileNotFoundError(f"Missing token json: {token_path}")
31
+
32
+ creds = Credentials.from_authorized_user_file(str(token_path), SCOPES)
33
+ self.service = build("gmail", "v1", credentials=creds, cache_discovery=False)
34
+
35
+ def list_labels(self) -> List[dict]:
36
+ resp = self.service.users().labels().list(userId="me").execute()
37
+ return resp.get("labels", [])
38
+
39
+ def get_label_id(self, name: str) -> Optional[str]:
40
+ for lbl in self.list_labels():
41
+ if lbl.get("name") == name:
42
+ return lbl.get("id")
43
+ return None
44
+
45
+ def ensure_label(self, name: str) -> str:
46
+ existing = self.get_label_id(name)
47
+ if existing:
48
+ return existing
49
+
50
+ body = {
51
+ "name": name,
52
+ "labelListVisibility": "labelShow",
53
+ "messageListVisibility": "show",
54
+ }
55
+ created = self.service.users().labels().create(userId="me", body=body).execute()
56
+ return created["id"]
57
+
58
+ def search_unread_pdf_messages(self, label_name: str, max_results: int = 10) -> List[GmailMessage]:
59
+ # Gmail search query: label + unread + pdf attachments
60
+ query = f'label:"{label_name}" is:unread has:attachment filename:pdf'
61
+ resp = self.service.users().messages().list(userId="me", q=query, maxResults=max_results).execute()
62
+ msgs = resp.get("messages", []) or []
63
+ out: List[GmailMessage] = []
64
+ for m in msgs:
65
+ out.append(GmailMessage(msg_id=m["id"], thread_id=m.get("threadId", "")))
66
+ return out
67
+
68
+ def get_message_full(self, msg_id: str) -> dict:
69
+ return self.service.users().messages().get(userId="me", id=msg_id, format="full").execute()
70
+
71
+ def _walk_parts(self, payload: dict) -> List[dict]:
72
+ parts = []
73
+ stack = [payload]
74
+ while stack:
75
+ node = stack.pop()
76
+ if not isinstance(node, dict):
77
+ continue
78
+ if node.get("parts"):
79
+ stack.extend(node["parts"])
80
+ parts.append(node)
81
+ return parts
82
+
83
+ def list_pdf_attachments(self, msg_full: dict) -> List[Tuple[str, str]]:
84
+ """
85
+ Returns [(filename, attachmentId), ...] for application/pdf parts.
86
+ """
87
+ payload = msg_full.get("payload", {}) or {}
88
+ parts = self._walk_parts(payload)
89
+
90
+ out: List[Tuple[str, str]] = []
91
+ for p in parts:
92
+ filename = (p.get("filename") or "").strip()
93
+ body = p.get("body") or {}
94
+ att_id = body.get("attachmentId")
95
+ mime = (p.get("mimeType") or "").lower()
96
+
97
+ if filename.lower().endswith(".pdf") or mime == "application/pdf":
98
+ if filename and att_id:
99
+ out.append((filename, att_id))
100
+ return out
101
+
102
+ def download_attachment(self, msg_id: str, attachment_id: str) -> bytes:
103
+ att = (
104
+ self.service.users()
105
+ .messages()
106
+ .attachments()
107
+ .get(userId="me", messageId=msg_id, id=attachment_id)
108
+ .execute()
109
+ )
110
+ data = att.get("data", "")
111
+ return base64.urlsafe_b64decode(data.encode("utf-8"))
112
+
113
+ def move_message(
114
+ self,
115
+ msg_id: str,
116
+ add_labels: List[str],
117
+ remove_labels: List[str],
118
+ mark_read: bool = True,
119
+ ) -> None:
120
+ add_ids = [self.ensure_label(n) for n in add_labels]
121
+ remove_ids = [self.ensure_label(n) for n in remove_labels]
122
+
123
+ if mark_read:
124
+ remove_ids.append("UNREAD")
125
+
126
+ body = {"addLabelIds": add_ids, "removeLabelIds": remove_ids}
127
+ self.service.users().messages().modify(userId="me", id=msg_id, body=body).execute()
128
+
129
+ def send_email(self, to_email: str, subject: str, body_text: str, from_email: Optional[str] = None, attachments: Optional[List[Tuple[str, bytes]]] = None) -> None:
130
+ msg = EmailMessage()
131
+ msg["To"] = to_email
132
+ msg["Subject"] = subject
133
+ if from_email:
134
+ msg["From"] = from_email
135
+ msg.set_content(body_text)
136
+
137
+ attachments = attachments or []
138
+ for filename, data in attachments:
139
+ # basic content type guess for pdf/json
140
+ if filename.lower().endswith(".pdf"):
141
+ maintype, subtype = "application", "pdf"
142
+ elif filename.lower().endswith(".json"):
143
+ maintype, subtype = "application", "json"
144
+ else:
145
+ maintype, subtype = "application", "octet-stream"
146
+ msg.add_attachment(data, maintype=maintype, subtype=subtype, filename=filename)
147
+
148
+ raw = base64.urlsafe_b64encode(msg.as_bytes()).decode("utf-8")
149
+ self.service.users().messages().send(userId="me", body={"raw": raw}).execute()
backend/worker/openai_classifier.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import json
5
+ import re
6
+ from pathlib import Path
7
+ from typing import Any, Dict, List, Optional, Tuple
8
+
9
+ from openai import OpenAI
10
+
11
+
12
+ # ----------------------------
13
+ # Known templates (mirror your main system)
14
+ # ----------------------------
15
+ KNOWN_TEMPLATES: List[Dict[str, Any]] = [
16
+ {
17
+ "template_id": "T1_IFACTOR_DELIVERED_ORDER",
18
+ "name": "I-FACTOR Delivered Order Form",
19
+ "keywords_all": ["delivered order form"],
20
+ "keywords_any": ["i-factor", "cerapedics", "product information", "stickers", "bill to", "delivered to"],
21
+ },
22
+ {
23
+ "template_id": "T2_SEASPINE_DELIVERED_GOODS_FORM",
24
+ "name": "SeaSpine Delivered Goods Form",
25
+ "keywords_all": ["delivered goods form"],
26
+ "keywords_any": ["seaspine", "isotis", "handling fee", "sales order", "invoice"],
27
+ },
28
+ {
29
+ "template_id": "T3_ASTURA_SALES_ORDER_FORM",
30
+ "name": "Astura Sales Order Form",
31
+ "keywords_all": [],
32
+ "keywords_any": ["astura", "dc141", "ca200", "cbba", "sales order"],
33
+ },
34
+ {
35
+ "template_id": "T4_MEDICAL_ESTIMATION_OF_CHARGES",
36
+ "name": "Medical Estimation of Charges",
37
+ "keywords_all": [],
38
+ "keywords_any": ["estimation of charges", "good faith estimate", "patient responsibility", "insurance"],
39
+ },
40
+ {
41
+ "template_id": "T5_CLINICAL_PROGRESS_NOTE_POSTOP",
42
+ "name": "Clinical Progress Note Postop",
43
+ "keywords_all": [],
44
+ "keywords_any": ["clinical progress note", "progress note", "post-op", "assessment", "plan"],
45
+ },
46
+ {
47
+ "template_id": "T6_CUSTOMER_CHARGE_SHEET_SPINE",
48
+ "name": "Customer Charge Sheet Spine",
49
+ "keywords_all": [],
50
+ "keywords_any": ["customer charge sheet", "charge sheet", "spine", "qty", "unit price", "total"],
51
+ },
52
+ {
53
+ "template_id": "T7_SALES_ORDER_ZIMMER",
54
+ "name": "Zimmer Sales Order",
55
+ "keywords_all": [],
56
+ "keywords_any": ["zimmer", "zimmer biomet", "biomet", "sales order", "purchase order", "po number"],
57
+ },
58
+ ]
59
+
60
+
61
+ # ----------------------------
62
+ # Public API (EXPLICIT key/model)
63
+ # ----------------------------
64
+ def classify_with_openai(
65
+ image_paths: List[str],
66
+ *,
67
+ api_key: str,
68
+ model: str,
69
+ max_pages: int = 2,
70
+ ) -> Dict[str, Any]:
71
+ """
72
+ Input: list of PNG file paths (page renders).
73
+ Output:
74
+ {
75
+ "template_id": "T1_..." OR "UNKNOWN",
76
+ "confidence": 0..1,
77
+ "reason": "short string",
78
+ "trainer_schema": {} # reserved for later
79
+ }
80
+
81
+ Hard guarantees:
82
+ - does NOT read environment variables
83
+ - does NOT guess api keys
84
+ - strict normalization to known template_ids
85
+ """
86
+ api_key = (api_key or "").strip()
87
+ model = (model or "").strip()
88
+
89
+ if not api_key:
90
+ raise RuntimeError("classify_with_openai: api_key is empty")
91
+ if not model:
92
+ raise RuntimeError("classify_with_openai: model is empty")
93
+
94
+ if not image_paths:
95
+ return {
96
+ "template_id": "UNKNOWN",
97
+ "confidence": 0.0,
98
+ "reason": "No rendered images provided.",
99
+ "trainer_schema": {},
100
+ }
101
+
102
+ # Encode first N pages (keep small + deterministic)
103
+ pages_b64: List[str] = []
104
+ for p in image_paths[: max_pages if max_pages > 0 else 1]:
105
+ pages_b64.append(_png_file_to_b64(Path(p)))
106
+
107
+ client = OpenAI(api_key=api_key)
108
+
109
+ system = (
110
+ "You are a strict document template classifier.\n"
111
+ "You will be shown PNG images of PDF pages (scanned forms).\n"
112
+ "Your job is to decide which known template matches.\n\n"
113
+ "Hard rules:\n"
114
+ "1) Output VALID JSON only. No markdown. No extra text.\n"
115
+ "2) Choose ONE template_id from the provided list OR return template_id='UNKNOWN'.\n"
116
+ "3) If uncertain, return UNKNOWN.\n"
117
+ "4) Use printed headers, vendor branding, and distinctive layout cues.\n"
118
+ "5) confidence must be 0..1.\n"
119
+ )
120
+
121
+ prompt_payload = {
122
+ "known_templates": KNOWN_TEMPLATES,
123
+ "output_schema": {
124
+ "template_id": "string (one of known template_ids) OR 'UNKNOWN'",
125
+ "confidence": "number 0..1",
126
+ "reason": "short string",
127
+ },
128
+ }
129
+
130
+ user_text = (
131
+ "Classify the attached document images against known_templates.\n"
132
+ "Return JSON matching output_schema.\n\n"
133
+ f"{json.dumps(prompt_payload, indent=2)}"
134
+ )
135
+
136
+ # Multi-modal message: text + images
137
+ content: List[Dict[str, Any]] = [{"type": "text", "text": user_text}]
138
+ for b64png in pages_b64:
139
+ content.append(
140
+ {
141
+ "type": "image_url",
142
+ "image_url": {"url": f"data:image/png;base64,{b64png}"},
143
+ }
144
+ )
145
+
146
+ resp = client.chat.completions.create(
147
+ model=model,
148
+ temperature=0.0,
149
+ messages=[
150
+ {"role": "system", "content": system},
151
+ {"role": "user", "content": content},
152
+ ],
153
+ )
154
+
155
+ raw = (resp.choices[0].message.content or "").strip()
156
+ parsed = _parse_json_object(raw)
157
+
158
+ template_id = str(parsed.get("template_id") or "").strip()
159
+ confidence = _to_float(parsed.get("confidence"), default=0.0)
160
+ confidence = max(0.0, min(1.0, confidence))
161
+ reason = str(parsed.get("reason") or "").strip()
162
+
163
+ # Normalize: only allow known template ids or UNKNOWN
164
+ template_id = _normalize_template_id(template_id)
165
+
166
+ # If model returns UNKNOWN but gives high confidence, clamp confidence.
167
+ if template_id == "UNKNOWN" and confidence > 0.6:
168
+ confidence = 0.6
169
+
170
+ return {
171
+ "template_id": template_id,
172
+ "confidence": confidence,
173
+ "reason": reason[:500],
174
+ "trainer_schema": {},
175
+ }
176
+
177
+
178
+ # ----------------------------
179
+ # Legacy wrapper (ENV-based) - keep only if you want
180
+ # ----------------------------
181
+ def classify_with_openai_from_env(image_paths: List[str]) -> Dict[str, Any]:
182
+ """
183
+ Backwards compatible wrapper.
184
+ Reads env vars, then calls classify_with_openai(api_key=..., model=...).
185
+
186
+ Use this only if you have old code you haven't updated yet.
187
+ """
188
+ import os
189
+
190
+ api_key = (os.getenv("OPENAI_API_KEY_TEST") or os.getenv("OPENAI_API_KEY") or "").strip()
191
+ if not api_key:
192
+ raise RuntimeError("Missing OPENAI_API_KEY_TEST (or OPENAI_API_KEY)")
193
+
194
+ model = (os.getenv("OPENAI_MODEL") or "gpt-4o-mini").strip()
195
+
196
+ # IMPORTANT: call the explicit version (one implementation only)
197
+ return classify_with_openai(
198
+ image_paths,
199
+ api_key=api_key,
200
+ model=model,
201
+ )
202
+
203
+
204
+ # ----------------------------
205
+ # Helpers
206
+ # ----------------------------
207
+ def _normalize_template_id(template_id: str) -> str:
208
+ tid = (template_id or "").strip()
209
+ if not tid:
210
+ return "UNKNOWN"
211
+
212
+ known_ids = {t["template_id"] for t in KNOWN_TEMPLATES}
213
+ if tid in known_ids:
214
+ return tid
215
+
216
+ # common garbage patterns (model returns name instead of id, etc.)
217
+ low = tid.lower()
218
+ for t in KNOWN_TEMPLATES:
219
+ if t["name"].lower() == low:
220
+ return t["template_id"]
221
+
222
+ return "UNKNOWN"
223
+
224
+
225
+ def _png_file_to_b64(path: Path) -> str:
226
+ data = path.read_bytes()
227
+ return base64.b64encode(data).decode("utf-8")
228
+
229
+
230
+ _JSON_BLOCK_RE = re.compile(r"\{.*\}", re.DOTALL)
231
+
232
+
233
+ def _parse_json_object(text: str) -> Dict[str, Any]:
234
+ """
235
+ Extract and parse the first {...} JSON object from model output.
236
+ Handles:
237
+ - pure JSON
238
+ - JSON embedded in text
239
+ - fenced code blocks (we strip fences)
240
+ """
241
+ if not text:
242
+ return {}
243
+
244
+ s = text.strip()
245
+
246
+ # Strip ```json fences if present
247
+ s = _strip_code_fences(s)
248
+
249
+ # Fast path: starts with "{"
250
+ if s.startswith("{"):
251
+ try:
252
+ return json.loads(s)
253
+ except Exception:
254
+ pass
255
+
256
+ # Try to find a JSON-looking block
257
+ m = _JSON_BLOCK_RE.search(s)
258
+ if not m:
259
+ return {}
260
+
261
+ chunk = m.group(0)
262
+ try:
263
+ return json.loads(chunk)
264
+ except Exception:
265
+ # last attempt: remove trailing commas (common model mistake)
266
+ cleaned = _remove_trailing_commas(chunk)
267
+ try:
268
+ return json.loads(cleaned)
269
+ except Exception:
270
+ return {}
271
+
272
+
273
+ def _strip_code_fences(s: str) -> str:
274
+ # remove leading ```json / ``` and trailing ```
275
+ if s.startswith("```"):
276
+ s = re.sub(r"^```[a-zA-Z0-9]*\s*", "", s)
277
+ s = re.sub(r"\s*```$", "", s)
278
+ return s.strip()
279
+
280
+
281
+ def _remove_trailing_commas(s: str) -> str:
282
+ # naive but effective: remove ",}" and ",]" patterns repeatedly
283
+ prev = None
284
+ cur = s
285
+ while prev != cur:
286
+ prev = cur
287
+ cur = re.sub(r",\s*}", "}", cur)
288
+ cur = re.sub(r",\s*]", "]", cur)
289
+ return cur
290
+
291
+
292
+ def _to_float(x: Any, default: float = 0.0) -> float:
293
+ try:
294
+ return float(x)
295
+ except Exception:
296
+ return default
297
+
298
+
299
+ # ----------------------------
300
+ # Optional: quick self-check (manual)
301
+ # ----------------------------
302
+ def _debug_summarize_result(res: Dict[str, Any]) -> str:
303
+ return f"template_id={res.get('template_id')} conf={res.get('confidence')} reason={str(res.get('reason') or '')[:80]}"
304
+
305
+
306
+ def _validate_known_templates() -> Tuple[bool, str]:
307
+ ids = [t.get("template_id") for t in KNOWN_TEMPLATES]
308
+ if any(not i for i in ids):
309
+ return False, "One or more templates missing template_id"
310
+ if len(set(ids)) != len(ids):
311
+ return False, "Duplicate template_id in KNOWN_TEMPLATES"
312
+ return True, "ok"
backend/worker/out/.keep ADDED
File without changes
backend/worker/pdf_render.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import List
6
+
7
+ import fitz # PyMuPDF
8
+ from PIL import Image
9
+
10
+
11
+ @dataclass
12
+ class RenderedImage:
13
+ path: Path
14
+ page_index: int
15
+
16
+
17
+ def render_pdf_to_pngs(pdf_path: Path, out_dir: Path, pages: int = 2, dpi: int = 200) -> List[RenderedImage]:
18
+ out_dir.mkdir(parents=True, exist_ok=True)
19
+
20
+ doc = fitz.open(pdf_path)
21
+ n = min(pages, doc.page_count)
22
+
23
+ zoom = dpi / 72.0
24
+ mat = fitz.Matrix(zoom, zoom)
25
+
26
+ rendered: List[RenderedImage] = []
27
+ for i in range(n):
28
+ page = doc.load_page(i)
29
+ pix = page.get_pixmap(matrix=mat, alpha=False)
30
+
31
+ img_path = out_dir / f"{pdf_path.stem}_p{i+1}.png"
32
+ pix.save(str(img_path))
33
+
34
+ # normalize to RGB with PIL (avoids weird modes)
35
+ im = Image.open(img_path).convert("RGB")
36
+ im.save(img_path)
37
+
38
+ rendered.append(RenderedImage(path=img_path, page_index=i))
39
+
40
+ doc.close()
41
+ return rendered
backend/worker/prompts.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TEMPLATE_IDS = [
2
+ "T1_IFACTOR_DELIVERED_ORDER",
3
+ "T2_SEASPINE_DELIVERED_GOODS_FORM",
4
+ "T3_ASTURA_SALES_ORDER_FORM",
5
+ "T4_MEDICAL_ESTIMATION_OF_CHARGES",
6
+ "T5_CLINICAL_PROGRESS_NOTE_POSTOP",
7
+ "T6_CUSTOMER_CHARGE_SHEET_SPINE",
8
+ "T7_SALES_ORDER_ZIMMER",
9
+ ]
10
+
11
+ SYSTEM_PROMPT = f"""
12
+ You are classifying a medical/healthcare sales/order PDF form into one of the known templates,
13
+ and extracting a "trainer schema" for onboarding.
14
+
15
+ Known template_ids:
16
+ {TEMPLATE_IDS}
17
+
18
+ Rules:
19
+ - You MUST return JSON only (no markdown, no extra text).
20
+ - If none match confidently, return template_id "UNKNOWN".
21
+ - Always produce a schema object (even for UNKNOWN) so onboarding can proceed.
22
+
23
+ Output JSON shape (strict):
24
+ {{
25
+ "template_id": "<one of known template_ids or UNKNOWN>",
26
+ "confidence": 0.0,
27
+ "reason": "<short reason>",
28
+ "trainer_schema": {{
29
+ "form_id": "<suggested id>",
30
+ "version": 1,
31
+ "page": 1,
32
+ "scalar_value_region_mode": "offset_from_anchor_v1",
33
+ "fields": [
34
+ {{
35
+ "field_id": "facility_organization",
36
+ "label": "Facility / Organization",
37
+ "type": "entity",
38
+ "anchor_hint": "<printed label text or None>",
39
+ "value_hint": "<what to extract>"
40
+ }},
41
+ {{
42
+ "field_id": "case_location_address",
43
+ "label": "Case Location / Address",
44
+ "type": "entity",
45
+ "anchor_hint": "<printed label text or None>",
46
+ "value_hint": "<what to extract>"
47
+ }},
48
+ {{
49
+ "field_id": "vendor",
50
+ "label": "Vendor",
51
+ "type": "entity",
52
+ "anchor_hint": "<printed label text or None>",
53
+ "value_hint": "<what to extract>"
54
+ }},
55
+ {{
56
+ "field_id": "physician_name",
57
+ "label": "Physician Name",
58
+ "type": "person",
59
+ "anchor_hint": "<printed label text or None>",
60
+ "value_hint": "<what to extract>"
61
+ }},
62
+ {{
63
+ "field_id": "date_of_surgery",
64
+ "label": "Date of Surgery",
65
+ "type": "date",
66
+ "anchor_hint": "<printed label text or None>",
67
+ "value_hint": "<what to extract>"
68
+ }},
69
+ {{
70
+ "field_id": "items",
71
+ "label": "Items / Line Items",
72
+ "type": "table",
73
+ "table_hint": {{
74
+ "expected_columns": ["item_number","description","qty","lot_number","price","extended_price"],
75
+ "where_on_page": "<short description>",
76
+ "header_text_examples": ["Item Number","Description","Qty"]
77
+ }}
78
+ }}
79
+ ]
80
+ }}
81
+ }}
82
+ """
83
+
84
+ USER_PROMPT = """
85
+ Classify the form template and generate trainer_schema based on the provided page images.
86
+ Focus on printed structure, titles, logos, and table headers.
87
+ """
backend/worker/template_registry_snapshot.py ADDED
File without changes
backend/worker/template_store.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import json
3
+ from pathlib import Path
4
+ from typing import Any, Dict, List
5
+
6
+ TEMPLATE_DIR = Path(__file__).resolve().parent / "trainer_templates"
7
+
8
+ def list_trainer_templates() -> List[Dict[str, Any]]:
9
+ TEMPLATE_DIR.mkdir(parents=True, exist_ok=True)
10
+ out: List[Dict[str, Any]] = []
11
+
12
+ for p in sorted(TEMPLATE_DIR.glob("*.json")):
13
+ try:
14
+ cfg = json.loads(p.read_text(encoding="utf-8"))
15
+ except Exception:
16
+ continue
17
+
18
+ template_id = cfg.get("template_id") or cfg.get("form_id") or p.stem
19
+ name = cfg.get("name") or cfg.get("form_id") or template_id
20
+
21
+ out.append({
22
+ "template_id": template_id,
23
+ "name": name,
24
+ # optional: trainer config itself (don’t spam prompt if huge)
25
+ "has_config": True,
26
+ })
27
+
28
+ return out
29
+
30
+ def save_trainer_template(template_id: str, cfg: Dict[str, Any]) -> Path:
31
+ TEMPLATE_DIR.mkdir(parents=True, exist_ok=True)
32
+ cfg = dict(cfg)
33
+ cfg["template_id"] = template_id # enforce
34
+ path = TEMPLATE_DIR / f"{template_id}.json"
35
+ path.write_text(json.dumps(cfg, indent=2), encoding="utf-8")
36
+ return path
backend/worker/tmp/.keep ADDED
File without changes
backend/worker/uploads/.keep ADDED
File without changes
backend/worker/worker.py ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import time
5
+ import uuid
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+ from typing import List, Tuple
9
+
10
+ from dotenv import load_dotenv
11
+
12
+ from .gmail_client import GmailClient
13
+ from .openai_classifier import classify_with_openai
14
+ from .pdf_render import render_pdf_to_pngs
15
+
16
+ # Force load repo_root/backend/.env (single source of truth)
17
+ REPO_ROOT = Path(__file__).resolve().parents[2]
18
+ load_dotenv(REPO_ROOT / "backend" / ".env", override=True)
19
+
20
+
21
+ @dataclass
22
+ class Settings:
23
+ creds_path: Path
24
+ token_path: Path
25
+
26
+ label_incoming: str
27
+ label_known: str
28
+ label_unknown: str
29
+ label_train: str
30
+
31
+ # Rep email for UNKNOWN detection
32
+ rep_notify_to: str
33
+ notify_from: str
34
+
35
+ # OpenAI
36
+ openai_api_key: str
37
+ openai_model: str
38
+
39
+ poll_seconds: int
40
+ max_messages_per_poll: int
41
+
42
+ render_pages: int
43
+ render_dpi: int
44
+
45
+ trainer_base_url: str
46
+
47
+
48
+ def load_settings() -> Settings:
49
+ base = Path(__file__).resolve().parents[1] # backend/
50
+ creds = Path(os.environ.get("GMAIL_CREDENTIALS_JSON", str(base / "credentials.json")))
51
+ token = Path(os.environ.get("GMAIL_TOKEN_JSON", str(base / "token.json")))
52
+
53
+ openai_api_key = (os.environ.get("OPENAI_API_KEY_TEST") or os.environ.get("OPENAI_API_KEY") or "").strip()
54
+ openai_model = (os.environ.get("OPENAI_MODEL") or "gpt-4o-mini").strip()
55
+
56
+ return Settings(
57
+ creds_path=creds,
58
+ token_path=token,
59
+
60
+ label_incoming=os.environ.get("PDF_PIPELINE_LABEL_INCOMING", "PDF_PIPELINE/INCOMING"),
61
+ label_known=os.environ.get("PDF_PIPELINE_LABEL_KNOWN", "PDF_PIPELINE/KNOWN"),
62
+ label_unknown=os.environ.get("PDF_PIPELINE_LABEL_UNKNOWN", "PDF_PIPELINE/UNKNOWN"),
63
+ label_train=os.environ.get("PDF_PIPELINE_LABEL_TRAIN", "PDF_PIPELINE/TRAIN"),
64
+
65
+ notify_from=(os.environ.get("PDF_PIPELINE_NOTIFY_FROM") or "").strip(),
66
+ rep_notify_to=(os.environ.get("PDF_PIPELINE_NOTIFY_TO") or "").strip(),
67
+
68
+ openai_api_key=openai_api_key,
69
+ openai_model=openai_model,
70
+
71
+ poll_seconds=int(os.environ.get("PDF_PIPELINE_POLL_SECONDS", "20")),
72
+ max_messages_per_poll=int(os.environ.get("PDF_PIPELINE_MAX_PER_POLL", "5")),
73
+
74
+ render_pages=int(os.environ.get("PDF_PIPELINE_RENDER_PAGES", "2")),
75
+ render_dpi=int(os.environ.get("PDF_PIPELINE_RENDER_DPI", "200")),
76
+
77
+ trainer_base_url=(os.environ.get("PDF_TRAINER_BASE_URL") or "http://localhost:5173").strip(),
78
+ )
79
+
80
+
81
+ def _safe_name(s: str) -> str:
82
+ return "".join(c if c.isalnum() or c in ("-", "_", ".", " ") else "_" for c in s).strip()
83
+
84
+
85
+ def _write_pipeline_pdf(root_worker_dir: Path, filename: str, pdf_bytes: bytes) -> Tuple[str, Path]:
86
+ """
87
+ Persist PDF for the trainer to fetch later.
88
+ Returns (pdf_id, pdf_path_on_disk).
89
+ """
90
+ uploads_dir = root_worker_dir / "uploads"
91
+ uploads_dir.mkdir(parents=True, exist_ok=True)
92
+
93
+ pdf_id = uuid.uuid4().hex
94
+ pdf_path = uploads_dir / f"{pdf_id}.pdf"
95
+ name_path = uploads_dir / f"{pdf_id}.name.txt"
96
+
97
+ pdf_path.write_bytes(pdf_bytes)
98
+ name_path.write_text(filename, encoding="utf-8")
99
+
100
+ return pdf_id, pdf_path
101
+
102
+
103
+ def _process_train_label(gmail: GmailClient, s: Settings, root: Path) -> None:
104
+ """
105
+ TRAIN behavior:
106
+ - Pull unread PDFs from TRAIN label
107
+ - Store into uploads/ and print trainer link
108
+ - Mark read
109
+ - Do NOT classify
110
+ - Do NOT move labels
111
+ """
112
+ msgs = gmail.search_unread_pdf_messages(s.label_train, max_results=s.max_messages_per_poll)
113
+ if not msgs:
114
+ return
115
+
116
+ for m in msgs:
117
+ msg_full = gmail.get_message_full(m.msg_id)
118
+ pdf_atts = gmail.list_pdf_attachments(msg_full)
119
+
120
+ if not pdf_atts:
121
+ gmail.move_message(m.msg_id, add_labels=[], remove_labels=[], mark_read=True)
122
+ continue
123
+
124
+ for (filename, att_id) in pdf_atts:
125
+ filename = _safe_name(filename or "attachment.pdf")
126
+ pdf_bytes = gmail.download_attachment(m.msg_id, att_id)
127
+
128
+ pdf_id, stored_pdf_path = _write_pipeline_pdf(root, filename, pdf_bytes)
129
+ trainer_link = f"{s.trainer_base_url.rstrip('/')}/?pdf_id={pdf_id}"
130
+
131
+ gmail.move_message(m.msg_id, add_labels=[], remove_labels=[], mark_read=True)
132
+
133
+ print(
134
+ f"[worker][TRAIN] stored PDF msg={m.msg_id} file={filename} "
135
+ f"pdf_id={pdf_id} stored={stored_pdf_path}"
136
+ )
137
+ print(f"[worker][TRAIN] open: {trainer_link}")
138
+
139
+
140
+ def main():
141
+ s = load_settings()
142
+
143
+ # Validate settings
144
+ if not s.rep_notify_to:
145
+ raise RuntimeError("Missing PDF_PIPELINE_NOTIFY_TO (rep email for UNKNOWN detection)")
146
+ if not s.notify_from:
147
+ raise RuntimeError("Missing PDF_PIPELINE_NOTIFY_FROM (OAuth Gmail account email)")
148
+ if not s.trainer_base_url:
149
+ raise RuntimeError("Missing PDF_TRAINER_BASE_URL (base URL for trainer link)")
150
+ if not s.openai_api_key:
151
+ raise RuntimeError("Missing OPENAI_API_KEY_TEST (or OPENAI_API_KEY) in backend/.env")
152
+
153
+ gmail = GmailClient(s.creds_path, s.token_path)
154
+
155
+ # Ensure labels exist
156
+ gmail.ensure_label(s.label_incoming)
157
+ gmail.ensure_label(s.label_known)
158
+ gmail.ensure_label(s.label_unknown)
159
+ gmail.ensure_label(s.label_train)
160
+
161
+ root = Path(__file__).resolve().parents[0] # backend/worker
162
+ tmp_dir = root / "tmp"
163
+ tmp_dir.mkdir(parents=True, exist_ok=True)
164
+
165
+ print(f"[worker] Watching label: {s.label_incoming}")
166
+ print(f"[worker] Known label: {s.label_known}")
167
+ print(f"[worker] Unknown label: {s.label_unknown}")
168
+ print(f"[worker] Train label: {s.label_train}")
169
+ print(f"[worker] Rep notify to: {s.rep_notify_to}")
170
+ print(f"[worker] OpenAI model: {s.openai_model}")
171
+
172
+ while True:
173
+ try:
174
+ # 1) TRAIN lane
175
+ _process_train_label(gmail, s, root)
176
+
177
+ # 2) Main pipeline (INCOMING -> KNOWN/UNKNOWN)
178
+ msgs = gmail.search_unread_pdf_messages(s.label_incoming, max_results=s.max_messages_per_poll)
179
+ if not msgs:
180
+ time.sleep(s.poll_seconds)
181
+ continue
182
+
183
+ for m in msgs:
184
+ msg_full = gmail.get_message_full(m.msg_id)
185
+ pdf_atts = gmail.list_pdf_attachments(msg_full)
186
+
187
+ if not pdf_atts:
188
+ # Remove INCOMING + mark read so it doesn't loop forever
189
+ gmail.move_message(m.msg_id, add_labels=[], remove_labels=[s.label_incoming], mark_read=True)
190
+ continue
191
+
192
+ any_unknown = False
193
+ unknown_payloads: List[Tuple[str, bytes]] = []
194
+
195
+ # Classify all PDF attachments for this message
196
+ for (filename, att_id) in pdf_atts:
197
+ filename = _safe_name(filename or "attachment.pdf")
198
+ pdf_bytes = gmail.download_attachment(m.msg_id, att_id)
199
+
200
+ stamp = str(int(time.time()))
201
+ pdf_path = tmp_dir / f"{stamp}_{m.msg_id}_{filename}"
202
+ pdf_path.write_bytes(pdf_bytes)
203
+
204
+ img_dir = tmp_dir / f"{stamp}_{m.msg_id}_{pdf_path.stem}"
205
+ rendered = render_pdf_to_pngs(pdf_path, img_dir, pages=s.render_pages, dpi=s.render_dpi)
206
+ image_paths = [str(r.path) for r in rendered]
207
+
208
+ result = classify_with_openai(
209
+ image_paths,
210
+ api_key=s.openai_api_key,
211
+ model=s.openai_model,
212
+ )
213
+
214
+ template_id = (result.get("template_id") or "UNKNOWN").strip()
215
+ conf = float(result.get("confidence") or 0.0)
216
+
217
+ if template_id == "UNKNOWN":
218
+ any_unknown = True
219
+ unknown_payloads.append((filename, pdf_bytes))
220
+ print(f"[worker] UNKNOWN attachment conf={conf:.3f} msg={m.msg_id} file={filename}")
221
+ else:
222
+ print(
223
+ f"[worker] KNOWN attachment template={template_id} conf={conf:.3f} "
224
+ f"msg={m.msg_id} file={filename}"
225
+ )
226
+
227
+ # Apply Gmail label ONCE per message
228
+ if any_unknown:
229
+ gmail.move_message(
230
+ m.msg_id,
231
+ add_labels=[s.label_unknown],
232
+ remove_labels=[s.label_incoming],
233
+ mark_read=True,
234
+ )
235
+ else:
236
+ gmail.move_message(
237
+ m.msg_id,
238
+ add_labels=[s.label_known],
239
+ remove_labels=[s.label_incoming],
240
+ mark_read=True,
241
+ )
242
+
243
+ # Notify rep for each unknown PDF attachment
244
+ if any_unknown:
245
+ for (filename, pdf_bytes) in unknown_payloads:
246
+ pdf_id, stored_pdf_path = _write_pipeline_pdf(root, filename, pdf_bytes)
247
+ trainer_link = f"{s.trainer_base_url.rstrip('/')}/?pdf_id={pdf_id}"
248
+
249
+ subject = "Action required: Unknown PDF format (template not found)"
250
+ body = (
251
+ "Hi,\n\n"
252
+ "We received a PDF that does not match any existing templates in the system.\n\n"
253
+ "Please open the PDF Trainer using the link below and create or update the template configuration:\n"
254
+ f"{trainer_link}\n\n"
255
+ "The original PDF is attached for reference.\n\n"
256
+ "Thank you,\n"
257
+ "Inserio Automation\n"
258
+ )
259
+
260
+ attachments: List[Tuple[str, bytes]] = []
261
+ if len(pdf_bytes) < 20 * 1024 * 1024:
262
+ attachments.append((filename, pdf_bytes))
263
+ else:
264
+ body += "\nNote: The PDF was too large to attach.\n"
265
+
266
+ gmail.send_email(
267
+ to_email=s.rep_notify_to,
268
+ from_email=s.notify_from,
269
+ subject=subject,
270
+ body_text=body,
271
+ attachments=attachments,
272
+ )
273
+
274
+ print(
275
+ f"[worker] UNKNOWN: emailed rep {s.rep_notify_to} msg={m.msg_id} file={filename} "
276
+ f"pdf_id={pdf_id} stored={stored_pdf_path}"
277
+ )
278
+
279
+ except Exception as e:
280
+ print(f"[worker] ERROR: {e}")
281
+
282
+ time.sleep(s.poll_seconds)
283
+
284
+
285
+ if __name__ == "__main__":
286
+ main()
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  fastapi
2
  uvicorn[standard]
 
 
1
  fastapi
2
  uvicorn[standard]
3
+ python-dotenv