Spaces:
Sleeping
Sleeping
Avinash commited on
Commit ·
4a5269c
1
Parent(s): 7dac948
integrate real backend api
Browse files- .gitignore +8 -0
- api.py +1 -7
- backend/__init__.py +0 -0
- backend/api.py +213 -0
- backend/oauth_bootstrap.py +51 -0
- backend/scripts/apply_trainer_schemas.py +48 -0
- backend/scripts/generate_template_schema_skeletons.py +137 -0
- backend/scripts/migrate_hardcoded_templates.py +99 -0
- backend/templates/T1_IFACTOR_DELIVERED_ORDER.json +206 -0
- backend/templates/T2_SEASPINE_DELIVERED_GOODS_FORM.json +200 -0
- backend/templates/T3_ASTURA_SALES_ORDER_FORM.json +203 -0
- backend/templates/T4_MEDICAL_ESTIMATION_OF_CHARGES.json +167 -0
- backend/templates/T5_CLINICAL_PROGRESS_NOTE_POSTOP.json +118 -0
- backend/templates/T6_CUSTOMER_CHARGE_SHEET_SPINE.json +204 -0
- backend/templates/T7_SALES_ORDER_ZIMMER.json +174 -0
- backend/trainer_schemas/T1_IFACTOR_DELIVERED_ORDER.schema.json +70 -0
- backend/trainer_schemas/T2_SEASPINE_DELIVERED_GOODS_FORM.schema.json +70 -0
- backend/trainer_schemas/T3_ASTURA_SALES_ORDER_FORM.schema.json +70 -0
- backend/trainer_schemas/T4_MEDICAL_ESTIMATION_OF_CHARGES.schema.json +49 -0
- backend/trainer_schemas/T5_CLINICAL_PROGRESS_NOTE_POSTOP.schema.json +35 -0
- backend/trainer_schemas/T6_CUSTOMER_CHARGE_SHEET_SPINE.schema.json +70 -0
- backend/trainer_schemas/T7_SALES_ORDER_ZIMMER.schema.json +70 -0
- backend/worker/__init__.py +0 -0
- backend/worker/config.py +89 -0
- backend/worker/gmail_client.py +149 -0
- backend/worker/openai_classifier.py +312 -0
- backend/worker/out/.keep +0 -0
- backend/worker/pdf_render.py +41 -0
- backend/worker/prompts.py +87 -0
- backend/worker/template_registry_snapshot.py +0 -0
- backend/worker/template_store.py +36 -0
- backend/worker/tmp/.keep +0 -0
- backend/worker/uploads/.keep +0 -0
- backend/worker/worker.py +286 -0
- requirements.txt +1 -0
.gitignore
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.pyc
|
| 3 |
+
.venv/
|
| 4 |
+
.env
|
| 5 |
+
.env.*
|
| 6 |
+
node_modules/
|
| 7 |
+
dist/
|
| 8 |
+
.DS_Store
|
api.py
CHANGED
|
@@ -1,7 +1 @@
|
|
| 1 |
-
from
|
| 2 |
-
|
| 3 |
-
app = FastAPI()
|
| 4 |
-
|
| 5 |
-
@app.get("/health")
|
| 6 |
-
def health():
|
| 7 |
-
return {"ok": True}
|
|
|
|
| 1 |
+
from backend.api import app
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/__init__.py
ADDED
|
File without changes
|
backend/api.py
ADDED
|
@@ -0,0 +1,213 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Any, Dict
|
| 7 |
+
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
from fastapi import FastAPI, HTTPException
|
| 10 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
+
from fastapi.responses import FileResponse, PlainTextResponse
|
| 12 |
+
|
| 13 |
+
from backend.worker.gmail_client import GmailClient
|
| 14 |
+
|
| 15 |
+
app = FastAPI(title="PDF Trainer API", version="1.0")
|
| 16 |
+
|
| 17 |
+
# Allow Vite dev server
|
| 18 |
+
app.add_middleware(
|
| 19 |
+
CORSMiddleware,
|
| 20 |
+
allow_origins=[
|
| 21 |
+
"http://localhost:5173",
|
| 22 |
+
"http://127.0.0.1:5173",
|
| 23 |
+
],
|
| 24 |
+
allow_credentials=True,
|
| 25 |
+
allow_methods=["*"],
|
| 26 |
+
allow_headers=["*"],
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
REPO_ROOT = Path(__file__).resolve().parents[1]
|
| 30 |
+
BACKEND_DIR = REPO_ROOT / "backend"
|
| 31 |
+
UPLOADS_DIR = BACKEND_DIR / "worker" / "uploads"
|
| 32 |
+
|
| 33 |
+
# Load backend/.env explicitly ONCE for this process
|
| 34 |
+
load_dotenv(BACKEND_DIR / ".env", override=True)
|
| 35 |
+
|
| 36 |
+
CREDENTIALS_JSON = Path(os.environ.get("GMAIL_CREDENTIALS_JSON", str(BACKEND_DIR / "credentials.json")))
|
| 37 |
+
TOKEN_JSON = Path(os.environ.get("GMAIL_TOKEN_JSON", str(BACKEND_DIR / "token.json")))
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def _gmail() -> GmailClient:
|
| 41 |
+
return GmailClient(CREDENTIALS_JSON, TOKEN_JSON)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def _get_env_required(key: str) -> str:
|
| 45 |
+
v = (os.environ.get(key) or "").strip()
|
| 46 |
+
if not v:
|
| 47 |
+
raise HTTPException(status_code=500, detail=f"Server missing {key} env var")
|
| 48 |
+
return v
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
@app.get("/health")
|
| 52 |
+
def health():
|
| 53 |
+
return {"ok": True}
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@app.get("/api/pdf/{pdf_id}")
|
| 57 |
+
def get_pdf(pdf_id: str):
|
| 58 |
+
path = UPLOADS_DIR / f"{pdf_id}.pdf"
|
| 59 |
+
if not path.exists():
|
| 60 |
+
raise HTTPException(status_code=404, detail="PDF not found")
|
| 61 |
+
|
| 62 |
+
name_path = UPLOADS_DIR / f"{pdf_id}.name.txt"
|
| 63 |
+
pdf_name = name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf"
|
| 64 |
+
|
| 65 |
+
resp = FileResponse(path, media_type="application/pdf", filename=pdf_name)
|
| 66 |
+
resp.headers["X-PDF-Name"] = pdf_name
|
| 67 |
+
return resp
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
@app.post("/api/send-config")
|
| 71 |
+
async def send_config(payload: Dict[str, Any]):
|
| 72 |
+
"""
|
| 73 |
+
PIPELINE SUBMISSION EMAIL (after rep saves config)
|
| 74 |
+
|
| 75 |
+
REQUIRED payload:
|
| 76 |
+
- pdf_id: str
|
| 77 |
+
- template_id: str
|
| 78 |
+
- config: dict
|
| 79 |
+
|
| 80 |
+
Sends to PIPELINE inbox:
|
| 81 |
+
- PDF_PIPELINE_PIPELINE_NOTIFY_TO
|
| 82 |
+
|
| 83 |
+
Requirements:
|
| 84 |
+
- Subject includes template_id
|
| 85 |
+
- Body includes pdf_id
|
| 86 |
+
- Attachments: JSON + PDF
|
| 87 |
+
"""
|
| 88 |
+
pdf_id = (payload.get("pdf_id") or "").strip()
|
| 89 |
+
template_id = (payload.get("template_id") or "").strip()
|
| 90 |
+
config = payload.get("config")
|
| 91 |
+
|
| 92 |
+
if not pdf_id:
|
| 93 |
+
raise HTTPException(status_code=400, detail="Missing pdf_id")
|
| 94 |
+
if not template_id:
|
| 95 |
+
raise HTTPException(status_code=400, detail="Missing template_id")
|
| 96 |
+
if not isinstance(config, dict):
|
| 97 |
+
raise HTTPException(status_code=400, detail="Missing config object")
|
| 98 |
+
|
| 99 |
+
pipeline_to = _get_env_required("PDF_PIPELINE_PIPELINE_NOTIFY_TO")
|
| 100 |
+
notify_from = _get_env_required("PDF_PIPELINE_NOTIFY_FROM")
|
| 101 |
+
trainer_base_url = (os.environ.get("PDF_TRAINER_BASE_URL") or "http://localhost:5173").strip()
|
| 102 |
+
|
| 103 |
+
pdf_path = UPLOADS_DIR / f"{pdf_id}.pdf"
|
| 104 |
+
if not pdf_path.exists():
|
| 105 |
+
raise HTTPException(status_code=404, detail="PDF not found for pdf_id")
|
| 106 |
+
|
| 107 |
+
name_path = UPLOADS_DIR / f"{pdf_id}.name.txt"
|
| 108 |
+
pdf_name = name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf"
|
| 109 |
+
|
| 110 |
+
trainer_link = f"{trainer_base_url.rstrip('/')}/?pdf_id={pdf_id}"
|
| 111 |
+
|
| 112 |
+
subject = f"PDF_TRAINER_CONFIG_SUBMITTED | template_id={template_id}"
|
| 113 |
+
body = (
|
| 114 |
+
"Hi,\n\n"
|
| 115 |
+
"A PDF Trainer configuration was submitted.\n\n"
|
| 116 |
+
f"template_id: {template_id}\n"
|
| 117 |
+
f"pdf_id: {pdf_id}\n"
|
| 118 |
+
f"trainer_link: {trainer_link}\n\n"
|
| 119 |
+
"Attachments:\n"
|
| 120 |
+
f"- trainer_config_{pdf_id}_{template_id}.json\n"
|
| 121 |
+
f"- {pdf_name}\n\n"
|
| 122 |
+
"Thank you,\n"
|
| 123 |
+
"Inserio Automation\n"
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
cfg_bytes = json.dumps(
|
| 127 |
+
{"pdf_id": pdf_id, "template_id": template_id, "config": config},
|
| 128 |
+
indent=2,
|
| 129 |
+
).encode("utf-8")
|
| 130 |
+
|
| 131 |
+
attachments = [
|
| 132 |
+
(f"trainer_config_{pdf_id}_{template_id}.json", cfg_bytes),
|
| 133 |
+
(pdf_name, pdf_path.read_bytes()),
|
| 134 |
+
]
|
| 135 |
+
|
| 136 |
+
gmail = _gmail()
|
| 137 |
+
gmail.send_email(
|
| 138 |
+
to_email=pipeline_to,
|
| 139 |
+
from_email=notify_from,
|
| 140 |
+
subject=subject,
|
| 141 |
+
body_text=body,
|
| 142 |
+
attachments=attachments,
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
return {"ok": True}
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
@app.post("/api/notify-unknown")
|
| 149 |
+
async def notify_unknown(payload: Dict[str, Any]):
|
| 150 |
+
"""
|
| 151 |
+
UNKNOWN TEMPLATE NOTIFICATION (rep email)
|
| 152 |
+
|
| 153 |
+
REQUIRED payload:
|
| 154 |
+
- pdf_id: str
|
| 155 |
+
OPTIONAL:
|
| 156 |
+
- reason: str
|
| 157 |
+
|
| 158 |
+
Sends to REP inbox:
|
| 159 |
+
- PDF_PIPELINE_NOTIFY_TO
|
| 160 |
+
|
| 161 |
+
Requirements:
|
| 162 |
+
- Includes trainer link with PDF pre-loaded
|
| 163 |
+
- Attaches PDF
|
| 164 |
+
- No JSON
|
| 165 |
+
"""
|
| 166 |
+
pdf_id = (payload.get("pdf_id") or "").strip()
|
| 167 |
+
reason = (payload.get("reason") or "").strip()
|
| 168 |
+
|
| 169 |
+
if not pdf_id:
|
| 170 |
+
raise HTTPException(status_code=400, detail="Missing pdf_id")
|
| 171 |
+
|
| 172 |
+
rep_to = _get_env_required("PDF_PIPELINE_NOTIFY_TO")
|
| 173 |
+
notify_from = _get_env_required("PDF_PIPELINE_NOTIFY_FROM")
|
| 174 |
+
trainer_base_url = (os.environ.get("PDF_TRAINER_BASE_URL") or "http://localhost:5173").strip()
|
| 175 |
+
|
| 176 |
+
pdf_path = UPLOADS_DIR / f"{pdf_id}.pdf"
|
| 177 |
+
if not pdf_path.exists():
|
| 178 |
+
raise HTTPException(status_code=404, detail="PDF not found for pdf_id")
|
| 179 |
+
|
| 180 |
+
name_path = UPLOADS_DIR / f"{pdf_id}.name.txt"
|
| 181 |
+
pdf_name = name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf"
|
| 182 |
+
|
| 183 |
+
trainer_link = f"{trainer_base_url.rstrip('/')}/?pdf_id={pdf_id}"
|
| 184 |
+
|
| 185 |
+
subject = "Action required: Unknown PDF format (template not found)"
|
| 186 |
+
body = (
|
| 187 |
+
"Hi,\n\n"
|
| 188 |
+
"We received a PDF that does not match any existing templates in the system.\n\n"
|
| 189 |
+
+ (f"Reason: {reason}\n\n" if reason else "")
|
| 190 |
+
+ "Please open the PDF Trainer using the link below and create or update the template configuration:\n"
|
| 191 |
+
f"{trainer_link}\n\n"
|
| 192 |
+
"The original PDF is attached for reference.\n\n"
|
| 193 |
+
"Thank you,\n"
|
| 194 |
+
"Inserio Automation\n"
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
attachments = [(pdf_name, pdf_path.read_bytes())]
|
| 198 |
+
|
| 199 |
+
gmail = _gmail()
|
| 200 |
+
gmail.send_email(
|
| 201 |
+
to_email=rep_to,
|
| 202 |
+
from_email=notify_from,
|
| 203 |
+
subject=subject,
|
| 204 |
+
body_text=body,
|
| 205 |
+
attachments=attachments,
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
return {"ok": True}
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
@app.get("/", response_class=PlainTextResponse)
|
| 212 |
+
def root():
|
| 213 |
+
return "PDF Trainer API. Use /health"
|
backend/oauth_bootstrap.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
from google_auth_oauthlib.flow import InstalledAppFlow
|
| 7 |
+
from google.auth.transport.requests import Request
|
| 8 |
+
from google.oauth2.credentials import Credentials
|
| 9 |
+
|
| 10 |
+
# REQUIRED scopes based on your plan:
|
| 11 |
+
# - read messages, move labels, mark read => modify
|
| 12 |
+
# - send mail => send
|
| 13 |
+
SCOPES = [
|
| 14 |
+
"https://www.googleapis.com/auth/gmail.modify",
|
| 15 |
+
"https://www.googleapis.com/auth/gmail.send",
|
| 16 |
+
]
|
| 17 |
+
|
| 18 |
+
ROOT = Path(__file__).resolve().parent
|
| 19 |
+
CREDS_PATH = Path("backend/credentials.json")
|
| 20 |
+
TOKEN_PATH = Path("backend/token.json")
|
| 21 |
+
|
| 22 |
+
def main() -> None:
|
| 23 |
+
if not CREDS_PATH.exists():
|
| 24 |
+
raise FileNotFoundError(
|
| 25 |
+
f"Missing {CREDS_PATH}. Download OAuth client JSON from Google Cloud and save as credentials.json in this folder."
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
creds: Credentials | None = None
|
| 29 |
+
|
| 30 |
+
# Load existing token if present
|
| 31 |
+
if TOKEN_PATH.exists():
|
| 32 |
+
creds = Credentials.from_authorized_user_file(str(TOKEN_PATH), SCOPES)
|
| 33 |
+
|
| 34 |
+
# Refresh or re-authenticate
|
| 35 |
+
if not creds or not creds.valid:
|
| 36 |
+
if creds and creds.expired and creds.refresh_token:
|
| 37 |
+
creds.refresh(Request())
|
| 38 |
+
else:
|
| 39 |
+
flow = InstalledAppFlow.from_client_secrets_file(str(CREDS_PATH), SCOPES)
|
| 40 |
+
# Local loopback server OAuth (Desktop app)
|
| 41 |
+
creds = flow.run_local_server(port=0)
|
| 42 |
+
|
| 43 |
+
# Save token
|
| 44 |
+
TOKEN_PATH.write_text(creds.to_json(), encoding="utf-8")
|
| 45 |
+
|
| 46 |
+
print("✅ OAuth complete.")
|
| 47 |
+
print(f"Saved token: {TOKEN_PATH}")
|
| 48 |
+
print("Scopes granted:", creds.scopes)
|
| 49 |
+
|
| 50 |
+
if __name__ == "__main__":
|
| 51 |
+
main()
|
backend/scripts/apply_trainer_schemas.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# backend/scripts/apply_trainer_schemas.py
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import json
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
REPO_ROOT = Path(__file__).resolve().parents[2]
|
| 8 |
+
TEMPLATES_DIR = REPO_ROOT / "backend" / "templates"
|
| 9 |
+
SCHEMAS_DIR = REPO_ROOT / "backend" / "trainer_schemas"
|
| 10 |
+
|
| 11 |
+
def main() -> None:
|
| 12 |
+
if not SCHEMAS_DIR.exists():
|
| 13 |
+
raise RuntimeError(f"Missing schemas dir: {SCHEMAS_DIR}")
|
| 14 |
+
|
| 15 |
+
schema_files = sorted(SCHEMAS_DIR.glob("*.schema.json"))
|
| 16 |
+
if not schema_files:
|
| 17 |
+
raise RuntimeError(f"No schema files found in: {SCHEMAS_DIR}")
|
| 18 |
+
|
| 19 |
+
applied = 0
|
| 20 |
+
|
| 21 |
+
for sf in schema_files:
|
| 22 |
+
template_id = sf.name.replace(".schema.json", "")
|
| 23 |
+
template_path = TEMPLATES_DIR / f"{template_id}.json"
|
| 24 |
+
|
| 25 |
+
if not template_path.exists():
|
| 26 |
+
print(f"⚠️ skip (no template file): {template_path}")
|
| 27 |
+
continue
|
| 28 |
+
|
| 29 |
+
new_schema = json.loads(sf.read_text(encoding="utf-8"))
|
| 30 |
+
if not isinstance(new_schema, dict):
|
| 31 |
+
raise RuntimeError(f"Invalid schema json (not object): {sf}")
|
| 32 |
+
if not isinstance(new_schema.get("fields"), list):
|
| 33 |
+
raise RuntimeError(f"Invalid schema json (missing fields[]): {sf}")
|
| 34 |
+
|
| 35 |
+
t = json.loads(template_path.read_text(encoding="utf-8"))
|
| 36 |
+
t["schema"] = new_schema
|
| 37 |
+
|
| 38 |
+
# Optional: bump template version when schema changes
|
| 39 |
+
# t["version"] = int(t.get("version") or 0) + 1
|
| 40 |
+
|
| 41 |
+
template_path.write_text(json.dumps(t, indent=2) + "\n", encoding="utf-8")
|
| 42 |
+
print(f"✅ updated {template_path} fields={len(new_schema['fields'])}")
|
| 43 |
+
applied += 1
|
| 44 |
+
|
| 45 |
+
print(f"done. applied={applied}")
|
| 46 |
+
|
| 47 |
+
if __name__ == "__main__":
|
| 48 |
+
main()
|
backend/scripts/generate_template_schema_skeletons.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Any, Dict, List
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
TEMPLATES_DIR = Path(__file__).resolve().parents[1] / "templates"
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
FIELDS: List[Dict[str, Any]] = [
|
| 12 |
+
{"field_id": "facility_organization", "label": "Facility / Organization", "type": "entity"},
|
| 13 |
+
{"field_id": "case_location", "label": "Case Location / Address", "type": "text"},
|
| 14 |
+
{"field_id": "vendor", "label": "Vendor", "type": "entity"},
|
| 15 |
+
{"field_id": "physician_name", "label": "Physician Name", "type": "person"},
|
| 16 |
+
{"field_id": "date_of_surgery", "label": "Date of Surgery", "type": "date"},
|
| 17 |
+
{"field_id": "items", "label": "Items / Line Items", "type": "table"},
|
| 18 |
+
]
|
| 19 |
+
|
| 20 |
+
TABLE_ANCHORS = [
|
| 21 |
+
{"key": "item_number", "expected_text": "Item Number"},
|
| 22 |
+
{"key": "description", "expected_text": "Description"},
|
| 23 |
+
{"key": "qty", "expected_text": "Qty"},
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
TABLE_COLUMNS = [
|
| 27 |
+
{"key": "item_number", "label": "Item Number"},
|
| 28 |
+
{"key": "lot_number", "label": "Lot Number"},
|
| 29 |
+
{"key": "description", "label": "Description"},
|
| 30 |
+
{"key": "qty", "label": "Qty"},
|
| 31 |
+
{"key": "price", "label": "Price"},
|
| 32 |
+
]
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def schema_skeleton(form_id: str) -> Dict[str, Any]:
|
| 36 |
+
return {
|
| 37 |
+
"form_id": form_id,
|
| 38 |
+
"version": 3,
|
| 39 |
+
"page": 1,
|
| 40 |
+
"scalar_value_region_mode": "offset_from_anchor_v1",
|
| 41 |
+
"fields": [
|
| 42 |
+
# scalar fields
|
| 43 |
+
{
|
| 44 |
+
"field_id": "facility_organization",
|
| 45 |
+
"label": "Facility / Organization",
|
| 46 |
+
"type": "entity",
|
| 47 |
+
"anchor_bbox_norm": None,
|
| 48 |
+
"value_bbox_norm": None,
|
| 49 |
+
"value_offset_norm": None,
|
| 50 |
+
},
|
| 51 |
+
{
|
| 52 |
+
"field_id": "case_location",
|
| 53 |
+
"label": "Case Location / Address",
|
| 54 |
+
"type": "text",
|
| 55 |
+
"anchor_bbox_norm": None,
|
| 56 |
+
"value_bbox_norm": None,
|
| 57 |
+
"value_offset_norm": None,
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"field_id": "vendor",
|
| 61 |
+
"label": "Vendor",
|
| 62 |
+
"type": "entity",
|
| 63 |
+
"anchor_bbox_norm": None,
|
| 64 |
+
"value_bbox_norm": None,
|
| 65 |
+
"value_offset_norm": None,
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"field_id": "physician_name",
|
| 69 |
+
"label": "Physician Name",
|
| 70 |
+
"type": "person",
|
| 71 |
+
"anchor_bbox_norm": None,
|
| 72 |
+
"value_bbox_norm": None,
|
| 73 |
+
"value_offset_norm": None,
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"field_id": "date_of_surgery",
|
| 77 |
+
"label": "Date of Surgery",
|
| 78 |
+
"type": "date",
|
| 79 |
+
"anchor_bbox_norm": None,
|
| 80 |
+
"value_bbox_norm": None,
|
| 81 |
+
"value_offset_norm": None,
|
| 82 |
+
},
|
| 83 |
+
# table field
|
| 84 |
+
{
|
| 85 |
+
"field_id": "items",
|
| 86 |
+
"label": "Items / Line Items",
|
| 87 |
+
"type": "table",
|
| 88 |
+
"table_bbox_norm": None,
|
| 89 |
+
"header_bbox_norm": None,
|
| 90 |
+
"row_height_hint_norm": None,
|
| 91 |
+
"columns": [
|
| 92 |
+
{"key": "item_number", "label": "Item Number", "bbox_rel_norm": None},
|
| 93 |
+
{"key": "lot_number", "label": "Lot Number", "bbox_rel_norm": None},
|
| 94 |
+
{"key": "description", "label": "Description", "bbox_rel_norm": None},
|
| 95 |
+
{"key": "qty", "label": "Qty", "bbox_rel_norm": None},
|
| 96 |
+
{"key": "price", "label": "Price", "bbox_rel_norm": None},
|
| 97 |
+
],
|
| 98 |
+
"table_anchors": [
|
| 99 |
+
{"key": "item_number", "expected_text": "Item Number", "bbox_norm": None},
|
| 100 |
+
{"key": "description", "expected_text": "Description", "bbox_norm": None},
|
| 101 |
+
{"key": "qty", "expected_text": "Qty", "bbox_norm": None},
|
| 102 |
+
],
|
| 103 |
+
"notes": "Anchors are used at runtime to localize table/header/columns under drift.",
|
| 104 |
+
},
|
| 105 |
+
],
|
| 106 |
+
"notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items.",
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def main() -> None:
|
| 111 |
+
if not TEMPLATES_DIR.exists():
|
| 112 |
+
raise SystemExit(f"templates dir not found: {TEMPLATES_DIR}")
|
| 113 |
+
|
| 114 |
+
files = sorted(TEMPLATES_DIR.glob("*.json"))
|
| 115 |
+
if not files:
|
| 116 |
+
raise SystemExit(f"No template json files found in: {TEMPLATES_DIR}")
|
| 117 |
+
|
| 118 |
+
updated = 0
|
| 119 |
+
for fp in files:
|
| 120 |
+
data = json.loads(fp.read_text(encoding="utf-8"))
|
| 121 |
+
template_id = (data.get("template_id") or fp.stem).strip()
|
| 122 |
+
|
| 123 |
+
# Only touch your known template IDs if you want:
|
| 124 |
+
# if not template_id.startswith("T"): continue
|
| 125 |
+
|
| 126 |
+
# Overwrite or create schema skeleton
|
| 127 |
+
data["schema"] = schema_skeleton(form_id=f"template_{template_id}")
|
| 128 |
+
|
| 129 |
+
fp.write_text(json.dumps(data, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
|
| 130 |
+
print(f"updated schema skeleton: {fp}")
|
| 131 |
+
updated += 1
|
| 132 |
+
|
| 133 |
+
print(f"done. updated {updated} template files.")
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
if __name__ == "__main__":
|
| 137 |
+
main()
|
backend/scripts/migrate_hardcoded_templates.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# backend/scripts/migrate_hardcoded_templates.py
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import json
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Any, Dict, List
|
| 8 |
+
|
| 9 |
+
TEMPLATES_DIR = Path(__file__).resolve().parents[1] / "templates"
|
| 10 |
+
TEMPLATES_DIR.mkdir(parents=True, exist_ok=True)
|
| 11 |
+
|
| 12 |
+
KNOWN_TEMPLATES: List[Dict[str, Any]] = [
|
| 13 |
+
{
|
| 14 |
+
"template_id": "T1_IFACTOR_DELIVERED_ORDER",
|
| 15 |
+
"name": "I-FACTOR Delivered Order Form",
|
| 16 |
+
"status": "active",
|
| 17 |
+
"version": 1,
|
| 18 |
+
"match": {
|
| 19 |
+
"keywords_all": ["delivered order form"],
|
| 20 |
+
"keywords_any": ["i-factor", "cerapedics", "product information", "stickers", "bill to", "delivered to"],
|
| 21 |
+
},
|
| 22 |
+
"schema": {},
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"template_id": "T2_SEASPINE_DELIVERED_GOODS_FORM",
|
| 26 |
+
"name": "SeaSpine Delivered Goods Form",
|
| 27 |
+
"status": "active",
|
| 28 |
+
"version": 1,
|
| 29 |
+
"match": {
|
| 30 |
+
"keywords_all": ["delivered goods form"],
|
| 31 |
+
"keywords_any": ["seaspine", "isotis", "handling fee", "sales order", "invoice"],
|
| 32 |
+
},
|
| 33 |
+
"schema": {},
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"template_id": "T3_ASTURA_SALES_ORDER_FORM",
|
| 37 |
+
"name": "Astura Sales Order Form",
|
| 38 |
+
"status": "active",
|
| 39 |
+
"version": 1,
|
| 40 |
+
"match": {
|
| 41 |
+
"keywords_all": [],
|
| 42 |
+
"keywords_any": ["astura", "dc141", "ca200", "cbba", "sales order"],
|
| 43 |
+
},
|
| 44 |
+
"schema": {},
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"template_id": "T4_MEDICAL_ESTIMATION_OF_CHARGES",
|
| 48 |
+
"name": "Medical Estimation of Charges",
|
| 49 |
+
"status": "active",
|
| 50 |
+
"version": 1,
|
| 51 |
+
"match": {
|
| 52 |
+
"keywords_all": [],
|
| 53 |
+
"keywords_any": ["estimation of charges", "good faith estimate", "patient responsibility", "insurance"],
|
| 54 |
+
},
|
| 55 |
+
"schema": {},
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"template_id": "T5_CLINICAL_PROGRESS_NOTE_POSTOP",
|
| 59 |
+
"name": "Clinical Progress Note Postop",
|
| 60 |
+
"status": "active",
|
| 61 |
+
"version": 1,
|
| 62 |
+
"match": {
|
| 63 |
+
"keywords_all": [],
|
| 64 |
+
"keywords_any": ["clinical progress note", "progress note", "post-op", "assessment", "plan"],
|
| 65 |
+
},
|
| 66 |
+
"schema": {},
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"template_id": "T6_CUSTOMER_CHARGE_SHEET_SPINE",
|
| 70 |
+
"name": "Customer Charge Sheet Spine",
|
| 71 |
+
"status": "active",
|
| 72 |
+
"version": 1,
|
| 73 |
+
"match": {
|
| 74 |
+
"keywords_all": [],
|
| 75 |
+
"keywords_any": ["customer charge sheet", "charge sheet", "spine", "qty", "unit price", "total"],
|
| 76 |
+
},
|
| 77 |
+
"schema": {},
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"template_id": "T7_SALES_ORDER_ZIMMER",
|
| 81 |
+
"name": "Zimmer Sales Order",
|
| 82 |
+
"status": "active",
|
| 83 |
+
"version": 1,
|
| 84 |
+
"match": {
|
| 85 |
+
"keywords_all": [],
|
| 86 |
+
"keywords_any": ["zimmer", "zimmer biomet", "biomet", "sales order", "purchase order", "po number"],
|
| 87 |
+
},
|
| 88 |
+
"schema": {},
|
| 89 |
+
},
|
| 90 |
+
]
|
| 91 |
+
|
| 92 |
+
def main() -> None:
|
| 93 |
+
for t in KNOWN_TEMPLATES:
|
| 94 |
+
out_path = TEMPLATES_DIR / f"{t['template_id']}.json"
|
| 95 |
+
out_path.write_text(json.dumps(t, indent=2), encoding="utf-8")
|
| 96 |
+
print(f"wrote {out_path}")
|
| 97 |
+
|
| 98 |
+
if __name__ == "__main__":
|
| 99 |
+
main()
|
backend/templates/T1_IFACTOR_DELIVERED_ORDER.json
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"template_id": "T1_IFACTOR_DELIVERED_ORDER",
|
| 3 |
+
"name": "I-FACTOR Delivered Order Form",
|
| 4 |
+
"status": "active",
|
| 5 |
+
"version": 2,
|
| 6 |
+
"match": {
|
| 7 |
+
"keywords_all": [
|
| 8 |
+
"delivered order form"
|
| 9 |
+
],
|
| 10 |
+
"keywords_any": [
|
| 11 |
+
"i-factor",
|
| 12 |
+
"cerapedics",
|
| 13 |
+
"product information",
|
| 14 |
+
"stickers",
|
| 15 |
+
"bill to",
|
| 16 |
+
"delivered to"
|
| 17 |
+
]
|
| 18 |
+
},
|
| 19 |
+
"schema": {
|
| 20 |
+
"form_id": "trainer_2f7cdbc443f040c79723c74490f6282f",
|
| 21 |
+
"version": 3,
|
| 22 |
+
"page": 1,
|
| 23 |
+
"scalar_value_region_mode": "offset_from_anchor_v1",
|
| 24 |
+
"fields": [
|
| 25 |
+
{
|
| 26 |
+
"field_id": "facility_organization",
|
| 27 |
+
"label": "Facility / Organization",
|
| 28 |
+
"type": "entity",
|
| 29 |
+
"anchor_bbox_norm": {
|
| 30 |
+
"x": 0.138889,
|
| 31 |
+
"y": 0.328283,
|
| 32 |
+
"w": 0.047386,
|
| 33 |
+
"h": 0.027778
|
| 34 |
+
},
|
| 35 |
+
"value_bbox_norm": null,
|
| 36 |
+
"value_offset_norm": {
|
| 37 |
+
"dx": 0.052288,
|
| 38 |
+
"dy": -0.001263,
|
| 39 |
+
"w": 0.294118,
|
| 40 |
+
"h": 0.045455
|
| 41 |
+
}
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"field_id": "case_location",
|
| 45 |
+
"label": "Case Location / Address",
|
| 46 |
+
"type": "text",
|
| 47 |
+
"anchor_bbox_norm": {
|
| 48 |
+
"x": 0.140523,
|
| 49 |
+
"y": 0.353535,
|
| 50 |
+
"w": 0.055556,
|
| 51 |
+
"h": 0.02399
|
| 52 |
+
},
|
| 53 |
+
"value_bbox_norm": null,
|
| 54 |
+
"value_offset_norm": {
|
| 55 |
+
"dx": 0.062092,
|
| 56 |
+
"dy": 0.005051,
|
| 57 |
+
"w": 0.292484,
|
| 58 |
+
"h": 0.056818
|
| 59 |
+
}
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"field_id": "vendor",
|
| 63 |
+
"label": "Vendor",
|
| 64 |
+
"type": "entity",
|
| 65 |
+
"anchor_bbox_norm": {
|
| 66 |
+
"x": 0.215686,
|
| 67 |
+
"y": 0.170455,
|
| 68 |
+
"w": 0.205882,
|
| 69 |
+
"h": 0.059343
|
| 70 |
+
},
|
| 71 |
+
"value_bbox_norm": null,
|
| 72 |
+
"value_offset_norm": null
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"field_id": "physician_name",
|
| 76 |
+
"label": "Physician Name",
|
| 77 |
+
"type": "person",
|
| 78 |
+
"anchor_bbox_norm": {
|
| 79 |
+
"x": 0.522876,
|
| 80 |
+
"y": 0.497475,
|
| 81 |
+
"w": 0.062092,
|
| 82 |
+
"h": 0.020202
|
| 83 |
+
},
|
| 84 |
+
"value_bbox_norm": null,
|
| 85 |
+
"value_offset_norm": {
|
| 86 |
+
"dx": 0.060458,
|
| 87 |
+
"dy": -0.005051,
|
| 88 |
+
"w": 0.214052,
|
| 89 |
+
"h": 0.025253
|
| 90 |
+
}
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"field_id": "date_of_surgery",
|
| 94 |
+
"label": "Date of Surgery",
|
| 95 |
+
"type": "date",
|
| 96 |
+
"anchor_bbox_norm": {
|
| 97 |
+
"x": 0.138889,
|
| 98 |
+
"y": 0.57197,
|
| 99 |
+
"w": 0.160131,
|
| 100 |
+
"h": 0.026515
|
| 101 |
+
},
|
| 102 |
+
"value_bbox_norm": null,
|
| 103 |
+
"value_offset_norm": {
|
| 104 |
+
"dx": 0.165033,
|
| 105 |
+
"dy": -0.002525,
|
| 106 |
+
"w": 0.205882,
|
| 107 |
+
"h": 0.02399
|
| 108 |
+
}
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"field_id": "items",
|
| 112 |
+
"label": "Items / Line Items",
|
| 113 |
+
"type": "table",
|
| 114 |
+
"table_bbox_norm": {
|
| 115 |
+
"x": 0.138889,
|
| 116 |
+
"y": 0.632576,
|
| 117 |
+
"w": 0.732026,
|
| 118 |
+
"h": 0.122475
|
| 119 |
+
},
|
| 120 |
+
"header_bbox_norm": {
|
| 121 |
+
"x": 0.142157,
|
| 122 |
+
"y": 0.632576,
|
| 123 |
+
"w": 0.727124,
|
| 124 |
+
"h": 0.034091
|
| 125 |
+
},
|
| 126 |
+
"row_height_hint_norm": null,
|
| 127 |
+
"columns": [
|
| 128 |
+
{
|
| 129 |
+
"key": "item_number",
|
| 130 |
+
"label": "Item Number",
|
| 131 |
+
"bbox_rel_norm": {
|
| 132 |
+
"x": 0.004464,
|
| 133 |
+
"y": 0.28866,
|
| 134 |
+
"w": 0.196429,
|
| 135 |
+
"h": 0.701031
|
| 136 |
+
}
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"key": "lot_number",
|
| 140 |
+
"label": "Lot Number",
|
| 141 |
+
"bbox_rel_norm": null
|
| 142 |
+
},
|
| 143 |
+
{
|
| 144 |
+
"key": "description",
|
| 145 |
+
"label": "Description",
|
| 146 |
+
"bbox_rel_norm": {
|
| 147 |
+
"x": 0.209821,
|
| 148 |
+
"y": 0.278351,
|
| 149 |
+
"w": 0.241071,
|
| 150 |
+
"h": 0.639175
|
| 151 |
+
}
|
| 152 |
+
},
|
| 153 |
+
{
|
| 154 |
+
"key": "qty",
|
| 155 |
+
"label": "Qty",
|
| 156 |
+
"bbox_rel_norm": {
|
| 157 |
+
"x": 0.647321,
|
| 158 |
+
"y": 0.247423,
|
| 159 |
+
"w": 0.058036,
|
| 160 |
+
"h": 0.71134
|
| 161 |
+
}
|
| 162 |
+
},
|
| 163 |
+
{
|
| 164 |
+
"key": "price",
|
| 165 |
+
"label": "Price",
|
| 166 |
+
"bbox_rel_norm": null
|
| 167 |
+
}
|
| 168 |
+
],
|
| 169 |
+
"table_anchors": [
|
| 170 |
+
{
|
| 171 |
+
"key": "item_number",
|
| 172 |
+
"expected_text": "Item Number",
|
| 173 |
+
"bbox_norm": {
|
| 174 |
+
"x": 0.140523,
|
| 175 |
+
"y": 0.652778,
|
| 176 |
+
"w": 0.145425,
|
| 177 |
+
"h": 0.016414
|
| 178 |
+
}
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"key": "description",
|
| 182 |
+
"expected_text": "Description",
|
| 183 |
+
"bbox_norm": {
|
| 184 |
+
"x": 0.287582,
|
| 185 |
+
"y": 0.650253,
|
| 186 |
+
"w": 0.181373,
|
| 187 |
+
"h": 0.018939
|
| 188 |
+
}
|
| 189 |
+
},
|
| 190 |
+
{
|
| 191 |
+
"key": "qty",
|
| 192 |
+
"expected_text": "Qty",
|
| 193 |
+
"bbox_norm": {
|
| 194 |
+
"x": 0.614379,
|
| 195 |
+
"y": 0.647727,
|
| 196 |
+
"w": 0.047386,
|
| 197 |
+
"h": 0.016414
|
| 198 |
+
}
|
| 199 |
+
}
|
| 200 |
+
],
|
| 201 |
+
"notes": "Anchors are used at runtime to localize table/header/columns under drift."
|
| 202 |
+
}
|
| 203 |
+
],
|
| 204 |
+
"notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
|
| 205 |
+
}
|
| 206 |
+
}
|
backend/templates/T2_SEASPINE_DELIVERED_GOODS_FORM.json
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"template_id": "T2_SEASPINE_DELIVERED_GOODS_FORM",
|
| 3 |
+
"name": "SeaSpine Delivered Goods Form",
|
| 4 |
+
"status": "active",
|
| 5 |
+
"version": 2,
|
| 6 |
+
"match": {
|
| 7 |
+
"keywords_all": [
|
| 8 |
+
"delivered goods form"
|
| 9 |
+
],
|
| 10 |
+
"keywords_any": [
|
| 11 |
+
"seaspine",
|
| 12 |
+
"isotis",
|
| 13 |
+
"handling fee",
|
| 14 |
+
"sales order",
|
| 15 |
+
"invoice"
|
| 16 |
+
]
|
| 17 |
+
},
|
| 18 |
+
"schema": {
|
| 19 |
+
"form_id": "trainer_245e70e31b1f4eb1b26fad626365e9ad",
|
| 20 |
+
"version": 3,
|
| 21 |
+
"page": 1,
|
| 22 |
+
"scalar_value_region_mode": "offset_from_anchor_v1",
|
| 23 |
+
"fields": [
|
| 24 |
+
{
|
| 25 |
+
"field_id": "facility_organization",
|
| 26 |
+
"label": "Facility / Organization",
|
| 27 |
+
"type": "entity",
|
| 28 |
+
"anchor_bbox_norm": {
|
| 29 |
+
"x": 0.179739,
|
| 30 |
+
"y": 0.284091,
|
| 31 |
+
"w": 0.04085,
|
| 32 |
+
"h": 0.020202
|
| 33 |
+
},
|
| 34 |
+
"value_bbox_norm": null,
|
| 35 |
+
"value_offset_norm": {
|
| 36 |
+
"dx": 0.044118,
|
| 37 |
+
"dy": -0.002525,
|
| 38 |
+
"w": 0.246732,
|
| 39 |
+
"h": 0.021465
|
| 40 |
+
}
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"field_id": "case_location",
|
| 44 |
+
"label": "Case Location / Address",
|
| 45 |
+
"type": "text",
|
| 46 |
+
"anchor_bbox_norm": {
|
| 47 |
+
"x": 0.181373,
|
| 48 |
+
"y": 0.310606,
|
| 49 |
+
"w": 0.135621,
|
| 50 |
+
"h": 0.016414
|
| 51 |
+
},
|
| 52 |
+
"value_bbox_norm": null,
|
| 53 |
+
"value_offset_norm": {
|
| 54 |
+
"dx": 0.001634,
|
| 55 |
+
"dy": 0.013889,
|
| 56 |
+
"w": 0.295752,
|
| 57 |
+
"h": 0.027778
|
| 58 |
+
}
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"field_id": "vendor",
|
| 62 |
+
"label": "Vendor",
|
| 63 |
+
"type": "entity",
|
| 64 |
+
"anchor_bbox_norm": {
|
| 65 |
+
"x": 0.606209,
|
| 66 |
+
"y": 0.152778,
|
| 67 |
+
"w": 0.173203,
|
| 68 |
+
"h": 0.068182
|
| 69 |
+
},
|
| 70 |
+
"value_bbox_norm": null,
|
| 71 |
+
"value_offset_norm": null
|
| 72 |
+
},
|
| 73 |
+
{
|
| 74 |
+
"field_id": "physician_name",
|
| 75 |
+
"label": "Physician Name",
|
| 76 |
+
"type": "person",
|
| 77 |
+
"anchor_bbox_norm": {
|
| 78 |
+
"x": 0.179739,
|
| 79 |
+
"y": 0.508838,
|
| 80 |
+
"w": 0.104575,
|
| 81 |
+
"h": 0.016414
|
| 82 |
+
},
|
| 83 |
+
"value_bbox_norm": null,
|
| 84 |
+
"value_offset_norm": {
|
| 85 |
+
"dx": 0.106209,
|
| 86 |
+
"dy": -0.001263,
|
| 87 |
+
"w": 0.372549,
|
| 88 |
+
"h": 0.015152
|
| 89 |
+
}
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"field_id": "date_of_surgery",
|
| 93 |
+
"label": "Date of Surgery",
|
| 94 |
+
"type": "date",
|
| 95 |
+
"anchor_bbox_norm": {
|
| 96 |
+
"x": 0.179739,
|
| 97 |
+
"y": 0.521465,
|
| 98 |
+
"w": 0.081699,
|
| 99 |
+
"h": 0.021465
|
| 100 |
+
},
|
| 101 |
+
"value_bbox_norm": null,
|
| 102 |
+
"value_offset_norm": {
|
| 103 |
+
"dx": 0.083333,
|
| 104 |
+
"dy": 0.005051,
|
| 105 |
+
"w": 0.068627,
|
| 106 |
+
"h": 0.015152
|
| 107 |
+
}
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"field_id": "items",
|
| 111 |
+
"label": "Items / Line Items",
|
| 112 |
+
"type": "table",
|
| 113 |
+
"table_bbox_norm": {
|
| 114 |
+
"x": 0.178105,
|
| 115 |
+
"y": 0.388889,
|
| 116 |
+
"w": 0.609477,
|
| 117 |
+
"h": 0.118687
|
| 118 |
+
},
|
| 119 |
+
"header_bbox_norm": {
|
| 120 |
+
"x": 0.178105,
|
| 121 |
+
"y": 0.390152,
|
| 122 |
+
"w": 0.609477,
|
| 123 |
+
"h": 0.02399
|
| 124 |
+
},
|
| 125 |
+
"row_height_hint_norm": null,
|
| 126 |
+
"columns": [
|
| 127 |
+
{
|
| 128 |
+
"key": "item_number",
|
| 129 |
+
"label": "Item Number",
|
| 130 |
+
"bbox_rel_norm": {
|
| 131 |
+
"x": 0.718499,
|
| 132 |
+
"y": 0.170213,
|
| 133 |
+
"w": 0.072386,
|
| 134 |
+
"h": 0.797872
|
| 135 |
+
}
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"key": "lot_number",
|
| 139 |
+
"label": "Lot Number",
|
| 140 |
+
"bbox_rel_norm": {
|
| 141 |
+
"x": 0.168901,
|
| 142 |
+
"y": 0.223404,
|
| 143 |
+
"w": 0.171582,
|
| 144 |
+
"h": 0.776596
|
| 145 |
+
}
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"key": "description",
|
| 149 |
+
"label": "Description",
|
| 150 |
+
"bbox_rel_norm": null
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"key": "qty",
|
| 154 |
+
"label": "Qty",
|
| 155 |
+
"bbox_rel_norm": null
|
| 156 |
+
},
|
| 157 |
+
{
|
| 158 |
+
"key": "price",
|
| 159 |
+
"label": "Price",
|
| 160 |
+
"bbox_rel_norm": null
|
| 161 |
+
}
|
| 162 |
+
],
|
| 163 |
+
"table_anchors": [
|
| 164 |
+
{
|
| 165 |
+
"key": "item_number",
|
| 166 |
+
"expected_text": "Item Number",
|
| 167 |
+
"bbox_norm": {
|
| 168 |
+
"x": 0.178105,
|
| 169 |
+
"y": 0.388889,
|
| 170 |
+
"w": 0.101307,
|
| 171 |
+
"h": 0.02399
|
| 172 |
+
}
|
| 173 |
+
},
|
| 174 |
+
{
|
| 175 |
+
"key": "description",
|
| 176 |
+
"expected_text": "Description",
|
| 177 |
+
"bbox_norm": {
|
| 178 |
+
"x": 0.488562,
|
| 179 |
+
"y": 0.388889,
|
| 180 |
+
"w": 0.129085,
|
| 181 |
+
"h": 0.025253
|
| 182 |
+
}
|
| 183 |
+
},
|
| 184 |
+
{
|
| 185 |
+
"key": "qty",
|
| 186 |
+
"expected_text": "Qty",
|
| 187 |
+
"bbox_norm": {
|
| 188 |
+
"x": 0.617647,
|
| 189 |
+
"y": 0.388889,
|
| 190 |
+
"w": 0.045752,
|
| 191 |
+
"h": 0.02399
|
| 192 |
+
}
|
| 193 |
+
}
|
| 194 |
+
],
|
| 195 |
+
"notes": "Anchors are used at runtime to localize table/header/columns under drift."
|
| 196 |
+
}
|
| 197 |
+
],
|
| 198 |
+
"notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
|
| 199 |
+
}
|
| 200 |
+
}
|
backend/templates/T3_ASTURA_SALES_ORDER_FORM.json
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"template_id": "T3_ASTURA_SALES_ORDER_FORM",
|
| 3 |
+
"name": "Astura Sales Order Form",
|
| 4 |
+
"status": "active",
|
| 5 |
+
"version": 2,
|
| 6 |
+
"match": {
|
| 7 |
+
"keywords_all": [],
|
| 8 |
+
"keywords_any": [
|
| 9 |
+
"astura",
|
| 10 |
+
"dc141",
|
| 11 |
+
"ca200",
|
| 12 |
+
"cbba",
|
| 13 |
+
"sales order"
|
| 14 |
+
]
|
| 15 |
+
},
|
| 16 |
+
"schema": {
|
| 17 |
+
"form_id": "trainer_b931186e13eb45d2a9a1ded8ff8641bb",
|
| 18 |
+
"version": 3,
|
| 19 |
+
"page": 1,
|
| 20 |
+
"scalar_value_region_mode": "offset_from_anchor_v1",
|
| 21 |
+
"fields": [
|
| 22 |
+
{
|
| 23 |
+
"field_id": "facility_organization",
|
| 24 |
+
"label": "Facility / Organization",
|
| 25 |
+
"type": "entity",
|
| 26 |
+
"anchor_bbox_norm": {
|
| 27 |
+
"x": 0.156863,
|
| 28 |
+
"y": 0.194444,
|
| 29 |
+
"w": 0.053922,
|
| 30 |
+
"h": 0.012626
|
| 31 |
+
},
|
| 32 |
+
"value_bbox_norm": null,
|
| 33 |
+
"value_offset_norm": {
|
| 34 |
+
"dx": 0.076797,
|
| 35 |
+
"dy": -0.002525,
|
| 36 |
+
"w": 0.205882,
|
| 37 |
+
"h": 0.021465
|
| 38 |
+
}
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"field_id": "case_location",
|
| 42 |
+
"label": "Case Location / Address",
|
| 43 |
+
"type": "text",
|
| 44 |
+
"anchor_bbox_norm": {
|
| 45 |
+
"x": 0.155229,
|
| 46 |
+
"y": 0.224747,
|
| 47 |
+
"w": 0.05719,
|
| 48 |
+
"h": 0.016414
|
| 49 |
+
},
|
| 50 |
+
"value_bbox_norm": null,
|
| 51 |
+
"value_offset_norm": {
|
| 52 |
+
"dx": 0.075163,
|
| 53 |
+
"dy": 0,
|
| 54 |
+
"w": 0.212418,
|
| 55 |
+
"h": 0.034091
|
| 56 |
+
}
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"field_id": "vendor",
|
| 60 |
+
"label": "Vendor",
|
| 61 |
+
"type": "entity",
|
| 62 |
+
"anchor_bbox_norm": {
|
| 63 |
+
"x": 0.160131,
|
| 64 |
+
"y": 0.117424,
|
| 65 |
+
"w": 0.098039,
|
| 66 |
+
"h": 0.064394
|
| 67 |
+
},
|
| 68 |
+
"value_bbox_norm": null,
|
| 69 |
+
"value_offset_norm": null
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"field_id": "physician_name",
|
| 73 |
+
"label": "Physician Name",
|
| 74 |
+
"type": "person",
|
| 75 |
+
"anchor_bbox_norm": {
|
| 76 |
+
"x": 0.158497,
|
| 77 |
+
"y": 0.289141,
|
| 78 |
+
"w": 0.062092,
|
| 79 |
+
"h": 0.013889
|
| 80 |
+
},
|
| 81 |
+
"value_bbox_norm": null,
|
| 82 |
+
"value_offset_norm": {
|
| 83 |
+
"dx": 0.068627,
|
| 84 |
+
"dy": -0.002525,
|
| 85 |
+
"w": 0.212418,
|
| 86 |
+
"h": 0.022727
|
| 87 |
+
}
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"field_id": "date_of_surgery",
|
| 91 |
+
"label": "Date of Surgery",
|
| 92 |
+
"type": "date",
|
| 93 |
+
"anchor_bbox_norm": {
|
| 94 |
+
"x": 0.160131,
|
| 95 |
+
"y": 0.256313,
|
| 96 |
+
"w": 0.053922,
|
| 97 |
+
"h": 0.016414
|
| 98 |
+
},
|
| 99 |
+
"value_bbox_norm": null,
|
| 100 |
+
"value_offset_norm": {
|
| 101 |
+
"dx": 0.071895,
|
| 102 |
+
"dy": 0,
|
| 103 |
+
"w": 0.124183,
|
| 104 |
+
"h": 0.018939
|
| 105 |
+
}
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"field_id": "items",
|
| 109 |
+
"label": "Items / Line Items",
|
| 110 |
+
"type": "table",
|
| 111 |
+
"table_bbox_norm": {
|
| 112 |
+
"x": 0.153595,
|
| 113 |
+
"y": 0.339646,
|
| 114 |
+
"w": 0.620915,
|
| 115 |
+
"h": 0.180556
|
| 116 |
+
},
|
| 117 |
+
"header_bbox_norm": {
|
| 118 |
+
"x": 0.156863,
|
| 119 |
+
"y": 0.339646,
|
| 120 |
+
"w": 0.617647,
|
| 121 |
+
"h": 0.018939
|
| 122 |
+
},
|
| 123 |
+
"row_height_hint_norm": null,
|
| 124 |
+
"columns": [
|
| 125 |
+
{
|
| 126 |
+
"key": "item_number",
|
| 127 |
+
"label": "Item Number",
|
| 128 |
+
"bbox_rel_norm": {
|
| 129 |
+
"x": 0,
|
| 130 |
+
"y": 0.104895,
|
| 131 |
+
"w": 0.171053,
|
| 132 |
+
"h": 0.895105
|
| 133 |
+
}
|
| 134 |
+
},
|
| 135 |
+
{
|
| 136 |
+
"key": "lot_number",
|
| 137 |
+
"label": "Lot Number",
|
| 138 |
+
"bbox_rel_norm": null
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"key": "description",
|
| 142 |
+
"label": "Description",
|
| 143 |
+
"bbox_rel_norm": {
|
| 144 |
+
"x": 0.171053,
|
| 145 |
+
"y": 0.111888,
|
| 146 |
+
"w": 0.323684,
|
| 147 |
+
"h": 0.888112
|
| 148 |
+
}
|
| 149 |
+
},
|
| 150 |
+
{
|
| 151 |
+
"key": "qty",
|
| 152 |
+
"label": "Qty",
|
| 153 |
+
"bbox_rel_norm": {
|
| 154 |
+
"x": 0.644737,
|
| 155 |
+
"y": 0.104895,
|
| 156 |
+
"w": 0.047368,
|
| 157 |
+
"h": 0.895105
|
| 158 |
+
}
|
| 159 |
+
},
|
| 160 |
+
{
|
| 161 |
+
"key": "price",
|
| 162 |
+
"label": "Price",
|
| 163 |
+
"bbox_rel_norm": null
|
| 164 |
+
}
|
| 165 |
+
],
|
| 166 |
+
"table_anchors": [
|
| 167 |
+
{
|
| 168 |
+
"key": "item_number",
|
| 169 |
+
"expected_text": "Item Number",
|
| 170 |
+
"bbox_norm": {
|
| 171 |
+
"x": 0.153595,
|
| 172 |
+
"y": 0.342172,
|
| 173 |
+
"w": 0.104575,
|
| 174 |
+
"h": 0.016414
|
| 175 |
+
}
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"key": "description",
|
| 179 |
+
"expected_text": "Description",
|
| 180 |
+
"bbox_norm": {
|
| 181 |
+
"x": 0.259804,
|
| 182 |
+
"y": 0.339646,
|
| 183 |
+
"w": 0.202614,
|
| 184 |
+
"h": 0.021465
|
| 185 |
+
}
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"key": "qty",
|
| 189 |
+
"expected_text": "Qty",
|
| 190 |
+
"bbox_norm": {
|
| 191 |
+
"x": 0.555556,
|
| 192 |
+
"y": 0.342172,
|
| 193 |
+
"w": 0.034314,
|
| 194 |
+
"h": 0.015152
|
| 195 |
+
}
|
| 196 |
+
}
|
| 197 |
+
],
|
| 198 |
+
"notes": "Anchors are used at runtime to localize table/header/columns under drift."
|
| 199 |
+
}
|
| 200 |
+
],
|
| 201 |
+
"notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
|
| 202 |
+
}
|
| 203 |
+
}
|
backend/templates/T4_MEDICAL_ESTIMATION_OF_CHARGES.json
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"template_id": "T4_MEDICAL_ESTIMATION_OF_CHARGES",
|
| 3 |
+
"name": "Medical Estimation of Charges",
|
| 4 |
+
"status": "active",
|
| 5 |
+
"version": 2,
|
| 6 |
+
"match": {
|
| 7 |
+
"keywords_all": [],
|
| 8 |
+
"keywords_any": [
|
| 9 |
+
"estimation of charges",
|
| 10 |
+
"good faith estimate",
|
| 11 |
+
"patient responsibility",
|
| 12 |
+
"insurance"
|
| 13 |
+
]
|
| 14 |
+
},
|
| 15 |
+
"schema": {
|
| 16 |
+
"form_id": "trainer_20c968bf41ac4b1c8ee12a9bb15b2bfb",
|
| 17 |
+
"version": 3,
|
| 18 |
+
"page": 1,
|
| 19 |
+
"scalar_value_region_mode": "offset_from_anchor_v1",
|
| 20 |
+
"fields": [
|
| 21 |
+
{
|
| 22 |
+
"field_id": "facility_organization",
|
| 23 |
+
"label": "Facility / Organization",
|
| 24 |
+
"type": "entity",
|
| 25 |
+
"anchor_bbox_norm": {
|
| 26 |
+
"x": 0.142157,
|
| 27 |
+
"y": 0.25,
|
| 28 |
+
"w": 0.042484,
|
| 29 |
+
"h": 0.015152
|
| 30 |
+
},
|
| 31 |
+
"value_bbox_norm": null,
|
| 32 |
+
"value_offset_norm": {
|
| 33 |
+
"dx": 0.068627,
|
| 34 |
+
"dy": -0.003788,
|
| 35 |
+
"w": 0.117647,
|
| 36 |
+
"h": 0.018939
|
| 37 |
+
}
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"field_id": "case_location",
|
| 41 |
+
"label": "Case Location / Address",
|
| 42 |
+
"type": "text",
|
| 43 |
+
"anchor_bbox_norm": {
|
| 44 |
+
"x": 0.143791,
|
| 45 |
+
"y": 0.271465,
|
| 46 |
+
"w": 0.047386,
|
| 47 |
+
"h": 0.017677
|
| 48 |
+
},
|
| 49 |
+
"value_bbox_norm": null,
|
| 50 |
+
"value_offset_norm": {
|
| 51 |
+
"dx": 0.071895,
|
| 52 |
+
"dy": -0.001263,
|
| 53 |
+
"w": 0.127451,
|
| 54 |
+
"h": 0.039141
|
| 55 |
+
}
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"field_id": "vendor",
|
| 59 |
+
"label": "Vendor",
|
| 60 |
+
"type": "entity",
|
| 61 |
+
"anchor_bbox_norm": null,
|
| 62 |
+
"value_bbox_norm": null,
|
| 63 |
+
"value_offset_norm": null
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"field_id": "physician_name",
|
| 67 |
+
"label": "Physician Name",
|
| 68 |
+
"type": "person",
|
| 69 |
+
"anchor_bbox_norm": null,
|
| 70 |
+
"value_bbox_norm": null,
|
| 71 |
+
"value_offset_norm": null
|
| 72 |
+
},
|
| 73 |
+
{
|
| 74 |
+
"field_id": "date_of_surgery",
|
| 75 |
+
"label": "Date of Surgery",
|
| 76 |
+
"type": "date",
|
| 77 |
+
"anchor_bbox_norm": null,
|
| 78 |
+
"value_bbox_norm": null,
|
| 79 |
+
"value_offset_norm": null
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"field_id": "items",
|
| 83 |
+
"label": "Items / Line Items",
|
| 84 |
+
"type": "table",
|
| 85 |
+
"table_bbox_norm": {
|
| 86 |
+
"x": 0.143791,
|
| 87 |
+
"y": 0.409091,
|
| 88 |
+
"w": 0.676471,
|
| 89 |
+
"h": 0.132576
|
| 90 |
+
},
|
| 91 |
+
"header_bbox_norm": {
|
| 92 |
+
"x": 0.143791,
|
| 93 |
+
"y": 0.409091,
|
| 94 |
+
"w": 0.676471,
|
| 95 |
+
"h": 0.018939
|
| 96 |
+
},
|
| 97 |
+
"row_height_hint_norm": null,
|
| 98 |
+
"columns": [
|
| 99 |
+
{
|
| 100 |
+
"key": "item_number",
|
| 101 |
+
"label": "Item Number",
|
| 102 |
+
"bbox_rel_norm": {
|
| 103 |
+
"x": 0.717391,
|
| 104 |
+
"y": 0.114286,
|
| 105 |
+
"w": 0.089372,
|
| 106 |
+
"h": 0.857143
|
| 107 |
+
}
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"key": "lot_number",
|
| 111 |
+
"label": "Lot Number",
|
| 112 |
+
"bbox_rel_norm": null
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"key": "description",
|
| 116 |
+
"label": "Description",
|
| 117 |
+
"bbox_rel_norm": {
|
| 118 |
+
"x": 0.2657,
|
| 119 |
+
"y": 0.114286,
|
| 120 |
+
"w": 0.376812,
|
| 121 |
+
"h": 0.87619
|
| 122 |
+
}
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"key": "qty",
|
| 126 |
+
"label": "Qty",
|
| 127 |
+
"bbox_rel_norm": null
|
| 128 |
+
},
|
| 129 |
+
{
|
| 130 |
+
"key": "price",
|
| 131 |
+
"label": "Price",
|
| 132 |
+
"bbox_rel_norm": null
|
| 133 |
+
}
|
| 134 |
+
],
|
| 135 |
+
"table_anchors": [
|
| 136 |
+
{
|
| 137 |
+
"key": "item_number",
|
| 138 |
+
"expected_text": "Item Number",
|
| 139 |
+
"bbox_norm": {
|
| 140 |
+
"x": 0.632353,
|
| 141 |
+
"y": 0.409091,
|
| 142 |
+
"w": 0.045752,
|
| 143 |
+
"h": 0.017677
|
| 144 |
+
}
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"key": "description",
|
| 148 |
+
"expected_text": "Description",
|
| 149 |
+
"bbox_norm": {
|
| 150 |
+
"x": 0.325163,
|
| 151 |
+
"y": 0.409091,
|
| 152 |
+
"w": 0.248366,
|
| 153 |
+
"h": 0.017677
|
| 154 |
+
}
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"key": "qty",
|
| 158 |
+
"expected_text": "Qty",
|
| 159 |
+
"bbox_norm": null
|
| 160 |
+
}
|
| 161 |
+
],
|
| 162 |
+
"notes": "Anchors are used at runtime to localize table/header/columns under drift."
|
| 163 |
+
}
|
| 164 |
+
],
|
| 165 |
+
"notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
|
| 166 |
+
}
|
| 167 |
+
}
|
backend/templates/T5_CLINICAL_PROGRESS_NOTE_POSTOP.json
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"template_id": "T5_CLINICAL_PROGRESS_NOTE_POSTOP",
|
| 3 |
+
"name": "Clinical Progress Note Postop",
|
| 4 |
+
"status": "active",
|
| 5 |
+
"version": 2,
|
| 6 |
+
"match": {
|
| 7 |
+
"keywords_all": [],
|
| 8 |
+
"keywords_any": [
|
| 9 |
+
"clinical progress note",
|
| 10 |
+
"progress note",
|
| 11 |
+
"post-op",
|
| 12 |
+
"assessment",
|
| 13 |
+
"plan"
|
| 14 |
+
]
|
| 15 |
+
},
|
| 16 |
+
"schema": {
|
| 17 |
+
"form_id": "trainer_e75eb5b93bb54c28934f43cacc406cc8",
|
| 18 |
+
"version": 3,
|
| 19 |
+
"page": 1,
|
| 20 |
+
"scalar_value_region_mode": "offset_from_anchor_v1",
|
| 21 |
+
"fields": [
|
| 22 |
+
{
|
| 23 |
+
"field_id": "facility_organization",
|
| 24 |
+
"label": "Facility / Organization",
|
| 25 |
+
"type": "entity",
|
| 26 |
+
"anchor_bbox_norm": null,
|
| 27 |
+
"value_bbox_norm": null,
|
| 28 |
+
"value_offset_norm": null
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"field_id": "case_location",
|
| 32 |
+
"label": "Case Location / Address",
|
| 33 |
+
"type": "text",
|
| 34 |
+
"anchor_bbox_norm": null,
|
| 35 |
+
"value_bbox_norm": null,
|
| 36 |
+
"value_offset_norm": null
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"field_id": "vendor",
|
| 40 |
+
"label": "Vendor",
|
| 41 |
+
"type": "entity",
|
| 42 |
+
"anchor_bbox_norm": null,
|
| 43 |
+
"value_bbox_norm": null,
|
| 44 |
+
"value_offset_norm": null
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"field_id": "physician_name",
|
| 48 |
+
"label": "Physician Name",
|
| 49 |
+
"type": "person",
|
| 50 |
+
"anchor_bbox_norm": null,
|
| 51 |
+
"value_bbox_norm": null,
|
| 52 |
+
"value_offset_norm": null
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"field_id": "date_of_surgery",
|
| 56 |
+
"label": "Date of Surgery",
|
| 57 |
+
"type": "date",
|
| 58 |
+
"anchor_bbox_norm": null,
|
| 59 |
+
"value_bbox_norm": null,
|
| 60 |
+
"value_offset_norm": null
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"field_id": "items",
|
| 64 |
+
"label": "Items / Line Items",
|
| 65 |
+
"type": "table",
|
| 66 |
+
"table_bbox_norm": null,
|
| 67 |
+
"header_bbox_norm": null,
|
| 68 |
+
"row_height_hint_norm": null,
|
| 69 |
+
"columns": [
|
| 70 |
+
{
|
| 71 |
+
"key": "item_number",
|
| 72 |
+
"label": "Item Number",
|
| 73 |
+
"bbox_rel_norm": null
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"key": "lot_number",
|
| 77 |
+
"label": "Lot Number",
|
| 78 |
+
"bbox_rel_norm": null
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"key": "description",
|
| 82 |
+
"label": "Description",
|
| 83 |
+
"bbox_rel_norm": null
|
| 84 |
+
},
|
| 85 |
+
{
|
| 86 |
+
"key": "qty",
|
| 87 |
+
"label": "Qty",
|
| 88 |
+
"bbox_rel_norm": null
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"key": "price",
|
| 92 |
+
"label": "Price",
|
| 93 |
+
"bbox_rel_norm": null
|
| 94 |
+
}
|
| 95 |
+
],
|
| 96 |
+
"table_anchors": [
|
| 97 |
+
{
|
| 98 |
+
"key": "item_number",
|
| 99 |
+
"expected_text": "Item Number",
|
| 100 |
+
"bbox_norm": null
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"key": "description",
|
| 104 |
+
"expected_text": "Description",
|
| 105 |
+
"bbox_norm": null
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"key": "qty",
|
| 109 |
+
"expected_text": "Qty",
|
| 110 |
+
"bbox_norm": null
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"notes": "Anchors are used at runtime to localize table/header/columns under drift."
|
| 114 |
+
}
|
| 115 |
+
],
|
| 116 |
+
"notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
|
| 117 |
+
}
|
| 118 |
+
}
|
backend/templates/T6_CUSTOMER_CHARGE_SHEET_SPINE.json
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"template_id": "T6_CUSTOMER_CHARGE_SHEET_SPINE",
|
| 3 |
+
"name": "Customer Charge Sheet Spine",
|
| 4 |
+
"status": "active",
|
| 5 |
+
"version": 2,
|
| 6 |
+
"match": {
|
| 7 |
+
"keywords_all": [],
|
| 8 |
+
"keywords_any": [
|
| 9 |
+
"customer charge sheet",
|
| 10 |
+
"charge sheet",
|
| 11 |
+
"spine",
|
| 12 |
+
"qty",
|
| 13 |
+
"unit price",
|
| 14 |
+
"total"
|
| 15 |
+
]
|
| 16 |
+
},
|
| 17 |
+
"schema": {
|
| 18 |
+
"form_id": "trainer_6b04e85b60a9470588be4f7541029d71",
|
| 19 |
+
"version": 3,
|
| 20 |
+
"page": 1,
|
| 21 |
+
"scalar_value_region_mode": "offset_from_anchor_v1",
|
| 22 |
+
"fields": [
|
| 23 |
+
{
|
| 24 |
+
"field_id": "facility_organization",
|
| 25 |
+
"label": "Facility / Organization",
|
| 26 |
+
"type": "entity",
|
| 27 |
+
"anchor_bbox_norm": {
|
| 28 |
+
"x": 0.388386,
|
| 29 |
+
"y": 0.27195,
|
| 30 |
+
"w": 0.096782,
|
| 31 |
+
"h": 0.013598
|
| 32 |
+
},
|
| 33 |
+
"value_bbox_norm": null,
|
| 34 |
+
"value_offset_norm": {
|
| 35 |
+
"dx": 0,
|
| 36 |
+
"dy": 0.011655,
|
| 37 |
+
"w": 0.096782,
|
| 38 |
+
"h": 0.01554
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"field_id": "case_location",
|
| 43 |
+
"label": "Case Location / Address",
|
| 44 |
+
"type": "text",
|
| 45 |
+
"anchor_bbox_norm": {
|
| 46 |
+
"x": 0.483912,
|
| 47 |
+
"y": 0.297203,
|
| 48 |
+
"w": 0.13826,
|
| 49 |
+
"h": 0.011655
|
| 50 |
+
},
|
| 51 |
+
"value_bbox_norm": null,
|
| 52 |
+
"value_offset_norm": {
|
| 53 |
+
"dx": 0.005028,
|
| 54 |
+
"dy": 0.00777,
|
| 55 |
+
"w": 0.124434,
|
| 56 |
+
"h": 0.035936
|
| 57 |
+
}
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"field_id": "vendor",
|
| 61 |
+
"label": "Vendor",
|
| 62 |
+
"type": "entity",
|
| 63 |
+
"anchor_bbox_norm": {
|
| 64 |
+
"x": 0.618401,
|
| 65 |
+
"y": 0.190365,
|
| 66 |
+
"w": 0.137004,
|
| 67 |
+
"h": 0.047591
|
| 68 |
+
},
|
| 69 |
+
"value_bbox_norm": null,
|
| 70 |
+
"value_offset_norm": null
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"field_id": "physician_name",
|
| 74 |
+
"label": "Physician Name",
|
| 75 |
+
"type": "person",
|
| 76 |
+
"anchor_bbox_norm": {
|
| 77 |
+
"x": 0.218703,
|
| 78 |
+
"y": 0.296232,
|
| 79 |
+
"w": 0.042735,
|
| 80 |
+
"h": 0.019425
|
| 81 |
+
},
|
| 82 |
+
"value_bbox_norm": null,
|
| 83 |
+
"value_offset_norm": {
|
| 84 |
+
"dx": 0.042735,
|
| 85 |
+
"dy": 0,
|
| 86 |
+
"w": 0.124434,
|
| 87 |
+
"h": 0.020396
|
| 88 |
+
}
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"field_id": "date_of_surgery",
|
| 92 |
+
"label": "Date of Surgery",
|
| 93 |
+
"type": "date",
|
| 94 |
+
"anchor_bbox_norm": {
|
| 95 |
+
"x": 0.221217,
|
| 96 |
+
"y": 0.308858,
|
| 97 |
+
"w": 0.081699,
|
| 98 |
+
"h": 0.018454
|
| 99 |
+
},
|
| 100 |
+
"value_bbox_norm": null,
|
| 101 |
+
"value_offset_norm": {
|
| 102 |
+
"dx": 0.084213,
|
| 103 |
+
"dy": 0.001943,
|
| 104 |
+
"w": 0.08547,
|
| 105 |
+
"h": 0.018454
|
| 106 |
+
}
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"field_id": "items",
|
| 110 |
+
"label": "Items / Line Items",
|
| 111 |
+
"type": "table",
|
| 112 |
+
"table_bbox_norm": {
|
| 113 |
+
"x": 0.224987,
|
| 114 |
+
"y": 0.373932,
|
| 115 |
+
"w": 0.549271,
|
| 116 |
+
"h": 0.305944
|
| 117 |
+
},
|
| 118 |
+
"header_bbox_norm": {
|
| 119 |
+
"x": 0.226244,
|
| 120 |
+
"y": 0.373932,
|
| 121 |
+
"w": 0.548014,
|
| 122 |
+
"h": 0.012626
|
| 123 |
+
},
|
| 124 |
+
"row_height_hint_norm": null,
|
| 125 |
+
"columns": [
|
| 126 |
+
{
|
| 127 |
+
"key": "item_number",
|
| 128 |
+
"label": "Item Number",
|
| 129 |
+
"bbox_rel_norm": {
|
| 130 |
+
"x": 0,
|
| 131 |
+
"y": 0.050794,
|
| 132 |
+
"w": 0.144165,
|
| 133 |
+
"h": 0.949206
|
| 134 |
+
}
|
| 135 |
+
},
|
| 136 |
+
{
|
| 137 |
+
"key": "lot_number",
|
| 138 |
+
"label": "Lot Number",
|
| 139 |
+
"bbox_rel_norm": null
|
| 140 |
+
},
|
| 141 |
+
{
|
| 142 |
+
"key": "description",
|
| 143 |
+
"label": "Description",
|
| 144 |
+
"bbox_rel_norm": {
|
| 145 |
+
"x": 0.15103,
|
| 146 |
+
"y": 0.057143,
|
| 147 |
+
"w": 0.157895,
|
| 148 |
+
"h": 0.942857
|
| 149 |
+
}
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"key": "qty",
|
| 153 |
+
"label": "Qty",
|
| 154 |
+
"bbox_rel_norm": {
|
| 155 |
+
"x": 0.414188,
|
| 156 |
+
"y": 0.044444,
|
| 157 |
+
"w": 0.059497,
|
| 158 |
+
"h": 0.952381
|
| 159 |
+
}
|
| 160 |
+
},
|
| 161 |
+
{
|
| 162 |
+
"key": "price",
|
| 163 |
+
"label": "Price",
|
| 164 |
+
"bbox_rel_norm": null
|
| 165 |
+
}
|
| 166 |
+
],
|
| 167 |
+
"table_anchors": [
|
| 168 |
+
{
|
| 169 |
+
"key": "item_number",
|
| 170 |
+
"expected_text": "Item Number",
|
| 171 |
+
"bbox_norm": {
|
| 172 |
+
"x": 0.224987,
|
| 173 |
+
"y": 0.373932,
|
| 174 |
+
"w": 0.080442,
|
| 175 |
+
"h": 0.016511
|
| 176 |
+
}
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"key": "description",
|
| 180 |
+
"expected_text": "Description",
|
| 181 |
+
"bbox_norm": {
|
| 182 |
+
"x": 0.306687,
|
| 183 |
+
"y": 0.373932,
|
| 184 |
+
"w": 0.081699,
|
| 185 |
+
"h": 0.019425
|
| 186 |
+
}
|
| 187 |
+
},
|
| 188 |
+
{
|
| 189 |
+
"key": "qty",
|
| 190 |
+
"expected_text": "Qty",
|
| 191 |
+
"bbox_norm": {
|
| 192 |
+
"x": 0.453746,
|
| 193 |
+
"y": 0.376845,
|
| 194 |
+
"w": 0.030166,
|
| 195 |
+
"h": 0.013598
|
| 196 |
+
}
|
| 197 |
+
}
|
| 198 |
+
],
|
| 199 |
+
"notes": "Anchors are used at runtime to localize table/header/columns under drift."
|
| 200 |
+
}
|
| 201 |
+
],
|
| 202 |
+
"notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
|
| 203 |
+
}
|
| 204 |
+
}
|
backend/templates/T7_SALES_ORDER_ZIMMER.json
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"template_id": "T7_SALES_ORDER_ZIMMER",
|
| 3 |
+
"name": "Zimmer Sales Order",
|
| 4 |
+
"status": "active",
|
| 5 |
+
"version": 2,
|
| 6 |
+
"match": {
|
| 7 |
+
"keywords_all": [],
|
| 8 |
+
"keywords_any": [
|
| 9 |
+
"zimmer",
|
| 10 |
+
"zimmer biomet",
|
| 11 |
+
"biomet",
|
| 12 |
+
"sales order",
|
| 13 |
+
"purchase order",
|
| 14 |
+
"po number"
|
| 15 |
+
]
|
| 16 |
+
},
|
| 17 |
+
"schema": {
|
| 18 |
+
"form_id": "trainer_2a12b374e66646689308af1beea88933",
|
| 19 |
+
"version": 3,
|
| 20 |
+
"page": 1,
|
| 21 |
+
"scalar_value_region_mode": "offset_from_anchor_v1",
|
| 22 |
+
"fields": [
|
| 23 |
+
{
|
| 24 |
+
"field_id": "facility_organization",
|
| 25 |
+
"label": "Facility / Organization",
|
| 26 |
+
"type": "entity",
|
| 27 |
+
"anchor_bbox_norm": {
|
| 28 |
+
"x": 0.292484,
|
| 29 |
+
"y": 0.183081,
|
| 30 |
+
"w": 0.01634,
|
| 31 |
+
"h": 0.045455
|
| 32 |
+
},
|
| 33 |
+
"value_bbox_norm": null,
|
| 34 |
+
"value_offset_norm": {
|
| 35 |
+
"dx": -0.003268,
|
| 36 |
+
"dy": 0.045455,
|
| 37 |
+
"w": 0.017974,
|
| 38 |
+
"h": 0.162879
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"field_id": "case_location",
|
| 43 |
+
"label": "Case Location / Address",
|
| 44 |
+
"type": "text",
|
| 45 |
+
"anchor_bbox_norm": {
|
| 46 |
+
"x": 0.271242,
|
| 47 |
+
"y": 0.14899,
|
| 48 |
+
"w": 0.013072,
|
| 49 |
+
"h": 0.080808
|
| 50 |
+
},
|
| 51 |
+
"value_bbox_norm": null,
|
| 52 |
+
"value_offset_norm": {
|
| 53 |
+
"dx": 0,
|
| 54 |
+
"dy": 0.079545,
|
| 55 |
+
"w": 0.017974,
|
| 56 |
+
"h": 0.165404
|
| 57 |
+
}
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"field_id": "vendor",
|
| 61 |
+
"label": "Vendor",
|
| 62 |
+
"type": "entity",
|
| 63 |
+
"anchor_bbox_norm": {
|
| 64 |
+
"x": 0.785948,
|
| 65 |
+
"y": 0.147727,
|
| 66 |
+
"w": 0.027778,
|
| 67 |
+
"h": 0.151515
|
| 68 |
+
},
|
| 69 |
+
"value_bbox_norm": null,
|
| 70 |
+
"value_offset_norm": null
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"field_id": "physician_name",
|
| 74 |
+
"label": "Physician Name",
|
| 75 |
+
"type": "person",
|
| 76 |
+
"anchor_bbox_norm": {
|
| 77 |
+
"x": 0.248366,
|
| 78 |
+
"y": 0.145202,
|
| 79 |
+
"w": 0.022876,
|
| 80 |
+
"h": 0.084596
|
| 81 |
+
},
|
| 82 |
+
"value_bbox_norm": null,
|
| 83 |
+
"value_offset_norm": {
|
| 84 |
+
"dx": 0.003268,
|
| 85 |
+
"dy": 0.084596,
|
| 86 |
+
"w": 0.02451,
|
| 87 |
+
"h": 0.165404
|
| 88 |
+
}
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"field_id": "date_of_surgery",
|
| 92 |
+
"label": "Date of Surgery",
|
| 93 |
+
"type": "date",
|
| 94 |
+
"anchor_bbox_norm": {
|
| 95 |
+
"x": 0.21732,
|
| 96 |
+
"y": 0.156566,
|
| 97 |
+
"w": 0.013072,
|
| 98 |
+
"h": 0.074495
|
| 99 |
+
},
|
| 100 |
+
"value_bbox_norm": null,
|
| 101 |
+
"value_offset_norm": {
|
| 102 |
+
"dx": -0.006536,
|
| 103 |
+
"dy": 0.073232,
|
| 104 |
+
"w": 0.027778,
|
| 105 |
+
"h": 0.167929
|
| 106 |
+
}
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"field_id": "items",
|
| 110 |
+
"label": "Items / Line Items",
|
| 111 |
+
"type": "table",
|
| 112 |
+
"table_bbox_norm": {
|
| 113 |
+
"x": 0.473856,
|
| 114 |
+
"y": 0.109848,
|
| 115 |
+
"w": 0.256536,
|
| 116 |
+
"h": 0.707071
|
| 117 |
+
},
|
| 118 |
+
"header_bbox_norm": {
|
| 119 |
+
"x": 0.707516,
|
| 120 |
+
"y": 0.109848,
|
| 121 |
+
"w": 0.021242,
|
| 122 |
+
"h": 0.707071
|
| 123 |
+
},
|
| 124 |
+
"row_height_hint_norm": null,
|
| 125 |
+
"columns": [
|
| 126 |
+
{
|
| 127 |
+
"key": "item_number",
|
| 128 |
+
"label": "Item Number",
|
| 129 |
+
"bbox_rel_norm": null
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"key": "lot_number",
|
| 133 |
+
"label": "Lot Number",
|
| 134 |
+
"bbox_rel_norm": null
|
| 135 |
+
},
|
| 136 |
+
{
|
| 137 |
+
"key": "description",
|
| 138 |
+
"label": "Description",
|
| 139 |
+
"bbox_rel_norm": null
|
| 140 |
+
},
|
| 141 |
+
{
|
| 142 |
+
"key": "qty",
|
| 143 |
+
"label": "Qty",
|
| 144 |
+
"bbox_rel_norm": null
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"key": "price",
|
| 148 |
+
"label": "Price",
|
| 149 |
+
"bbox_rel_norm": null
|
| 150 |
+
}
|
| 151 |
+
],
|
| 152 |
+
"table_anchors": [
|
| 153 |
+
{
|
| 154 |
+
"key": "item_number",
|
| 155 |
+
"expected_text": "Item Number",
|
| 156 |
+
"bbox_norm": null
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"key": "description",
|
| 160 |
+
"expected_text": "Description",
|
| 161 |
+
"bbox_norm": null
|
| 162 |
+
},
|
| 163 |
+
{
|
| 164 |
+
"key": "qty",
|
| 165 |
+
"expected_text": "Qty",
|
| 166 |
+
"bbox_norm": null
|
| 167 |
+
}
|
| 168 |
+
],
|
| 169 |
+
"notes": "Anchors are used at runtime to localize table/header/columns under drift."
|
| 170 |
+
}
|
| 171 |
+
],
|
| 172 |
+
"notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
|
| 173 |
+
}
|
| 174 |
+
}
|
backend/trainer_schemas/T1_IFACTOR_DELIVERED_ORDER.schema.json
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"form_id": "trainer_2f7cdbc443f040c79723c74490f6282f",
|
| 3 |
+
"version": 3,
|
| 4 |
+
"page": 1,
|
| 5 |
+
"scalar_value_region_mode": "offset_from_anchor_v1",
|
| 6 |
+
"fields": [
|
| 7 |
+
{
|
| 8 |
+
"field_id": "facility_organization",
|
| 9 |
+
"label": "Facility / Organization",
|
| 10 |
+
"type": "entity",
|
| 11 |
+
"anchor_bbox_norm": { "x": 0.138889, "y": 0.328283, "w": 0.047386, "h": 0.027778 },
|
| 12 |
+
"value_bbox_norm": null,
|
| 13 |
+
"value_offset_norm": { "dx": 0.052288, "dy": -0.001263, "w": 0.294118, "h": 0.045455 }
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"field_id": "case_location",
|
| 17 |
+
"label": "Case Location / Address",
|
| 18 |
+
"type": "text",
|
| 19 |
+
"anchor_bbox_norm": { "x": 0.140523, "y": 0.353535, "w": 0.055556, "h": 0.02399 },
|
| 20 |
+
"value_bbox_norm": null,
|
| 21 |
+
"value_offset_norm": { "dx": 0.062092, "dy": 0.005051, "w": 0.292484, "h": 0.056818 }
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"field_id": "vendor",
|
| 25 |
+
"label": "Vendor",
|
| 26 |
+
"type": "entity",
|
| 27 |
+
"anchor_bbox_norm": { "x": 0.215686, "y": 0.170455, "w": 0.205882, "h": 0.059343 },
|
| 28 |
+
"value_bbox_norm": null,
|
| 29 |
+
"value_offset_norm": null
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"field_id": "physician_name",
|
| 33 |
+
"label": "Physician Name",
|
| 34 |
+
"type": "person",
|
| 35 |
+
"anchor_bbox_norm": { "x": 0.522876, "y": 0.497475, "w": 0.062092, "h": 0.020202 },
|
| 36 |
+
"value_bbox_norm": null,
|
| 37 |
+
"value_offset_norm": { "dx": 0.060458, "dy": -0.005051, "w": 0.214052, "h": 0.025253 }
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"field_id": "date_of_surgery",
|
| 41 |
+
"label": "Date of Surgery",
|
| 42 |
+
"type": "date",
|
| 43 |
+
"anchor_bbox_norm": { "x": 0.138889, "y": 0.57197, "w": 0.160131, "h": 0.026515 },
|
| 44 |
+
"value_bbox_norm": null,
|
| 45 |
+
"value_offset_norm": { "dx": 0.165033, "dy": -0.002525, "w": 0.205882, "h": 0.02399 }
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"field_id": "items",
|
| 49 |
+
"label": "Items / Line Items",
|
| 50 |
+
"type": "table",
|
| 51 |
+
"table_bbox_norm": { "x": 0.138889, "y": 0.632576, "w": 0.732026, "h": 0.122475 },
|
| 52 |
+
"header_bbox_norm": { "x": 0.142157, "y": 0.632576, "w": 0.727124, "h": 0.034091 },
|
| 53 |
+
"row_height_hint_norm": null,
|
| 54 |
+
"columns": [
|
| 55 |
+
{ "key": "item_number", "label": "Item Number", "bbox_rel_norm": { "x": 0.004464, "y": 0.28866, "w": 0.196429, "h": 0.701031 } },
|
| 56 |
+
{ "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": null },
|
| 57 |
+
{ "key": "description", "label": "Description", "bbox_rel_norm": { "x": 0.209821, "y": 0.278351, "w": 0.241071, "h": 0.639175 } },
|
| 58 |
+
{ "key": "qty", "label": "Qty", "bbox_rel_norm": { "x": 0.647321, "y": 0.247423, "w": 0.058036, "h": 0.71134 } },
|
| 59 |
+
{ "key": "price", "label": "Price", "bbox_rel_norm": null }
|
| 60 |
+
],
|
| 61 |
+
"table_anchors": [
|
| 62 |
+
{ "key": "item_number", "expected_text": "Item Number", "bbox_norm": { "x": 0.140523, "y": 0.652778, "w": 0.145425, "h": 0.016414 } },
|
| 63 |
+
{ "key": "description", "expected_text": "Description", "bbox_norm": { "x": 0.287582, "y": 0.650253, "w": 0.181373, "h": 0.018939 } },
|
| 64 |
+
{ "key": "qty", "expected_text": "Qty", "bbox_norm": { "x": 0.614379, "y": 0.647727, "w": 0.047386, "h": 0.016414 } }
|
| 65 |
+
],
|
| 66 |
+
"notes": "Anchors are used at runtime to localize table/header/columns under drift."
|
| 67 |
+
}
|
| 68 |
+
],
|
| 69 |
+
"notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
|
| 70 |
+
}
|
backend/trainer_schemas/T2_SEASPINE_DELIVERED_GOODS_FORM.schema.json
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"form_id": "trainer_245e70e31b1f4eb1b26fad626365e9ad",
|
| 3 |
+
"version": 3,
|
| 4 |
+
"page": 1,
|
| 5 |
+
"scalar_value_region_mode": "offset_from_anchor_v1",
|
| 6 |
+
"fields": [
|
| 7 |
+
{
|
| 8 |
+
"field_id": "facility_organization",
|
| 9 |
+
"label": "Facility / Organization",
|
| 10 |
+
"type": "entity",
|
| 11 |
+
"anchor_bbox_norm": { "x": 0.179739, "y": 0.284091, "w": 0.04085, "h": 0.020202 },
|
| 12 |
+
"value_bbox_norm": null,
|
| 13 |
+
"value_offset_norm": { "dx": 0.044118, "dy": -0.002525, "w": 0.246732, "h": 0.021465 }
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"field_id": "case_location",
|
| 17 |
+
"label": "Case Location / Address",
|
| 18 |
+
"type": "text",
|
| 19 |
+
"anchor_bbox_norm": { "x": 0.181373, "y": 0.310606, "w": 0.135621, "h": 0.016414 },
|
| 20 |
+
"value_bbox_norm": null,
|
| 21 |
+
"value_offset_norm": { "dx": 0.001634, "dy": 0.013889, "w": 0.295752, "h": 0.027778 }
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"field_id": "vendor",
|
| 25 |
+
"label": "Vendor",
|
| 26 |
+
"type": "entity",
|
| 27 |
+
"anchor_bbox_norm": { "x": 0.606209, "y": 0.152778, "w": 0.173203, "h": 0.068182 },
|
| 28 |
+
"value_bbox_norm": null,
|
| 29 |
+
"value_offset_norm": null
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"field_id": "physician_name",
|
| 33 |
+
"label": "Physician Name",
|
| 34 |
+
"type": "person",
|
| 35 |
+
"anchor_bbox_norm": { "x": 0.179739, "y": 0.508838, "w": 0.104575, "h": 0.016414 },
|
| 36 |
+
"value_bbox_norm": null,
|
| 37 |
+
"value_offset_norm": { "dx": 0.106209, "dy": -0.001263, "w": 0.372549, "h": 0.015152 }
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"field_id": "date_of_surgery",
|
| 41 |
+
"label": "Date of Surgery",
|
| 42 |
+
"type": "date",
|
| 43 |
+
"anchor_bbox_norm": { "x": 0.179739, "y": 0.521465, "w": 0.081699, "h": 0.021465 },
|
| 44 |
+
"value_bbox_norm": null,
|
| 45 |
+
"value_offset_norm": { "dx": 0.083333, "dy": 0.005051, "w": 0.068627, "h": 0.015152 }
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"field_id": "items",
|
| 49 |
+
"label": "Items / Line Items",
|
| 50 |
+
"type": "table",
|
| 51 |
+
"table_bbox_norm": { "x": 0.178105, "y": 0.388889, "w": 0.609477, "h": 0.118687 },
|
| 52 |
+
"header_bbox_norm": { "x": 0.178105, "y": 0.390152, "w": 0.609477, "h": 0.02399 },
|
| 53 |
+
"row_height_hint_norm": null,
|
| 54 |
+
"columns": [
|
| 55 |
+
{ "key": "item_number", "label": "Item Number", "bbox_rel_norm": { "x": 0.718499, "y": 0.170213, "w": 0.072386, "h": 0.797872 } },
|
| 56 |
+
{ "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": { "x": 0.168901, "y": 0.223404, "w": 0.171582, "h": 0.776596 } },
|
| 57 |
+
{ "key": "description", "label": "Description", "bbox_rel_norm": null },
|
| 58 |
+
{ "key": "qty", "label": "Qty", "bbox_rel_norm": null },
|
| 59 |
+
{ "key": "price", "label": "Price", "bbox_rel_norm": null }
|
| 60 |
+
],
|
| 61 |
+
"table_anchors": [
|
| 62 |
+
{ "key": "item_number", "expected_text": "Item Number", "bbox_norm": { "x": 0.178105, "y": 0.388889, "w": 0.101307, "h": 0.02399 } },
|
| 63 |
+
{ "key": "description", "expected_text": "Description", "bbox_norm": { "x": 0.488562, "y": 0.388889, "w": 0.129085, "h": 0.025253 } },
|
| 64 |
+
{ "key": "qty", "expected_text": "Qty", "bbox_norm": { "x": 0.617647, "y": 0.388889, "w": 0.045752, "h": 0.02399 } }
|
| 65 |
+
],
|
| 66 |
+
"notes": "Anchors are used at runtime to localize table/header/columns under drift."
|
| 67 |
+
}
|
| 68 |
+
],
|
| 69 |
+
"notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
|
| 70 |
+
}
|
backend/trainer_schemas/T3_ASTURA_SALES_ORDER_FORM.schema.json
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"form_id": "trainer_b931186e13eb45d2a9a1ded8ff8641bb",
|
| 3 |
+
"version": 3,
|
| 4 |
+
"page": 1,
|
| 5 |
+
"scalar_value_region_mode": "offset_from_anchor_v1",
|
| 6 |
+
"fields": [
|
| 7 |
+
{
|
| 8 |
+
"field_id": "facility_organization",
|
| 9 |
+
"label": "Facility / Organization",
|
| 10 |
+
"type": "entity",
|
| 11 |
+
"anchor_bbox_norm": { "x": 0.156863, "y": 0.194444, "w": 0.053922, "h": 0.012626 },
|
| 12 |
+
"value_bbox_norm": null,
|
| 13 |
+
"value_offset_norm": { "dx": 0.076797, "dy": -0.002525, "w": 0.205882, "h": 0.021465 }
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"field_id": "case_location",
|
| 17 |
+
"label": "Case Location / Address",
|
| 18 |
+
"type": "text",
|
| 19 |
+
"anchor_bbox_norm": { "x": 0.155229, "y": 0.224747, "w": 0.05719, "h": 0.016414 },
|
| 20 |
+
"value_bbox_norm": null,
|
| 21 |
+
"value_offset_norm": { "dx": 0.075163, "dy": 0, "w": 0.212418, "h": 0.034091 }
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"field_id": "vendor",
|
| 25 |
+
"label": "Vendor",
|
| 26 |
+
"type": "entity",
|
| 27 |
+
"anchor_bbox_norm": { "x": 0.160131, "y": 0.117424, "w": 0.098039, "h": 0.064394 },
|
| 28 |
+
"value_bbox_norm": null,
|
| 29 |
+
"value_offset_norm": null
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"field_id": "physician_name",
|
| 33 |
+
"label": "Physician Name",
|
| 34 |
+
"type": "person",
|
| 35 |
+
"anchor_bbox_norm": { "x": 0.158497, "y": 0.289141, "w": 0.062092, "h": 0.013889 },
|
| 36 |
+
"value_bbox_norm": null,
|
| 37 |
+
"value_offset_norm": { "dx": 0.068627, "dy": -0.002525, "w": 0.212418, "h": 0.022727 }
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"field_id": "date_of_surgery",
|
| 41 |
+
"label": "Date of Surgery",
|
| 42 |
+
"type": "date",
|
| 43 |
+
"anchor_bbox_norm": { "x": 0.160131, "y": 0.256313, "w": 0.053922, "h": 0.016414 },
|
| 44 |
+
"value_bbox_norm": null,
|
| 45 |
+
"value_offset_norm": { "dx": 0.071895, "dy": 0, "w": 0.124183, "h": 0.018939 }
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"field_id": "items",
|
| 49 |
+
"label": "Items / Line Items",
|
| 50 |
+
"type": "table",
|
| 51 |
+
"table_bbox_norm": { "x": 0.153595, "y": 0.339646, "w": 0.620915, "h": 0.180556 },
|
| 52 |
+
"header_bbox_norm": { "x": 0.156863, "y": 0.339646, "w": 0.617647, "h": 0.018939 },
|
| 53 |
+
"row_height_hint_norm": null,
|
| 54 |
+
"columns": [
|
| 55 |
+
{ "key": "item_number", "label": "Item Number", "bbox_rel_norm": { "x": 0, "y": 0.104895, "w": 0.171053, "h": 0.895105 } },
|
| 56 |
+
{ "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": null },
|
| 57 |
+
{ "key": "description", "label": "Description", "bbox_rel_norm": { "x": 0.171053, "y": 0.111888, "w": 0.323684, "h": 0.888112 } },
|
| 58 |
+
{ "key": "qty", "label": "Qty", "bbox_rel_norm": { "x": 0.644737, "y": 0.104895, "w": 0.047368, "h": 0.895105 } },
|
| 59 |
+
{ "key": "price", "label": "Price", "bbox_rel_norm": null }
|
| 60 |
+
],
|
| 61 |
+
"table_anchors": [
|
| 62 |
+
{ "key": "item_number", "expected_text": "Item Number", "bbox_norm": { "x": 0.153595, "y": 0.342172, "w": 0.104575, "h": 0.016414 } },
|
| 63 |
+
{ "key": "description", "expected_text": "Description", "bbox_norm": { "x": 0.259804, "y": 0.339646, "w": 0.202614, "h": 0.021465 } },
|
| 64 |
+
{ "key": "qty", "expected_text": "Qty", "bbox_norm": { "x": 0.555556, "y": 0.342172, "w": 0.034314, "h": 0.015152 } }
|
| 65 |
+
],
|
| 66 |
+
"notes": "Anchors are used at runtime to localize table/header/columns under drift."
|
| 67 |
+
}
|
| 68 |
+
],
|
| 69 |
+
"notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
|
| 70 |
+
}
|
backend/trainer_schemas/T4_MEDICAL_ESTIMATION_OF_CHARGES.schema.json
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"form_id": "trainer_20c968bf41ac4b1c8ee12a9bb15b2bfb",
|
| 3 |
+
"version": 3,
|
| 4 |
+
"page": 1,
|
| 5 |
+
"scalar_value_region_mode": "offset_from_anchor_v1",
|
| 6 |
+
"fields": [
|
| 7 |
+
{
|
| 8 |
+
"field_id": "facility_organization",
|
| 9 |
+
"label": "Facility / Organization",
|
| 10 |
+
"type": "entity",
|
| 11 |
+
"anchor_bbox_norm": { "x": 0.142157, "y": 0.25, "w": 0.042484, "h": 0.015152 },
|
| 12 |
+
"value_bbox_norm": null,
|
| 13 |
+
"value_offset_norm": { "dx": 0.068627, "dy": -0.003788, "w": 0.117647, "h": 0.018939 }
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"field_id": "case_location",
|
| 17 |
+
"label": "Case Location / Address",
|
| 18 |
+
"type": "text",
|
| 19 |
+
"anchor_bbox_norm": { "x": 0.143791, "y": 0.271465, "w": 0.047386, "h": 0.017677 },
|
| 20 |
+
"value_bbox_norm": null,
|
| 21 |
+
"value_offset_norm": { "dx": 0.071895, "dy": -0.001263, "w": 0.127451, "h": 0.039141 }
|
| 22 |
+
},
|
| 23 |
+
{ "field_id": "vendor", "label": "Vendor", "type": "entity", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
|
| 24 |
+
{ "field_id": "physician_name", "label": "Physician Name", "type": "person", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
|
| 25 |
+
{ "field_id": "date_of_surgery", "label": "Date of Surgery", "type": "date", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
|
| 26 |
+
{
|
| 27 |
+
"field_id": "items",
|
| 28 |
+
"label": "Items / Line Items",
|
| 29 |
+
"type": "table",
|
| 30 |
+
"table_bbox_norm": { "x": 0.143791, "y": 0.409091, "w": 0.676471, "h": 0.132576 },
|
| 31 |
+
"header_bbox_norm": { "x": 0.143791, "y": 0.409091, "w": 0.676471, "h": 0.018939 },
|
| 32 |
+
"row_height_hint_norm": null,
|
| 33 |
+
"columns": [
|
| 34 |
+
{ "key": "item_number", "label": "Item Number", "bbox_rel_norm": { "x": 0.717391, "y": 0.114286, "w": 0.089372, "h": 0.857143 } },
|
| 35 |
+
{ "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": null },
|
| 36 |
+
{ "key": "description", "label": "Description", "bbox_rel_norm": { "x": 0.2657, "y": 0.114286, "w": 0.376812, "h": 0.87619 } },
|
| 37 |
+
{ "key": "qty", "label": "Qty", "bbox_rel_norm": null },
|
| 38 |
+
{ "key": "price", "label": "Price", "bbox_rel_norm": null }
|
| 39 |
+
],
|
| 40 |
+
"table_anchors": [
|
| 41 |
+
{ "key": "item_number", "expected_text": "Item Number", "bbox_norm": { "x": 0.632353, "y": 0.409091, "w": 0.045752, "h": 0.017677 } },
|
| 42 |
+
{ "key": "description", "expected_text": "Description", "bbox_norm": { "x": 0.325163, "y": 0.409091, "w": 0.248366, "h": 0.017677 } },
|
| 43 |
+
{ "key": "qty", "expected_text": "Qty", "bbox_norm": null }
|
| 44 |
+
],
|
| 45 |
+
"notes": "Anchors are used at runtime to localize table/header/columns under drift."
|
| 46 |
+
}
|
| 47 |
+
],
|
| 48 |
+
"notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
|
| 49 |
+
}
|
backend/trainer_schemas/T5_CLINICAL_PROGRESS_NOTE_POSTOP.schema.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"form_id": "trainer_e75eb5b93bb54c28934f43cacc406cc8",
|
| 3 |
+
"version": 3,
|
| 4 |
+
"page": 1,
|
| 5 |
+
"scalar_value_region_mode": "offset_from_anchor_v1",
|
| 6 |
+
"fields": [
|
| 7 |
+
{ "field_id": "facility_organization", "label": "Facility / Organization", "type": "entity", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
|
| 8 |
+
{ "field_id": "case_location", "label": "Case Location / Address", "type": "text", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
|
| 9 |
+
{ "field_id": "vendor", "label": "Vendor", "type": "entity", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
|
| 10 |
+
{ "field_id": "physician_name", "label": "Physician Name", "type": "person", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
|
| 11 |
+
{ "field_id": "date_of_surgery", "label": "Date of Surgery", "type": "date", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
|
| 12 |
+
{
|
| 13 |
+
"field_id": "items",
|
| 14 |
+
"label": "Items / Line Items",
|
| 15 |
+
"type": "table",
|
| 16 |
+
"table_bbox_norm": null,
|
| 17 |
+
"header_bbox_norm": null,
|
| 18 |
+
"row_height_hint_norm": null,
|
| 19 |
+
"columns": [
|
| 20 |
+
{ "key": "item_number", "label": "Item Number", "bbox_rel_norm": null },
|
| 21 |
+
{ "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": null },
|
| 22 |
+
{ "key": "description", "label": "Description", "bbox_rel_norm": null },
|
| 23 |
+
{ "key": "qty", "label": "Qty", "bbox_rel_norm": null },
|
| 24 |
+
{ "key": "price", "label": "Price", "bbox_rel_norm": null }
|
| 25 |
+
],
|
| 26 |
+
"table_anchors": [
|
| 27 |
+
{ "key": "item_number", "expected_text": "Item Number", "bbox_norm": null },
|
| 28 |
+
{ "key": "description", "expected_text": "Description", "bbox_norm": null },
|
| 29 |
+
{ "key": "qty", "expected_text": "Qty", "bbox_norm": null }
|
| 30 |
+
],
|
| 31 |
+
"notes": "Anchors are used at runtime to localize table/header/columns under drift."
|
| 32 |
+
}
|
| 33 |
+
],
|
| 34 |
+
"notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
|
| 35 |
+
}
|
backend/trainer_schemas/T6_CUSTOMER_CHARGE_SHEET_SPINE.schema.json
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"form_id": "trainer_6b04e85b60a9470588be4f7541029d71",
|
| 3 |
+
"version": 3,
|
| 4 |
+
"page": 1,
|
| 5 |
+
"scalar_value_region_mode": "offset_from_anchor_v1",
|
| 6 |
+
"fields": [
|
| 7 |
+
{
|
| 8 |
+
"field_id": "facility_organization",
|
| 9 |
+
"label": "Facility / Organization",
|
| 10 |
+
"type": "entity",
|
| 11 |
+
"anchor_bbox_norm": { "x": 0.388386, "y": 0.27195, "w": 0.096782, "h": 0.013598 },
|
| 12 |
+
"value_bbox_norm": null,
|
| 13 |
+
"value_offset_norm": { "dx": 0, "dy": 0.011655, "w": 0.096782, "h": 0.01554 }
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"field_id": "case_location",
|
| 17 |
+
"label": "Case Location / Address",
|
| 18 |
+
"type": "text",
|
| 19 |
+
"anchor_bbox_norm": { "x": 0.483912, "y": 0.297203, "w": 0.13826, "h": 0.011655 },
|
| 20 |
+
"value_bbox_norm": null,
|
| 21 |
+
"value_offset_norm": { "dx": 0.005028, "dy": 0.00777, "w": 0.124434, "h": 0.035936 }
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"field_id": "vendor",
|
| 25 |
+
"label": "Vendor",
|
| 26 |
+
"type": "entity",
|
| 27 |
+
"anchor_bbox_norm": { "x": 0.618401, "y": 0.190365, "w": 0.137004, "h": 0.047591 },
|
| 28 |
+
"value_bbox_norm": null,
|
| 29 |
+
"value_offset_norm": null
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"field_id": "physician_name",
|
| 33 |
+
"label": "Physician Name",
|
| 34 |
+
"type": "person",
|
| 35 |
+
"anchor_bbox_norm": { "x": 0.218703, "y": 0.296232, "w": 0.042735, "h": 0.019425 },
|
| 36 |
+
"value_bbox_norm": null,
|
| 37 |
+
"value_offset_norm": { "dx": 0.042735, "dy": 0, "w": 0.124434, "h": 0.020396 }
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"field_id": "date_of_surgery",
|
| 41 |
+
"label": "Date of Surgery",
|
| 42 |
+
"type": "date",
|
| 43 |
+
"anchor_bbox_norm": { "x": 0.221217, "y": 0.308858, "w": 0.081699, "h": 0.018454 },
|
| 44 |
+
"value_bbox_norm": null,
|
| 45 |
+
"value_offset_norm": { "dx": 0.084213, "dy": 0.001943, "w": 0.08547, "h": 0.018454 }
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"field_id": "items",
|
| 49 |
+
"label": "Items / Line Items",
|
| 50 |
+
"type": "table",
|
| 51 |
+
"table_bbox_norm": { "x": 0.224987, "y": 0.373932, "w": 0.549271, "h": 0.305944 },
|
| 52 |
+
"header_bbox_norm": { "x": 0.226244, "y": 0.373932, "w": 0.548014, "h": 0.012626 },
|
| 53 |
+
"row_height_hint_norm": null,
|
| 54 |
+
"columns": [
|
| 55 |
+
{ "key": "item_number", "label": "Item Number", "bbox_rel_norm": { "x": 0, "y": 0.050794, "w": 0.144165, "h": 0.949206 } },
|
| 56 |
+
{ "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": null },
|
| 57 |
+
{ "key": "description", "label": "Description", "bbox_rel_norm": { "x": 0.15103, "y": 0.057143, "w": 0.157895, "h": 0.942857 } },
|
| 58 |
+
{ "key": "qty", "label": "Qty", "bbox_rel_norm": { "x": 0.414188, "y": 0.044444, "w": 0.059497, "h": 0.952381 } },
|
| 59 |
+
{ "key": "price", "label": "Price", "bbox_rel_norm": null }
|
| 60 |
+
],
|
| 61 |
+
"table_anchors": [
|
| 62 |
+
{ "key": "item_number", "expected_text": "Item Number", "bbox_norm": { "x": 0.224987, "y": 0.373932, "w": 0.080442, "h": 0.016511 } },
|
| 63 |
+
{ "key": "description", "expected_text": "Description", "bbox_norm": { "x": 0.306687, "y": 0.373932, "w": 0.081699, "h": 0.019425 } },
|
| 64 |
+
{ "key": "qty", "expected_text": "Qty", "bbox_norm": { "x": 0.453746, "y": 0.376845, "w": 0.030166, "h": 0.013598 } }
|
| 65 |
+
],
|
| 66 |
+
"notes": "Anchors are used at runtime to localize table/header/columns under drift."
|
| 67 |
+
}
|
| 68 |
+
],
|
| 69 |
+
"notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
|
| 70 |
+
}
|
backend/trainer_schemas/T7_SALES_ORDER_ZIMMER.schema.json
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"form_id": "trainer_2a12b374e66646689308af1beea88933",
|
| 3 |
+
"version": 3,
|
| 4 |
+
"page": 1,
|
| 5 |
+
"scalar_value_region_mode": "offset_from_anchor_v1",
|
| 6 |
+
"fields": [
|
| 7 |
+
{
|
| 8 |
+
"field_id": "facility_organization",
|
| 9 |
+
"label": "Facility / Organization",
|
| 10 |
+
"type": "entity",
|
| 11 |
+
"anchor_bbox_norm": { "x": 0.292484, "y": 0.183081, "w": 0.01634, "h": 0.045455 },
|
| 12 |
+
"value_bbox_norm": null,
|
| 13 |
+
"value_offset_norm": { "dx": -0.003268, "dy": 0.045455, "w": 0.017974, "h": 0.162879 }
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"field_id": "case_location",
|
| 17 |
+
"label": "Case Location / Address",
|
| 18 |
+
"type": "text",
|
| 19 |
+
"anchor_bbox_norm": { "x": 0.271242, "y": 0.14899, "w": 0.013072, "h": 0.080808 },
|
| 20 |
+
"value_bbox_norm": null,
|
| 21 |
+
"value_offset_norm": { "dx": 0, "dy": 0.079545, "w": 0.017974, "h": 0.165404 }
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"field_id": "vendor",
|
| 25 |
+
"label": "Vendor",
|
| 26 |
+
"type": "entity",
|
| 27 |
+
"anchor_bbox_norm": { "x": 0.785948, "y": 0.147727, "w": 0.027778, "h": 0.151515 },
|
| 28 |
+
"value_bbox_norm": null,
|
| 29 |
+
"value_offset_norm": null
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"field_id": "physician_name",
|
| 33 |
+
"label": "Physician Name",
|
| 34 |
+
"type": "person",
|
| 35 |
+
"anchor_bbox_norm": { "x": 0.248366, "y": 0.145202, "w": 0.022876, "h": 0.084596 },
|
| 36 |
+
"value_bbox_norm": null,
|
| 37 |
+
"value_offset_norm": { "dx": 0.003268, "dy": 0.084596, "w": 0.02451, "h": 0.165404 }
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"field_id": "date_of_surgery",
|
| 41 |
+
"label": "Date of Surgery",
|
| 42 |
+
"type": "date",
|
| 43 |
+
"anchor_bbox_norm": { "x": 0.21732, "y": 0.156566, "w": 0.013072, "h": 0.074495 },
|
| 44 |
+
"value_bbox_norm": null,
|
| 45 |
+
"value_offset_norm": { "dx": -0.006536, "dy": 0.073232, "w": 0.027778, "h": 0.167929 }
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"field_id": "items",
|
| 49 |
+
"label": "Items / Line Items",
|
| 50 |
+
"type": "table",
|
| 51 |
+
"table_bbox_norm": { "x": 0.473856, "y": 0.109848, "w": 0.256536, "h": 0.707071 },
|
| 52 |
+
"header_bbox_norm": { "x": 0.707516, "y": 0.109848, "w": 0.021242, "h": 0.707071 },
|
| 53 |
+
"row_height_hint_norm": null,
|
| 54 |
+
"columns": [
|
| 55 |
+
{ "key": "item_number", "label": "Item Number", "bbox_rel_norm": null },
|
| 56 |
+
{ "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": null },
|
| 57 |
+
{ "key": "description", "label": "Description", "bbox_rel_norm": null },
|
| 58 |
+
{ "key": "qty", "label": "Qty", "bbox_rel_norm": null },
|
| 59 |
+
{ "key": "price", "label": "Price", "bbox_rel_norm": null }
|
| 60 |
+
],
|
| 61 |
+
"table_anchors": [
|
| 62 |
+
{ "key": "item_number", "expected_text": "Item Number", "bbox_norm": null },
|
| 63 |
+
{ "key": "description", "expected_text": "Description", "bbox_norm": null },
|
| 64 |
+
{ "key": "qty", "expected_text": "Qty", "bbox_norm": null }
|
| 65 |
+
],
|
| 66 |
+
"notes": "Anchors are used at runtime to localize table/header/columns under drift."
|
| 67 |
+
}
|
| 68 |
+
],
|
| 69 |
+
"notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
|
| 70 |
+
}
|
backend/worker/__init__.py
ADDED
|
File without changes
|
backend/worker/config.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from dataclasses import dataclass
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
@dataclass(frozen=True)
|
| 9 |
+
class Settings:
|
| 10 |
+
# Repo paths
|
| 11 |
+
repo_root: Path
|
| 12 |
+
backend_dir: Path
|
| 13 |
+
worker_dir: Path
|
| 14 |
+
|
| 15 |
+
# Gmail
|
| 16 |
+
credentials_path: Path
|
| 17 |
+
token_path: Path
|
| 18 |
+
|
| 19 |
+
label_incoming: str
|
| 20 |
+
label_known: str
|
| 21 |
+
label_unknown: str
|
| 22 |
+
label_train: str
|
| 23 |
+
|
| 24 |
+
# Notification
|
| 25 |
+
notify_to_email: str
|
| 26 |
+
notify_from_email: str
|
| 27 |
+
|
| 28 |
+
# Trainer
|
| 29 |
+
trainer_base_url: str
|
| 30 |
+
|
| 31 |
+
# OpenAI
|
| 32 |
+
openai_api_key: str
|
| 33 |
+
openai_model: str
|
| 34 |
+
|
| 35 |
+
# Worker behavior
|
| 36 |
+
poll_seconds: int
|
| 37 |
+
max_messages_per_poll: int
|
| 38 |
+
render_pages: int
|
| 39 |
+
render_dpi: int
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def load_settings(repo_root: Path) -> Settings:
|
| 43 |
+
backend_dir = repo_root / "backend"
|
| 44 |
+
worker_dir = backend_dir / "worker"
|
| 45 |
+
|
| 46 |
+
# IMPORTANT: use the SAME env var you actually store in backend/.env
|
| 47 |
+
# Your file shows OPENAI_API_KEY_TEST=...
|
| 48 |
+
openai_api_key = os.environ.get("OPENAI_API_KEY_TEST", "").strip()
|
| 49 |
+
if not openai_api_key:
|
| 50 |
+
raise RuntimeError("Missing OPENAI_API_KEY_TEST env var in backend/.env")
|
| 51 |
+
|
| 52 |
+
notify_to = os.environ.get("PDF_PIPELINE_NOTIFY_TO", "").strip()
|
| 53 |
+
if not notify_to:
|
| 54 |
+
raise RuntimeError("Missing PDF_PIPELINE_NOTIFY_TO env var")
|
| 55 |
+
|
| 56 |
+
notify_from = os.environ.get("PDF_PIPELINE_NOTIFY_FROM", "").strip()
|
| 57 |
+
if not notify_from:
|
| 58 |
+
raise RuntimeError("Missing PDF_PIPELINE_NOTIFY_FROM env var")
|
| 59 |
+
|
| 60 |
+
trainer_base_url = os.environ.get("PDF_TRAINER_BASE_URL", "http://localhost:5173").strip()
|
| 61 |
+
if not trainer_base_url:
|
| 62 |
+
raise RuntimeError("Missing PDF_TRAINER_BASE_URL env var")
|
| 63 |
+
|
| 64 |
+
return Settings(
|
| 65 |
+
repo_root=repo_root,
|
| 66 |
+
backend_dir=backend_dir,
|
| 67 |
+
worker_dir=worker_dir,
|
| 68 |
+
|
| 69 |
+
credentials_path=Path(os.environ.get("GMAIL_CREDENTIALS_JSON", str(backend_dir / "credentials.json"))),
|
| 70 |
+
token_path=Path(os.environ.get("GMAIL_TOKEN_JSON", str(backend_dir / "token.json"))),
|
| 71 |
+
|
| 72 |
+
label_incoming=os.environ.get("PDF_PIPELINE_LABEL_INCOMING", "PDF_PIPELINE/INCOMING"),
|
| 73 |
+
label_known=os.environ.get("PDF_PIPELINE_LABEL_KNOWN", "PDF_PIPELINE/KNOWN"),
|
| 74 |
+
label_unknown=os.environ.get("PDF_PIPELINE_LABEL_UNKNOWN", "PDF_PIPELINE/UNKNOWN"),
|
| 75 |
+
label_train=os.environ.get("PDF_PIPELINE_LABEL_TRAIN", "PDF_PIPELINE/TRAIN"),
|
| 76 |
+
|
| 77 |
+
notify_to_email=notify_to,
|
| 78 |
+
notify_from_email=notify_from,
|
| 79 |
+
|
| 80 |
+
trainer_base_url=trainer_base_url,
|
| 81 |
+
|
| 82 |
+
openai_api_key=openai_api_key,
|
| 83 |
+
openai_model=os.environ.get("OPENAI_MODEL", "gpt-4.1-mini"),
|
| 84 |
+
|
| 85 |
+
poll_seconds=int(os.environ.get("PDF_PIPELINE_POLL_SECONDS", "20")),
|
| 86 |
+
max_messages_per_poll=int(os.environ.get("PDF_PIPELINE_MAX_PER_POLL", "5")),
|
| 87 |
+
render_pages=int(os.environ.get("PDF_PIPELINE_RENDER_PAGES", "2")),
|
| 88 |
+
render_dpi=int(os.environ.get("PDF_PIPELINE_RENDER_DPI", "200")),
|
| 89 |
+
)
|
backend/worker/gmail_client.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import base64
|
| 3 |
+
import os
|
| 4 |
+
from dataclasses import dataclass
|
| 5 |
+
from email.message import EmailMessage
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import List, Optional, Tuple
|
| 8 |
+
|
| 9 |
+
from google.oauth2.credentials import Credentials
|
| 10 |
+
from googleapiclient.discovery import build
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
SCOPES = [
|
| 14 |
+
"https://www.googleapis.com/auth/gmail.modify",
|
| 15 |
+
"https://www.googleapis.com/auth/gmail.send",
|
| 16 |
+
]
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@dataclass
|
| 20 |
+
class GmailMessage:
|
| 21 |
+
msg_id: str
|
| 22 |
+
thread_id: str
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class GmailClient:
|
| 26 |
+
def __init__(self, credentials_path: Path, token_path: Path):
|
| 27 |
+
if not credentials_path.exists():
|
| 28 |
+
raise FileNotFoundError(f"Missing OAuth client json: {credentials_path}")
|
| 29 |
+
if not token_path.exists():
|
| 30 |
+
raise FileNotFoundError(f"Missing token json: {token_path}")
|
| 31 |
+
|
| 32 |
+
creds = Credentials.from_authorized_user_file(str(token_path), SCOPES)
|
| 33 |
+
self.service = build("gmail", "v1", credentials=creds, cache_discovery=False)
|
| 34 |
+
|
| 35 |
+
def list_labels(self) -> List[dict]:
|
| 36 |
+
resp = self.service.users().labels().list(userId="me").execute()
|
| 37 |
+
return resp.get("labels", [])
|
| 38 |
+
|
| 39 |
+
def get_label_id(self, name: str) -> Optional[str]:
|
| 40 |
+
for lbl in self.list_labels():
|
| 41 |
+
if lbl.get("name") == name:
|
| 42 |
+
return lbl.get("id")
|
| 43 |
+
return None
|
| 44 |
+
|
| 45 |
+
def ensure_label(self, name: str) -> str:
|
| 46 |
+
existing = self.get_label_id(name)
|
| 47 |
+
if existing:
|
| 48 |
+
return existing
|
| 49 |
+
|
| 50 |
+
body = {
|
| 51 |
+
"name": name,
|
| 52 |
+
"labelListVisibility": "labelShow",
|
| 53 |
+
"messageListVisibility": "show",
|
| 54 |
+
}
|
| 55 |
+
created = self.service.users().labels().create(userId="me", body=body).execute()
|
| 56 |
+
return created["id"]
|
| 57 |
+
|
| 58 |
+
def search_unread_pdf_messages(self, label_name: str, max_results: int = 10) -> List[GmailMessage]:
|
| 59 |
+
# Gmail search query: label + unread + pdf attachments
|
| 60 |
+
query = f'label:"{label_name}" is:unread has:attachment filename:pdf'
|
| 61 |
+
resp = self.service.users().messages().list(userId="me", q=query, maxResults=max_results).execute()
|
| 62 |
+
msgs = resp.get("messages", []) or []
|
| 63 |
+
out: List[GmailMessage] = []
|
| 64 |
+
for m in msgs:
|
| 65 |
+
out.append(GmailMessage(msg_id=m["id"], thread_id=m.get("threadId", "")))
|
| 66 |
+
return out
|
| 67 |
+
|
| 68 |
+
def get_message_full(self, msg_id: str) -> dict:
|
| 69 |
+
return self.service.users().messages().get(userId="me", id=msg_id, format="full").execute()
|
| 70 |
+
|
| 71 |
+
def _walk_parts(self, payload: dict) -> List[dict]:
|
| 72 |
+
parts = []
|
| 73 |
+
stack = [payload]
|
| 74 |
+
while stack:
|
| 75 |
+
node = stack.pop()
|
| 76 |
+
if not isinstance(node, dict):
|
| 77 |
+
continue
|
| 78 |
+
if node.get("parts"):
|
| 79 |
+
stack.extend(node["parts"])
|
| 80 |
+
parts.append(node)
|
| 81 |
+
return parts
|
| 82 |
+
|
| 83 |
+
def list_pdf_attachments(self, msg_full: dict) -> List[Tuple[str, str]]:
|
| 84 |
+
"""
|
| 85 |
+
Returns [(filename, attachmentId), ...] for application/pdf parts.
|
| 86 |
+
"""
|
| 87 |
+
payload = msg_full.get("payload", {}) or {}
|
| 88 |
+
parts = self._walk_parts(payload)
|
| 89 |
+
|
| 90 |
+
out: List[Tuple[str, str]] = []
|
| 91 |
+
for p in parts:
|
| 92 |
+
filename = (p.get("filename") or "").strip()
|
| 93 |
+
body = p.get("body") or {}
|
| 94 |
+
att_id = body.get("attachmentId")
|
| 95 |
+
mime = (p.get("mimeType") or "").lower()
|
| 96 |
+
|
| 97 |
+
if filename.lower().endswith(".pdf") or mime == "application/pdf":
|
| 98 |
+
if filename and att_id:
|
| 99 |
+
out.append((filename, att_id))
|
| 100 |
+
return out
|
| 101 |
+
|
| 102 |
+
def download_attachment(self, msg_id: str, attachment_id: str) -> bytes:
|
| 103 |
+
att = (
|
| 104 |
+
self.service.users()
|
| 105 |
+
.messages()
|
| 106 |
+
.attachments()
|
| 107 |
+
.get(userId="me", messageId=msg_id, id=attachment_id)
|
| 108 |
+
.execute()
|
| 109 |
+
)
|
| 110 |
+
data = att.get("data", "")
|
| 111 |
+
return base64.urlsafe_b64decode(data.encode("utf-8"))
|
| 112 |
+
|
| 113 |
+
def move_message(
|
| 114 |
+
self,
|
| 115 |
+
msg_id: str,
|
| 116 |
+
add_labels: List[str],
|
| 117 |
+
remove_labels: List[str],
|
| 118 |
+
mark_read: bool = True,
|
| 119 |
+
) -> None:
|
| 120 |
+
add_ids = [self.ensure_label(n) for n in add_labels]
|
| 121 |
+
remove_ids = [self.ensure_label(n) for n in remove_labels]
|
| 122 |
+
|
| 123 |
+
if mark_read:
|
| 124 |
+
remove_ids.append("UNREAD")
|
| 125 |
+
|
| 126 |
+
body = {"addLabelIds": add_ids, "removeLabelIds": remove_ids}
|
| 127 |
+
self.service.users().messages().modify(userId="me", id=msg_id, body=body).execute()
|
| 128 |
+
|
| 129 |
+
def send_email(self, to_email: str, subject: str, body_text: str, from_email: Optional[str] = None, attachments: Optional[List[Tuple[str, bytes]]] = None) -> None:
|
| 130 |
+
msg = EmailMessage()
|
| 131 |
+
msg["To"] = to_email
|
| 132 |
+
msg["Subject"] = subject
|
| 133 |
+
if from_email:
|
| 134 |
+
msg["From"] = from_email
|
| 135 |
+
msg.set_content(body_text)
|
| 136 |
+
|
| 137 |
+
attachments = attachments or []
|
| 138 |
+
for filename, data in attachments:
|
| 139 |
+
# basic content type guess for pdf/json
|
| 140 |
+
if filename.lower().endswith(".pdf"):
|
| 141 |
+
maintype, subtype = "application", "pdf"
|
| 142 |
+
elif filename.lower().endswith(".json"):
|
| 143 |
+
maintype, subtype = "application", "json"
|
| 144 |
+
else:
|
| 145 |
+
maintype, subtype = "application", "octet-stream"
|
| 146 |
+
msg.add_attachment(data, maintype=maintype, subtype=subtype, filename=filename)
|
| 147 |
+
|
| 148 |
+
raw = base64.urlsafe_b64encode(msg.as_bytes()).decode("utf-8")
|
| 149 |
+
self.service.users().messages().send(userId="me", body={"raw": raw}).execute()
|
backend/worker/openai_classifier.py
ADDED
|
@@ -0,0 +1,312 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import base64
|
| 4 |
+
import json
|
| 5 |
+
import re
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 8 |
+
|
| 9 |
+
from openai import OpenAI
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# ----------------------------
|
| 13 |
+
# Known templates (mirror your main system)
|
| 14 |
+
# ----------------------------
|
| 15 |
+
KNOWN_TEMPLATES: List[Dict[str, Any]] = [
|
| 16 |
+
{
|
| 17 |
+
"template_id": "T1_IFACTOR_DELIVERED_ORDER",
|
| 18 |
+
"name": "I-FACTOR Delivered Order Form",
|
| 19 |
+
"keywords_all": ["delivered order form"],
|
| 20 |
+
"keywords_any": ["i-factor", "cerapedics", "product information", "stickers", "bill to", "delivered to"],
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"template_id": "T2_SEASPINE_DELIVERED_GOODS_FORM",
|
| 24 |
+
"name": "SeaSpine Delivered Goods Form",
|
| 25 |
+
"keywords_all": ["delivered goods form"],
|
| 26 |
+
"keywords_any": ["seaspine", "isotis", "handling fee", "sales order", "invoice"],
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"template_id": "T3_ASTURA_SALES_ORDER_FORM",
|
| 30 |
+
"name": "Astura Sales Order Form",
|
| 31 |
+
"keywords_all": [],
|
| 32 |
+
"keywords_any": ["astura", "dc141", "ca200", "cbba", "sales order"],
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"template_id": "T4_MEDICAL_ESTIMATION_OF_CHARGES",
|
| 36 |
+
"name": "Medical Estimation of Charges",
|
| 37 |
+
"keywords_all": [],
|
| 38 |
+
"keywords_any": ["estimation of charges", "good faith estimate", "patient responsibility", "insurance"],
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"template_id": "T5_CLINICAL_PROGRESS_NOTE_POSTOP",
|
| 42 |
+
"name": "Clinical Progress Note Postop",
|
| 43 |
+
"keywords_all": [],
|
| 44 |
+
"keywords_any": ["clinical progress note", "progress note", "post-op", "assessment", "plan"],
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"template_id": "T6_CUSTOMER_CHARGE_SHEET_SPINE",
|
| 48 |
+
"name": "Customer Charge Sheet Spine",
|
| 49 |
+
"keywords_all": [],
|
| 50 |
+
"keywords_any": ["customer charge sheet", "charge sheet", "spine", "qty", "unit price", "total"],
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"template_id": "T7_SALES_ORDER_ZIMMER",
|
| 54 |
+
"name": "Zimmer Sales Order",
|
| 55 |
+
"keywords_all": [],
|
| 56 |
+
"keywords_any": ["zimmer", "zimmer biomet", "biomet", "sales order", "purchase order", "po number"],
|
| 57 |
+
},
|
| 58 |
+
]
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
# ----------------------------
|
| 62 |
+
# Public API (EXPLICIT key/model)
|
| 63 |
+
# ----------------------------
|
| 64 |
+
def classify_with_openai(
|
| 65 |
+
image_paths: List[str],
|
| 66 |
+
*,
|
| 67 |
+
api_key: str,
|
| 68 |
+
model: str,
|
| 69 |
+
max_pages: int = 2,
|
| 70 |
+
) -> Dict[str, Any]:
|
| 71 |
+
"""
|
| 72 |
+
Input: list of PNG file paths (page renders).
|
| 73 |
+
Output:
|
| 74 |
+
{
|
| 75 |
+
"template_id": "T1_..." OR "UNKNOWN",
|
| 76 |
+
"confidence": 0..1,
|
| 77 |
+
"reason": "short string",
|
| 78 |
+
"trainer_schema": {} # reserved for later
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
Hard guarantees:
|
| 82 |
+
- does NOT read environment variables
|
| 83 |
+
- does NOT guess api keys
|
| 84 |
+
- strict normalization to known template_ids
|
| 85 |
+
"""
|
| 86 |
+
api_key = (api_key or "").strip()
|
| 87 |
+
model = (model or "").strip()
|
| 88 |
+
|
| 89 |
+
if not api_key:
|
| 90 |
+
raise RuntimeError("classify_with_openai: api_key is empty")
|
| 91 |
+
if not model:
|
| 92 |
+
raise RuntimeError("classify_with_openai: model is empty")
|
| 93 |
+
|
| 94 |
+
if not image_paths:
|
| 95 |
+
return {
|
| 96 |
+
"template_id": "UNKNOWN",
|
| 97 |
+
"confidence": 0.0,
|
| 98 |
+
"reason": "No rendered images provided.",
|
| 99 |
+
"trainer_schema": {},
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
# Encode first N pages (keep small + deterministic)
|
| 103 |
+
pages_b64: List[str] = []
|
| 104 |
+
for p in image_paths[: max_pages if max_pages > 0 else 1]:
|
| 105 |
+
pages_b64.append(_png_file_to_b64(Path(p)))
|
| 106 |
+
|
| 107 |
+
client = OpenAI(api_key=api_key)
|
| 108 |
+
|
| 109 |
+
system = (
|
| 110 |
+
"You are a strict document template classifier.\n"
|
| 111 |
+
"You will be shown PNG images of PDF pages (scanned forms).\n"
|
| 112 |
+
"Your job is to decide which known template matches.\n\n"
|
| 113 |
+
"Hard rules:\n"
|
| 114 |
+
"1) Output VALID JSON only. No markdown. No extra text.\n"
|
| 115 |
+
"2) Choose ONE template_id from the provided list OR return template_id='UNKNOWN'.\n"
|
| 116 |
+
"3) If uncertain, return UNKNOWN.\n"
|
| 117 |
+
"4) Use printed headers, vendor branding, and distinctive layout cues.\n"
|
| 118 |
+
"5) confidence must be 0..1.\n"
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
prompt_payload = {
|
| 122 |
+
"known_templates": KNOWN_TEMPLATES,
|
| 123 |
+
"output_schema": {
|
| 124 |
+
"template_id": "string (one of known template_ids) OR 'UNKNOWN'",
|
| 125 |
+
"confidence": "number 0..1",
|
| 126 |
+
"reason": "short string",
|
| 127 |
+
},
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
user_text = (
|
| 131 |
+
"Classify the attached document images against known_templates.\n"
|
| 132 |
+
"Return JSON matching output_schema.\n\n"
|
| 133 |
+
f"{json.dumps(prompt_payload, indent=2)}"
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
# Multi-modal message: text + images
|
| 137 |
+
content: List[Dict[str, Any]] = [{"type": "text", "text": user_text}]
|
| 138 |
+
for b64png in pages_b64:
|
| 139 |
+
content.append(
|
| 140 |
+
{
|
| 141 |
+
"type": "image_url",
|
| 142 |
+
"image_url": {"url": f"data:image/png;base64,{b64png}"},
|
| 143 |
+
}
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
resp = client.chat.completions.create(
|
| 147 |
+
model=model,
|
| 148 |
+
temperature=0.0,
|
| 149 |
+
messages=[
|
| 150 |
+
{"role": "system", "content": system},
|
| 151 |
+
{"role": "user", "content": content},
|
| 152 |
+
],
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
raw = (resp.choices[0].message.content or "").strip()
|
| 156 |
+
parsed = _parse_json_object(raw)
|
| 157 |
+
|
| 158 |
+
template_id = str(parsed.get("template_id") or "").strip()
|
| 159 |
+
confidence = _to_float(parsed.get("confidence"), default=0.0)
|
| 160 |
+
confidence = max(0.0, min(1.0, confidence))
|
| 161 |
+
reason = str(parsed.get("reason") or "").strip()
|
| 162 |
+
|
| 163 |
+
# Normalize: only allow known template ids or UNKNOWN
|
| 164 |
+
template_id = _normalize_template_id(template_id)
|
| 165 |
+
|
| 166 |
+
# If model returns UNKNOWN but gives high confidence, clamp confidence.
|
| 167 |
+
if template_id == "UNKNOWN" and confidence > 0.6:
|
| 168 |
+
confidence = 0.6
|
| 169 |
+
|
| 170 |
+
return {
|
| 171 |
+
"template_id": template_id,
|
| 172 |
+
"confidence": confidence,
|
| 173 |
+
"reason": reason[:500],
|
| 174 |
+
"trainer_schema": {},
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
# ----------------------------
|
| 179 |
+
# Legacy wrapper (ENV-based) - keep only if you want
|
| 180 |
+
# ----------------------------
|
| 181 |
+
def classify_with_openai_from_env(image_paths: List[str]) -> Dict[str, Any]:
|
| 182 |
+
"""
|
| 183 |
+
Backwards compatible wrapper.
|
| 184 |
+
Reads env vars, then calls classify_with_openai(api_key=..., model=...).
|
| 185 |
+
|
| 186 |
+
Use this only if you have old code you haven't updated yet.
|
| 187 |
+
"""
|
| 188 |
+
import os
|
| 189 |
+
|
| 190 |
+
api_key = (os.getenv("OPENAI_API_KEY_TEST") or os.getenv("OPENAI_API_KEY") or "").strip()
|
| 191 |
+
if not api_key:
|
| 192 |
+
raise RuntimeError("Missing OPENAI_API_KEY_TEST (or OPENAI_API_KEY)")
|
| 193 |
+
|
| 194 |
+
model = (os.getenv("OPENAI_MODEL") or "gpt-4o-mini").strip()
|
| 195 |
+
|
| 196 |
+
# IMPORTANT: call the explicit version (one implementation only)
|
| 197 |
+
return classify_with_openai(
|
| 198 |
+
image_paths,
|
| 199 |
+
api_key=api_key,
|
| 200 |
+
model=model,
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
# ----------------------------
|
| 205 |
+
# Helpers
|
| 206 |
+
# ----------------------------
|
| 207 |
+
def _normalize_template_id(template_id: str) -> str:
|
| 208 |
+
tid = (template_id or "").strip()
|
| 209 |
+
if not tid:
|
| 210 |
+
return "UNKNOWN"
|
| 211 |
+
|
| 212 |
+
known_ids = {t["template_id"] for t in KNOWN_TEMPLATES}
|
| 213 |
+
if tid in known_ids:
|
| 214 |
+
return tid
|
| 215 |
+
|
| 216 |
+
# common garbage patterns (model returns name instead of id, etc.)
|
| 217 |
+
low = tid.lower()
|
| 218 |
+
for t in KNOWN_TEMPLATES:
|
| 219 |
+
if t["name"].lower() == low:
|
| 220 |
+
return t["template_id"]
|
| 221 |
+
|
| 222 |
+
return "UNKNOWN"
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
def _png_file_to_b64(path: Path) -> str:
|
| 226 |
+
data = path.read_bytes()
|
| 227 |
+
return base64.b64encode(data).decode("utf-8")
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
_JSON_BLOCK_RE = re.compile(r"\{.*\}", re.DOTALL)
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
def _parse_json_object(text: str) -> Dict[str, Any]:
|
| 234 |
+
"""
|
| 235 |
+
Extract and parse the first {...} JSON object from model output.
|
| 236 |
+
Handles:
|
| 237 |
+
- pure JSON
|
| 238 |
+
- JSON embedded in text
|
| 239 |
+
- fenced code blocks (we strip fences)
|
| 240 |
+
"""
|
| 241 |
+
if not text:
|
| 242 |
+
return {}
|
| 243 |
+
|
| 244 |
+
s = text.strip()
|
| 245 |
+
|
| 246 |
+
# Strip ```json fences if present
|
| 247 |
+
s = _strip_code_fences(s)
|
| 248 |
+
|
| 249 |
+
# Fast path: starts with "{"
|
| 250 |
+
if s.startswith("{"):
|
| 251 |
+
try:
|
| 252 |
+
return json.loads(s)
|
| 253 |
+
except Exception:
|
| 254 |
+
pass
|
| 255 |
+
|
| 256 |
+
# Try to find a JSON-looking block
|
| 257 |
+
m = _JSON_BLOCK_RE.search(s)
|
| 258 |
+
if not m:
|
| 259 |
+
return {}
|
| 260 |
+
|
| 261 |
+
chunk = m.group(0)
|
| 262 |
+
try:
|
| 263 |
+
return json.loads(chunk)
|
| 264 |
+
except Exception:
|
| 265 |
+
# last attempt: remove trailing commas (common model mistake)
|
| 266 |
+
cleaned = _remove_trailing_commas(chunk)
|
| 267 |
+
try:
|
| 268 |
+
return json.loads(cleaned)
|
| 269 |
+
except Exception:
|
| 270 |
+
return {}
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
def _strip_code_fences(s: str) -> str:
|
| 274 |
+
# remove leading ```json / ``` and trailing ```
|
| 275 |
+
if s.startswith("```"):
|
| 276 |
+
s = re.sub(r"^```[a-zA-Z0-9]*\s*", "", s)
|
| 277 |
+
s = re.sub(r"\s*```$", "", s)
|
| 278 |
+
return s.strip()
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
def _remove_trailing_commas(s: str) -> str:
|
| 282 |
+
# naive but effective: remove ",}" and ",]" patterns repeatedly
|
| 283 |
+
prev = None
|
| 284 |
+
cur = s
|
| 285 |
+
while prev != cur:
|
| 286 |
+
prev = cur
|
| 287 |
+
cur = re.sub(r",\s*}", "}", cur)
|
| 288 |
+
cur = re.sub(r",\s*]", "]", cur)
|
| 289 |
+
return cur
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
def _to_float(x: Any, default: float = 0.0) -> float:
|
| 293 |
+
try:
|
| 294 |
+
return float(x)
|
| 295 |
+
except Exception:
|
| 296 |
+
return default
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
# ----------------------------
|
| 300 |
+
# Optional: quick self-check (manual)
|
| 301 |
+
# ----------------------------
|
| 302 |
+
def _debug_summarize_result(res: Dict[str, Any]) -> str:
|
| 303 |
+
return f"template_id={res.get('template_id')} conf={res.get('confidence')} reason={str(res.get('reason') or '')[:80]}"
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
def _validate_known_templates() -> Tuple[bool, str]:
|
| 307 |
+
ids = [t.get("template_id") for t in KNOWN_TEMPLATES]
|
| 308 |
+
if any(not i for i in ids):
|
| 309 |
+
return False, "One or more templates missing template_id"
|
| 310 |
+
if len(set(ids)) != len(ids):
|
| 311 |
+
return False, "Duplicate template_id in KNOWN_TEMPLATES"
|
| 312 |
+
return True, "ok"
|
backend/worker/out/.keep
ADDED
|
File without changes
|
backend/worker/pdf_render.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import List
|
| 6 |
+
|
| 7 |
+
import fitz # PyMuPDF
|
| 8 |
+
from PIL import Image
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@dataclass
|
| 12 |
+
class RenderedImage:
|
| 13 |
+
path: Path
|
| 14 |
+
page_index: int
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def render_pdf_to_pngs(pdf_path: Path, out_dir: Path, pages: int = 2, dpi: int = 200) -> List[RenderedImage]:
|
| 18 |
+
out_dir.mkdir(parents=True, exist_ok=True)
|
| 19 |
+
|
| 20 |
+
doc = fitz.open(pdf_path)
|
| 21 |
+
n = min(pages, doc.page_count)
|
| 22 |
+
|
| 23 |
+
zoom = dpi / 72.0
|
| 24 |
+
mat = fitz.Matrix(zoom, zoom)
|
| 25 |
+
|
| 26 |
+
rendered: List[RenderedImage] = []
|
| 27 |
+
for i in range(n):
|
| 28 |
+
page = doc.load_page(i)
|
| 29 |
+
pix = page.get_pixmap(matrix=mat, alpha=False)
|
| 30 |
+
|
| 31 |
+
img_path = out_dir / f"{pdf_path.stem}_p{i+1}.png"
|
| 32 |
+
pix.save(str(img_path))
|
| 33 |
+
|
| 34 |
+
# normalize to RGB with PIL (avoids weird modes)
|
| 35 |
+
im = Image.open(img_path).convert("RGB")
|
| 36 |
+
im.save(img_path)
|
| 37 |
+
|
| 38 |
+
rendered.append(RenderedImage(path=img_path, page_index=i))
|
| 39 |
+
|
| 40 |
+
doc.close()
|
| 41 |
+
return rendered
|
backend/worker/prompts.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
TEMPLATE_IDS = [
|
| 2 |
+
"T1_IFACTOR_DELIVERED_ORDER",
|
| 3 |
+
"T2_SEASPINE_DELIVERED_GOODS_FORM",
|
| 4 |
+
"T3_ASTURA_SALES_ORDER_FORM",
|
| 5 |
+
"T4_MEDICAL_ESTIMATION_OF_CHARGES",
|
| 6 |
+
"T5_CLINICAL_PROGRESS_NOTE_POSTOP",
|
| 7 |
+
"T6_CUSTOMER_CHARGE_SHEET_SPINE",
|
| 8 |
+
"T7_SALES_ORDER_ZIMMER",
|
| 9 |
+
]
|
| 10 |
+
|
| 11 |
+
SYSTEM_PROMPT = f"""
|
| 12 |
+
You are classifying a medical/healthcare sales/order PDF form into one of the known templates,
|
| 13 |
+
and extracting a "trainer schema" for onboarding.
|
| 14 |
+
|
| 15 |
+
Known template_ids:
|
| 16 |
+
{TEMPLATE_IDS}
|
| 17 |
+
|
| 18 |
+
Rules:
|
| 19 |
+
- You MUST return JSON only (no markdown, no extra text).
|
| 20 |
+
- If none match confidently, return template_id "UNKNOWN".
|
| 21 |
+
- Always produce a schema object (even for UNKNOWN) so onboarding can proceed.
|
| 22 |
+
|
| 23 |
+
Output JSON shape (strict):
|
| 24 |
+
{{
|
| 25 |
+
"template_id": "<one of known template_ids or UNKNOWN>",
|
| 26 |
+
"confidence": 0.0,
|
| 27 |
+
"reason": "<short reason>",
|
| 28 |
+
"trainer_schema": {{
|
| 29 |
+
"form_id": "<suggested id>",
|
| 30 |
+
"version": 1,
|
| 31 |
+
"page": 1,
|
| 32 |
+
"scalar_value_region_mode": "offset_from_anchor_v1",
|
| 33 |
+
"fields": [
|
| 34 |
+
{{
|
| 35 |
+
"field_id": "facility_organization",
|
| 36 |
+
"label": "Facility / Organization",
|
| 37 |
+
"type": "entity",
|
| 38 |
+
"anchor_hint": "<printed label text or None>",
|
| 39 |
+
"value_hint": "<what to extract>"
|
| 40 |
+
}},
|
| 41 |
+
{{
|
| 42 |
+
"field_id": "case_location_address",
|
| 43 |
+
"label": "Case Location / Address",
|
| 44 |
+
"type": "entity",
|
| 45 |
+
"anchor_hint": "<printed label text or None>",
|
| 46 |
+
"value_hint": "<what to extract>"
|
| 47 |
+
}},
|
| 48 |
+
{{
|
| 49 |
+
"field_id": "vendor",
|
| 50 |
+
"label": "Vendor",
|
| 51 |
+
"type": "entity",
|
| 52 |
+
"anchor_hint": "<printed label text or None>",
|
| 53 |
+
"value_hint": "<what to extract>"
|
| 54 |
+
}},
|
| 55 |
+
{{
|
| 56 |
+
"field_id": "physician_name",
|
| 57 |
+
"label": "Physician Name",
|
| 58 |
+
"type": "person",
|
| 59 |
+
"anchor_hint": "<printed label text or None>",
|
| 60 |
+
"value_hint": "<what to extract>"
|
| 61 |
+
}},
|
| 62 |
+
{{
|
| 63 |
+
"field_id": "date_of_surgery",
|
| 64 |
+
"label": "Date of Surgery",
|
| 65 |
+
"type": "date",
|
| 66 |
+
"anchor_hint": "<printed label text or None>",
|
| 67 |
+
"value_hint": "<what to extract>"
|
| 68 |
+
}},
|
| 69 |
+
{{
|
| 70 |
+
"field_id": "items",
|
| 71 |
+
"label": "Items / Line Items",
|
| 72 |
+
"type": "table",
|
| 73 |
+
"table_hint": {{
|
| 74 |
+
"expected_columns": ["item_number","description","qty","lot_number","price","extended_price"],
|
| 75 |
+
"where_on_page": "<short description>",
|
| 76 |
+
"header_text_examples": ["Item Number","Description","Qty"]
|
| 77 |
+
}}
|
| 78 |
+
}}
|
| 79 |
+
]
|
| 80 |
+
}}
|
| 81 |
+
}}
|
| 82 |
+
"""
|
| 83 |
+
|
| 84 |
+
USER_PROMPT = """
|
| 85 |
+
Classify the form template and generate trainer_schema based on the provided page images.
|
| 86 |
+
Focus on printed structure, titles, logos, and table headers.
|
| 87 |
+
"""
|
backend/worker/template_registry_snapshot.py
ADDED
|
File without changes
|
backend/worker/template_store.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import json
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import Any, Dict, List
|
| 5 |
+
|
| 6 |
+
TEMPLATE_DIR = Path(__file__).resolve().parent / "trainer_templates"
|
| 7 |
+
|
| 8 |
+
def list_trainer_templates() -> List[Dict[str, Any]]:
|
| 9 |
+
TEMPLATE_DIR.mkdir(parents=True, exist_ok=True)
|
| 10 |
+
out: List[Dict[str, Any]] = []
|
| 11 |
+
|
| 12 |
+
for p in sorted(TEMPLATE_DIR.glob("*.json")):
|
| 13 |
+
try:
|
| 14 |
+
cfg = json.loads(p.read_text(encoding="utf-8"))
|
| 15 |
+
except Exception:
|
| 16 |
+
continue
|
| 17 |
+
|
| 18 |
+
template_id = cfg.get("template_id") or cfg.get("form_id") or p.stem
|
| 19 |
+
name = cfg.get("name") or cfg.get("form_id") or template_id
|
| 20 |
+
|
| 21 |
+
out.append({
|
| 22 |
+
"template_id": template_id,
|
| 23 |
+
"name": name,
|
| 24 |
+
# optional: trainer config itself (don’t spam prompt if huge)
|
| 25 |
+
"has_config": True,
|
| 26 |
+
})
|
| 27 |
+
|
| 28 |
+
return out
|
| 29 |
+
|
| 30 |
+
def save_trainer_template(template_id: str, cfg: Dict[str, Any]) -> Path:
|
| 31 |
+
TEMPLATE_DIR.mkdir(parents=True, exist_ok=True)
|
| 32 |
+
cfg = dict(cfg)
|
| 33 |
+
cfg["template_id"] = template_id # enforce
|
| 34 |
+
path = TEMPLATE_DIR / f"{template_id}.json"
|
| 35 |
+
path.write_text(json.dumps(cfg, indent=2), encoding="utf-8")
|
| 36 |
+
return path
|
backend/worker/tmp/.keep
ADDED
|
File without changes
|
backend/worker/uploads/.keep
ADDED
|
File without changes
|
backend/worker/worker.py
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import time
|
| 5 |
+
import uuid
|
| 6 |
+
from dataclasses import dataclass
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import List, Tuple
|
| 9 |
+
|
| 10 |
+
from dotenv import load_dotenv
|
| 11 |
+
|
| 12 |
+
from .gmail_client import GmailClient
|
| 13 |
+
from .openai_classifier import classify_with_openai
|
| 14 |
+
from .pdf_render import render_pdf_to_pngs
|
| 15 |
+
|
| 16 |
+
# Force load repo_root/backend/.env (single source of truth)
|
| 17 |
+
REPO_ROOT = Path(__file__).resolve().parents[2]
|
| 18 |
+
load_dotenv(REPO_ROOT / "backend" / ".env", override=True)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
@dataclass
|
| 22 |
+
class Settings:
|
| 23 |
+
creds_path: Path
|
| 24 |
+
token_path: Path
|
| 25 |
+
|
| 26 |
+
label_incoming: str
|
| 27 |
+
label_known: str
|
| 28 |
+
label_unknown: str
|
| 29 |
+
label_train: str
|
| 30 |
+
|
| 31 |
+
# Rep email for UNKNOWN detection
|
| 32 |
+
rep_notify_to: str
|
| 33 |
+
notify_from: str
|
| 34 |
+
|
| 35 |
+
# OpenAI
|
| 36 |
+
openai_api_key: str
|
| 37 |
+
openai_model: str
|
| 38 |
+
|
| 39 |
+
poll_seconds: int
|
| 40 |
+
max_messages_per_poll: int
|
| 41 |
+
|
| 42 |
+
render_pages: int
|
| 43 |
+
render_dpi: int
|
| 44 |
+
|
| 45 |
+
trainer_base_url: str
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def load_settings() -> Settings:
|
| 49 |
+
base = Path(__file__).resolve().parents[1] # backend/
|
| 50 |
+
creds = Path(os.environ.get("GMAIL_CREDENTIALS_JSON", str(base / "credentials.json")))
|
| 51 |
+
token = Path(os.environ.get("GMAIL_TOKEN_JSON", str(base / "token.json")))
|
| 52 |
+
|
| 53 |
+
openai_api_key = (os.environ.get("OPENAI_API_KEY_TEST") or os.environ.get("OPENAI_API_KEY") or "").strip()
|
| 54 |
+
openai_model = (os.environ.get("OPENAI_MODEL") or "gpt-4o-mini").strip()
|
| 55 |
+
|
| 56 |
+
return Settings(
|
| 57 |
+
creds_path=creds,
|
| 58 |
+
token_path=token,
|
| 59 |
+
|
| 60 |
+
label_incoming=os.environ.get("PDF_PIPELINE_LABEL_INCOMING", "PDF_PIPELINE/INCOMING"),
|
| 61 |
+
label_known=os.environ.get("PDF_PIPELINE_LABEL_KNOWN", "PDF_PIPELINE/KNOWN"),
|
| 62 |
+
label_unknown=os.environ.get("PDF_PIPELINE_LABEL_UNKNOWN", "PDF_PIPELINE/UNKNOWN"),
|
| 63 |
+
label_train=os.environ.get("PDF_PIPELINE_LABEL_TRAIN", "PDF_PIPELINE/TRAIN"),
|
| 64 |
+
|
| 65 |
+
notify_from=(os.environ.get("PDF_PIPELINE_NOTIFY_FROM") or "").strip(),
|
| 66 |
+
rep_notify_to=(os.environ.get("PDF_PIPELINE_NOTIFY_TO") or "").strip(),
|
| 67 |
+
|
| 68 |
+
openai_api_key=openai_api_key,
|
| 69 |
+
openai_model=openai_model,
|
| 70 |
+
|
| 71 |
+
poll_seconds=int(os.environ.get("PDF_PIPELINE_POLL_SECONDS", "20")),
|
| 72 |
+
max_messages_per_poll=int(os.environ.get("PDF_PIPELINE_MAX_PER_POLL", "5")),
|
| 73 |
+
|
| 74 |
+
render_pages=int(os.environ.get("PDF_PIPELINE_RENDER_PAGES", "2")),
|
| 75 |
+
render_dpi=int(os.environ.get("PDF_PIPELINE_RENDER_DPI", "200")),
|
| 76 |
+
|
| 77 |
+
trainer_base_url=(os.environ.get("PDF_TRAINER_BASE_URL") or "http://localhost:5173").strip(),
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def _safe_name(s: str) -> str:
|
| 82 |
+
return "".join(c if c.isalnum() or c in ("-", "_", ".", " ") else "_" for c in s).strip()
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def _write_pipeline_pdf(root_worker_dir: Path, filename: str, pdf_bytes: bytes) -> Tuple[str, Path]:
|
| 86 |
+
"""
|
| 87 |
+
Persist PDF for the trainer to fetch later.
|
| 88 |
+
Returns (pdf_id, pdf_path_on_disk).
|
| 89 |
+
"""
|
| 90 |
+
uploads_dir = root_worker_dir / "uploads"
|
| 91 |
+
uploads_dir.mkdir(parents=True, exist_ok=True)
|
| 92 |
+
|
| 93 |
+
pdf_id = uuid.uuid4().hex
|
| 94 |
+
pdf_path = uploads_dir / f"{pdf_id}.pdf"
|
| 95 |
+
name_path = uploads_dir / f"{pdf_id}.name.txt"
|
| 96 |
+
|
| 97 |
+
pdf_path.write_bytes(pdf_bytes)
|
| 98 |
+
name_path.write_text(filename, encoding="utf-8")
|
| 99 |
+
|
| 100 |
+
return pdf_id, pdf_path
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def _process_train_label(gmail: GmailClient, s: Settings, root: Path) -> None:
|
| 104 |
+
"""
|
| 105 |
+
TRAIN behavior:
|
| 106 |
+
- Pull unread PDFs from TRAIN label
|
| 107 |
+
- Store into uploads/ and print trainer link
|
| 108 |
+
- Mark read
|
| 109 |
+
- Do NOT classify
|
| 110 |
+
- Do NOT move labels
|
| 111 |
+
"""
|
| 112 |
+
msgs = gmail.search_unread_pdf_messages(s.label_train, max_results=s.max_messages_per_poll)
|
| 113 |
+
if not msgs:
|
| 114 |
+
return
|
| 115 |
+
|
| 116 |
+
for m in msgs:
|
| 117 |
+
msg_full = gmail.get_message_full(m.msg_id)
|
| 118 |
+
pdf_atts = gmail.list_pdf_attachments(msg_full)
|
| 119 |
+
|
| 120 |
+
if not pdf_atts:
|
| 121 |
+
gmail.move_message(m.msg_id, add_labels=[], remove_labels=[], mark_read=True)
|
| 122 |
+
continue
|
| 123 |
+
|
| 124 |
+
for (filename, att_id) in pdf_atts:
|
| 125 |
+
filename = _safe_name(filename or "attachment.pdf")
|
| 126 |
+
pdf_bytes = gmail.download_attachment(m.msg_id, att_id)
|
| 127 |
+
|
| 128 |
+
pdf_id, stored_pdf_path = _write_pipeline_pdf(root, filename, pdf_bytes)
|
| 129 |
+
trainer_link = f"{s.trainer_base_url.rstrip('/')}/?pdf_id={pdf_id}"
|
| 130 |
+
|
| 131 |
+
gmail.move_message(m.msg_id, add_labels=[], remove_labels=[], mark_read=True)
|
| 132 |
+
|
| 133 |
+
print(
|
| 134 |
+
f"[worker][TRAIN] stored PDF msg={m.msg_id} file={filename} "
|
| 135 |
+
f"pdf_id={pdf_id} stored={stored_pdf_path}"
|
| 136 |
+
)
|
| 137 |
+
print(f"[worker][TRAIN] open: {trainer_link}")
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def main():
|
| 141 |
+
s = load_settings()
|
| 142 |
+
|
| 143 |
+
# Validate settings
|
| 144 |
+
if not s.rep_notify_to:
|
| 145 |
+
raise RuntimeError("Missing PDF_PIPELINE_NOTIFY_TO (rep email for UNKNOWN detection)")
|
| 146 |
+
if not s.notify_from:
|
| 147 |
+
raise RuntimeError("Missing PDF_PIPELINE_NOTIFY_FROM (OAuth Gmail account email)")
|
| 148 |
+
if not s.trainer_base_url:
|
| 149 |
+
raise RuntimeError("Missing PDF_TRAINER_BASE_URL (base URL for trainer link)")
|
| 150 |
+
if not s.openai_api_key:
|
| 151 |
+
raise RuntimeError("Missing OPENAI_API_KEY_TEST (or OPENAI_API_KEY) in backend/.env")
|
| 152 |
+
|
| 153 |
+
gmail = GmailClient(s.creds_path, s.token_path)
|
| 154 |
+
|
| 155 |
+
# Ensure labels exist
|
| 156 |
+
gmail.ensure_label(s.label_incoming)
|
| 157 |
+
gmail.ensure_label(s.label_known)
|
| 158 |
+
gmail.ensure_label(s.label_unknown)
|
| 159 |
+
gmail.ensure_label(s.label_train)
|
| 160 |
+
|
| 161 |
+
root = Path(__file__).resolve().parents[0] # backend/worker
|
| 162 |
+
tmp_dir = root / "tmp"
|
| 163 |
+
tmp_dir.mkdir(parents=True, exist_ok=True)
|
| 164 |
+
|
| 165 |
+
print(f"[worker] Watching label: {s.label_incoming}")
|
| 166 |
+
print(f"[worker] Known label: {s.label_known}")
|
| 167 |
+
print(f"[worker] Unknown label: {s.label_unknown}")
|
| 168 |
+
print(f"[worker] Train label: {s.label_train}")
|
| 169 |
+
print(f"[worker] Rep notify to: {s.rep_notify_to}")
|
| 170 |
+
print(f"[worker] OpenAI model: {s.openai_model}")
|
| 171 |
+
|
| 172 |
+
while True:
|
| 173 |
+
try:
|
| 174 |
+
# 1) TRAIN lane
|
| 175 |
+
_process_train_label(gmail, s, root)
|
| 176 |
+
|
| 177 |
+
# 2) Main pipeline (INCOMING -> KNOWN/UNKNOWN)
|
| 178 |
+
msgs = gmail.search_unread_pdf_messages(s.label_incoming, max_results=s.max_messages_per_poll)
|
| 179 |
+
if not msgs:
|
| 180 |
+
time.sleep(s.poll_seconds)
|
| 181 |
+
continue
|
| 182 |
+
|
| 183 |
+
for m in msgs:
|
| 184 |
+
msg_full = gmail.get_message_full(m.msg_id)
|
| 185 |
+
pdf_atts = gmail.list_pdf_attachments(msg_full)
|
| 186 |
+
|
| 187 |
+
if not pdf_atts:
|
| 188 |
+
# Remove INCOMING + mark read so it doesn't loop forever
|
| 189 |
+
gmail.move_message(m.msg_id, add_labels=[], remove_labels=[s.label_incoming], mark_read=True)
|
| 190 |
+
continue
|
| 191 |
+
|
| 192 |
+
any_unknown = False
|
| 193 |
+
unknown_payloads: List[Tuple[str, bytes]] = []
|
| 194 |
+
|
| 195 |
+
# Classify all PDF attachments for this message
|
| 196 |
+
for (filename, att_id) in pdf_atts:
|
| 197 |
+
filename = _safe_name(filename or "attachment.pdf")
|
| 198 |
+
pdf_bytes = gmail.download_attachment(m.msg_id, att_id)
|
| 199 |
+
|
| 200 |
+
stamp = str(int(time.time()))
|
| 201 |
+
pdf_path = tmp_dir / f"{stamp}_{m.msg_id}_{filename}"
|
| 202 |
+
pdf_path.write_bytes(pdf_bytes)
|
| 203 |
+
|
| 204 |
+
img_dir = tmp_dir / f"{stamp}_{m.msg_id}_{pdf_path.stem}"
|
| 205 |
+
rendered = render_pdf_to_pngs(pdf_path, img_dir, pages=s.render_pages, dpi=s.render_dpi)
|
| 206 |
+
image_paths = [str(r.path) for r in rendered]
|
| 207 |
+
|
| 208 |
+
result = classify_with_openai(
|
| 209 |
+
image_paths,
|
| 210 |
+
api_key=s.openai_api_key,
|
| 211 |
+
model=s.openai_model,
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
template_id = (result.get("template_id") or "UNKNOWN").strip()
|
| 215 |
+
conf = float(result.get("confidence") or 0.0)
|
| 216 |
+
|
| 217 |
+
if template_id == "UNKNOWN":
|
| 218 |
+
any_unknown = True
|
| 219 |
+
unknown_payloads.append((filename, pdf_bytes))
|
| 220 |
+
print(f"[worker] UNKNOWN attachment conf={conf:.3f} msg={m.msg_id} file={filename}")
|
| 221 |
+
else:
|
| 222 |
+
print(
|
| 223 |
+
f"[worker] KNOWN attachment template={template_id} conf={conf:.3f} "
|
| 224 |
+
f"msg={m.msg_id} file={filename}"
|
| 225 |
+
)
|
| 226 |
+
|
| 227 |
+
# Apply Gmail label ONCE per message
|
| 228 |
+
if any_unknown:
|
| 229 |
+
gmail.move_message(
|
| 230 |
+
m.msg_id,
|
| 231 |
+
add_labels=[s.label_unknown],
|
| 232 |
+
remove_labels=[s.label_incoming],
|
| 233 |
+
mark_read=True,
|
| 234 |
+
)
|
| 235 |
+
else:
|
| 236 |
+
gmail.move_message(
|
| 237 |
+
m.msg_id,
|
| 238 |
+
add_labels=[s.label_known],
|
| 239 |
+
remove_labels=[s.label_incoming],
|
| 240 |
+
mark_read=True,
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
# Notify rep for each unknown PDF attachment
|
| 244 |
+
if any_unknown:
|
| 245 |
+
for (filename, pdf_bytes) in unknown_payloads:
|
| 246 |
+
pdf_id, stored_pdf_path = _write_pipeline_pdf(root, filename, pdf_bytes)
|
| 247 |
+
trainer_link = f"{s.trainer_base_url.rstrip('/')}/?pdf_id={pdf_id}"
|
| 248 |
+
|
| 249 |
+
subject = "Action required: Unknown PDF format (template not found)"
|
| 250 |
+
body = (
|
| 251 |
+
"Hi,\n\n"
|
| 252 |
+
"We received a PDF that does not match any existing templates in the system.\n\n"
|
| 253 |
+
"Please open the PDF Trainer using the link below and create or update the template configuration:\n"
|
| 254 |
+
f"{trainer_link}\n\n"
|
| 255 |
+
"The original PDF is attached for reference.\n\n"
|
| 256 |
+
"Thank you,\n"
|
| 257 |
+
"Inserio Automation\n"
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
attachments: List[Tuple[str, bytes]] = []
|
| 261 |
+
if len(pdf_bytes) < 20 * 1024 * 1024:
|
| 262 |
+
attachments.append((filename, pdf_bytes))
|
| 263 |
+
else:
|
| 264 |
+
body += "\nNote: The PDF was too large to attach.\n"
|
| 265 |
+
|
| 266 |
+
gmail.send_email(
|
| 267 |
+
to_email=s.rep_notify_to,
|
| 268 |
+
from_email=s.notify_from,
|
| 269 |
+
subject=subject,
|
| 270 |
+
body_text=body,
|
| 271 |
+
attachments=attachments,
|
| 272 |
+
)
|
| 273 |
+
|
| 274 |
+
print(
|
| 275 |
+
f"[worker] UNKNOWN: emailed rep {s.rep_notify_to} msg={m.msg_id} file={filename} "
|
| 276 |
+
f"pdf_id={pdf_id} stored={stored_pdf_path}"
|
| 277 |
+
)
|
| 278 |
+
|
| 279 |
+
except Exception as e:
|
| 280 |
+
print(f"[worker] ERROR: {e}")
|
| 281 |
+
|
| 282 |
+
time.sleep(s.poll_seconds)
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
if __name__ == "__main__":
|
| 286 |
+
main()
|
requirements.txt
CHANGED
|
@@ -1,2 +1,3 @@
|
|
| 1 |
fastapi
|
| 2 |
uvicorn[standard]
|
|
|
|
|
|
| 1 |
fastapi
|
| 2 |
uvicorn[standard]
|
| 3 |
+
python-dotenv
|