Spaces:

SAadmin
/

pdf-trainer-api

Sleeping

App Files Files Community

Avinash commited on Jan 30

Commit

4a5269c

1 Parent(s): 7dac948

integrate real backend api

Browse files

Files changed (35) hide show

.gitignore +8 -0
api.py +1 -7
backend/__init__.py +0 -0
backend/api.py +213 -0
backend/oauth_bootstrap.py +51 -0
backend/scripts/apply_trainer_schemas.py +48 -0
backend/scripts/generate_template_schema_skeletons.py +137 -0
backend/scripts/migrate_hardcoded_templates.py +99 -0
backend/templates/T1_IFACTOR_DELIVERED_ORDER.json +206 -0
backend/templates/T2_SEASPINE_DELIVERED_GOODS_FORM.json +200 -0
backend/templates/T3_ASTURA_SALES_ORDER_FORM.json +203 -0
backend/templates/T4_MEDICAL_ESTIMATION_OF_CHARGES.json +167 -0
backend/templates/T5_CLINICAL_PROGRESS_NOTE_POSTOP.json +118 -0
backend/templates/T6_CUSTOMER_CHARGE_SHEET_SPINE.json +204 -0
backend/templates/T7_SALES_ORDER_ZIMMER.json +174 -0
backend/trainer_schemas/T1_IFACTOR_DELIVERED_ORDER.schema.json +70 -0
backend/trainer_schemas/T2_SEASPINE_DELIVERED_GOODS_FORM.schema.json +70 -0
backend/trainer_schemas/T3_ASTURA_SALES_ORDER_FORM.schema.json +70 -0
backend/trainer_schemas/T4_MEDICAL_ESTIMATION_OF_CHARGES.schema.json +49 -0
backend/trainer_schemas/T5_CLINICAL_PROGRESS_NOTE_POSTOP.schema.json +35 -0
backend/trainer_schemas/T6_CUSTOMER_CHARGE_SHEET_SPINE.schema.json +70 -0
backend/trainer_schemas/T7_SALES_ORDER_ZIMMER.schema.json +70 -0
backend/worker/__init__.py +0 -0
backend/worker/config.py +89 -0
backend/worker/gmail_client.py +149 -0
backend/worker/openai_classifier.py +312 -0
backend/worker/out/.keep +0 -0
backend/worker/pdf_render.py +41 -0
backend/worker/prompts.py +87 -0
backend/worker/template_registry_snapshot.py +0 -0
backend/worker/template_store.py +36 -0
backend/worker/tmp/.keep +0 -0
backend/worker/uploads/.keep +0 -0
backend/worker/worker.py +286 -0
requirements.txt +1 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+__pycache__/
+*.pyc
+.venv/
+.env
+.env.*
+node_modules/
+dist/
+.DS_Store

api.py CHANGED Viewed

@@ -1,7 +1 @@
-from fastapi import FastAPI
-app = FastAPI()
-@app.get("/health")
-def health():
-    return {"ok": True}


1	+ from backend.api import app

backend/__init__.py ADDED Viewed

File without changes

backend/api.py ADDED Viewed

	@@ -0,0 +1,213 @@

+from __future__ import annotations
+import json
+import os
+from pathlib import Path
+from typing import Any, Dict
+from dotenv import load_dotenv
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse, PlainTextResponse
+from backend.worker.gmail_client import GmailClient
+app = FastAPI(title="PDF Trainer API", version="1.0")
+# Allow Vite dev server
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=[
+        "http://localhost:5173",
+        "http://127.0.0.1:5173",
+    ],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+REPO_ROOT = Path(__file__).resolve().parents[1]
+BACKEND_DIR = REPO_ROOT / "backend"
+UPLOADS_DIR = BACKEND_DIR / "worker" / "uploads"
+# Load backend/.env explicitly ONCE for this process
+load_dotenv(BACKEND_DIR / ".env", override=True)
+CREDENTIALS_JSON = Path(os.environ.get("GMAIL_CREDENTIALS_JSON", str(BACKEND_DIR / "credentials.json")))
+TOKEN_JSON = Path(os.environ.get("GMAIL_TOKEN_JSON", str(BACKEND_DIR / "token.json")))
+def _gmail() -> GmailClient:
+    return GmailClient(CREDENTIALS_JSON, TOKEN_JSON)
+def _get_env_required(key: str) -> str:
+    v = (os.environ.get(key) or "").strip()
+    if not v:
+        raise HTTPException(status_code=500, detail=f"Server missing {key} env var")
+    return v
+@app.get("/health")
+def health():
+    return {"ok": True}
+@app.get("/api/pdf/{pdf_id}")
+def get_pdf(pdf_id: str):
+    path = UPLOADS_DIR / f"{pdf_id}.pdf"
+    if not path.exists():
+        raise HTTPException(status_code=404, detail="PDF not found")
+    name_path = UPLOADS_DIR / f"{pdf_id}.name.txt"
+    pdf_name = name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf"
+    resp = FileResponse(path, media_type="application/pdf", filename=pdf_name)
+    resp.headers["X-PDF-Name"] = pdf_name
+    return resp
+@app.post("/api/send-config")
+async def send_config(payload: Dict[str, Any]):
+    """
+    PIPELINE SUBMISSION EMAIL (after rep saves config)
+    REQUIRED payload:
+      - pdf_id: str
+      - template_id: str
+      - config: dict
+    Sends to PIPELINE inbox:
+      - PDF_PIPELINE_PIPELINE_NOTIFY_TO
+    Requirements:
+      - Subject includes template_id
+      - Body includes pdf_id
+      - Attachments: JSON + PDF
+    """
+    pdf_id = (payload.get("pdf_id") or "").strip()
+    template_id = (payload.get("template_id") or "").strip()
+    config = payload.get("config")
+    if not pdf_id:
+        raise HTTPException(status_code=400, detail="Missing pdf_id")
+    if not template_id:
+        raise HTTPException(status_code=400, detail="Missing template_id")
+    if not isinstance(config, dict):
+        raise HTTPException(status_code=400, detail="Missing config object")
+    pipeline_to = _get_env_required("PDF_PIPELINE_PIPELINE_NOTIFY_TO")
+    notify_from = _get_env_required("PDF_PIPELINE_NOTIFY_FROM")
+    trainer_base_url = (os.environ.get("PDF_TRAINER_BASE_URL") or "http://localhost:5173").strip()
+    pdf_path = UPLOADS_DIR / f"{pdf_id}.pdf"
+    if not pdf_path.exists():
+        raise HTTPException(status_code=404, detail="PDF not found for pdf_id")
+    name_path = UPLOADS_DIR / f"{pdf_id}.name.txt"
+    pdf_name = name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf"
+    trainer_link = f"{trainer_base_url.rstrip('/')}/?pdf_id={pdf_id}"
+    subject = f"PDF_TRAINER_CONFIG_SUBMITTED | template_id={template_id}"
+    body = (
+        "Hi,\n\n"
+        "A PDF Trainer configuration was submitted.\n\n"
+        f"template_id: {template_id}\n"
+        f"pdf_id: {pdf_id}\n"
+        f"trainer_link: {trainer_link}\n\n"
+        "Attachments:\n"
+        f"- trainer_config_{pdf_id}_{template_id}.json\n"
+        f"- {pdf_name}\n\n"
+        "Thank you,\n"
+        "Inserio Automation\n"
+    )
+    cfg_bytes = json.dumps(
+        {"pdf_id": pdf_id, "template_id": template_id, "config": config},
+        indent=2,
+    ).encode("utf-8")
+    attachments = [
+        (f"trainer_config_{pdf_id}_{template_id}.json", cfg_bytes),
+        (pdf_name, pdf_path.read_bytes()),
+    ]
+    gmail = _gmail()
+    gmail.send_email(
+        to_email=pipeline_to,
+        from_email=notify_from,
+        subject=subject,
+        body_text=body,
+        attachments=attachments,
+    )
+    return {"ok": True}
+@app.post("/api/notify-unknown")
+async def notify_unknown(payload: Dict[str, Any]):
+    """
+    UNKNOWN TEMPLATE NOTIFICATION (rep email)
+    REQUIRED payload:
+      - pdf_id: str
+    OPTIONAL:
+      - reason: str
+    Sends to REP inbox:
+      - PDF_PIPELINE_NOTIFY_TO
+    Requirements:
+      - Includes trainer link with PDF pre-loaded
+      - Attaches PDF
+      - No JSON
+    """
+    pdf_id = (payload.get("pdf_id") or "").strip()
+    reason = (payload.get("reason") or "").strip()
+    if not pdf_id:
+        raise HTTPException(status_code=400, detail="Missing pdf_id")
+    rep_to = _get_env_required("PDF_PIPELINE_NOTIFY_TO")
+    notify_from = _get_env_required("PDF_PIPELINE_NOTIFY_FROM")
+    trainer_base_url = (os.environ.get("PDF_TRAINER_BASE_URL") or "http://localhost:5173").strip()
+    pdf_path = UPLOADS_DIR / f"{pdf_id}.pdf"
+    if not pdf_path.exists():
+        raise HTTPException(status_code=404, detail="PDF not found for pdf_id")
+    name_path = UPLOADS_DIR / f"{pdf_id}.name.txt"
+    pdf_name = name_path.read_text(encoding="utf-8").strip() if name_path.exists() else f"{pdf_id}.pdf"
+    trainer_link = f"{trainer_base_url.rstrip('/')}/?pdf_id={pdf_id}"
+    subject = "Action required: Unknown PDF format (template not found)"
+    body = (
+        "Hi,\n\n"
+        "We received a PDF that does not match any existing templates in the system.\n\n"
+        + (f"Reason: {reason}\n\n" if reason else "")
+        + "Please open the PDF Trainer using the link below and create or update the template configuration:\n"
+        f"{trainer_link}\n\n"
+        "The original PDF is attached for reference.\n\n"
+        "Thank you,\n"
+        "Inserio Automation\n"
+    )
+    attachments = [(pdf_name, pdf_path.read_bytes())]
+    gmail = _gmail()
+    gmail.send_email(
+        to_email=rep_to,
+        from_email=notify_from,
+        subject=subject,
+        body_text=body,
+        attachments=attachments,
+    )
+    return {"ok": True}
+@app.get("/", response_class=PlainTextResponse)
+def root():
+    return "PDF Trainer API. Use /health"

backend/oauth_bootstrap.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from __future__ import annotations
+import os
+from pathlib import Path
+from google_auth_oauthlib.flow import InstalledAppFlow
+from google.auth.transport.requests import Request
+from google.oauth2.credentials import Credentials
+# REQUIRED scopes based on your plan:
+# - read messages, move labels, mark read => modify
+# - send mail => send
+SCOPES = [
+    "https://www.googleapis.com/auth/gmail.modify",
+    "https://www.googleapis.com/auth/gmail.send",
+]
+ROOT = Path(__file__).resolve().parent
+CREDS_PATH = Path("backend/credentials.json")
+TOKEN_PATH = Path("backend/token.json")
+def main() -> None:
+    if not CREDS_PATH.exists():
+        raise FileNotFoundError(
+            f"Missing {CREDS_PATH}. Download OAuth client JSON from Google Cloud and save as credentials.json in this folder."
+        )
+    creds: Credentials | None = None
+    # Load existing token if present
+    if TOKEN_PATH.exists():
+        creds = Credentials.from_authorized_user_file(str(TOKEN_PATH), SCOPES)
+    # Refresh or re-authenticate
+    if not creds or not creds.valid:
+        if creds and creds.expired and creds.refresh_token:
+            creds.refresh(Request())
+        else:
+            flow = InstalledAppFlow.from_client_secrets_file(str(CREDS_PATH), SCOPES)
+            # Local loopback server OAuth (Desktop app)
+            creds = flow.run_local_server(port=0)
+        # Save token
+        TOKEN_PATH.write_text(creds.to_json(), encoding="utf-8")
+    print("✅ OAuth complete.")
+    print(f"Saved token: {TOKEN_PATH}")
+    print("Scopes granted:", creds.scopes)
+if __name__ == "__main__":
+    main()

backend/scripts/apply_trainer_schemas.py ADDED Viewed

	@@ -0,0 +1,48 @@

+# backend/scripts/apply_trainer_schemas.py
+from __future__ import annotations
+import json
+from pathlib import Path
+REPO_ROOT = Path(__file__).resolve().parents[2]
+TEMPLATES_DIR = REPO_ROOT / "backend" / "templates"
+SCHEMAS_DIR = REPO_ROOT / "backend" / "trainer_schemas"
+def main() -> None:
+    if not SCHEMAS_DIR.exists():
+        raise RuntimeError(f"Missing schemas dir: {SCHEMAS_DIR}")
+    schema_files = sorted(SCHEMAS_DIR.glob("*.schema.json"))
+    if not schema_files:
+        raise RuntimeError(f"No schema files found in: {SCHEMAS_DIR}")
+    applied = 0
+    for sf in schema_files:
+        template_id = sf.name.replace(".schema.json", "")
+        template_path = TEMPLATES_DIR / f"{template_id}.json"
+        if not template_path.exists():
+            print(f"⚠️ skip (no template file): {template_path}")
+            continue
+        new_schema = json.loads(sf.read_text(encoding="utf-8"))
+        if not isinstance(new_schema, dict):
+            raise RuntimeError(f"Invalid schema json (not object): {sf}")
+        if not isinstance(new_schema.get("fields"), list):
+            raise RuntimeError(f"Invalid schema json (missing fields[]): {sf}")
+        t = json.loads(template_path.read_text(encoding="utf-8"))
+        t["schema"] = new_schema
+        # Optional: bump template version when schema changes
+        # t["version"] = int(t.get("version") or 0) + 1
+        template_path.write_text(json.dumps(t, indent=2) + "\n", encoding="utf-8")
+        print(f"✅ updated {template_path}  fields={len(new_schema['fields'])}")
+        applied += 1
+    print(f"done. applied={applied}")
+if __name__ == "__main__":
+    main()

backend/scripts/generate_template_schema_skeletons.py ADDED Viewed

	@@ -0,0 +1,137 @@

+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Any, Dict, List
+TEMPLATES_DIR = Path(__file__).resolve().parents[1] / "templates"
+FIELDS: List[Dict[str, Any]] = [
+    {"field_id": "facility_organization", "label": "Facility / Organization", "type": "entity"},
+    {"field_id": "case_location", "label": "Case Location / Address", "type": "text"},
+    {"field_id": "vendor", "label": "Vendor", "type": "entity"},
+    {"field_id": "physician_name", "label": "Physician Name", "type": "person"},
+    {"field_id": "date_of_surgery", "label": "Date of Surgery", "type": "date"},
+    {"field_id": "items", "label": "Items / Line Items", "type": "table"},
+]
+TABLE_ANCHORS = [
+    {"key": "item_number", "expected_text": "Item Number"},
+    {"key": "description", "expected_text": "Description"},
+    {"key": "qty", "expected_text": "Qty"},
+]
+TABLE_COLUMNS = [
+    {"key": "item_number", "label": "Item Number"},
+    {"key": "lot_number", "label": "Lot Number"},
+    {"key": "description", "label": "Description"},
+    {"key": "qty", "label": "Qty"},
+    {"key": "price", "label": "Price"},
+]
+def schema_skeleton(form_id: str) -> Dict[str, Any]:
+    return {
+        "form_id": form_id,
+        "version": 3,
+        "page": 1,
+        "scalar_value_region_mode": "offset_from_anchor_v1",
+        "fields": [
+            # scalar fields
+            {
+                "field_id": "facility_organization",
+                "label": "Facility / Organization",
+                "type": "entity",
+                "anchor_bbox_norm": None,
+                "value_bbox_norm": None,
+                "value_offset_norm": None,
+            },
+            {
+                "field_id": "case_location",
+                "label": "Case Location / Address",
+                "type": "text",
+                "anchor_bbox_norm": None,
+                "value_bbox_norm": None,
+                "value_offset_norm": None,
+            },
+            {
+                "field_id": "vendor",
+                "label": "Vendor",
+                "type": "entity",
+                "anchor_bbox_norm": None,
+                "value_bbox_norm": None,
+                "value_offset_norm": None,
+            },
+            {
+                "field_id": "physician_name",
+                "label": "Physician Name",
+                "type": "person",
+                "anchor_bbox_norm": None,
+                "value_bbox_norm": None,
+                "value_offset_norm": None,
+            },
+            {
+                "field_id": "date_of_surgery",
+                "label": "Date of Surgery",
+                "type": "date",
+                "anchor_bbox_norm": None,
+                "value_bbox_norm": None,
+                "value_offset_norm": None,
+            },
+            # table field
+            {
+                "field_id": "items",
+                "label": "Items / Line Items",
+                "type": "table",
+                "table_bbox_norm": None,
+                "header_bbox_norm": None,
+                "row_height_hint_norm": None,
+                "columns": [
+                    {"key": "item_number", "label": "Item Number", "bbox_rel_norm": None},
+                    {"key": "lot_number", "label": "Lot Number", "bbox_rel_norm": None},
+                    {"key": "description", "label": "Description", "bbox_rel_norm": None},
+                    {"key": "qty", "label": "Qty", "bbox_rel_norm": None},
+                    {"key": "price", "label": "Price", "bbox_rel_norm": None},
+                ],
+                "table_anchors": [
+                    {"key": "item_number", "expected_text": "Item Number", "bbox_norm": None},
+                    {"key": "description", "expected_text": "Description", "bbox_norm": None},
+                    {"key": "qty", "expected_text": "Qty", "bbox_norm": None},
+                ],
+                "notes": "Anchors are used at runtime to localize table/header/columns under drift.",
+            },
+        ],
+        "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items.",
+    }
+def main() -> None:
+    if not TEMPLATES_DIR.exists():
+        raise SystemExit(f"templates dir not found: {TEMPLATES_DIR}")
+    files = sorted(TEMPLATES_DIR.glob("*.json"))
+    if not files:
+        raise SystemExit(f"No template json files found in: {TEMPLATES_DIR}")
+    updated = 0
+    for fp in files:
+        data = json.loads(fp.read_text(encoding="utf-8"))
+        template_id = (data.get("template_id") or fp.stem).strip()
+        # Only touch your known template IDs if you want:
+        # if not template_id.startswith("T"): continue
+        # Overwrite or create schema skeleton
+        data["schema"] = schema_skeleton(form_id=f"template_{template_id}")
+        fp.write_text(json.dumps(data, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+        print(f"updated schema skeleton: {fp}")
+        updated += 1
+    print(f"done. updated {updated} template files.")
+if __name__ == "__main__":
+    main()

backend/scripts/migrate_hardcoded_templates.py ADDED Viewed

	@@ -0,0 +1,99 @@

+# backend/scripts/migrate_hardcoded_templates.py
+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Any, Dict, List
+TEMPLATES_DIR = Path(__file__).resolve().parents[1] / "templates"
+TEMPLATES_DIR.mkdir(parents=True, exist_ok=True)
+KNOWN_TEMPLATES: List[Dict[str, Any]] = [
+    {
+        "template_id": "T1_IFACTOR_DELIVERED_ORDER",
+        "name": "I-FACTOR Delivered Order Form",
+        "status": "active",
+        "version": 1,
+        "match": {
+            "keywords_all": ["delivered order form"],
+            "keywords_any": ["i-factor", "cerapedics", "product information", "stickers", "bill to", "delivered to"],
+        },
+        "schema": {},
+    },
+    {
+        "template_id": "T2_SEASPINE_DELIVERED_GOODS_FORM",
+        "name": "SeaSpine Delivered Goods Form",
+        "status": "active",
+        "version": 1,
+        "match": {
+            "keywords_all": ["delivered goods form"],
+            "keywords_any": ["seaspine", "isotis", "handling fee", "sales order", "invoice"],
+        },
+        "schema": {},
+    },
+    {
+        "template_id": "T3_ASTURA_SALES_ORDER_FORM",
+        "name": "Astura Sales Order Form",
+        "status": "active",
+        "version": 1,
+        "match": {
+            "keywords_all": [],
+            "keywords_any": ["astura", "dc141", "ca200", "cbba", "sales order"],
+        },
+        "schema": {},
+    },
+    {
+        "template_id": "T4_MEDICAL_ESTIMATION_OF_CHARGES",
+        "name": "Medical Estimation of Charges",
+        "status": "active",
+        "version": 1,
+        "match": {
+            "keywords_all": [],
+            "keywords_any": ["estimation of charges", "good faith estimate", "patient responsibility", "insurance"],
+        },
+        "schema": {},
+    },
+    {
+        "template_id": "T5_CLINICAL_PROGRESS_NOTE_POSTOP",
+        "name": "Clinical Progress Note Postop",
+        "status": "active",
+        "version": 1,
+        "match": {
+            "keywords_all": [],
+            "keywords_any": ["clinical progress note", "progress note", "post-op", "assessment", "plan"],
+        },
+        "schema": {},
+    },
+    {
+        "template_id": "T6_CUSTOMER_CHARGE_SHEET_SPINE",
+        "name": "Customer Charge Sheet Spine",
+        "status": "active",
+        "version": 1,
+        "match": {
+            "keywords_all": [],
+            "keywords_any": ["customer charge sheet", "charge sheet", "spine", "qty", "unit price", "total"],
+        },
+        "schema": {},
+    },
+    {
+        "template_id": "T7_SALES_ORDER_ZIMMER",
+        "name": "Zimmer Sales Order",
+        "status": "active",
+        "version": 1,
+        "match": {
+            "keywords_all": [],
+            "keywords_any": ["zimmer", "zimmer biomet", "biomet", "sales order", "purchase order", "po number"],
+        },
+        "schema": {},
+    },
+]
+def main() -> None:
+    for t in KNOWN_TEMPLATES:
+        out_path = TEMPLATES_DIR / f"{t['template_id']}.json"
+        out_path.write_text(json.dumps(t, indent=2), encoding="utf-8")
+        print(f"wrote {out_path}")
+if __name__ == "__main__":
+    main()

backend/templates/T1_IFACTOR_DELIVERED_ORDER.json ADDED Viewed

	@@ -0,0 +1,206 @@

+{
+  "template_id": "T1_IFACTOR_DELIVERED_ORDER",
+  "name": "I-FACTOR Delivered Order Form",
+  "status": "active",
+  "version": 2,
+  "match": {
+    "keywords_all": [
+      "delivered order form"
+    ],
+    "keywords_any": [
+      "i-factor",
+      "cerapedics",
+      "product information",
+      "stickers",
+      "bill to",
+      "delivered to"
+    ]
+  },
+  "schema": {
+    "form_id": "trainer_2f7cdbc443f040c79723c74490f6282f",
+    "version": 3,
+    "page": 1,
+    "scalar_value_region_mode": "offset_from_anchor_v1",
+    "fields": [
+      {
+        "field_id": "facility_organization",
+        "label": "Facility / Organization",
+        "type": "entity",
+        "anchor_bbox_norm": {
+          "x": 0.138889,
+          "y": 0.328283,
+          "w": 0.047386,
+          "h": 0.027778
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": 0.052288,
+          "dy": -0.001263,
+          "w": 0.294118,
+          "h": 0.045455
+        }
+      },
+      {
+        "field_id": "case_location",
+        "label": "Case Location / Address",
+        "type": "text",
+        "anchor_bbox_norm": {
+          "x": 0.140523,
+          "y": 0.353535,
+          "w": 0.055556,
+          "h": 0.02399
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": 0.062092,
+          "dy": 0.005051,
+          "w": 0.292484,
+          "h": 0.056818
+        }
+      },
+      {
+        "field_id": "vendor",
+        "label": "Vendor",
+        "type": "entity",
+        "anchor_bbox_norm": {
+          "x": 0.215686,
+          "y": 0.170455,
+          "w": 0.205882,
+          "h": 0.059343
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": null
+      },
+      {
+        "field_id": "physician_name",
+        "label": "Physician Name",
+        "type": "person",
+        "anchor_bbox_norm": {
+          "x": 0.522876,
+          "y": 0.497475,
+          "w": 0.062092,
+          "h": 0.020202
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": 0.060458,
+          "dy": -0.005051,
+          "w": 0.214052,
+          "h": 0.025253
+        }
+      },
+      {
+        "field_id": "date_of_surgery",
+        "label": "Date of Surgery",
+        "type": "date",
+        "anchor_bbox_norm": {
+          "x": 0.138889,
+          "y": 0.57197,
+          "w": 0.160131,
+          "h": 0.026515
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": 0.165033,
+          "dy": -0.002525,
+          "w": 0.205882,
+          "h": 0.02399
+        }
+      },
+      {
+        "field_id": "items",
+        "label": "Items / Line Items",
+        "type": "table",
+        "table_bbox_norm": {
+          "x": 0.138889,
+          "y": 0.632576,
+          "w": 0.732026,
+          "h": 0.122475
+        },
+        "header_bbox_norm": {
+          "x": 0.142157,
+          "y": 0.632576,
+          "w": 0.727124,
+          "h": 0.034091
+        },
+        "row_height_hint_norm": null,
+        "columns": [
+          {
+            "key": "item_number",
+            "label": "Item Number",
+            "bbox_rel_norm": {
+              "x": 0.004464,
+              "y": 0.28866,
+              "w": 0.196429,
+              "h": 0.701031
+            }
+          },
+          {
+            "key": "lot_number",
+            "label": "Lot Number",
+            "bbox_rel_norm": null
+          },
+          {
+            "key": "description",
+            "label": "Description",
+            "bbox_rel_norm": {
+              "x": 0.209821,
+              "y": 0.278351,
+              "w": 0.241071,
+              "h": 0.639175
+            }
+          },
+          {
+            "key": "qty",
+            "label": "Qty",
+            "bbox_rel_norm": {
+              "x": 0.647321,
+              "y": 0.247423,
+              "w": 0.058036,
+              "h": 0.71134
+            }
+          },
+          {
+            "key": "price",
+            "label": "Price",
+            "bbox_rel_norm": null
+          }
+        ],
+        "table_anchors": [
+          {
+            "key": "item_number",
+            "expected_text": "Item Number",
+            "bbox_norm": {
+              "x": 0.140523,
+              "y": 0.652778,
+              "w": 0.145425,
+              "h": 0.016414
+            }
+          },
+          {
+            "key": "description",
+            "expected_text": "Description",
+            "bbox_norm": {
+              "x": 0.287582,
+              "y": 0.650253,
+              "w": 0.181373,
+              "h": 0.018939
+            }
+          },
+          {
+            "key": "qty",
+            "expected_text": "Qty",
+            "bbox_norm": {
+              "x": 0.614379,
+              "y": 0.647727,
+              "w": 0.047386,
+              "h": 0.016414
+            }
+          }
+        ],
+        "notes": "Anchors are used at runtime to localize table/header/columns under drift."
+      }
+    ],
+    "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
+  }
+}

backend/templates/T2_SEASPINE_DELIVERED_GOODS_FORM.json ADDED Viewed

	@@ -0,0 +1,200 @@

+{
+  "template_id": "T2_SEASPINE_DELIVERED_GOODS_FORM",
+  "name": "SeaSpine Delivered Goods Form",
+  "status": "active",
+  "version": 2,
+  "match": {
+    "keywords_all": [
+      "delivered goods form"
+    ],
+    "keywords_any": [
+      "seaspine",
+      "isotis",
+      "handling fee",
+      "sales order",
+      "invoice"
+    ]
+  },
+  "schema": {
+    "form_id": "trainer_245e70e31b1f4eb1b26fad626365e9ad",
+    "version": 3,
+    "page": 1,
+    "scalar_value_region_mode": "offset_from_anchor_v1",
+    "fields": [
+      {
+        "field_id": "facility_organization",
+        "label": "Facility / Organization",
+        "type": "entity",
+        "anchor_bbox_norm": {
+          "x": 0.179739,
+          "y": 0.284091,
+          "w": 0.04085,
+          "h": 0.020202
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": 0.044118,
+          "dy": -0.002525,
+          "w": 0.246732,
+          "h": 0.021465
+        }
+      },
+      {
+        "field_id": "case_location",
+        "label": "Case Location / Address",
+        "type": "text",
+        "anchor_bbox_norm": {
+          "x": 0.181373,
+          "y": 0.310606,
+          "w": 0.135621,
+          "h": 0.016414
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": 0.001634,
+          "dy": 0.013889,
+          "w": 0.295752,
+          "h": 0.027778
+        }
+      },
+      {
+        "field_id": "vendor",
+        "label": "Vendor",
+        "type": "entity",
+        "anchor_bbox_norm": {
+          "x": 0.606209,
+          "y": 0.152778,
+          "w": 0.173203,
+          "h": 0.068182
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": null
+      },
+      {
+        "field_id": "physician_name",
+        "label": "Physician Name",
+        "type": "person",
+        "anchor_bbox_norm": {
+          "x": 0.179739,
+          "y": 0.508838,
+          "w": 0.104575,
+          "h": 0.016414
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": 0.106209,
+          "dy": -0.001263,
+          "w": 0.372549,
+          "h": 0.015152
+        }
+      },
+      {
+        "field_id": "date_of_surgery",
+        "label": "Date of Surgery",
+        "type": "date",
+        "anchor_bbox_norm": {
+          "x": 0.179739,
+          "y": 0.521465,
+          "w": 0.081699,
+          "h": 0.021465
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": 0.083333,
+          "dy": 0.005051,
+          "w": 0.068627,
+          "h": 0.015152
+        }
+      },
+      {
+        "field_id": "items",
+        "label": "Items / Line Items",
+        "type": "table",
+        "table_bbox_norm": {
+          "x": 0.178105,
+          "y": 0.388889,
+          "w": 0.609477,
+          "h": 0.118687
+        },
+        "header_bbox_norm": {
+          "x": 0.178105,
+          "y": 0.390152,
+          "w": 0.609477,
+          "h": 0.02399
+        },
+        "row_height_hint_norm": null,
+        "columns": [
+          {
+            "key": "item_number",
+            "label": "Item Number",
+            "bbox_rel_norm": {
+              "x": 0.718499,
+              "y": 0.170213,
+              "w": 0.072386,
+              "h": 0.797872
+            }
+          },
+          {
+            "key": "lot_number",
+            "label": "Lot Number",
+            "bbox_rel_norm": {
+              "x": 0.168901,
+              "y": 0.223404,
+              "w": 0.171582,
+              "h": 0.776596
+            }
+          },
+          {
+            "key": "description",
+            "label": "Description",
+            "bbox_rel_norm": null
+          },
+          {
+            "key": "qty",
+            "label": "Qty",
+            "bbox_rel_norm": null
+          },
+          {
+            "key": "price",
+            "label": "Price",
+            "bbox_rel_norm": null
+          }
+        ],
+        "table_anchors": [
+          {
+            "key": "item_number",
+            "expected_text": "Item Number",
+            "bbox_norm": {
+              "x": 0.178105,
+              "y": 0.388889,
+              "w": 0.101307,
+              "h": 0.02399
+            }
+          },
+          {
+            "key": "description",
+            "expected_text": "Description",
+            "bbox_norm": {
+              "x": 0.488562,
+              "y": 0.388889,
+              "w": 0.129085,
+              "h": 0.025253
+            }
+          },
+          {
+            "key": "qty",
+            "expected_text": "Qty",
+            "bbox_norm": {
+              "x": 0.617647,
+              "y": 0.388889,
+              "w": 0.045752,
+              "h": 0.02399
+            }
+          }
+        ],
+        "notes": "Anchors are used at runtime to localize table/header/columns under drift."
+      }
+    ],
+    "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
+  }
+}

backend/templates/T3_ASTURA_SALES_ORDER_FORM.json ADDED Viewed

	@@ -0,0 +1,203 @@

+{
+  "template_id": "T3_ASTURA_SALES_ORDER_FORM",
+  "name": "Astura Sales Order Form",
+  "status": "active",
+  "version": 2,
+  "match": {
+    "keywords_all": [],
+    "keywords_any": [
+      "astura",
+      "dc141",
+      "ca200",
+      "cbba",
+      "sales order"
+    ]
+  },
+  "schema": {
+    "form_id": "trainer_b931186e13eb45d2a9a1ded8ff8641bb",
+    "version": 3,
+    "page": 1,
+    "scalar_value_region_mode": "offset_from_anchor_v1",
+    "fields": [
+      {
+        "field_id": "facility_organization",
+        "label": "Facility / Organization",
+        "type": "entity",
+        "anchor_bbox_norm": {
+          "x": 0.156863,
+          "y": 0.194444,
+          "w": 0.053922,
+          "h": 0.012626
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": 0.076797,
+          "dy": -0.002525,
+          "w": 0.205882,
+          "h": 0.021465
+        }
+      },
+      {
+        "field_id": "case_location",
+        "label": "Case Location / Address",
+        "type": "text",
+        "anchor_bbox_norm": {
+          "x": 0.155229,
+          "y": 0.224747,
+          "w": 0.05719,
+          "h": 0.016414
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": 0.075163,
+          "dy": 0,
+          "w": 0.212418,
+          "h": 0.034091
+        }
+      },
+      {
+        "field_id": "vendor",
+        "label": "Vendor",
+        "type": "entity",
+        "anchor_bbox_norm": {
+          "x": 0.160131,
+          "y": 0.117424,
+          "w": 0.098039,
+          "h": 0.064394
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": null
+      },
+      {
+        "field_id": "physician_name",
+        "label": "Physician Name",
+        "type": "person",
+        "anchor_bbox_norm": {
+          "x": 0.158497,
+          "y": 0.289141,
+          "w": 0.062092,
+          "h": 0.013889
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": 0.068627,
+          "dy": -0.002525,
+          "w": 0.212418,
+          "h": 0.022727
+        }
+      },
+      {
+        "field_id": "date_of_surgery",
+        "label": "Date of Surgery",
+        "type": "date",
+        "anchor_bbox_norm": {
+          "x": 0.160131,
+          "y": 0.256313,
+          "w": 0.053922,
+          "h": 0.016414
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": 0.071895,
+          "dy": 0,
+          "w": 0.124183,
+          "h": 0.018939
+        }
+      },
+      {
+        "field_id": "items",
+        "label": "Items / Line Items",
+        "type": "table",
+        "table_bbox_norm": {
+          "x": 0.153595,
+          "y": 0.339646,
+          "w": 0.620915,
+          "h": 0.180556
+        },
+        "header_bbox_norm": {
+          "x": 0.156863,
+          "y": 0.339646,
+          "w": 0.617647,
+          "h": 0.018939
+        },
+        "row_height_hint_norm": null,
+        "columns": [
+          {
+            "key": "item_number",
+            "label": "Item Number",
+            "bbox_rel_norm": {
+              "x": 0,
+              "y": 0.104895,
+              "w": 0.171053,
+              "h": 0.895105
+            }
+          },
+          {
+            "key": "lot_number",
+            "label": "Lot Number",
+            "bbox_rel_norm": null
+          },
+          {
+            "key": "description",
+            "label": "Description",
+            "bbox_rel_norm": {
+              "x": 0.171053,
+              "y": 0.111888,
+              "w": 0.323684,
+              "h": 0.888112
+            }
+          },
+          {
+            "key": "qty",
+            "label": "Qty",
+            "bbox_rel_norm": {
+              "x": 0.644737,
+              "y": 0.104895,
+              "w": 0.047368,
+              "h": 0.895105
+            }
+          },
+          {
+            "key": "price",
+            "label": "Price",
+            "bbox_rel_norm": null
+          }
+        ],
+        "table_anchors": [
+          {
+            "key": "item_number",
+            "expected_text": "Item Number",
+            "bbox_norm": {
+              "x": 0.153595,
+              "y": 0.342172,
+              "w": 0.104575,
+              "h": 0.016414
+            }
+          },
+          {
+            "key": "description",
+            "expected_text": "Description",
+            "bbox_norm": {
+              "x": 0.259804,
+              "y": 0.339646,
+              "w": 0.202614,
+              "h": 0.021465
+            }
+          },
+          {
+            "key": "qty",
+            "expected_text": "Qty",
+            "bbox_norm": {
+              "x": 0.555556,
+              "y": 0.342172,
+              "w": 0.034314,
+              "h": 0.015152
+            }
+          }
+        ],
+        "notes": "Anchors are used at runtime to localize table/header/columns under drift."
+      }
+    ],
+    "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
+  }
+}

backend/templates/T4_MEDICAL_ESTIMATION_OF_CHARGES.json ADDED Viewed

	@@ -0,0 +1,167 @@

+{
+  "template_id": "T4_MEDICAL_ESTIMATION_OF_CHARGES",
+  "name": "Medical Estimation of Charges",
+  "status": "active",
+  "version": 2,
+  "match": {
+    "keywords_all": [],
+    "keywords_any": [
+      "estimation of charges",
+      "good faith estimate",
+      "patient responsibility",
+      "insurance"
+    ]
+  },
+  "schema": {
+    "form_id": "trainer_20c968bf41ac4b1c8ee12a9bb15b2bfb",
+    "version": 3,
+    "page": 1,
+    "scalar_value_region_mode": "offset_from_anchor_v1",
+    "fields": [
+      {
+        "field_id": "facility_organization",
+        "label": "Facility / Organization",
+        "type": "entity",
+        "anchor_bbox_norm": {
+          "x": 0.142157,
+          "y": 0.25,
+          "w": 0.042484,
+          "h": 0.015152
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": 0.068627,
+          "dy": -0.003788,
+          "w": 0.117647,
+          "h": 0.018939
+        }
+      },
+      {
+        "field_id": "case_location",
+        "label": "Case Location / Address",
+        "type": "text",
+        "anchor_bbox_norm": {
+          "x": 0.143791,
+          "y": 0.271465,
+          "w": 0.047386,
+          "h": 0.017677
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": 0.071895,
+          "dy": -0.001263,
+          "w": 0.127451,
+          "h": 0.039141
+        }
+      },
+      {
+        "field_id": "vendor",
+        "label": "Vendor",
+        "type": "entity",
+        "anchor_bbox_norm": null,
+        "value_bbox_norm": null,
+        "value_offset_norm": null
+      },
+      {
+        "field_id": "physician_name",
+        "label": "Physician Name",
+        "type": "person",
+        "anchor_bbox_norm": null,
+        "value_bbox_norm": null,
+        "value_offset_norm": null
+      },
+      {
+        "field_id": "date_of_surgery",
+        "label": "Date of Surgery",
+        "type": "date",
+        "anchor_bbox_norm": null,
+        "value_bbox_norm": null,
+        "value_offset_norm": null
+      },
+      {
+        "field_id": "items",
+        "label": "Items / Line Items",
+        "type": "table",
+        "table_bbox_norm": {
+          "x": 0.143791,
+          "y": 0.409091,
+          "w": 0.676471,
+          "h": 0.132576
+        },
+        "header_bbox_norm": {
+          "x": 0.143791,
+          "y": 0.409091,
+          "w": 0.676471,
+          "h": 0.018939
+        },
+        "row_height_hint_norm": null,
+        "columns": [
+          {
+            "key": "item_number",
+            "label": "Item Number",
+            "bbox_rel_norm": {
+              "x": 0.717391,
+              "y": 0.114286,
+              "w": 0.089372,
+              "h": 0.857143
+            }
+          },
+          {
+            "key": "lot_number",
+            "label": "Lot Number",
+            "bbox_rel_norm": null
+          },
+          {
+            "key": "description",
+            "label": "Description",
+            "bbox_rel_norm": {
+              "x": 0.2657,
+              "y": 0.114286,
+              "w": 0.376812,
+              "h": 0.87619
+            }
+          },
+          {
+            "key": "qty",
+            "label": "Qty",
+            "bbox_rel_norm": null
+          },
+          {
+            "key": "price",
+            "label": "Price",
+            "bbox_rel_norm": null
+          }
+        ],
+        "table_anchors": [
+          {
+            "key": "item_number",
+            "expected_text": "Item Number",
+            "bbox_norm": {
+              "x": 0.632353,
+              "y": 0.409091,
+              "w": 0.045752,
+              "h": 0.017677
+            }
+          },
+          {
+            "key": "description",
+            "expected_text": "Description",
+            "bbox_norm": {
+              "x": 0.325163,
+              "y": 0.409091,
+              "w": 0.248366,
+              "h": 0.017677
+            }
+          },
+          {
+            "key": "qty",
+            "expected_text": "Qty",
+            "bbox_norm": null
+          }
+        ],
+        "notes": "Anchors are used at runtime to localize table/header/columns under drift."
+      }
+    ],
+    "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
+  }
+}

backend/templates/T5_CLINICAL_PROGRESS_NOTE_POSTOP.json ADDED Viewed

	@@ -0,0 +1,118 @@

+{
+  "template_id": "T5_CLINICAL_PROGRESS_NOTE_POSTOP",
+  "name": "Clinical Progress Note Postop",
+  "status": "active",
+  "version": 2,
+  "match": {
+    "keywords_all": [],
+    "keywords_any": [
+      "clinical progress note",
+      "progress note",
+      "post-op",
+      "assessment",
+      "plan"
+    ]
+  },
+  "schema": {
+    "form_id": "trainer_e75eb5b93bb54c28934f43cacc406cc8",
+    "version": 3,
+    "page": 1,
+    "scalar_value_region_mode": "offset_from_anchor_v1",
+    "fields": [
+      {
+        "field_id": "facility_organization",
+        "label": "Facility / Organization",
+        "type": "entity",
+        "anchor_bbox_norm": null,
+        "value_bbox_norm": null,
+        "value_offset_norm": null
+      },
+      {
+        "field_id": "case_location",
+        "label": "Case Location / Address",
+        "type": "text",
+        "anchor_bbox_norm": null,
+        "value_bbox_norm": null,
+        "value_offset_norm": null
+      },
+      {
+        "field_id": "vendor",
+        "label": "Vendor",
+        "type": "entity",
+        "anchor_bbox_norm": null,
+        "value_bbox_norm": null,
+        "value_offset_norm": null
+      },
+      {
+        "field_id": "physician_name",
+        "label": "Physician Name",
+        "type": "person",
+        "anchor_bbox_norm": null,
+        "value_bbox_norm": null,
+        "value_offset_norm": null
+      },
+      {
+        "field_id": "date_of_surgery",
+        "label": "Date of Surgery",
+        "type": "date",
+        "anchor_bbox_norm": null,
+        "value_bbox_norm": null,
+        "value_offset_norm": null
+      },
+      {
+        "field_id": "items",
+        "label": "Items / Line Items",
+        "type": "table",
+        "table_bbox_norm": null,
+        "header_bbox_norm": null,
+        "row_height_hint_norm": null,
+        "columns": [
+          {
+            "key": "item_number",
+            "label": "Item Number",
+            "bbox_rel_norm": null
+          },
+          {
+            "key": "lot_number",
+            "label": "Lot Number",
+            "bbox_rel_norm": null
+          },
+          {
+            "key": "description",
+            "label": "Description",
+            "bbox_rel_norm": null
+          },
+          {
+            "key": "qty",
+            "label": "Qty",
+            "bbox_rel_norm": null
+          },
+          {
+            "key": "price",
+            "label": "Price",
+            "bbox_rel_norm": null
+          }
+        ],
+        "table_anchors": [
+          {
+            "key": "item_number",
+            "expected_text": "Item Number",
+            "bbox_norm": null
+          },
+          {
+            "key": "description",
+            "expected_text": "Description",
+            "bbox_norm": null
+          },
+          {
+            "key": "qty",
+            "expected_text": "Qty",
+            "bbox_norm": null
+          }
+        ],
+        "notes": "Anchors are used at runtime to localize table/header/columns under drift."
+      }
+    ],
+    "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
+  }
+}

backend/templates/T6_CUSTOMER_CHARGE_SHEET_SPINE.json ADDED Viewed

	@@ -0,0 +1,204 @@

+{
+  "template_id": "T6_CUSTOMER_CHARGE_SHEET_SPINE",
+  "name": "Customer Charge Sheet Spine",
+  "status": "active",
+  "version": 2,
+  "match": {
+    "keywords_all": [],
+    "keywords_any": [
+      "customer charge sheet",
+      "charge sheet",
+      "spine",
+      "qty",
+      "unit price",
+      "total"
+    ]
+  },
+  "schema": {
+    "form_id": "trainer_6b04e85b60a9470588be4f7541029d71",
+    "version": 3,
+    "page": 1,
+    "scalar_value_region_mode": "offset_from_anchor_v1",
+    "fields": [
+      {
+        "field_id": "facility_organization",
+        "label": "Facility / Organization",
+        "type": "entity",
+        "anchor_bbox_norm": {
+          "x": 0.388386,
+          "y": 0.27195,
+          "w": 0.096782,
+          "h": 0.013598
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": 0,
+          "dy": 0.011655,
+          "w": 0.096782,
+          "h": 0.01554
+        }
+      },
+      {
+        "field_id": "case_location",
+        "label": "Case Location / Address",
+        "type": "text",
+        "anchor_bbox_norm": {
+          "x": 0.483912,
+          "y": 0.297203,
+          "w": 0.13826,
+          "h": 0.011655
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": 0.005028,
+          "dy": 0.00777,
+          "w": 0.124434,
+          "h": 0.035936
+        }
+      },
+      {
+        "field_id": "vendor",
+        "label": "Vendor",
+        "type": "entity",
+        "anchor_bbox_norm": {
+          "x": 0.618401,
+          "y": 0.190365,
+          "w": 0.137004,
+          "h": 0.047591
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": null
+      },
+      {
+        "field_id": "physician_name",
+        "label": "Physician Name",
+        "type": "person",
+        "anchor_bbox_norm": {
+          "x": 0.218703,
+          "y": 0.296232,
+          "w": 0.042735,
+          "h": 0.019425
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": 0.042735,
+          "dy": 0,
+          "w": 0.124434,
+          "h": 0.020396
+        }
+      },
+      {
+        "field_id": "date_of_surgery",
+        "label": "Date of Surgery",
+        "type": "date",
+        "anchor_bbox_norm": {
+          "x": 0.221217,
+          "y": 0.308858,
+          "w": 0.081699,
+          "h": 0.018454
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": 0.084213,
+          "dy": 0.001943,
+          "w": 0.08547,
+          "h": 0.018454
+        }
+      },
+      {
+        "field_id": "items",
+        "label": "Items / Line Items",
+        "type": "table",
+        "table_bbox_norm": {
+          "x": 0.224987,
+          "y": 0.373932,
+          "w": 0.549271,
+          "h": 0.305944
+        },
+        "header_bbox_norm": {
+          "x": 0.226244,
+          "y": 0.373932,
+          "w": 0.548014,
+          "h": 0.012626
+        },
+        "row_height_hint_norm": null,
+        "columns": [
+          {
+            "key": "item_number",
+            "label": "Item Number",
+            "bbox_rel_norm": {
+              "x": 0,
+              "y": 0.050794,
+              "w": 0.144165,
+              "h": 0.949206
+            }
+          },
+          {
+            "key": "lot_number",
+            "label": "Lot Number",
+            "bbox_rel_norm": null
+          },
+          {
+            "key": "description",
+            "label": "Description",
+            "bbox_rel_norm": {
+              "x": 0.15103,
+              "y": 0.057143,
+              "w": 0.157895,
+              "h": 0.942857
+            }
+          },
+          {
+            "key": "qty",
+            "label": "Qty",
+            "bbox_rel_norm": {
+              "x": 0.414188,
+              "y": 0.044444,
+              "w": 0.059497,
+              "h": 0.952381
+            }
+          },
+          {
+            "key": "price",
+            "label": "Price",
+            "bbox_rel_norm": null
+          }
+        ],
+        "table_anchors": [
+          {
+            "key": "item_number",
+            "expected_text": "Item Number",
+            "bbox_norm": {
+              "x": 0.224987,
+              "y": 0.373932,
+              "w": 0.080442,
+              "h": 0.016511
+            }
+          },
+          {
+            "key": "description",
+            "expected_text": "Description",
+            "bbox_norm": {
+              "x": 0.306687,
+              "y": 0.373932,
+              "w": 0.081699,
+              "h": 0.019425
+            }
+          },
+          {
+            "key": "qty",
+            "expected_text": "Qty",
+            "bbox_norm": {
+              "x": 0.453746,
+              "y": 0.376845,
+              "w": 0.030166,
+              "h": 0.013598
+            }
+          }
+        ],
+        "notes": "Anchors are used at runtime to localize table/header/columns under drift."
+      }
+    ],
+    "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
+  }
+}

backend/templates/T7_SALES_ORDER_ZIMMER.json ADDED Viewed

	@@ -0,0 +1,174 @@

+{
+  "template_id": "T7_SALES_ORDER_ZIMMER",
+  "name": "Zimmer Sales Order",
+  "status": "active",
+  "version": 2,
+  "match": {
+    "keywords_all": [],
+    "keywords_any": [
+      "zimmer",
+      "zimmer biomet",
+      "biomet",
+      "sales order",
+      "purchase order",
+      "po number"
+    ]
+  },
+  "schema": {
+    "form_id": "trainer_2a12b374e66646689308af1beea88933",
+    "version": 3,
+    "page": 1,
+    "scalar_value_region_mode": "offset_from_anchor_v1",
+    "fields": [
+      {
+        "field_id": "facility_organization",
+        "label": "Facility / Organization",
+        "type": "entity",
+        "anchor_bbox_norm": {
+          "x": 0.292484,
+          "y": 0.183081,
+          "w": 0.01634,
+          "h": 0.045455
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": -0.003268,
+          "dy": 0.045455,
+          "w": 0.017974,
+          "h": 0.162879
+        }
+      },
+      {
+        "field_id": "case_location",
+        "label": "Case Location / Address",
+        "type": "text",
+        "anchor_bbox_norm": {
+          "x": 0.271242,
+          "y": 0.14899,
+          "w": 0.013072,
+          "h": 0.080808
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": 0,
+          "dy": 0.079545,
+          "w": 0.017974,
+          "h": 0.165404
+        }
+      },
+      {
+        "field_id": "vendor",
+        "label": "Vendor",
+        "type": "entity",
+        "anchor_bbox_norm": {
+          "x": 0.785948,
+          "y": 0.147727,
+          "w": 0.027778,
+          "h": 0.151515
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": null
+      },
+      {
+        "field_id": "physician_name",
+        "label": "Physician Name",
+        "type": "person",
+        "anchor_bbox_norm": {
+          "x": 0.248366,
+          "y": 0.145202,
+          "w": 0.022876,
+          "h": 0.084596
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": 0.003268,
+          "dy": 0.084596,
+          "w": 0.02451,
+          "h": 0.165404
+        }
+      },
+      {
+        "field_id": "date_of_surgery",
+        "label": "Date of Surgery",
+        "type": "date",
+        "anchor_bbox_norm": {
+          "x": 0.21732,
+          "y": 0.156566,
+          "w": 0.013072,
+          "h": 0.074495
+        },
+        "value_bbox_norm": null,
+        "value_offset_norm": {
+          "dx": -0.006536,
+          "dy": 0.073232,
+          "w": 0.027778,
+          "h": 0.167929
+        }
+      },
+      {
+        "field_id": "items",
+        "label": "Items / Line Items",
+        "type": "table",
+        "table_bbox_norm": {
+          "x": 0.473856,
+          "y": 0.109848,
+          "w": 0.256536,
+          "h": 0.707071
+        },
+        "header_bbox_norm": {
+          "x": 0.707516,
+          "y": 0.109848,
+          "w": 0.021242,
+          "h": 0.707071
+        },
+        "row_height_hint_norm": null,
+        "columns": [
+          {
+            "key": "item_number",
+            "label": "Item Number",
+            "bbox_rel_norm": null
+          },
+          {
+            "key": "lot_number",
+            "label": "Lot Number",
+            "bbox_rel_norm": null
+          },
+          {
+            "key": "description",
+            "label": "Description",
+            "bbox_rel_norm": null
+          },
+          {
+            "key": "qty",
+            "label": "Qty",
+            "bbox_rel_norm": null
+          },
+          {
+            "key": "price",
+            "label": "Price",
+            "bbox_rel_norm": null
+          }
+        ],
+        "table_anchors": [
+          {
+            "key": "item_number",
+            "expected_text": "Item Number",
+            "bbox_norm": null
+          },
+          {
+            "key": "description",
+            "expected_text": "Description",
+            "bbox_norm": null
+          },
+          {
+            "key": "qty",
+            "expected_text": "Qty",
+            "bbox_norm": null
+          }
+        ],
+        "notes": "Anchors are used at runtime to localize table/header/columns under drift."
+      }
+    ],
+    "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
+  }
+}

backend/trainer_schemas/T1_IFACTOR_DELIVERED_ORDER.schema.json ADDED Viewed

	@@ -0,0 +1,70 @@

+{
+  "form_id": "trainer_2f7cdbc443f040c79723c74490f6282f",
+  "version": 3,
+  "page": 1,
+  "scalar_value_region_mode": "offset_from_anchor_v1",
+  "fields": [
+    {
+      "field_id": "facility_organization",
+      "label": "Facility / Organization",
+      "type": "entity",
+      "anchor_bbox_norm": { "x": 0.138889, "y": 0.328283, "w": 0.047386, "h": 0.027778 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": 0.052288, "dy": -0.001263, "w": 0.294118, "h": 0.045455 }
+    },
+    {
+      "field_id": "case_location",
+      "label": "Case Location / Address",
+      "type": "text",
+      "anchor_bbox_norm": { "x": 0.140523, "y": 0.353535, "w": 0.055556, "h": 0.02399 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": 0.062092, "dy": 0.005051, "w": 0.292484, "h": 0.056818 }
+    },
+    {
+      "field_id": "vendor",
+      "label": "Vendor",
+      "type": "entity",
+      "anchor_bbox_norm": { "x": 0.215686, "y": 0.170455, "w": 0.205882, "h": 0.059343 },
+      "value_bbox_norm": null,
+      "value_offset_norm": null
+    },
+    {
+      "field_id": "physician_name",
+      "label": "Physician Name",
+      "type": "person",
+      "anchor_bbox_norm": { "x": 0.522876, "y": 0.497475, "w": 0.062092, "h": 0.020202 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": 0.060458, "dy": -0.005051, "w": 0.214052, "h": 0.025253 }
+    },
+    {
+      "field_id": "date_of_surgery",
+      "label": "Date of Surgery",
+      "type": "date",
+      "anchor_bbox_norm": { "x": 0.138889, "y": 0.57197, "w": 0.160131, "h": 0.026515 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": 0.165033, "dy": -0.002525, "w": 0.205882, "h": 0.02399 }
+    },
+    {
+      "field_id": "items",
+      "label": "Items / Line Items",
+      "type": "table",
+      "table_bbox_norm": { "x": 0.138889, "y": 0.632576, "w": 0.732026, "h": 0.122475 },
+      "header_bbox_norm": { "x": 0.142157, "y": 0.632576, "w": 0.727124, "h": 0.034091 },
+      "row_height_hint_norm": null,
+      "columns": [
+        { "key": "item_number", "label": "Item Number", "bbox_rel_norm": { "x": 0.004464, "y": 0.28866, "w": 0.196429, "h": 0.701031 } },
+        { "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": null },
+        { "key": "description", "label": "Description", "bbox_rel_norm": { "x": 0.209821, "y": 0.278351, "w": 0.241071, "h": 0.639175 } },
+        { "key": "qty", "label": "Qty", "bbox_rel_norm": { "x": 0.647321, "y": 0.247423, "w": 0.058036, "h": 0.71134 } },
+        { "key": "price", "label": "Price", "bbox_rel_norm": null }
+      ],
+      "table_anchors": [
+        { "key": "item_number", "expected_text": "Item Number", "bbox_norm": { "x": 0.140523, "y": 0.652778, "w": 0.145425, "h": 0.016414 } },
+        { "key": "description", "expected_text": "Description", "bbox_norm": { "x": 0.287582, "y": 0.650253, "w": 0.181373, "h": 0.018939 } },
+        { "key": "qty", "expected_text": "Qty", "bbox_norm": { "x": 0.614379, "y": 0.647727, "w": 0.047386, "h": 0.016414 } }
+      ],
+      "notes": "Anchors are used at runtime to localize table/header/columns under drift."
+    }
+  ],
+  "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
+}

backend/trainer_schemas/T2_SEASPINE_DELIVERED_GOODS_FORM.schema.json ADDED Viewed

	@@ -0,0 +1,70 @@

+{
+  "form_id": "trainer_245e70e31b1f4eb1b26fad626365e9ad",
+  "version": 3,
+  "page": 1,
+  "scalar_value_region_mode": "offset_from_anchor_v1",
+  "fields": [
+    {
+      "field_id": "facility_organization",
+      "label": "Facility / Organization",
+      "type": "entity",
+      "anchor_bbox_norm": { "x": 0.179739, "y": 0.284091, "w": 0.04085, "h": 0.020202 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": 0.044118, "dy": -0.002525, "w": 0.246732, "h": 0.021465 }
+    },
+    {
+      "field_id": "case_location",
+      "label": "Case Location / Address",
+      "type": "text",
+      "anchor_bbox_norm": { "x": 0.181373, "y": 0.310606, "w": 0.135621, "h": 0.016414 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": 0.001634, "dy": 0.013889, "w": 0.295752, "h": 0.027778 }
+    },
+    {
+      "field_id": "vendor",
+      "label": "Vendor",
+      "type": "entity",
+      "anchor_bbox_norm": { "x": 0.606209, "y": 0.152778, "w": 0.173203, "h": 0.068182 },
+      "value_bbox_norm": null,
+      "value_offset_norm": null
+    },
+    {
+      "field_id": "physician_name",
+      "label": "Physician Name",
+      "type": "person",
+      "anchor_bbox_norm": { "x": 0.179739, "y": 0.508838, "w": 0.104575, "h": 0.016414 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": 0.106209, "dy": -0.001263, "w": 0.372549, "h": 0.015152 }
+    },
+    {
+      "field_id": "date_of_surgery",
+      "label": "Date of Surgery",
+      "type": "date",
+      "anchor_bbox_norm": { "x": 0.179739, "y": 0.521465, "w": 0.081699, "h": 0.021465 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": 0.083333, "dy": 0.005051, "w": 0.068627, "h": 0.015152 }
+    },
+    {
+      "field_id": "items",
+      "label": "Items / Line Items",
+      "type": "table",
+      "table_bbox_norm": { "x": 0.178105, "y": 0.388889, "w": 0.609477, "h": 0.118687 },
+      "header_bbox_norm": { "x": 0.178105, "y": 0.390152, "w": 0.609477, "h": 0.02399 },
+      "row_height_hint_norm": null,
+      "columns": [
+        { "key": "item_number", "label": "Item Number", "bbox_rel_norm": { "x": 0.718499, "y": 0.170213, "w": 0.072386, "h": 0.797872 } },
+        { "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": { "x": 0.168901, "y": 0.223404, "w": 0.171582, "h": 0.776596 } },
+        { "key": "description", "label": "Description", "bbox_rel_norm": null },
+        { "key": "qty", "label": "Qty", "bbox_rel_norm": null },
+        { "key": "price", "label": "Price", "bbox_rel_norm": null }
+      ],
+      "table_anchors": [
+        { "key": "item_number", "expected_text": "Item Number", "bbox_norm": { "x": 0.178105, "y": 0.388889, "w": 0.101307, "h": 0.02399 } },
+        { "key": "description", "expected_text": "Description", "bbox_norm": { "x": 0.488562, "y": 0.388889, "w": 0.129085, "h": 0.025253 } },
+        { "key": "qty", "expected_text": "Qty", "bbox_norm": { "x": 0.617647, "y": 0.388889, "w": 0.045752, "h": 0.02399 } }
+      ],
+      "notes": "Anchors are used at runtime to localize table/header/columns under drift."
+    }
+  ],
+  "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
+}

backend/trainer_schemas/T3_ASTURA_SALES_ORDER_FORM.schema.json ADDED Viewed

	@@ -0,0 +1,70 @@

+{
+  "form_id": "trainer_b931186e13eb45d2a9a1ded8ff8641bb",
+  "version": 3,
+  "page": 1,
+  "scalar_value_region_mode": "offset_from_anchor_v1",
+  "fields": [
+    {
+      "field_id": "facility_organization",
+      "label": "Facility / Organization",
+      "type": "entity",
+      "anchor_bbox_norm": { "x": 0.156863, "y": 0.194444, "w": 0.053922, "h": 0.012626 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": 0.076797, "dy": -0.002525, "w": 0.205882, "h": 0.021465 }
+    },
+    {
+      "field_id": "case_location",
+      "label": "Case Location / Address",
+      "type": "text",
+      "anchor_bbox_norm": { "x": 0.155229, "y": 0.224747, "w": 0.05719, "h": 0.016414 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": 0.075163, "dy": 0, "w": 0.212418, "h": 0.034091 }
+    },
+    {
+      "field_id": "vendor",
+      "label": "Vendor",
+      "type": "entity",
+      "anchor_bbox_norm": { "x": 0.160131, "y": 0.117424, "w": 0.098039, "h": 0.064394 },
+      "value_bbox_norm": null,
+      "value_offset_norm": null
+    },
+    {
+      "field_id": "physician_name",
+      "label": "Physician Name",
+      "type": "person",
+      "anchor_bbox_norm": { "x": 0.158497, "y": 0.289141, "w": 0.062092, "h": 0.013889 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": 0.068627, "dy": -0.002525, "w": 0.212418, "h": 0.022727 }
+    },
+    {
+      "field_id": "date_of_surgery",
+      "label": "Date of Surgery",
+      "type": "date",
+      "anchor_bbox_norm": { "x": 0.160131, "y": 0.256313, "w": 0.053922, "h": 0.016414 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": 0.071895, "dy": 0, "w": 0.124183, "h": 0.018939 }
+    },
+    {
+      "field_id": "items",
+      "label": "Items / Line Items",
+      "type": "table",
+      "table_bbox_norm": { "x": 0.153595, "y": 0.339646, "w": 0.620915, "h": 0.180556 },
+      "header_bbox_norm": { "x": 0.156863, "y": 0.339646, "w": 0.617647, "h": 0.018939 },
+      "row_height_hint_norm": null,
+      "columns": [
+        { "key": "item_number", "label": "Item Number", "bbox_rel_norm": { "x": 0, "y": 0.104895, "w": 0.171053, "h": 0.895105 } },
+        { "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": null },
+        { "key": "description", "label": "Description", "bbox_rel_norm": { "x": 0.171053, "y": 0.111888, "w": 0.323684, "h": 0.888112 } },
+        { "key": "qty", "label": "Qty", "bbox_rel_norm": { "x": 0.644737, "y": 0.104895, "w": 0.047368, "h": 0.895105 } },
+        { "key": "price", "label": "Price", "bbox_rel_norm": null }
+      ],
+      "table_anchors": [
+        { "key": "item_number", "expected_text": "Item Number", "bbox_norm": { "x": 0.153595, "y": 0.342172, "w": 0.104575, "h": 0.016414 } },
+        { "key": "description", "expected_text": "Description", "bbox_norm": { "x": 0.259804, "y": 0.339646, "w": 0.202614, "h": 0.021465 } },
+        { "key": "qty", "expected_text": "Qty", "bbox_norm": { "x": 0.555556, "y": 0.342172, "w": 0.034314, "h": 0.015152 } }
+      ],
+      "notes": "Anchors are used at runtime to localize table/header/columns under drift."
+    }
+  ],
+  "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
+}

backend/trainer_schemas/T4_MEDICAL_ESTIMATION_OF_CHARGES.schema.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "form_id": "trainer_20c968bf41ac4b1c8ee12a9bb15b2bfb",
+  "version": 3,
+  "page": 1,
+  "scalar_value_region_mode": "offset_from_anchor_v1",
+  "fields": [
+    {
+      "field_id": "facility_organization",
+      "label": "Facility / Organization",
+      "type": "entity",
+      "anchor_bbox_norm": { "x": 0.142157, "y": 0.25, "w": 0.042484, "h": 0.015152 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": 0.068627, "dy": -0.003788, "w": 0.117647, "h": 0.018939 }
+    },
+    {
+      "field_id": "case_location",
+      "label": "Case Location / Address",
+      "type": "text",
+      "anchor_bbox_norm": { "x": 0.143791, "y": 0.271465, "w": 0.047386, "h": 0.017677 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": 0.071895, "dy": -0.001263, "w": 0.127451, "h": 0.039141 }
+    },
+    { "field_id": "vendor", "label": "Vendor", "type": "entity", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
+    { "field_id": "physician_name", "label": "Physician Name", "type": "person", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
+    { "field_id": "date_of_surgery", "label": "Date of Surgery", "type": "date", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
+    {
+      "field_id": "items",
+      "label": "Items / Line Items",
+      "type": "table",
+      "table_bbox_norm": { "x": 0.143791, "y": 0.409091, "w": 0.676471, "h": 0.132576 },
+      "header_bbox_norm": { "x": 0.143791, "y": 0.409091, "w": 0.676471, "h": 0.018939 },
+      "row_height_hint_norm": null,
+      "columns": [
+        { "key": "item_number", "label": "Item Number", "bbox_rel_norm": { "x": 0.717391, "y": 0.114286, "w": 0.089372, "h": 0.857143 } },
+        { "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": null },
+        { "key": "description", "label": "Description", "bbox_rel_norm": { "x": 0.2657, "y": 0.114286, "w": 0.376812, "h": 0.87619 } },
+        { "key": "qty", "label": "Qty", "bbox_rel_norm": null },
+        { "key": "price", "label": "Price", "bbox_rel_norm": null }
+      ],
+      "table_anchors": [
+        { "key": "item_number", "expected_text": "Item Number", "bbox_norm": { "x": 0.632353, "y": 0.409091, "w": 0.045752, "h": 0.017677 } },
+        { "key": "description", "expected_text": "Description", "bbox_norm": { "x": 0.325163, "y": 0.409091, "w": 0.248366, "h": 0.017677 } },
+        { "key": "qty", "expected_text": "Qty", "bbox_norm": null }
+      ],
+      "notes": "Anchors are used at runtime to localize table/header/columns under drift."
+    }
+  ],
+  "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
+}

backend/trainer_schemas/T5_CLINICAL_PROGRESS_NOTE_POSTOP.schema.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "form_id": "trainer_e75eb5b93bb54c28934f43cacc406cc8",
+  "version": 3,
+  "page": 1,
+  "scalar_value_region_mode": "offset_from_anchor_v1",
+  "fields": [
+    { "field_id": "facility_organization", "label": "Facility / Organization", "type": "entity", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
+    { "field_id": "case_location", "label": "Case Location / Address", "type": "text", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
+    { "field_id": "vendor", "label": "Vendor", "type": "entity", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
+    { "field_id": "physician_name", "label": "Physician Name", "type": "person", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
+    { "field_id": "date_of_surgery", "label": "Date of Surgery", "type": "date", "anchor_bbox_norm": null, "value_bbox_norm": null, "value_offset_norm": null },
+    {
+      "field_id": "items",
+      "label": "Items / Line Items",
+      "type": "table",
+      "table_bbox_norm": null,
+      "header_bbox_norm": null,
+      "row_height_hint_norm": null,
+      "columns": [
+        { "key": "item_number", "label": "Item Number", "bbox_rel_norm": null },
+        { "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": null },
+        { "key": "description", "label": "Description", "bbox_rel_norm": null },
+        { "key": "qty", "label": "Qty", "bbox_rel_norm": null },
+        { "key": "price", "label": "Price", "bbox_rel_norm": null }
+      ],
+      "table_anchors": [
+        { "key": "item_number", "expected_text": "Item Number", "bbox_norm": null },
+        { "key": "description", "expected_text": "Description", "bbox_norm": null },
+        { "key": "qty", "expected_text": "Qty", "bbox_norm": null }
+      ],
+      "notes": "Anchors are used at runtime to localize table/header/columns under drift."
+    }
+  ],
+  "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
+}

backend/trainer_schemas/T6_CUSTOMER_CHARGE_SHEET_SPINE.schema.json ADDED Viewed

	@@ -0,0 +1,70 @@

+{
+  "form_id": "trainer_6b04e85b60a9470588be4f7541029d71",
+  "version": 3,
+  "page": 1,
+  "scalar_value_region_mode": "offset_from_anchor_v1",
+  "fields": [
+    {
+      "field_id": "facility_organization",
+      "label": "Facility / Organization",
+      "type": "entity",
+      "anchor_bbox_norm": { "x": 0.388386, "y": 0.27195, "w": 0.096782, "h": 0.013598 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": 0, "dy": 0.011655, "w": 0.096782, "h": 0.01554 }
+    },
+    {
+      "field_id": "case_location",
+      "label": "Case Location / Address",
+      "type": "text",
+      "anchor_bbox_norm": { "x": 0.483912, "y": 0.297203, "w": 0.13826, "h": 0.011655 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": 0.005028, "dy": 0.00777, "w": 0.124434, "h": 0.035936 }
+    },
+    {
+      "field_id": "vendor",
+      "label": "Vendor",
+      "type": "entity",
+      "anchor_bbox_norm": { "x": 0.618401, "y": 0.190365, "w": 0.137004, "h": 0.047591 },
+      "value_bbox_norm": null,
+      "value_offset_norm": null
+    },
+    {
+      "field_id": "physician_name",
+      "label": "Physician Name",
+      "type": "person",
+      "anchor_bbox_norm": { "x": 0.218703, "y": 0.296232, "w": 0.042735, "h": 0.019425 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": 0.042735, "dy": 0, "w": 0.124434, "h": 0.020396 }
+    },
+    {
+      "field_id": "date_of_surgery",
+      "label": "Date of Surgery",
+      "type": "date",
+      "anchor_bbox_norm": { "x": 0.221217, "y": 0.308858, "w": 0.081699, "h": 0.018454 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": 0.084213, "dy": 0.001943, "w": 0.08547, "h": 0.018454 }
+    },
+    {
+      "field_id": "items",
+      "label": "Items / Line Items",
+      "type": "table",
+      "table_bbox_norm": { "x": 0.224987, "y": 0.373932, "w": 0.549271, "h": 0.305944 },
+      "header_bbox_norm": { "x": 0.226244, "y": 0.373932, "w": 0.548014, "h": 0.012626 },
+      "row_height_hint_norm": null,
+      "columns": [
+        { "key": "item_number", "label": "Item Number", "bbox_rel_norm": { "x": 0, "y": 0.050794, "w": 0.144165, "h": 0.949206 } },
+        { "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": null },
+        { "key": "description", "label": "Description", "bbox_rel_norm": { "x": 0.15103, "y": 0.057143, "w": 0.157895, "h": 0.942857 } },
+        { "key": "qty", "label": "Qty", "bbox_rel_norm": { "x": 0.414188, "y": 0.044444, "w": 0.059497, "h": 0.952381 } },
+        { "key": "price", "label": "Price", "bbox_rel_norm": null }
+      ],
+      "table_anchors": [
+        { "key": "item_number", "expected_text": "Item Number", "bbox_norm": { "x": 0.224987, "y": 0.373932, "w": 0.080442, "h": 0.016511 } },
+        { "key": "description", "expected_text": "Description", "bbox_norm": { "x": 0.306687, "y": 0.373932, "w": 0.081699, "h": 0.019425 } },
+        { "key": "qty", "expected_text": "Qty", "bbox_norm": { "x": 0.453746, "y": 0.376845, "w": 0.030166, "h": 0.013598 } }
+      ],
+      "notes": "Anchors are used at runtime to localize table/header/columns under drift."
+    }
+  ],
+  "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
+}

backend/trainer_schemas/T7_SALES_ORDER_ZIMMER.schema.json ADDED Viewed

	@@ -0,0 +1,70 @@

+{
+  "form_id": "trainer_2a12b374e66646689308af1beea88933",
+  "version": 3,
+  "page": 1,
+  "scalar_value_region_mode": "offset_from_anchor_v1",
+  "fields": [
+    {
+      "field_id": "facility_organization",
+      "label": "Facility / Organization",
+      "type": "entity",
+      "anchor_bbox_norm": { "x": 0.292484, "y": 0.183081, "w": 0.01634, "h": 0.045455 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": -0.003268, "dy": 0.045455, "w": 0.017974, "h": 0.162879 }
+    },
+    {
+      "field_id": "case_location",
+      "label": "Case Location / Address",
+      "type": "text",
+      "anchor_bbox_norm": { "x": 0.271242, "y": 0.14899, "w": 0.013072, "h": 0.080808 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": 0, "dy": 0.079545, "w": 0.017974, "h": 0.165404 }
+    },
+    {
+      "field_id": "vendor",
+      "label": "Vendor",
+      "type": "entity",
+      "anchor_bbox_norm": { "x": 0.785948, "y": 0.147727, "w": 0.027778, "h": 0.151515 },
+      "value_bbox_norm": null,
+      "value_offset_norm": null
+    },
+    {
+      "field_id": "physician_name",
+      "label": "Physician Name",
+      "type": "person",
+      "anchor_bbox_norm": { "x": 0.248366, "y": 0.145202, "w": 0.022876, "h": 0.084596 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": 0.003268, "dy": 0.084596, "w": 0.02451, "h": 0.165404 }
+    },
+    {
+      "field_id": "date_of_surgery",
+      "label": "Date of Surgery",
+      "type": "date",
+      "anchor_bbox_norm": { "x": 0.21732, "y": 0.156566, "w": 0.013072, "h": 0.074495 },
+      "value_bbox_norm": null,
+      "value_offset_norm": { "dx": -0.006536, "dy": 0.073232, "w": 0.027778, "h": 0.167929 }
+    },
+    {
+      "field_id": "items",
+      "label": "Items / Line Items",
+      "type": "table",
+      "table_bbox_norm": { "x": 0.473856, "y": 0.109848, "w": 0.256536, "h": 0.707071 },
+      "header_bbox_norm": { "x": 0.707516, "y": 0.109848, "w": 0.021242, "h": 0.707071 },
+      "row_height_hint_norm": null,
+      "columns": [
+        { "key": "item_number", "label": "Item Number", "bbox_rel_norm": null },
+        { "key": "lot_number", "label": "Lot Number", "bbox_rel_norm": null },
+        { "key": "description", "label": "Description", "bbox_rel_norm": null },
+        { "key": "qty", "label": "Qty", "bbox_rel_norm": null },
+        { "key": "price", "label": "Price", "bbox_rel_norm": null }
+      ],
+      "table_anchors": [
+        { "key": "item_number", "expected_text": "Item Number", "bbox_norm": null },
+        { "key": "description", "expected_text": "Description", "bbox_norm": null },
+        { "key": "qty", "expected_text": "Qty", "bbox_norm": null }
+      ],
+      "notes": "Anchors are used at runtime to localize table/header/columns under drift."
+    }
+  ],
+  "notes": "Trainer exports config only. Runtime should localize anchors then apply offsets/table mappings to extract values + line items."
+}

backend/worker/__init__.py ADDED Viewed

File without changes

backend/worker/config.py ADDED Viewed

	@@ -0,0 +1,89 @@

+from __future__ import annotations
+import os
+from dataclasses import dataclass
+from pathlib import Path
+@dataclass(frozen=True)
+class Settings:
+    # Repo paths
+    repo_root: Path
+    backend_dir: Path
+    worker_dir: Path
+    # Gmail
+    credentials_path: Path
+    token_path: Path
+    label_incoming: str
+    label_known: str
+    label_unknown: str
+    label_train: str
+    # Notification
+    notify_to_email: str
+    notify_from_email: str
+    # Trainer
+    trainer_base_url: str
+    # OpenAI
+    openai_api_key: str
+    openai_model: str
+    # Worker behavior
+    poll_seconds: int
+    max_messages_per_poll: int
+    render_pages: int
+    render_dpi: int
+def load_settings(repo_root: Path) -> Settings:
+    backend_dir = repo_root / "backend"
+    worker_dir = backend_dir / "worker"
+    # IMPORTANT: use the SAME env var you actually store in backend/.env
+    # Your file shows OPENAI_API_KEY_TEST=...
+    openai_api_key = os.environ.get("OPENAI_API_KEY_TEST", "").strip()
+    if not openai_api_key:
+        raise RuntimeError("Missing OPENAI_API_KEY_TEST env var in backend/.env")
+    notify_to = os.environ.get("PDF_PIPELINE_NOTIFY_TO", "").strip()
+    if not notify_to:
+        raise RuntimeError("Missing PDF_PIPELINE_NOTIFY_TO env var")
+    notify_from = os.environ.get("PDF_PIPELINE_NOTIFY_FROM", "").strip()
+    if not notify_from:
+        raise RuntimeError("Missing PDF_PIPELINE_NOTIFY_FROM env var")
+    trainer_base_url = os.environ.get("PDF_TRAINER_BASE_URL", "http://localhost:5173").strip()
+    if not trainer_base_url:
+        raise RuntimeError("Missing PDF_TRAINER_BASE_URL env var")
+    return Settings(
+        repo_root=repo_root,
+        backend_dir=backend_dir,
+        worker_dir=worker_dir,
+        credentials_path=Path(os.environ.get("GMAIL_CREDENTIALS_JSON", str(backend_dir / "credentials.json"))),
+        token_path=Path(os.environ.get("GMAIL_TOKEN_JSON", str(backend_dir / "token.json"))),
+        label_incoming=os.environ.get("PDF_PIPELINE_LABEL_INCOMING", "PDF_PIPELINE/INCOMING"),
+        label_known=os.environ.get("PDF_PIPELINE_LABEL_KNOWN", "PDF_PIPELINE/KNOWN"),
+        label_unknown=os.environ.get("PDF_PIPELINE_LABEL_UNKNOWN", "PDF_PIPELINE/UNKNOWN"),
+        label_train=os.environ.get("PDF_PIPELINE_LABEL_TRAIN", "PDF_PIPELINE/TRAIN"),
+        notify_to_email=notify_to,
+        notify_from_email=notify_from,
+        trainer_base_url=trainer_base_url,
+        openai_api_key=openai_api_key,
+        openai_model=os.environ.get("OPENAI_MODEL", "gpt-4.1-mini"),
+        poll_seconds=int(os.environ.get("PDF_PIPELINE_POLL_SECONDS", "20")),
+        max_messages_per_poll=int(os.environ.get("PDF_PIPELINE_MAX_PER_POLL", "5")),
+        render_pages=int(os.environ.get("PDF_PIPELINE_RENDER_PAGES", "2")),
+        render_dpi=int(os.environ.get("PDF_PIPELINE_RENDER_DPI", "200")),
+    )

backend/worker/gmail_client.py ADDED Viewed

	@@ -0,0 +1,149 @@

+from __future__ import annotations
+import base64
+import os
+from dataclasses import dataclass
+from email.message import EmailMessage
+from pathlib import Path
+from typing import List, Optional, Tuple
+from google.oauth2.credentials import Credentials
+from googleapiclient.discovery import build
+SCOPES = [
+    "https://www.googleapis.com/auth/gmail.modify",
+    "https://www.googleapis.com/auth/gmail.send",
+]
+@dataclass
+class GmailMessage:
+    msg_id: str
+    thread_id: str
+class GmailClient:
+    def __init__(self, credentials_path: Path, token_path: Path):
+        if not credentials_path.exists():
+            raise FileNotFoundError(f"Missing OAuth client json: {credentials_path}")
+        if not token_path.exists():
+            raise FileNotFoundError(f"Missing token json: {token_path}")
+        creds = Credentials.from_authorized_user_file(str(token_path), SCOPES)
+        self.service = build("gmail", "v1", credentials=creds, cache_discovery=False)
+    def list_labels(self) -> List[dict]:
+        resp = self.service.users().labels().list(userId="me").execute()
+        return resp.get("labels", [])
+    def get_label_id(self, name: str) -> Optional[str]:
+        for lbl in self.list_labels():
+            if lbl.get("name") == name:
+                return lbl.get("id")
+        return None
+    def ensure_label(self, name: str) -> str:
+        existing = self.get_label_id(name)
+        if existing:
+            return existing
+        body = {
+            "name": name,
+            "labelListVisibility": "labelShow",
+            "messageListVisibility": "show",
+        }
+        created = self.service.users().labels().create(userId="me", body=body).execute()
+        return created["id"]
+    def search_unread_pdf_messages(self, label_name: str, max_results: int = 10) -> List[GmailMessage]:
+        # Gmail search query: label + unread + pdf attachments
+        query = f'label:"{label_name}" is:unread has:attachment filename:pdf'
+        resp = self.service.users().messages().list(userId="me", q=query, maxResults=max_results).execute()
+        msgs = resp.get("messages", []) or []
+        out: List[GmailMessage] = []
+        for m in msgs:
+            out.append(GmailMessage(msg_id=m["id"], thread_id=m.get("threadId", "")))
+        return out
+    def get_message_full(self, msg_id: str) -> dict:
+        return self.service.users().messages().get(userId="me", id=msg_id, format="full").execute()
+    def _walk_parts(self, payload: dict) -> List[dict]:
+        parts = []
+        stack = [payload]
+        while stack:
+            node = stack.pop()
+            if not isinstance(node, dict):
+                continue
+            if node.get("parts"):
+                stack.extend(node["parts"])
+            parts.append(node)
+        return parts
+    def list_pdf_attachments(self, msg_full: dict) -> List[Tuple[str, str]]:
+        """
+        Returns [(filename, attachmentId), ...] for application/pdf parts.
+        """
+        payload = msg_full.get("payload", {}) or {}
+        parts = self._walk_parts(payload)
+        out: List[Tuple[str, str]] = []
+        for p in parts:
+            filename = (p.get("filename") or "").strip()
+            body = p.get("body") or {}
+            att_id = body.get("attachmentId")
+            mime = (p.get("mimeType") or "").lower()
+            if filename.lower().endswith(".pdf") or mime == "application/pdf":
+                if filename and att_id:
+                    out.append((filename, att_id))
+        return out
+    def download_attachment(self, msg_id: str, attachment_id: str) -> bytes:
+        att = (
+            self.service.users()
+            .messages()
+            .attachments()
+            .get(userId="me", messageId=msg_id, id=attachment_id)
+            .execute()
+        )
+        data = att.get("data", "")
+        return base64.urlsafe_b64decode(data.encode("utf-8"))
+    def move_message(
+        self,
+        msg_id: str,
+        add_labels: List[str],
+        remove_labels: List[str],
+        mark_read: bool = True,
+    ) -> None:
+        add_ids = [self.ensure_label(n) for n in add_labels]
+        remove_ids = [self.ensure_label(n) for n in remove_labels]
+        if mark_read:
+            remove_ids.append("UNREAD")
+        body = {"addLabelIds": add_ids, "removeLabelIds": remove_ids}
+        self.service.users().messages().modify(userId="me", id=msg_id, body=body).execute()
+    def send_email(self, to_email: str, subject: str, body_text: str, from_email: Optional[str] = None, attachments: Optional[List[Tuple[str, bytes]]] = None) -> None:
+        msg = EmailMessage()
+        msg["To"] = to_email
+        msg["Subject"] = subject
+        if from_email:
+            msg["From"] = from_email
+        msg.set_content(body_text)
+        attachments = attachments or []
+        for filename, data in attachments:
+            # basic content type guess for pdf/json
+            if filename.lower().endswith(".pdf"):
+                maintype, subtype = "application", "pdf"
+            elif filename.lower().endswith(".json"):
+                maintype, subtype = "application", "json"
+            else:
+                maintype, subtype = "application", "octet-stream"
+            msg.add_attachment(data, maintype=maintype, subtype=subtype, filename=filename)
+        raw = base64.urlsafe_b64encode(msg.as_bytes()).decode("utf-8")
+        self.service.users().messages().send(userId="me", body={"raw": raw}).execute()

backend/worker/openai_classifier.py ADDED Viewed

	@@ -0,0 +1,312 @@

+from __future__ import annotations
+import base64
+import json
+import re
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+from openai import OpenAI
+# ----------------------------
+# Known templates (mirror your main system)
+# ----------------------------
+KNOWN_TEMPLATES: List[Dict[str, Any]] = [
+    {
+        "template_id": "T1_IFACTOR_DELIVERED_ORDER",
+        "name": "I-FACTOR Delivered Order Form",
+        "keywords_all": ["delivered order form"],
+        "keywords_any": ["i-factor", "cerapedics", "product information", "stickers", "bill to", "delivered to"],
+    },
+    {
+        "template_id": "T2_SEASPINE_DELIVERED_GOODS_FORM",
+        "name": "SeaSpine Delivered Goods Form",
+        "keywords_all": ["delivered goods form"],
+        "keywords_any": ["seaspine", "isotis", "handling fee", "sales order", "invoice"],
+    },
+    {
+        "template_id": "T3_ASTURA_SALES_ORDER_FORM",
+        "name": "Astura Sales Order Form",
+        "keywords_all": [],
+        "keywords_any": ["astura", "dc141", "ca200", "cbba", "sales order"],
+    },
+    {
+        "template_id": "T4_MEDICAL_ESTIMATION_OF_CHARGES",
+        "name": "Medical Estimation of Charges",
+        "keywords_all": [],
+        "keywords_any": ["estimation of charges", "good faith estimate", "patient responsibility", "insurance"],
+    },
+    {
+        "template_id": "T5_CLINICAL_PROGRESS_NOTE_POSTOP",
+        "name": "Clinical Progress Note Postop",
+        "keywords_all": [],
+        "keywords_any": ["clinical progress note", "progress note", "post-op", "assessment", "plan"],
+    },
+    {
+        "template_id": "T6_CUSTOMER_CHARGE_SHEET_SPINE",
+        "name": "Customer Charge Sheet Spine",
+        "keywords_all": [],
+        "keywords_any": ["customer charge sheet", "charge sheet", "spine", "qty", "unit price", "total"],
+    },
+    {
+        "template_id": "T7_SALES_ORDER_ZIMMER",
+        "name": "Zimmer Sales Order",
+        "keywords_all": [],
+        "keywords_any": ["zimmer", "zimmer biomet", "biomet", "sales order", "purchase order", "po number"],
+    },
+]
+# ----------------------------
+# Public API (EXPLICIT key/model)
+# ----------------------------
+def classify_with_openai(
+    image_paths: List[str],
+    *,
+    api_key: str,
+    model: str,
+    max_pages: int = 2,
+) -> Dict[str, Any]:
+    """
+    Input: list of PNG file paths (page renders).
+    Output:
+      {
+        "template_id": "T1_..." OR "UNKNOWN",
+        "confidence": 0..1,
+        "reason": "short string",
+        "trainer_schema": {}   # reserved for later
+      }
+    Hard guarantees:
+      - does NOT read environment variables
+      - does NOT guess api keys
+      - strict normalization to known template_ids
+    """
+    api_key = (api_key or "").strip()
+    model = (model or "").strip()
+    if not api_key:
+        raise RuntimeError("classify_with_openai: api_key is empty")
+    if not model:
+        raise RuntimeError("classify_with_openai: model is empty")
+    if not image_paths:
+        return {
+            "template_id": "UNKNOWN",
+            "confidence": 0.0,
+            "reason": "No rendered images provided.",
+            "trainer_schema": {},
+        }
+    # Encode first N pages (keep small + deterministic)
+    pages_b64: List[str] = []
+    for p in image_paths[: max_pages if max_pages > 0 else 1]:
+        pages_b64.append(_png_file_to_b64(Path(p)))
+    client = OpenAI(api_key=api_key)
+    system = (
+        "You are a strict document template classifier.\n"
+        "You will be shown PNG images of PDF pages (scanned forms).\n"
+        "Your job is to decide which known template matches.\n\n"
+        "Hard rules:\n"
+        "1) Output VALID JSON only. No markdown. No extra text.\n"
+        "2) Choose ONE template_id from the provided list OR return template_id='UNKNOWN'.\n"
+        "3) If uncertain, return UNKNOWN.\n"
+        "4) Use printed headers, vendor branding, and distinctive layout cues.\n"
+        "5) confidence must be 0..1.\n"
+    )
+    prompt_payload = {
+        "known_templates": KNOWN_TEMPLATES,
+        "output_schema": {
+            "template_id": "string (one of known template_ids) OR 'UNKNOWN'",
+            "confidence": "number 0..1",
+            "reason": "short string",
+        },
+    }
+    user_text = (
+        "Classify the attached document images against known_templates.\n"
+        "Return JSON matching output_schema.\n\n"
+        f"{json.dumps(prompt_payload, indent=2)}"
+    )
+    # Multi-modal message: text + images
+    content: List[Dict[str, Any]] = [{"type": "text", "text": user_text}]
+    for b64png in pages_b64:
+        content.append(
+            {
+                "type": "image_url",
+                "image_url": {"url": f"data:image/png;base64,{b64png}"},
+            }
+        )
+    resp = client.chat.completions.create(
+        model=model,
+        temperature=0.0,
+        messages=[
+            {"role": "system", "content": system},
+            {"role": "user", "content": content},
+        ],
+    )
+    raw = (resp.choices[0].message.content or "").strip()
+    parsed = _parse_json_object(raw)
+    template_id = str(parsed.get("template_id") or "").strip()
+    confidence = _to_float(parsed.get("confidence"), default=0.0)
+    confidence = max(0.0, min(1.0, confidence))
+    reason = str(parsed.get("reason") or "").strip()
+    # Normalize: only allow known template ids or UNKNOWN
+    template_id = _normalize_template_id(template_id)
+    # If model returns UNKNOWN but gives high confidence, clamp confidence.
+    if template_id == "UNKNOWN" and confidence > 0.6:
+        confidence = 0.6
+    return {
+        "template_id": template_id,
+        "confidence": confidence,
+        "reason": reason[:500],
+        "trainer_schema": {},
+    }
+# ----------------------------
+# Legacy wrapper (ENV-based) - keep only if you want
+# ----------------------------
+def classify_with_openai_from_env(image_paths: List[str]) -> Dict[str, Any]:
+    """
+    Backwards compatible wrapper.
+    Reads env vars, then calls classify_with_openai(api_key=..., model=...).
+    Use this only if you have old code you haven't updated yet.
+    """
+    import os
+    api_key = (os.getenv("OPENAI_API_KEY_TEST") or os.getenv("OPENAI_API_KEY") or "").strip()
+    if not api_key:
+        raise RuntimeError("Missing OPENAI_API_KEY_TEST (or OPENAI_API_KEY)")
+    model = (os.getenv("OPENAI_MODEL") or "gpt-4o-mini").strip()
+    # IMPORTANT: call the explicit version (one implementation only)
+    return classify_with_openai(
+        image_paths,
+        api_key=api_key,
+        model=model,
+    )
+# ----------------------------
+# Helpers
+# ----------------------------
+def _normalize_template_id(template_id: str) -> str:
+    tid = (template_id or "").strip()
+    if not tid:
+        return "UNKNOWN"
+    known_ids = {t["template_id"] for t in KNOWN_TEMPLATES}
+    if tid in known_ids:
+        return tid
+    # common garbage patterns (model returns name instead of id, etc.)
+    low = tid.lower()
+    for t in KNOWN_TEMPLATES:
+        if t["name"].lower() == low:
+            return t["template_id"]
+    return "UNKNOWN"
+def _png_file_to_b64(path: Path) -> str:
+    data = path.read_bytes()
+    return base64.b64encode(data).decode("utf-8")
+_JSON_BLOCK_RE = re.compile(r"\{.*\}", re.DOTALL)
+def _parse_json_object(text: str) -> Dict[str, Any]:
+    """
+    Extract and parse the first {...} JSON object from model output.
+    Handles:
+      - pure JSON
+      - JSON embedded in text
+      - fenced code blocks (we strip fences)
+    """
+    if not text:
+        return {}
+    s = text.strip()
+    # Strip ```json fences if present
+    s = _strip_code_fences(s)
+    # Fast path: starts with "{"
+    if s.startswith("{"):
+        try:
+            return json.loads(s)
+        except Exception:
+            pass
+    # Try to find a JSON-looking block
+    m = _JSON_BLOCK_RE.search(s)
+    if not m:
+        return {}
+    chunk = m.group(0)
+    try:
+        return json.loads(chunk)
+    except Exception:
+        # last attempt: remove trailing commas (common model mistake)
+        cleaned = _remove_trailing_commas(chunk)
+        try:
+            return json.loads(cleaned)
+        except Exception:
+            return {}
+def _strip_code_fences(s: str) -> str:
+    # remove leading ```json / ``` and trailing ```
+    if s.startswith("```"):
+        s = re.sub(r"^```[a-zA-Z0-9]*\s*", "", s)
+        s = re.sub(r"\s*```$", "", s)
+    return s.strip()
+def _remove_trailing_commas(s: str) -> str:
+    # naive but effective: remove ",}" and ",]" patterns repeatedly
+    prev = None
+    cur = s
+    while prev != cur:
+        prev = cur
+        cur = re.sub(r",\s*}", "}", cur)
+        cur = re.sub(r",\s*]", "]", cur)
+    return cur
+def _to_float(x: Any, default: float = 0.0) -> float:
+    try:
+        return float(x)
+    except Exception:
+        return default
+# ----------------------------
+# Optional: quick self-check (manual)
+# ----------------------------
+def _debug_summarize_result(res: Dict[str, Any]) -> str:
+    return f"template_id={res.get('template_id')} conf={res.get('confidence')} reason={str(res.get('reason') or '')[:80]}"
+def _validate_known_templates() -> Tuple[bool, str]:
+    ids = [t.get("template_id") for t in KNOWN_TEMPLATES]
+    if any(not i for i in ids):
+        return False, "One or more templates missing template_id"
+    if len(set(ids)) != len(ids):
+        return False, "Duplicate template_id in KNOWN_TEMPLATES"
+    return True, "ok"

backend/worker/out/.keep ADDED Viewed

File without changes

backend/worker/pdf_render.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List
+import fitz  # PyMuPDF
+from PIL import Image
+@dataclass
+class RenderedImage:
+    path: Path
+    page_index: int
+def render_pdf_to_pngs(pdf_path: Path, out_dir: Path, pages: int = 2, dpi: int = 200) -> List[RenderedImage]:
+    out_dir.mkdir(parents=True, exist_ok=True)
+    doc = fitz.open(pdf_path)
+    n = min(pages, doc.page_count)
+    zoom = dpi / 72.0
+    mat = fitz.Matrix(zoom, zoom)
+    rendered: List[RenderedImage] = []
+    for i in range(n):
+        page = doc.load_page(i)
+        pix = page.get_pixmap(matrix=mat, alpha=False)
+        img_path = out_dir / f"{pdf_path.stem}_p{i+1}.png"
+        pix.save(str(img_path))
+        # normalize to RGB with PIL (avoids weird modes)
+        im = Image.open(img_path).convert("RGB")
+        im.save(img_path)
+        rendered.append(RenderedImage(path=img_path, page_index=i))
+    doc.close()
+    return rendered

backend/worker/prompts.py ADDED Viewed

	@@ -0,0 +1,87 @@

+TEMPLATE_IDS = [
+    "T1_IFACTOR_DELIVERED_ORDER",
+    "T2_SEASPINE_DELIVERED_GOODS_FORM",
+    "T3_ASTURA_SALES_ORDER_FORM",
+    "T4_MEDICAL_ESTIMATION_OF_CHARGES",
+    "T5_CLINICAL_PROGRESS_NOTE_POSTOP",
+    "T6_CUSTOMER_CHARGE_SHEET_SPINE",
+    "T7_SALES_ORDER_ZIMMER",
+]
+SYSTEM_PROMPT = f"""
+You are classifying a medical/healthcare sales/order PDF form into one of the known templates,
+and extracting a "trainer schema" for onboarding.
+Known template_ids:
+{TEMPLATE_IDS}
+Rules:
+- You MUST return JSON only (no markdown, no extra text).
+- If none match confidently, return template_id "UNKNOWN".
+- Always produce a schema object (even for UNKNOWN) so onboarding can proceed.
+Output JSON shape (strict):
+{{
+  "template_id": "<one of known template_ids or UNKNOWN>",
+  "confidence": 0.0,
+  "reason": "<short reason>",
+  "trainer_schema": {{
+    "form_id": "<suggested id>",
+    "version": 1,
+    "page": 1,
+    "scalar_value_region_mode": "offset_from_anchor_v1",
+    "fields": [
+      {{
+        "field_id": "facility_organization",
+        "label": "Facility / Organization",
+        "type": "entity",
+        "anchor_hint": "<printed label text or None>",
+        "value_hint": "<what to extract>"
+      }},
+      {{
+        "field_id": "case_location_address",
+        "label": "Case Location / Address",
+        "type": "entity",
+        "anchor_hint": "<printed label text or None>",
+        "value_hint": "<what to extract>"
+      }},
+      {{
+        "field_id": "vendor",
+        "label": "Vendor",
+        "type": "entity",
+        "anchor_hint": "<printed label text or None>",
+        "value_hint": "<what to extract>"
+      }},
+      {{
+        "field_id": "physician_name",
+        "label": "Physician Name",
+        "type": "person",
+        "anchor_hint": "<printed label text or None>",
+        "value_hint": "<what to extract>"
+      }},
+      {{
+        "field_id": "date_of_surgery",
+        "label": "Date of Surgery",
+        "type": "date",
+        "anchor_hint": "<printed label text or None>",
+        "value_hint": "<what to extract>"
+      }},
+      {{
+        "field_id": "items",
+        "label": "Items / Line Items",
+        "type": "table",
+        "table_hint": {{
+          "expected_columns": ["item_number","description","qty","lot_number","price","extended_price"],
+          "where_on_page": "<short description>",
+          "header_text_examples": ["Item Number","Description","Qty"]
+        }}
+      }}
+    ]
+  }}
+}}
+"""
+USER_PROMPT = """
+Classify the form template and generate trainer_schema based on the provided page images.
+Focus on printed structure, titles, logos, and table headers.
+"""

backend/worker/template_registry_snapshot.py ADDED Viewed

File without changes

backend/worker/template_store.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Any, Dict, List
+TEMPLATE_DIR = Path(__file__).resolve().parent / "trainer_templates"
+def list_trainer_templates() -> List[Dict[str, Any]]:
+    TEMPLATE_DIR.mkdir(parents=True, exist_ok=True)
+    out: List[Dict[str, Any]] = []
+    for p in sorted(TEMPLATE_DIR.glob("*.json")):
+        try:
+            cfg = json.loads(p.read_text(encoding="utf-8"))
+        except Exception:
+            continue
+        template_id = cfg.get("template_id") or cfg.get("form_id") or p.stem
+        name = cfg.get("name") or cfg.get("form_id") or template_id
+        out.append({
+            "template_id": template_id,
+            "name": name,
+            # optional: trainer config itself (don’t spam prompt if huge)
+            "has_config": True,
+        })
+    return out
+def save_trainer_template(template_id: str, cfg: Dict[str, Any]) -> Path:
+    TEMPLATE_DIR.mkdir(parents=True, exist_ok=True)
+    cfg = dict(cfg)
+    cfg["template_id"] = template_id  # enforce
+    path = TEMPLATE_DIR / f"{template_id}.json"
+    path.write_text(json.dumps(cfg, indent=2), encoding="utf-8")
+    return path

backend/worker/tmp/.keep ADDED Viewed

File without changes

backend/worker/uploads/.keep ADDED Viewed

File without changes

backend/worker/worker.py ADDED Viewed

	@@ -0,0 +1,286 @@

+from __future__ import annotations
+import os
+import time
+import uuid
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List, Tuple
+from dotenv import load_dotenv
+from .gmail_client import GmailClient
+from .openai_classifier import classify_with_openai
+from .pdf_render import render_pdf_to_pngs
+# Force load repo_root/backend/.env (single source of truth)
+REPO_ROOT = Path(__file__).resolve().parents[2]
+load_dotenv(REPO_ROOT / "backend" / ".env", override=True)
+@dataclass
+class Settings:
+    creds_path: Path
+    token_path: Path
+    label_incoming: str
+    label_known: str
+    label_unknown: str
+    label_train: str
+    # Rep email for UNKNOWN detection
+    rep_notify_to: str
+    notify_from: str
+    # OpenAI
+    openai_api_key: str
+    openai_model: str
+    poll_seconds: int
+    max_messages_per_poll: int
+    render_pages: int
+    render_dpi: int
+    trainer_base_url: str
+def load_settings() -> Settings:
+    base = Path(__file__).resolve().parents[1]  # backend/
+    creds = Path(os.environ.get("GMAIL_CREDENTIALS_JSON", str(base / "credentials.json")))
+    token = Path(os.environ.get("GMAIL_TOKEN_JSON", str(base / "token.json")))
+    openai_api_key = (os.environ.get("OPENAI_API_KEY_TEST") or os.environ.get("OPENAI_API_KEY") or "").strip()
+    openai_model = (os.environ.get("OPENAI_MODEL") or "gpt-4o-mini").strip()
+    return Settings(
+        creds_path=creds,
+        token_path=token,
+        label_incoming=os.environ.get("PDF_PIPELINE_LABEL_INCOMING", "PDF_PIPELINE/INCOMING"),
+        label_known=os.environ.get("PDF_PIPELINE_LABEL_KNOWN", "PDF_PIPELINE/KNOWN"),
+        label_unknown=os.environ.get("PDF_PIPELINE_LABEL_UNKNOWN", "PDF_PIPELINE/UNKNOWN"),
+        label_train=os.environ.get("PDF_PIPELINE_LABEL_TRAIN", "PDF_PIPELINE/TRAIN"),
+        notify_from=(os.environ.get("PDF_PIPELINE_NOTIFY_FROM") or "").strip(),
+        rep_notify_to=(os.environ.get("PDF_PIPELINE_NOTIFY_TO") or "").strip(),
+        openai_api_key=openai_api_key,
+        openai_model=openai_model,
+        poll_seconds=int(os.environ.get("PDF_PIPELINE_POLL_SECONDS", "20")),
+        max_messages_per_poll=int(os.environ.get("PDF_PIPELINE_MAX_PER_POLL", "5")),
+        render_pages=int(os.environ.get("PDF_PIPELINE_RENDER_PAGES", "2")),
+        render_dpi=int(os.environ.get("PDF_PIPELINE_RENDER_DPI", "200")),
+        trainer_base_url=(os.environ.get("PDF_TRAINER_BASE_URL") or "http://localhost:5173").strip(),
+    )
+def _safe_name(s: str) -> str:
+    return "".join(c if c.isalnum() or c in ("-", "_", ".", " ") else "_" for c in s).strip()
+def _write_pipeline_pdf(root_worker_dir: Path, filename: str, pdf_bytes: bytes) -> Tuple[str, Path]:
+    """
+    Persist PDF for the trainer to fetch later.
+    Returns (pdf_id, pdf_path_on_disk).
+    """
+    uploads_dir = root_worker_dir / "uploads"
+    uploads_dir.mkdir(parents=True, exist_ok=True)
+    pdf_id = uuid.uuid4().hex
+    pdf_path = uploads_dir / f"{pdf_id}.pdf"
+    name_path = uploads_dir / f"{pdf_id}.name.txt"
+    pdf_path.write_bytes(pdf_bytes)
+    name_path.write_text(filename, encoding="utf-8")
+    return pdf_id, pdf_path
+def _process_train_label(gmail: GmailClient, s: Settings, root: Path) -> None:
+    """
+    TRAIN behavior:
+      - Pull unread PDFs from TRAIN label
+      - Store into uploads/ and print trainer link
+      - Mark read
+      - Do NOT classify
+      - Do NOT move labels
+    """
+    msgs = gmail.search_unread_pdf_messages(s.label_train, max_results=s.max_messages_per_poll)
+    if not msgs:
+        return
+    for m in msgs:
+        msg_full = gmail.get_message_full(m.msg_id)
+        pdf_atts = gmail.list_pdf_attachments(msg_full)
+        if not pdf_atts:
+            gmail.move_message(m.msg_id, add_labels=[], remove_labels=[], mark_read=True)
+            continue
+        for (filename, att_id) in pdf_atts:
+            filename = _safe_name(filename or "attachment.pdf")
+            pdf_bytes = gmail.download_attachment(m.msg_id, att_id)
+            pdf_id, stored_pdf_path = _write_pipeline_pdf(root, filename, pdf_bytes)
+            trainer_link = f"{s.trainer_base_url.rstrip('/')}/?pdf_id={pdf_id}"
+            gmail.move_message(m.msg_id, add_labels=[], remove_labels=[], mark_read=True)
+            print(
+                f"[worker][TRAIN] stored PDF msg={m.msg_id} file={filename} "
+                f"pdf_id={pdf_id} stored={stored_pdf_path}"
+            )
+            print(f"[worker][TRAIN] open: {trainer_link}")
+def main():
+    s = load_settings()
+    # Validate settings
+    if not s.rep_notify_to:
+        raise RuntimeError("Missing PDF_PIPELINE_NOTIFY_TO (rep email for UNKNOWN detection)")
+    if not s.notify_from:
+        raise RuntimeError("Missing PDF_PIPELINE_NOTIFY_FROM (OAuth Gmail account email)")
+    if not s.trainer_base_url:
+        raise RuntimeError("Missing PDF_TRAINER_BASE_URL (base URL for trainer link)")
+    if not s.openai_api_key:
+        raise RuntimeError("Missing OPENAI_API_KEY_TEST (or OPENAI_API_KEY) in backend/.env")
+    gmail = GmailClient(s.creds_path, s.token_path)
+    # Ensure labels exist
+    gmail.ensure_label(s.label_incoming)
+    gmail.ensure_label(s.label_known)
+    gmail.ensure_label(s.label_unknown)
+    gmail.ensure_label(s.label_train)
+    root = Path(__file__).resolve().parents[0]  # backend/worker
+    tmp_dir = root / "tmp"
+    tmp_dir.mkdir(parents=True, exist_ok=True)
+    print(f"[worker] Watching label: {s.label_incoming}")
+    print(f"[worker] Known label:   {s.label_known}")
+    print(f"[worker] Unknown label: {s.label_unknown}")
+    print(f"[worker] Train label:   {s.label_train}")
+    print(f"[worker] Rep notify to: {s.rep_notify_to}")
+    print(f"[worker] OpenAI model:  {s.openai_model}")
+    while True:
+        try:
+            # 1) TRAIN lane
+            _process_train_label(gmail, s, root)
+            # 2) Main pipeline (INCOMING -> KNOWN/UNKNOWN)
+            msgs = gmail.search_unread_pdf_messages(s.label_incoming, max_results=s.max_messages_per_poll)
+            if not msgs:
+                time.sleep(s.poll_seconds)
+                continue
+            for m in msgs:
+                msg_full = gmail.get_message_full(m.msg_id)
+                pdf_atts = gmail.list_pdf_attachments(msg_full)
+                if not pdf_atts:
+                    # Remove INCOMING + mark read so it doesn't loop forever
+                    gmail.move_message(m.msg_id, add_labels=[], remove_labels=[s.label_incoming], mark_read=True)
+                    continue
+                any_unknown = False
+                unknown_payloads: List[Tuple[str, bytes]] = []
+                # Classify all PDF attachments for this message
+                for (filename, att_id) in pdf_atts:
+                    filename = _safe_name(filename or "attachment.pdf")
+                    pdf_bytes = gmail.download_attachment(m.msg_id, att_id)
+                    stamp = str(int(time.time()))
+                    pdf_path = tmp_dir / f"{stamp}_{m.msg_id}_{filename}"
+                    pdf_path.write_bytes(pdf_bytes)
+                    img_dir = tmp_dir / f"{stamp}_{m.msg_id}_{pdf_path.stem}"
+                    rendered = render_pdf_to_pngs(pdf_path, img_dir, pages=s.render_pages, dpi=s.render_dpi)
+                    image_paths = [str(r.path) for r in rendered]
+                    result = classify_with_openai(
+                        image_paths,
+                        api_key=s.openai_api_key,
+                        model=s.openai_model,
+                    )
+                    template_id = (result.get("template_id") or "UNKNOWN").strip()
+                    conf = float(result.get("confidence") or 0.0)
+                    if template_id == "UNKNOWN":
+                        any_unknown = True
+                        unknown_payloads.append((filename, pdf_bytes))
+                        print(f"[worker] UNKNOWN attachment conf={conf:.3f} msg={m.msg_id} file={filename}")
+                    else:
+                        print(
+                            f"[worker] KNOWN attachment template={template_id} conf={conf:.3f} "
+                            f"msg={m.msg_id} file={filename}"
+                        )
+                # Apply Gmail label ONCE per message
+                if any_unknown:
+                    gmail.move_message(
+                        m.msg_id,
+                        add_labels=[s.label_unknown],
+                        remove_labels=[s.label_incoming],
+                        mark_read=True,
+                    )
+                else:
+                    gmail.move_message(
+                        m.msg_id,
+                        add_labels=[s.label_known],
+                        remove_labels=[s.label_incoming],
+                        mark_read=True,
+                    )
+                # Notify rep for each unknown PDF attachment
+                if any_unknown:
+                    for (filename, pdf_bytes) in unknown_payloads:
+                        pdf_id, stored_pdf_path = _write_pipeline_pdf(root, filename, pdf_bytes)
+                        trainer_link = f"{s.trainer_base_url.rstrip('/')}/?pdf_id={pdf_id}"
+                        subject = "Action required: Unknown PDF format (template not found)"
+                        body = (
+                            "Hi,\n\n"
+                            "We received a PDF that does not match any existing templates in the system.\n\n"
+                            "Please open the PDF Trainer using the link below and create or update the template configuration:\n"
+                            f"{trainer_link}\n\n"
+                            "The original PDF is attached for reference.\n\n"
+                            "Thank you,\n"
+                            "Inserio Automation\n"
+                        )
+                        attachments: List[Tuple[str, bytes]] = []
+                        if len(pdf_bytes) < 20 * 1024 * 1024:
+                            attachments.append((filename, pdf_bytes))
+                        else:
+                            body += "\nNote: The PDF was too large to attach.\n"
+                        gmail.send_email(
+                            to_email=s.rep_notify_to,
+                            from_email=s.notify_from,
+                            subject=subject,
+                            body_text=body,
+                            attachments=attachments,
+                        )
+                        print(
+                            f"[worker] UNKNOWN: emailed rep {s.rep_notify_to} msg={m.msg_id} file={filename} "
+                            f"pdf_id={pdf_id} stored={stored_pdf_path}"
+                        )
+        except Exception as e:
+            print(f"[worker] ERROR: {e}")
+        time.sleep(s.poll_seconds)
+if __name__ == "__main__":
+    main()

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 fastapi
 uvicorn[standard]

 fastapi
 uvicorn[standard]
+python-dotenv