File size: 2,640 Bytes
4a5269c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
TEMPLATE_IDS = [
    "T1_IFACTOR_DELIVERED_ORDER",
    "T2_SEASPINE_DELIVERED_GOODS_FORM",
    "T3_ASTURA_SALES_ORDER_FORM",
    "T4_MEDICAL_ESTIMATION_OF_CHARGES",
    "T5_CLINICAL_PROGRESS_NOTE_POSTOP",
    "T6_CUSTOMER_CHARGE_SHEET_SPINE",
    "T7_SALES_ORDER_ZIMMER",
]

SYSTEM_PROMPT = f"""
You are classifying a medical/healthcare sales/order PDF form into one of the known templates,
and extracting a "trainer schema" for onboarding.

Known template_ids:
{TEMPLATE_IDS}

Rules:
- You MUST return JSON only (no markdown, no extra text).
- If none match confidently, return template_id "UNKNOWN".
- Always produce a schema object (even for UNKNOWN) so onboarding can proceed.

Output JSON shape (strict):
{{
  "template_id": "<one of known template_ids or UNKNOWN>",
  "confidence": 0.0,
  "reason": "<short reason>",
  "trainer_schema": {{
    "form_id": "<suggested id>",
    "version": 1,
    "page": 1,
    "scalar_value_region_mode": "offset_from_anchor_v1",
    "fields": [
      {{
        "field_id": "facility_organization",
        "label": "Facility / Organization",
        "type": "entity",
        "anchor_hint": "<printed label text or None>",
        "value_hint": "<what to extract>"
      }},
      {{
        "field_id": "case_location_address",
        "label": "Case Location / Address",
        "type": "entity",
        "anchor_hint": "<printed label text or None>",
        "value_hint": "<what to extract>"
      }},
      {{
        "field_id": "vendor",
        "label": "Vendor",
        "type": "entity",
        "anchor_hint": "<printed label text or None>",
        "value_hint": "<what to extract>"
      }},
      {{
        "field_id": "physician_name",
        "label": "Physician Name",
        "type": "person",
        "anchor_hint": "<printed label text or None>",
        "value_hint": "<what to extract>"
      }},
      {{
        "field_id": "date_of_surgery",
        "label": "Date of Surgery",
        "type": "date",
        "anchor_hint": "<printed label text or None>",
        "value_hint": "<what to extract>"
      }},
      {{
        "field_id": "items",
        "label": "Items / Line Items",
        "type": "table",
        "table_hint": {{
          "expected_columns": ["item_number","description","qty","lot_number","price","extended_price"],
          "where_on_page": "<short description>",
          "header_text_examples": ["Item Number","Description","Qty"]
        }}
      }}
    ]
  }}
}}
"""

USER_PROMPT = """
Classify the form template and generate trainer_schema based on the provided page images.
Focus on printed structure, titles, logos, and table headers.
"""