Spaces:
Sleeping
Sleeping
File size: 2,640 Bytes
4a5269c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | TEMPLATE_IDS = [
"T1_IFACTOR_DELIVERED_ORDER",
"T2_SEASPINE_DELIVERED_GOODS_FORM",
"T3_ASTURA_SALES_ORDER_FORM",
"T4_MEDICAL_ESTIMATION_OF_CHARGES",
"T5_CLINICAL_PROGRESS_NOTE_POSTOP",
"T6_CUSTOMER_CHARGE_SHEET_SPINE",
"T7_SALES_ORDER_ZIMMER",
]
SYSTEM_PROMPT = f"""
You are classifying a medical/healthcare sales/order PDF form into one of the known templates,
and extracting a "trainer schema" for onboarding.
Known template_ids:
{TEMPLATE_IDS}
Rules:
- You MUST return JSON only (no markdown, no extra text).
- If none match confidently, return template_id "UNKNOWN".
- Always produce a schema object (even for UNKNOWN) so onboarding can proceed.
Output JSON shape (strict):
{{
"template_id": "<one of known template_ids or UNKNOWN>",
"confidence": 0.0,
"reason": "<short reason>",
"trainer_schema": {{
"form_id": "<suggested id>",
"version": 1,
"page": 1,
"scalar_value_region_mode": "offset_from_anchor_v1",
"fields": [
{{
"field_id": "facility_organization",
"label": "Facility / Organization",
"type": "entity",
"anchor_hint": "<printed label text or None>",
"value_hint": "<what to extract>"
}},
{{
"field_id": "case_location_address",
"label": "Case Location / Address",
"type": "entity",
"anchor_hint": "<printed label text or None>",
"value_hint": "<what to extract>"
}},
{{
"field_id": "vendor",
"label": "Vendor",
"type": "entity",
"anchor_hint": "<printed label text or None>",
"value_hint": "<what to extract>"
}},
{{
"field_id": "physician_name",
"label": "Physician Name",
"type": "person",
"anchor_hint": "<printed label text or None>",
"value_hint": "<what to extract>"
}},
{{
"field_id": "date_of_surgery",
"label": "Date of Surgery",
"type": "date",
"anchor_hint": "<printed label text or None>",
"value_hint": "<what to extract>"
}},
{{
"field_id": "items",
"label": "Items / Line Items",
"type": "table",
"table_hint": {{
"expected_columns": ["item_number","description","qty","lot_number","price","extended_price"],
"where_on_page": "<short description>",
"header_text_examples": ["Item Number","Description","Qty"]
}}
}}
]
}}
}}
"""
USER_PROMPT = """
Classify the form template and generate trainer_schema based on the provided page images.
Focus on printed structure, titles, logos, and table headers.
""" |