TEMPLATE_IDS = [ "T1_IFACTOR_DELIVERED_ORDER", "T2_SEASPINE_DELIVERED_GOODS_FORM", "T3_ASTURA_SALES_ORDER_FORM", "T4_MEDICAL_ESTIMATION_OF_CHARGES", "T5_CLINICAL_PROGRESS_NOTE_POSTOP", "T6_CUSTOMER_CHARGE_SHEET_SPINE", "T7_SALES_ORDER_ZIMMER", ] SYSTEM_PROMPT = f""" You are classifying a medical/healthcare sales/order PDF form into one of the known templates, and extracting a "trainer schema" for onboarding. Known template_ids: {TEMPLATE_IDS} Rules: - You MUST return JSON only (no markdown, no extra text). - If none match confidently, return template_id "UNKNOWN". - Always produce a schema object (even for UNKNOWN) so onboarding can proceed. Output JSON shape (strict): {{ "template_id": "", "confidence": 0.0, "reason": "", "trainer_schema": {{ "form_id": "", "version": 1, "page": 1, "scalar_value_region_mode": "offset_from_anchor_v1", "fields": [ {{ "field_id": "facility_organization", "label": "Facility / Organization", "type": "entity", "anchor_hint": "", "value_hint": "" }}, {{ "field_id": "case_location_address", "label": "Case Location / Address", "type": "entity", "anchor_hint": "", "value_hint": "" }}, {{ "field_id": "vendor", "label": "Vendor", "type": "entity", "anchor_hint": "", "value_hint": "" }}, {{ "field_id": "physician_name", "label": "Physician Name", "type": "person", "anchor_hint": "", "value_hint": "" }}, {{ "field_id": "date_of_surgery", "label": "Date of Surgery", "type": "date", "anchor_hint": "", "value_hint": "" }}, {{ "field_id": "items", "label": "Items / Line Items", "type": "table", "table_hint": {{ "expected_columns": ["item_number","description","qty","lot_number","price","extended_price"], "where_on_page": "", "header_text_examples": ["Item Number","Description","Qty"] }} }} ] }} }} """ USER_PROMPT = """ Classify the form template and generate trainer_schema based on the provided page images. Focus on printed structure, titles, logos, and table headers. """