Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -117,17 +117,23 @@ def fallback_supplier(text):
|
|
| 117 |
|
| 118 |
def get_extraction_prompt(model_choice, txt):
|
| 119 |
return (
|
| 120 |
-
"
|
| 121 |
-
"
|
| 122 |
-
"
|
| 123 |
-
"
|
|
|
|
|
|
|
| 124 |
'{\n'
|
| 125 |
' "invoice_header": {\n'
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
' "invoice_number": "string or null",\n'
|
| 127 |
' "invoice_date": "string or null",\n'
|
| 128 |
' "order_number": "string or null",\n'
|
| 129 |
-
' "
|
| 130 |
-
' "
|
| 131 |
' "sales_order_number": "string or null",\n'
|
| 132 |
' "purchase_order_number": "string or null",\n'
|
| 133 |
' "order_date": "string or null",\n'
|
|
@@ -148,7 +154,6 @@ def get_extraction_prompt(model_choice, txt):
|
|
| 148 |
' "tax_id": "string or null",\n'
|
| 149 |
' "tax_registration_number": "string or null",\n'
|
| 150 |
' "vat_number": "string or null",\n'
|
| 151 |
-
' "currency": "string or null",\n'
|
| 152 |
' "payment_terms": "string or null",\n'
|
| 153 |
' "payment_method": "string or null",\n'
|
| 154 |
' "payment_reference": "string or null",\n'
|
|
@@ -174,32 +179,24 @@ def get_extraction_prompt(model_choice, txt):
|
|
| 174 |
' },\n'
|
| 175 |
' "line_items": [\n'
|
| 176 |
' {\n'
|
| 177 |
-
' "item_number": "string or null",\n'
|
| 178 |
-
' "line_number": "string or null",\n'
|
| 179 |
-
' "product_code": "string or null",\n'
|
| 180 |
-
' "sku": "string or null",\n'
|
| 181 |
-
' "description": "string or null",\n'
|
| 182 |
' "quantity": "string or null",\n'
|
| 183 |
-
' "
|
| 184 |
-
' "
|
| 185 |
-
' "
|
| 186 |
-
' "
|
| 187 |
-
' "
|
| 188 |
-
' "
|
| 189 |
-
' "delivery_date": "string or null",\n'
|
| 190 |
-
' "gl_code": "string or null",\n'
|
| 191 |
-
' "cost_center": "string or null",\n'
|
| 192 |
-
' "project_code": "string or null",\n'
|
| 193 |
-
' "any_other_line_item_field": "string or null"\n'
|
| 194 |
' }\n'
|
| 195 |
' ]\n'
|
| 196 |
'}'
|
| 197 |
-
"\
|
|
|
|
| 198 |
"\nInvoice Text:\n"
|
| 199 |
f"{txt}"
|
| 200 |
)
|
| 201 |
|
| 202 |
|
|
|
|
| 203 |
def extract_invoice_info(model_choice, text):
|
| 204 |
prompt = get_extraction_prompt(model_choice, text)
|
| 205 |
raw = query_llm(model_choice, prompt)
|
|
|
|
| 117 |
|
| 118 |
def get_extraction_prompt(model_choice, txt):
|
| 119 |
return (
|
| 120 |
+
"You are an expert invoice parser. "
|
| 121 |
+
"Extract data according to the visible table structure and column headers in the invoice. "
|
| 122 |
+
"For every line item, only extract fields that correspond to the table columns for that row (do not include header/shipment fields in line items). "
|
| 123 |
+
"Merge all multi-line content within a single cell into that field (especially for the 'description' and 'notes'). "
|
| 124 |
+
"Shipment/invoice-level fields such as CAR NUMBER, SHIPPING POINT, SHIPMENT NUMBER, CURRENCY, etc., must go ONLY into the 'invoice_header', not as line item fields.\n"
|
| 125 |
+
"Use this schema:\n"
|
| 126 |
'{\n'
|
| 127 |
' "invoice_header": {\n'
|
| 128 |
+
' "car_number": "string or null",\n'
|
| 129 |
+
' "shipment_number": "string or null",\n'
|
| 130 |
+
' "shipping_point": "string or null",\n'
|
| 131 |
+
' "currency": "string or null",\n'
|
| 132 |
' "invoice_number": "string or null",\n'
|
| 133 |
' "invoice_date": "string or null",\n'
|
| 134 |
' "order_number": "string or null",\n'
|
| 135 |
+
' "customer_order_number": "string or null",\n'
|
| 136 |
+
' "our_order_number": "string or null",\n'
|
| 137 |
' "sales_order_number": "string or null",\n'
|
| 138 |
' "purchase_order_number": "string or null",\n'
|
| 139 |
' "order_date": "string or null",\n'
|
|
|
|
| 154 |
' "tax_id": "string or null",\n'
|
| 155 |
' "tax_registration_number": "string or null",\n'
|
| 156 |
' "vat_number": "string or null",\n'
|
|
|
|
| 157 |
' "payment_terms": "string or null",\n'
|
| 158 |
' "payment_method": "string or null",\n'
|
| 159 |
' "payment_reference": "string or null",\n'
|
|
|
|
| 179 |
' },\n'
|
| 180 |
' "line_items": [\n'
|
| 181 |
' {\n'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
' "quantity": "string or null",\n'
|
| 183 |
+
' "units": "string or null",\n'
|
| 184 |
+
' "description": "string or null",\n'
|
| 185 |
+
' "footage": "string or null",\n'
|
| 186 |
+
' "price": "string or null",\n'
|
| 187 |
+
' "amount": "string or null",\n'
|
| 188 |
+
' "notes": "string or null"\n'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
' }\n'
|
| 190 |
' ]\n'
|
| 191 |
'}'
|
| 192 |
+
"\nIf a field is missing for a line item or header, use null. "
|
| 193 |
+
"Do not invent fields. Do not add any header or shipment data to any line item. Return ONLY the JSON object, no explanation.\n"
|
| 194 |
"\nInvoice Text:\n"
|
| 195 |
f"{txt}"
|
| 196 |
)
|
| 197 |
|
| 198 |
|
| 199 |
+
|
| 200 |
def extract_invoice_info(model_choice, text):
|
| 201 |
prompt = get_extraction_prompt(model_choice, text)
|
| 202 |
raw = query_llm(model_choice, prompt)
|