Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -116,15 +116,49 @@ def fallback_supplier(text):
|
|
| 116 |
return None
|
| 117 |
|
| 118 |
def get_extraction_prompt(model_choice, txt):
|
| 119 |
-
#
|
| 120 |
return (
|
| 121 |
-
"Extract
|
| 122 |
-
"Return a
|
| 123 |
-
"If any field is
|
| 124 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
f"{txt}"
|
| 126 |
)
|
| 127 |
|
|
|
|
| 128 |
def extract_invoice_info(model_choice, text):
|
| 129 |
prompt = get_extraction_prompt(model_choice, text)
|
| 130 |
raw = query_llm(model_choice, prompt)
|
|
|
|
| 116 |
return None
|
| 117 |
|
| 118 |
def get_extraction_prompt(model_choice, txt):
|
| 119 |
+
# Example output shows both header & line items
|
| 120 |
return (
|
| 121 |
+
"Extract every possible piece of metadata and detail from the following invoice text—including all header information, supplier details, customer details, addresses, invoice numbers, dates, tax information, payment terms, references, summary totals, and a full list of line items with as many columns as possible. "
|
| 122 |
+
"Return a structured JSON with two keys: 'invoice_header' (an object with all header fields found) and 'line_items' (an array of all detected line items and their attributes). "
|
| 123 |
+
"If any field is not present, use null. Do not invent/hallucinate fields not present. "
|
| 124 |
+
"Your output must match the format of this example (but include only fields found in the invoice):\n"
|
| 125 |
+
'{\n'
|
| 126 |
+
' "invoice_header": {\n'
|
| 127 |
+
' "invoice_number": "string or null",\n'
|
| 128 |
+
' "invoice_date": "string or null",\n'
|
| 129 |
+
' "supplier_name": "string or null",\n'
|
| 130 |
+
' "supplier_address": "string or null",\n'
|
| 131 |
+
' "customer_name": "string or null",\n'
|
| 132 |
+
' "customer_address": "string or null",\n'
|
| 133 |
+
' "po_number": "string or null",\n'
|
| 134 |
+
' "tax_id": "string or null",\n'
|
| 135 |
+
' "payment_terms": "string or null",\n'
|
| 136 |
+
' "total_before_tax": "string or null",\n'
|
| 137 |
+
' "tax_amount": "string or null",\n'
|
| 138 |
+
' "total_due": "string or null",\n'
|
| 139 |
+
' "currency": "string or null",\n'
|
| 140 |
+
' "due_date": "string or null",\n'
|
| 141 |
+
' "any_other_metadata": "string or null"\n'
|
| 142 |
+
' },\n'
|
| 143 |
+
' "line_items": [\n'
|
| 144 |
+
' {\n'
|
| 145 |
+
' "item_number": "string or null",\n'
|
| 146 |
+
' "description": "string or null",\n'
|
| 147 |
+
' "quantity": "string or null",\n'
|
| 148 |
+
' "unit_price": "string or null",\n'
|
| 149 |
+
' "total_price": "string or null",\n'
|
| 150 |
+
' "tax_rate": "string or null",\n'
|
| 151 |
+
' "sku": "string or null",\n'
|
| 152 |
+
' "any_other_line_item_field": "string or null"\n'
|
| 153 |
+
' }\n'
|
| 154 |
+
' ]\n'
|
| 155 |
+
'}'
|
| 156 |
+
"\nReturn ONLY the JSON object, no explanations.\n"
|
| 157 |
+
"\nInvoice Text:\n"
|
| 158 |
f"{txt}"
|
| 159 |
)
|
| 160 |
|
| 161 |
+
|
| 162 |
def extract_invoice_info(model_choice, text):
|
| 163 |
prompt = get_extraction_prompt(model_choice, text)
|
| 164 |
raw = query_llm(model_choice, prompt)
|