Seth0330 commited on
Commit
784a877
·
verified ·
1 Parent(s): 65db264

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -24
app.py CHANGED
@@ -117,17 +117,23 @@ def fallback_supplier(text):
117
 
118
  def get_extraction_prompt(model_choice, txt):
119
  return (
120
- "Extract every possible piece of metadata and detail from the following invoice text—including all header information, supplier details, customer details, addresses, invoice numbers, dates, tax information, payment terms, references, summary totals, and a full list of line items with as many columns as possible. "
121
- "Return a structured JSON with two keys: 'invoice_header' (an object with all header fields found) and 'line_items' (an array of all detected line items and their attributes). "
122
- "If any field is not present, use null. Do not invent/hallucinate fields not present. "
123
- "Your output must match the format of this example (but include only fields found in the invoice):\n"
 
 
124
  '{\n'
125
  ' "invoice_header": {\n'
 
 
 
 
126
  ' "invoice_number": "string or null",\n'
127
  ' "invoice_date": "string or null",\n'
128
  ' "order_number": "string or null",\n'
129
- ' "Customer_order_number": "string or null",\n'
130
- ' "Our_order_number": "string or null",\n'
131
  ' "sales_order_number": "string or null",\n'
132
  ' "purchase_order_number": "string or null",\n'
133
  ' "order_date": "string or null",\n'
@@ -148,7 +154,6 @@ def get_extraction_prompt(model_choice, txt):
148
  ' "tax_id": "string or null",\n'
149
  ' "tax_registration_number": "string or null",\n'
150
  ' "vat_number": "string or null",\n'
151
- ' "currency": "string or null",\n'
152
  ' "payment_terms": "string or null",\n'
153
  ' "payment_method": "string or null",\n'
154
  ' "payment_reference": "string or null",\n'
@@ -174,32 +179,24 @@ def get_extraction_prompt(model_choice, txt):
174
  ' },\n'
175
  ' "line_items": [\n'
176
  ' {\n'
177
- ' "item_number": "string or null",\n'
178
- ' "line_number": "string or null",\n'
179
- ' "product_code": "string or null",\n'
180
- ' "sku": "string or null",\n'
181
- ' "description": "string or null",\n'
182
  ' "quantity": "string or null",\n'
183
- ' "unit_of_measure": "string or null",\n'
184
- ' "unit_price": "string or null",\n'
185
- ' "discount": "string or null",\n'
186
- ' "tax_rate": "string or null",\n'
187
- ' "tax_amount": "string or null",\n'
188
- ' "total_price": "string or null",\n'
189
- ' "delivery_date": "string or null",\n'
190
- ' "gl_code": "string or null",\n'
191
- ' "cost_center": "string or null",\n'
192
- ' "project_code": "string or null",\n'
193
- ' "any_other_line_item_field": "string or null"\n'
194
  ' }\n'
195
  ' ]\n'
196
  '}'
197
- "\nReturn ONLY the JSON object, no explanations.\n"
 
198
  "\nInvoice Text:\n"
199
  f"{txt}"
200
  )
201
 
202
 
 
203
  def extract_invoice_info(model_choice, text):
204
  prompt = get_extraction_prompt(model_choice, text)
205
  raw = query_llm(model_choice, prompt)
 
117
 
118
  def get_extraction_prompt(model_choice, txt):
119
  return (
120
+ "You are an expert invoice parser. "
121
+ "Extract data according to the visible table structure and column headers in the invoice. "
122
+ "For every line item, only extract fields that correspond to the table columns for that row (do not include header/shipment fields in line items). "
123
+ "Merge all multi-line content within a single cell into that field (especially for the 'description' and 'notes'). "
124
+ "Shipment/invoice-level fields such as CAR NUMBER, SHIPPING POINT, SHIPMENT NUMBER, CURRENCY, etc., must go ONLY into the 'invoice_header', not as line item fields.\n"
125
+ "Use this schema:\n"
126
  '{\n'
127
  ' "invoice_header": {\n'
128
+ ' "car_number": "string or null",\n'
129
+ ' "shipment_number": "string or null",\n'
130
+ ' "shipping_point": "string or null",\n'
131
+ ' "currency": "string or null",\n'
132
  ' "invoice_number": "string or null",\n'
133
  ' "invoice_date": "string or null",\n'
134
  ' "order_number": "string or null",\n'
135
+ ' "customer_order_number": "string or null",\n'
136
+ ' "our_order_number": "string or null",\n'
137
  ' "sales_order_number": "string or null",\n'
138
  ' "purchase_order_number": "string or null",\n'
139
  ' "order_date": "string or null",\n'
 
154
  ' "tax_id": "string or null",\n'
155
  ' "tax_registration_number": "string or null",\n'
156
  ' "vat_number": "string or null",\n'
 
157
  ' "payment_terms": "string or null",\n'
158
  ' "payment_method": "string or null",\n'
159
  ' "payment_reference": "string or null",\n'
 
179
  ' },\n'
180
  ' "line_items": [\n'
181
  ' {\n'
 
 
 
 
 
182
  ' "quantity": "string or null",\n'
183
+ ' "units": "string or null",\n'
184
+ ' "description": "string or null",\n'
185
+ ' "footage": "string or null",\n'
186
+ ' "price": "string or null",\n'
187
+ ' "amount": "string or null",\n'
188
+ ' "notes": "string or null"\n'
 
 
 
 
 
189
  ' }\n'
190
  ' ]\n'
191
  '}'
192
+ "\nIf a field is missing for a line item or header, use null. "
193
+ "Do not invent fields. Do not add any header or shipment data to any line item. Return ONLY the JSON object, no explanation.\n"
194
  "\nInvoice Text:\n"
195
  f"{txt}"
196
  )
197
 
198
 
199
+
200
  def extract_invoice_info(model_choice, text):
201
  prompt = get_extraction_prompt(model_choice, text)
202
  raw = query_llm(model_choice, prompt)