Seth0330 commited on
Commit
498d8b2
·
verified ·
1 Parent(s): e73b3fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -108
app.py CHANGED
@@ -131,114 +131,86 @@ def fallback_supplier(text):
131
  return None
132
 
133
  def get_extraction_prompt(model_choice, txt):
134
- return f"""
135
- You are an expert invoice parser.
136
- Extract data according to the visible table structure and column headers in the invoice.
137
- For every line item, only extract fields that correspond to the table columns for that row (do not include header/shipment fields in line items).
138
- Merge all multi-line content within a single cell into that field (especially for the "description" and "notes").
139
- Shipment/invoice-level fields such as CAR NUMBER, SHIPPING POINT, SHIPMENT NUMBER, CURRENCY, etc., must go ONLY into the "invoice_header", not as line item fields.
140
-
141
- **For example, if the source document shows Payment Terms: 1% 10 ADI, ADF, NET 20 DAYS**
142
- **Do not alter the sequence, combine, or separate phrases in any way.**
143
-
144
- Discount Extraction:
145
- Extract only fields explicitly labeled "Discount", "Special Discount", "Rebate", or "Early Payment Discount".
146
- Do not extract "deduct" amounts as a discount if they are part of payment terms or incentives.
147
- Exclude any ambiguous phrases from the discount field.
148
-
149
- Use this schema:
150
- {{
151
- // ============================
152
- // Invoice Header Details
153
- // ============================
154
- "invoice_header": {{
155
- "id_number": "string or null",
156
- "document_type": "string or null",
157
- "invoice_number": "string or null",
158
- "invoice_date": "string or null",
159
- "due_date": "string or null",
160
- "invoice_status": "string or null",
161
- "mobile_no": "string or null",
162
- "landline_no": "string or null",
163
- "fax": "string or null",
164
- "e_mail_id": "string or null",
165
- "taxable_amount": "string or null",
166
- "tax_amount": "string or null",
167
- "tax_rate": "string or null",
168
- "total_before_tax": "string or null",
169
- "shipping_charges": "string or null",
170
- "discount": "string or null",
171
- "round_off": "string or null",
172
- "grand_total": "string or null",
173
- "order_number": "string or null",
174
- "order_date": "string or null",
175
- "purchase_order_number": "string or null",
176
- "purchase_order_date": "string or null",
177
- "customer_order_number": "string or null",
178
- "our_order_number": "string or null",
179
- "sales_order_number": "string or null",
180
- "supplier_name": "string or null",
181
- "supplier_address": "string or null",
182
- "supplier_phone": "string or null",
183
- "supplier_email": "string or null",
184
- "supplier_tax_id": "string or null",
185
- "customer_name": "string or null",
186
- "customer_no": "string or null",
187
- "customer_address": "string or null",
188
- "customer_phone": "string or null",
189
- "customer_email": "string or null",
190
- "customer_tax_id": "string or null",
191
- "bill_to_name": "string or null",
192
- "bill_to_address": "string or null",
193
- "ship_to_address": "string or null",
194
- "ship_to_name": "string or null",
195
- "remit_to": "string or null",
196
- "pay_to": "string or null",
197
- "shipment_number": "string or null",
198
- "shipping_point": "string or null",
199
- "shipped_via": "string or null",
200
- "transportation_mode": "string or null",
201
- "delivery_doc_no": "string or null",
202
- "delivery_doc_date": "string or null",
203
- "payment_terms": "string or null",
204
- "payment_method": "string or null",
205
- "payment_reference": "string or null",
206
- "bank_account_number": "string or null",
207
- "iban": "string or null",
208
- "bank_account_name": "string or null",
209
- "bank_name": "string or null",
210
- "bank_branch": "string or null",
211
- "notes_remarks": "string or null",
212
- "additional_info": "string or null",
213
- "delivery_terms": "string or null",
214
- "aca_admin_fee": "string or null"
215
- }},
216
- // ============================
217
- // Line Items Details
218
- // ============================
219
- "line_items": [
220
- {{
221
- "line_no": "string or null",
222
- "item_no": "string or null",
223
- "quantity": "string or null",
224
- "uom": "string or null",
225
- "units": "string or null",
226
- "rate": "string or null",
227
- "rate_per": "string or null",
228
- "price": "string or null",
229
- "line_amount": "string or null",
230
- "description": "string or null",
231
- "footage": "string or null",
232
- "notes": "string or null"
233
- }}
234
- ]
235
- }}
236
- If a field is missing for a line item or header, use null.
237
- Do not invent fields. Do not add any header or shipment data to any line item. Return ONLY the JSON object, no explanation.
238
- Invoice Text:
239
- {txt}
240
- """
241
-
242
 
243
  def ensure_total_due(invoice_header):
244
  if invoice_header.get("total_due") in [None, ""]:
 
131
  return None
132
 
133
  def get_extraction_prompt(model_choice, txt):
134
+ return (
135
+ "You are an expert invoice parser. "
136
+ "Extract data according to the visible table structure and column headers in the invoice. "
137
+ "For every line item, only extract fields that correspond to the table columns for that row (do not include header/shipment fields in line items). "
138
+ "Merge all multi-line content within a single cell into that field (especially for the 'description' and 'notes'). "
139
+ "Shipment/invoice-level fields such as CAR NUMBER, SHIPPING POINT, SHIPMENT NUMBER, CURRENCY, etc., must go ONLY into the 'invoice_header', not as line item fields.\n"
140
+ "Use this schema:\n"
141
+ '{\n'
142
+ ' "invoice_header": {\n'
143
+ ' "car_number": "string or null",\n'
144
+ ' "shipment_number": "string or null",\n'
145
+ ' "shipping_point": "string or null",\n'
146
+ ' "currency": "string or null",\n'
147
+ ' "invoice_number": "string or null",\n'
148
+ ' "invoice_date": "string or null",\n'
149
+ ' "order_number": "string or null",\n'
150
+ ' "customer_order_number": "string or null",\n'
151
+ ' "our_order_number": "string or null",\n'
152
+ ' "sales_order_number": "string or null",\n'
153
+ ' "purchase_order_number": "string or null",\n'
154
+ ' "order_date": "string or null",\n'
155
+ ' "supplier_name": "string or null",\n'
156
+ ' "supplier_address": "string or null",\n'
157
+ ' "supplier_phone": "string or null",\n'
158
+ ' "supplier_email": "string or null",\n'
159
+ ' "supplier_tax_id": "string or null",\n'
160
+ ' "customer_name": "string or null",\n'
161
+ ' "customer_address": "string or null",\n'
162
+ ' "customer_phone": "string or null",\n'
163
+ ' "customer_email": "string or null",\n'
164
+ ' "customer_tax_id": "string or null",\n'
165
+ ' "ship_to_name": "string or null",\n'
166
+ ' "ship_to_address": "string or null",\n'
167
+ ' "bill_to_name": "string or null",\n'
168
+ ' "bill_to_address": "string or null",\n'
169
+ ' "remit_to_name": "string or null",\n'
170
+ ' "remit_to_address": "string or null",\n'
171
+ ' "tax_id": "string or null",\n'
172
+ ' "tax_registration_number": "string or null",\n'
173
+ ' "vat_number": "string or null",\n'
174
+ ' "payment_terms": "string or null",\n'
175
+ ' "payment_method": "string or null",\n'
176
+ ' "payment_reference": "string or null",\n'
177
+ ' "bank_account_number": "string or null",\n'
178
+ ' "iban": "string or null",\n'
179
+ ' "swift_code": "string or null",\n'
180
+ ' "total_before_tax": "string or null",\n'
181
+ ' "tax_amount": "string or null",\n'
182
+ ' "tax_rate": "string or null",\n'
183
+ ' "shipping_charges": "string or null",\n'
184
+ ' "discount": "string or null",\n'
185
+ ' "total_due": "string or null",\n'
186
+ ' "amount_paid": "string or null",\n'
187
+ ' "balance_due": "string or null",\n'
188
+ ' "due_date": "string or null",\n'
189
+ ' "invoice_status": "string or null",\n'
190
+ ' "reference_number": "string or null",\n'
191
+ ' "project_code": "string or null",\n'
192
+ ' "department": "string or null",\n'
193
+ ' "contact_person": "string or null",\n'
194
+ ' "notes": "string or null",\n'
195
+ ' "additional_info": "string or null"\n'
196
+ ' },\n'
197
+ ' "line_items": [\n'
198
+ ' {\n'
199
+ ' "quantity": "string or null",\n'
200
+ ' "units": "string or null",\n'
201
+ ' "description": "string or null",\n'
202
+ ' "footage": "string or null",\n'
203
+ ' "price": "string or null",\n'
204
+ ' "amount": "string or null",\n'
205
+ ' "notes": "string or null"\n'
206
+ ' }\n'
207
+ ' ]\n'
208
+ '}'
209
+ "\nIf a field is missing for a line item or header, use null. "
210
+ "Do not invent fields. Do not add any header or shipment data to any line item. Return ONLY the JSON object, no explanation.\n"
211
+ "\nInvoice Text:\n"
212
+ f"{txt}"
213
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
  def ensure_total_due(invoice_header):
216
  if invoice_header.get("total_due") in [None, ""]: