Seth0330 commited on
Commit
e73b3fb
·
verified ·
1 Parent(s): ee4e57c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -80
app.py CHANGED
@@ -131,86 +131,114 @@ def fallback_supplier(text):
131
  return None
132
 
133
  def get_extraction_prompt(model_choice, txt):
134
- return (
135
- "You are an expert invoice parser. "
136
- "Extract data according to the visible table structure and column headers in the invoice. "
137
- "For every line item, only extract fields that correspond to the table columns for that row (do not include header/shipment fields in line items). "
138
- "Merge all multi-line content within a single cell into that field (especially for the 'description' and 'notes'). "
139
- "Shipment/invoice-level fields such as CAR NUMBER, SHIPPING POINT, SHIPMENT NUMBER, CURRENCY, etc., must go ONLY into the 'invoice_header', not as line item fields.\n"
140
- "Use this schema:\n"
141
- '{\n'
142
- ' "invoice_header": {\n'
143
- ' "car_number": "string or null",\n'
144
- ' "shipment_number": "string or null",\n'
145
- ' "shipping_point": "string or null",\n'
146
- ' "currency": "string or null",\n'
147
- ' "invoice_number": "string or null",\n'
148
- ' "invoice_date": "string or null",\n'
149
- ' "order_number": "string or null",\n'
150
- ' "customer_order_number": "string or null",\n'
151
- ' "our_order_number": "string or null",\n'
152
- ' "sales_order_number": "string or null",\n'
153
- ' "purchase_order_number": "string or null",\n'
154
- ' "order_date": "string or null",\n'
155
- ' "supplier_name": "string or null",\n'
156
- ' "supplier_address": "string or null",\n'
157
- ' "supplier_phone": "string or null",\n'
158
- ' "supplier_email": "string or null",\n'
159
- ' "supplier_tax_id": "string or null",\n'
160
- ' "customer_name": "string or null",\n'
161
- ' "customer_address": "string or null",\n'
162
- ' "customer_phone": "string or null",\n'
163
- ' "customer_email": "string or null",\n'
164
- ' "customer_tax_id": "string or null",\n'
165
- ' "ship_to_name": "string or null",\n'
166
- ' "ship_to_address": "string or null",\n'
167
- ' "bill_to_name": "string or null",\n'
168
- ' "bill_to_address": "string or null",\n'
169
- ' "remit_to_name": "string or null",\n'
170
- ' "remit_to_address": "string or null",\n'
171
- ' "tax_id": "string or null",\n'
172
- ' "tax_registration_number": "string or null",\n'
173
- ' "vat_number": "string or null",\n'
174
- ' "payment_terms": "string or null",\n'
175
- ' "payment_method": "string or null",\n'
176
- ' "payment_reference": "string or null",\n'
177
- ' "bank_account_number": "string or null",\n'
178
- ' "iban": "string or null",\n'
179
- ' "swift_code": "string or null",\n'
180
- ' "total_before_tax": "string or null",\n'
181
- ' "tax_amount": "string or null",\n'
182
- ' "tax_rate": "string or null",\n'
183
- ' "shipping_charges": "string or null",\n'
184
- ' "discount": "string or null",\n'
185
- ' "total_due": "string or null",\n'
186
- ' "amount_paid": "string or null",\n'
187
- ' "balance_due": "string or null",\n'
188
- ' "due_date": "string or null",\n'
189
- ' "invoice_status": "string or null",\n'
190
- ' "reference_number": "string or null",\n'
191
- ' "project_code": "string or null",\n'
192
- ' "department": "string or null",\n'
193
- ' "contact_person": "string or null",\n'
194
- ' "notes": "string or null",\n'
195
- ' "additional_info": "string or null"\n'
196
- ' },\n'
197
- ' "line_items": [\n'
198
- ' {\n'
199
- ' "quantity": "string or null",\n'
200
- ' "units": "string or null",\n'
201
- ' "description": "string or null",\n'
202
- ' "footage": "string or null",\n'
203
- ' "price": "string or null",\n'
204
- ' "amount": "string or null",\n'
205
- ' "notes": "string or null"\n'
206
- ' }\n'
207
- ' ]\n'
208
- '}'
209
- "\nIf a field is missing for a line item or header, use null. "
210
- "Do not invent fields. Do not add any header or shipment data to any line item. Return ONLY the JSON object, no explanation.\n"
211
- "\nInvoice Text:\n"
212
- f"{txt}"
213
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
  def ensure_total_due(invoice_header):
216
  if invoice_header.get("total_due") in [None, ""]:
 
131
  return None
132
 
133
  def get_extraction_prompt(model_choice, txt):
134
+ return f"""
135
+ You are an expert invoice parser.
136
+ Extract data according to the visible table structure and column headers in the invoice.
137
+ For every line item, only extract fields that correspond to the table columns for that row (do not include header/shipment fields in line items).
138
+ Merge all multi-line content within a single cell into that field (especially for the "description" and "notes").
139
+ Shipment/invoice-level fields such as CAR NUMBER, SHIPPING POINT, SHIPMENT NUMBER, CURRENCY, etc., must go ONLY into the "invoice_header", not as line item fields.
140
+
141
+ **For example, if the source document shows Payment Terms: 1% 10 ADI, ADF, NET 20 DAYS**
142
+ **Do not alter the sequence, combine, or separate phrases in any way.**
143
+
144
+ Discount Extraction:
145
+ Extract only fields explicitly labeled "Discount", "Special Discount", "Rebate", or "Early Payment Discount".
146
+ Do not extract "deduct" amounts as a discount if they are part of payment terms or incentives.
147
+ Exclude any ambiguous phrases from the discount field.
148
+
149
+ Use this schema:
150
+ {{
151
+ // ============================
152
+ // Invoice Header Details
153
+ // ============================
154
+ "invoice_header": {{
155
+ "id_number": "string or null",
156
+ "document_type": "string or null",
157
+ "invoice_number": "string or null",
158
+ "invoice_date": "string or null",
159
+ "due_date": "string or null",
160
+ "invoice_status": "string or null",
161
+ "mobile_no": "string or null",
162
+ "landline_no": "string or null",
163
+ "fax": "string or null",
164
+ "e_mail_id": "string or null",
165
+ "taxable_amount": "string or null",
166
+ "tax_amount": "string or null",
167
+ "tax_rate": "string or null",
168
+ "total_before_tax": "string or null",
169
+ "shipping_charges": "string or null",
170
+ "discount": "string or null",
171
+ "round_off": "string or null",
172
+ "grand_total": "string or null",
173
+ "order_number": "string or null",
174
+ "order_date": "string or null",
175
+ "purchase_order_number": "string or null",
176
+ "purchase_order_date": "string or null",
177
+ "customer_order_number": "string or null",
178
+ "our_order_number": "string or null",
179
+ "sales_order_number": "string or null",
180
+ "supplier_name": "string or null",
181
+ "supplier_address": "string or null",
182
+ "supplier_phone": "string or null",
183
+ "supplier_email": "string or null",
184
+ "supplier_tax_id": "string or null",
185
+ "customer_name": "string or null",
186
+ "customer_no": "string or null",
187
+ "customer_address": "string or null",
188
+ "customer_phone": "string or null",
189
+ "customer_email": "string or null",
190
+ "customer_tax_id": "string or null",
191
+ "bill_to_name": "string or null",
192
+ "bill_to_address": "string or null",
193
+ "ship_to_address": "string or null",
194
+ "ship_to_name": "string or null",
195
+ "remit_to": "string or null",
196
+ "pay_to": "string or null",
197
+ "shipment_number": "string or null",
198
+ "shipping_point": "string or null",
199
+ "shipped_via": "string or null",
200
+ "transportation_mode": "string or null",
201
+ "delivery_doc_no": "string or null",
202
+ "delivery_doc_date": "string or null",
203
+ "payment_terms": "string or null",
204
+ "payment_method": "string or null",
205
+ "payment_reference": "string or null",
206
+ "bank_account_number": "string or null",
207
+ "iban": "string or null",
208
+ "bank_account_name": "string or null",
209
+ "bank_name": "string or null",
210
+ "bank_branch": "string or null",
211
+ "notes_remarks": "string or null",
212
+ "additional_info": "string or null",
213
+ "delivery_terms": "string or null",
214
+ "aca_admin_fee": "string or null"
215
+ }},
216
+ // ============================
217
+ // Line Items Details
218
+ // ============================
219
+ "line_items": [
220
+ {{
221
+ "line_no": "string or null",
222
+ "item_no": "string or null",
223
+ "quantity": "string or null",
224
+ "uom": "string or null",
225
+ "units": "string or null",
226
+ "rate": "string or null",
227
+ "rate_per": "string or null",
228
+ "price": "string or null",
229
+ "line_amount": "string or null",
230
+ "description": "string or null",
231
+ "footage": "string or null",
232
+ "notes": "string or null"
233
+ }}
234
+ ]
235
+ }}
236
+ If a field is missing for a line item or header, use null.
237
+ Do not invent fields. Do not add any header or shipment data to any line item. Return ONLY the JSON object, no explanation.
238
+ Invoice Text:
239
+ {txt}
240
+ """
241
+
242
 
243
  def ensure_total_due(invoice_header):
244
  if invoice_header.get("total_due") in [None, ""]: