Seth0330 commited on
Commit
877a665
·
verified ·
1 Parent(s): 50aaed9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -78
app.py CHANGED
@@ -113,84 +113,137 @@ def fallback_supplier(text):
113
 
114
  def get_extraction_prompt(model_choice, txt):
115
  return (
116
- "You are an expert invoice parser. "
117
- "Extract data according to the visible table structure and column headers in the invoice. "
118
- "For every line item, only extract fields that correspond to the table columns for that row (do not include header/shipment fields in line items). "
119
- "Merge all multi-line content within a single cell into that field (especially for the 'description' and 'notes'). "
120
- "Shipment/invoice-level fields such as CAR NUMBER, SHIPPING POINT, SHIPMENT NUMBER, CURRENCY, etc., must go ONLY into the 'invoice_header', not as line item fields.\n"
121
- "Use this schema:\n"
122
- '{\n'
123
- ' "invoice_header": {\n'
124
- ' "car_number": "string or null",\n'
125
- ' "shipment_number": "string or null",\n'
126
- ' "shipping_point": "string or null",\n'
127
- ' "currency": "string or null",\n'
128
- ' "invoice_number": "string or null",\n'
129
- ' "invoice_date": "string or null",\n'
130
- ' "order_number": "string or null",\n'
131
- ' "customer_order_number": "string or null",\n'
132
- ' "our_order_number": "string or null",\n'
133
- ' "sales_order_number": "string or null",\n'
134
- ' "purchase_order_number": "string or null",\n'
135
- ' "order_date": "string or null",\n'
136
- ' "supplier_name": "string or null",\n'
137
- ' "supplier_address": "string or null",\n'
138
- ' "supplier_phone": "string or null",\n'
139
- ' "supplier_email": "string or null",\n'
140
- ' "supplier_tax_id": "string or null",\n'
141
- ' "customer_name": "string or null",\n'
142
- ' "customer_address": "string or null",\n'
143
- ' "customer_phone": "string or null",\n'
144
- ' "customer_email": "string or null",\n'
145
- ' "customer_tax_id": "string or null",\n'
146
- ' "ship_to_name": "string or null",\n'
147
- ' "ship_to_address": "string or null",\n'
148
- ' "bill_to_name": "string or null",\n'
149
- ' "bill_to_address": "string or null",\n'
150
- ' "remit_to_name": "string or null",\n'
151
- ' "remit_to_address": "string or null",\n'
152
- ' "tax_id": "string or null",\n'
153
- ' "tax_registration_number": "string or null",\n'
154
- ' "vat_number": "string or null",\n'
155
- ' "payment_terms": "string or null",\n'
156
- ' "payment_method": "string or null",\n'
157
- ' "payment_reference": "string or null",\n'
158
- ' "bank_account_number": "string or null",\n'
159
- ' "iban": "string or null",\n'
160
- ' "swift_code": "string or null",\n'
161
- ' "total_before_tax": "string or null",\n'
162
- ' "tax_amount": "string or null",\n'
163
- ' "tax_rate": "string or null",\n'
164
- ' "shipping_charges": "string or null",\n'
165
- ' "discount": "string or null",\n'
166
- ' "total_due": "string or null",\n'
167
- ' "amount_paid": "string or null",\n'
168
- ' "balance_due": "string or null",\n'
169
- ' "due_date": "string or null",\n'
170
- ' "invoice_status": "string or null",\n'
171
- ' "reference_number": "string or null",\n'
172
- ' "project_code": "string or null",\n'
173
- ' "department": "string or null",\n'
174
- ' "contact_person": "string or null",\n'
175
- ' "notes": "string or null",\n'
176
- ' "additional_info": "string or null"\n'
177
- ' },\n'
178
- ' "line_items": [\n'
179
- ' {\n'
180
- ' "quantity": "string or null",\n'
181
- ' "units": "string or null",\n'
182
- ' "description": "string or null",\n'
183
- ' "footage": "string or null",\n'
184
- ' "price": "string or null",\n'
185
- ' "amount": "string or null",\n'
186
- ' "notes": "string or null"\n'
187
- ' }\n'
188
- ' ]\n'
189
- '}'
190
- "\nIf a field is missing for a line item or header, use null. "
191
- "Do not invent fields. Do not add any header or shipment data to any line item. Return ONLY the JSON object, no explanation.\n"
192
- "\nInvoice Text:\n"
193
- f"{txt}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  )
195
 
196
  def extract_invoice_info(model_choice, text):
 
113
 
114
  def get_extraction_prompt(model_choice, txt):
115
  return (
116
+ 'You are an expert invoice parser.
117
+ Extract data according to the visible table structure and column headers in the invoice.
118
+ For every line item, only extract fields that correspond to the table columns for that row (do not include header/shipment fields in line items).
119
+ Merge all multi-line content within a single cell into that field (especially for the "description" and "notes").
120
+ Shipment/invoice-level fields such as CAR NUMBER, SHIPPING POINT, SHIPMENT NUMBER, CURRENCY, etc., must go ONLY into the "invoice_header", not as line item fields.
121
+
122
+ **For example, if the source document shows Payment Terms: 1% 10 ADI, ADF, NET 20 DAYS
123
+ **Do not alter the sequence, combine, or separate phrases in any way.
124
+
125
+ Discount Extraction:
126
+ Extract only fields explicitly labeled "Discount", "Special Discount", "Rebate", or "Early Payment Discount".
127
+ Do not extract "deduct" amounts as a discount if they are part of payment terms or incentives.
128
+ Exclude any ambiguous phrases from the discount field.
129
+
130
+ Use this schema:
131
+ {
132
+ // ============================
133
+ // Invoice Header Details
134
+ // ============================
135
+ "invoice_header": {
136
+
137
+ // General Document Information
138
+ "id_number": "string or null",
139
+ "document_type": "string or null",
140
+ "invoice_number": "string or null",
141
+ "invoice_date": "string or null",
142
+ "due_date": "string or null",
143
+ "invoice_status": "string or null",
144
+
145
+ // Contact Information (Phone, Email, etc.)
146
+ "mobile_no": "string or null",
147
+ "landline_no": "string or null",
148
+ "fax": "string or null",
149
+ "e_mail_id": "string or null",
150
+
151
+ // Financial and Tax Information
152
+ "taxable_amount": "string or null",
153
+ "tax_amount": "string or null",
154
+ "tax_rate": "string or null",
155
+ "total_before_tax": "string or null",
156
+ "shipping_charges": "string or null",
157
+ "discount": "string or null",
158
+ "round_off": "string or null",
159
+ "grand_total": "string or null",
160
+
161
+ // Order and Reference Information
162
+ "order_number": "string or null",
163
+ "order_date": "string or null",
164
+ "purchase_order_number": "string or null",
165
+ "purchase_order_date": "string or null",
166
+ "customer_order_number": "string or null",
167
+ "our_order_number": "string or null",
168
+ "sales_order_number": "string or null",
169
+
170
+ // Supplier Information
171
+ "supplier_name": "string or null",
172
+ "supplier_address": "string or null",
173
+ "supplier_phone": "string or null",
174
+ "supplier_email": "string or null",
175
+ "supplier_tax_id": "string or null",
176
+
177
+ // Customer Information
178
+ "customer_name": "string or null",
179
+ "customer_no": "string or null",
180
+ "customer_address": "string or null",
181
+ "customer_phone": "string or null",
182
+ "customer_email": "string or null",
183
+ "customer_tax_id": "string or null",
184
+
185
+ // Shipping and Payment Details
186
+ "bill_to_name": "string or null",
187
+ "bill_to_address": "string or null",
188
+ "ship_to_address": "string or null",
189
+ "ship_to_name": "string or null",
190
+ "remit_to": "string or null",
191
+ "pay_to": "string or null",
192
+ "shipment_number": "string or null",
193
+ "shipping_point": "string or null",
194
+ "shipped_via": "string or null",
195
+ "transportation_mode": "string or null",
196
+ "delivery_doc_no": "string or null",
197
+ "delivery_doc_date": "string or null",
198
+
199
+ // Bank & Payment Information
200
+ "payment_terms": "string or null",
201
+ "payment_method": "string or null",
202
+ "payment_reference": "string or null",
203
+ "bank_account_number": "string or null",
204
+ "iban": "string or null",
205
+ "bank_account_name": "string or null",
206
+ "bank_name": "string or null",
207
+ "bank_branch": "string or null",
208
+
209
+ // Additional Information & Notes
210
+ "notes_remarks": "string or null",
211
+ "additional_info": "string or null",
212
+ "delivery_terms": "string or null",
213
+ "aca_admin_fee": "string or null"
214
+ },
215
+
216
+ // ============================
217
+ // Line Items Details
218
+ // ============================
219
+ "line_items": [
220
+ {
221
+ // Item Identification
222
+ "line_no": "string or null",
223
+ "item_no": "string or null",
224
+
225
+ // Quantity and Unit Details
226
+ "quantity": "string or null",
227
+ "uom": "string or null",
228
+ "units": "string or null",
229
+
230
+ // Pricing Details
231
+ "rate": "string or null",
232
+ "rate_per": "string or null",
233
+ "price": "string or null",
234
+ "line_amount": "string or null",
235
+
236
+ // Item Description & Additional Notes
237
+ "description": "string or null",
238
+ "footage": "string or null",
239
+ "notes": "string or null"
240
+ }
241
+ ]
242
+ }
243
+ If a field is missing for a line item or header, use null.
244
+ Do not invent fields. Do not add any header or shipment data to any line item. Return ONLY the JSON object, no explanation.
245
+ "Invoice Text:"
246
+ "f@txt"
247
  )
248
 
249
  def extract_invoice_info(model_choice, text):