Seth0330 commited on
Commit
1108365
·
verified ·
1 Parent(s): dc0c728

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -13
app.py CHANGED
@@ -246,28 +246,94 @@ def find_best_po_match(inv, po_df):
246
  po_unit = str(row.get("Item Unit Price", ""))
247
  po_line_total = clean_num(row.get("Line Item Total", ""))
248
 
249
- # Weighted fuzzy scores
 
250
  s_supplier = weighted_fuzzy_score(inv_supplier, po_supplier)
 
 
 
 
 
 
 
251
  s_ship_to = weighted_fuzzy_score(inv_ship_to, po_ship_to)
 
 
 
 
 
 
 
252
  s_bill_to = weighted_fuzzy_score(inv_bill_to, po_bill_to)
 
 
 
 
 
 
 
253
  s_terms = weighted_fuzzy_score(inv_payment_terms, po_payment_terms)
 
 
 
 
 
 
 
254
  s_currency = weighted_fuzzy_score(inv_currency, po_currency)
 
 
 
 
 
 
 
255
  s_total = 100 if inv_total_due is not None and po_total is not None and abs(inv_total_due - po_total) < 2 else 0
 
 
 
 
 
 
256
 
257
  # Check for at least one line item strong match
258
  line_item_score = 0
259
  line_reason = ""
 
260
  for line in inv_line_items:
261
  desc_score = weighted_fuzzy_score(line.get("description", ""), po_desc)
262
  qty_score = 100 if clean_num(line.get("quantity")) == clean_num(po_qty) else 0
263
  unit_score = 100 if clean_num(line.get("price")) == clean_num(po_unit) else 0
264
  amount_score = 100 if clean_num(line.get("amount")) == po_line_total else 0
265
  total = desc_score * 0.5 + qty_score * 0.2 + unit_score * 0.15 + amount_score * 0.15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  if total > line_item_score:
267
  line_item_score = total
268
- line_reason = (f"Best line item: desc_score={desc_score}, qty_score={qty_score}, "
269
- f"unit_score={unit_score}, amount_score={amount_score}")
270
- # Score weights (tune as needed)
 
 
 
271
  total_score = (
272
  s_supplier * 0.25 +
273
  s_ship_to * 0.1 +
@@ -277,25 +343,26 @@ def find_best_po_match(inv, po_df):
277
  s_total * 0.2 +
278
  line_item_score * 0.2
279
  )
 
280
  reason = (
281
- f"Supplier match: {s_supplier}/100, Ship To: {s_ship_to}/100, "
282
- f"Bill To: {s_bill_to}/100, Payment Terms: {s_terms}/100, Currency: {s_currency}/100, "
283
- f"Total Due: {'match' if s_total else 'no match'}, "
 
 
 
284
  f"Line item best match: {int(line_item_score)}/100. {line_reason}"
285
  )
 
286
  debug = {
287
  "po_idx": idx,
288
  "po_supplier": po_supplier,
289
  "po_ship_to": po_ship_to,
290
  "po_bill_to": po_bill_to,
291
  "po_total": po_total,
292
- "s_supplier": s_supplier,
293
- "s_ship_to": s_ship_to,
294
- "s_bill_to": s_bill_to,
295
- "s_terms": s_terms,
296
- "s_currency": s_currency,
297
- "s_total": s_total,
298
  "line_item_score": line_item_score,
 
299
  "total_score": total_score,
300
  "line_reason": line_reason,
301
  "inv_total_due": inv_total_due
@@ -309,6 +376,7 @@ def find_best_po_match(inv, po_df):
309
  best_row, best_score, reason, debug = scores[0]
310
  return best_row, best_score, reason, debug
311
 
 
312
  def extract_invoice_info(model_choice, text):
313
  prompt = get_extraction_prompt(model_choice, text)
314
  raw = query_llm(model_choice, prompt)
 
246
  po_unit = str(row.get("Item Unit Price", ""))
247
  po_line_total = clean_num(row.get("Line Item Total", ""))
248
 
249
+ field_details = []
250
+
251
  s_supplier = weighted_fuzzy_score(inv_supplier, po_supplier)
252
+ field_details.append({
253
+ "field": "Supplier Name",
254
+ "invoice": inv_supplier,
255
+ "po": po_supplier,
256
+ "score": s_supplier
257
+ })
258
+
259
  s_ship_to = weighted_fuzzy_score(inv_ship_to, po_ship_to)
260
+ field_details.append({
261
+ "field": "Ship To",
262
+ "invoice": inv_ship_to,
263
+ "po": po_ship_to,
264
+ "score": s_ship_to
265
+ })
266
+
267
  s_bill_to = weighted_fuzzy_score(inv_bill_to, po_bill_to)
268
+ field_details.append({
269
+ "field": "Bill To",
270
+ "invoice": inv_bill_to,
271
+ "po": po_bill_to,
272
+ "score": s_bill_to
273
+ })
274
+
275
  s_terms = weighted_fuzzy_score(inv_payment_terms, po_payment_terms)
276
+ field_details.append({
277
+ "field": "Payment Terms",
278
+ "invoice": inv_payment_terms,
279
+ "po": po_payment_terms,
280
+ "score": s_terms
281
+ })
282
+
283
  s_currency = weighted_fuzzy_score(inv_currency, po_currency)
284
+ field_details.append({
285
+ "field": "Currency",
286
+ "invoice": inv_currency,
287
+ "po": po_currency,
288
+ "score": s_currency
289
+ })
290
+
291
  s_total = 100 if inv_total_due is not None and po_total is not None and abs(inv_total_due - po_total) < 2 else 0
292
+ field_details.append({
293
+ "field": "Total Due",
294
+ "invoice": inv_total_due,
295
+ "po": po_total,
296
+ "score": s_total
297
+ })
298
 
299
  # Check for at least one line item strong match
300
  line_item_score = 0
301
  line_reason = ""
302
+ best_line_detail = None
303
  for line in inv_line_items:
304
  desc_score = weighted_fuzzy_score(line.get("description", ""), po_desc)
305
  qty_score = 100 if clean_num(line.get("quantity")) == clean_num(po_qty) else 0
306
  unit_score = 100 if clean_num(line.get("price")) == clean_num(po_unit) else 0
307
  amount_score = 100 if clean_num(line.get("amount")) == po_line_total else 0
308
  total = desc_score * 0.5 + qty_score * 0.2 + unit_score * 0.15 + amount_score * 0.15
309
+ detail = {
310
+ "field": "Line Item",
311
+ "invoice": {
312
+ "description": line.get("description", ""),
313
+ "quantity": line.get("quantity", ""),
314
+ "price": line.get("price", ""),
315
+ "amount": line.get("amount", ""),
316
+ },
317
+ "po": {
318
+ "description": po_desc,
319
+ "quantity": po_qty,
320
+ "price": po_unit,
321
+ "amount": po_line_total,
322
+ },
323
+ "desc_score": desc_score,
324
+ "qty_score": qty_score,
325
+ "unit_score": unit_score,
326
+ "amount_score": amount_score,
327
+ "line_item_score": total
328
+ }
329
  if total > line_item_score:
330
  line_item_score = total
331
+ best_line_detail = detail
332
+ line_reason = (
333
+ f"Best line item: desc_score={desc_score}, qty_score={qty_score}, "
334
+ f"unit_score={unit_score}, amount_score={amount_score}"
335
+ )
336
+
337
  total_score = (
338
  s_supplier * 0.25 +
339
  s_ship_to * 0.1 +
 
343
  s_total * 0.2 +
344
  line_item_score * 0.2
345
  )
346
+
347
  reason = (
348
+ f"Supplier match: {s_supplier}/100 (invoice: '{inv_supplier}' vs PO: '{po_supplier}'), "
349
+ f"Ship To: {s_ship_to}/100 (invoice: '{inv_ship_to}' vs PO: '{po_ship_to}'), "
350
+ f"Bill To: {s_bill_to}/100 (invoice: '{inv_bill_to}' vs PO: '{po_bill_to}'), "
351
+ f"Payment Terms: {s_terms}/100 (invoice: '{inv_payment_terms}' vs PO: '{po_payment_terms}'), "
352
+ f"Currency: {s_currency}/100 (invoice: '{inv_currency}' vs PO: '{po_currency}'), "
353
+ f"Total Due: {'match' if s_total else 'no match'} (invoice: {inv_total_due} vs PO: {po_total}), "
354
  f"Line item best match: {int(line_item_score)}/100. {line_reason}"
355
  )
356
+
357
  debug = {
358
  "po_idx": idx,
359
  "po_supplier": po_supplier,
360
  "po_ship_to": po_ship_to,
361
  "po_bill_to": po_bill_to,
362
  "po_total": po_total,
363
+ "scores": field_details,
 
 
 
 
 
364
  "line_item_score": line_item_score,
365
+ "best_line_detail": best_line_detail,
366
  "total_score": total_score,
367
  "line_reason": line_reason,
368
  "inv_total_due": inv_total_due
 
376
  best_row, best_score, reason, debug = scores[0]
377
  return best_row, best_score, reason, debug
378
 
379
+
380
  def extract_invoice_info(model_choice, text):
381
  prompt = get_extraction_prompt(model_choice, text)
382
  raw = query_llm(model_choice, prompt)