Update app.py
Browse files
app.py
CHANGED
|
@@ -246,28 +246,94 @@ def find_best_po_match(inv, po_df):
|
|
| 246 |
po_unit = str(row.get("Item Unit Price", ""))
|
| 247 |
po_line_total = clean_num(row.get("Line Item Total", ""))
|
| 248 |
|
| 249 |
-
|
|
|
|
| 250 |
s_supplier = weighted_fuzzy_score(inv_supplier, po_supplier)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
s_ship_to = weighted_fuzzy_score(inv_ship_to, po_ship_to)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
s_bill_to = weighted_fuzzy_score(inv_bill_to, po_bill_to)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
s_terms = weighted_fuzzy_score(inv_payment_terms, po_payment_terms)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
s_currency = weighted_fuzzy_score(inv_currency, po_currency)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
s_total = 100 if inv_total_due is not None and po_total is not None and abs(inv_total_due - po_total) < 2 else 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
|
| 257 |
# Check for at least one line item strong match
|
| 258 |
line_item_score = 0
|
| 259 |
line_reason = ""
|
|
|
|
| 260 |
for line in inv_line_items:
|
| 261 |
desc_score = weighted_fuzzy_score(line.get("description", ""), po_desc)
|
| 262 |
qty_score = 100 if clean_num(line.get("quantity")) == clean_num(po_qty) else 0
|
| 263 |
unit_score = 100 if clean_num(line.get("price")) == clean_num(po_unit) else 0
|
| 264 |
amount_score = 100 if clean_num(line.get("amount")) == po_line_total else 0
|
| 265 |
total = desc_score * 0.5 + qty_score * 0.2 + unit_score * 0.15 + amount_score * 0.15
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
if total > line_item_score:
|
| 267 |
line_item_score = total
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
| 271 |
total_score = (
|
| 272 |
s_supplier * 0.25 +
|
| 273 |
s_ship_to * 0.1 +
|
|
@@ -277,25 +343,26 @@ def find_best_po_match(inv, po_df):
|
|
| 277 |
s_total * 0.2 +
|
| 278 |
line_item_score * 0.2
|
| 279 |
)
|
|
|
|
| 280 |
reason = (
|
| 281 |
-
f"Supplier match: {s_supplier}/100
|
| 282 |
-
f"
|
| 283 |
-
f"
|
|
|
|
|
|
|
|
|
|
| 284 |
f"Line item best match: {int(line_item_score)}/100. {line_reason}"
|
| 285 |
)
|
|
|
|
| 286 |
debug = {
|
| 287 |
"po_idx": idx,
|
| 288 |
"po_supplier": po_supplier,
|
| 289 |
"po_ship_to": po_ship_to,
|
| 290 |
"po_bill_to": po_bill_to,
|
| 291 |
"po_total": po_total,
|
| 292 |
-
"
|
| 293 |
-
"s_ship_to": s_ship_to,
|
| 294 |
-
"s_bill_to": s_bill_to,
|
| 295 |
-
"s_terms": s_terms,
|
| 296 |
-
"s_currency": s_currency,
|
| 297 |
-
"s_total": s_total,
|
| 298 |
"line_item_score": line_item_score,
|
|
|
|
| 299 |
"total_score": total_score,
|
| 300 |
"line_reason": line_reason,
|
| 301 |
"inv_total_due": inv_total_due
|
|
@@ -309,6 +376,7 @@ def find_best_po_match(inv, po_df):
|
|
| 309 |
best_row, best_score, reason, debug = scores[0]
|
| 310 |
return best_row, best_score, reason, debug
|
| 311 |
|
|
|
|
| 312 |
def extract_invoice_info(model_choice, text):
|
| 313 |
prompt = get_extraction_prompt(model_choice, text)
|
| 314 |
raw = query_llm(model_choice, prompt)
|
|
|
|
| 246 |
po_unit = str(row.get("Item Unit Price", ""))
|
| 247 |
po_line_total = clean_num(row.get("Line Item Total", ""))
|
| 248 |
|
| 249 |
+
field_details = []
|
| 250 |
+
|
| 251 |
s_supplier = weighted_fuzzy_score(inv_supplier, po_supplier)
|
| 252 |
+
field_details.append({
|
| 253 |
+
"field": "Supplier Name",
|
| 254 |
+
"invoice": inv_supplier,
|
| 255 |
+
"po": po_supplier,
|
| 256 |
+
"score": s_supplier
|
| 257 |
+
})
|
| 258 |
+
|
| 259 |
s_ship_to = weighted_fuzzy_score(inv_ship_to, po_ship_to)
|
| 260 |
+
field_details.append({
|
| 261 |
+
"field": "Ship To",
|
| 262 |
+
"invoice": inv_ship_to,
|
| 263 |
+
"po": po_ship_to,
|
| 264 |
+
"score": s_ship_to
|
| 265 |
+
})
|
| 266 |
+
|
| 267 |
s_bill_to = weighted_fuzzy_score(inv_bill_to, po_bill_to)
|
| 268 |
+
field_details.append({
|
| 269 |
+
"field": "Bill To",
|
| 270 |
+
"invoice": inv_bill_to,
|
| 271 |
+
"po": po_bill_to,
|
| 272 |
+
"score": s_bill_to
|
| 273 |
+
})
|
| 274 |
+
|
| 275 |
s_terms = weighted_fuzzy_score(inv_payment_terms, po_payment_terms)
|
| 276 |
+
field_details.append({
|
| 277 |
+
"field": "Payment Terms",
|
| 278 |
+
"invoice": inv_payment_terms,
|
| 279 |
+
"po": po_payment_terms,
|
| 280 |
+
"score": s_terms
|
| 281 |
+
})
|
| 282 |
+
|
| 283 |
s_currency = weighted_fuzzy_score(inv_currency, po_currency)
|
| 284 |
+
field_details.append({
|
| 285 |
+
"field": "Currency",
|
| 286 |
+
"invoice": inv_currency,
|
| 287 |
+
"po": po_currency,
|
| 288 |
+
"score": s_currency
|
| 289 |
+
})
|
| 290 |
+
|
| 291 |
s_total = 100 if inv_total_due is not None and po_total is not None and abs(inv_total_due - po_total) < 2 else 0
|
| 292 |
+
field_details.append({
|
| 293 |
+
"field": "Total Due",
|
| 294 |
+
"invoice": inv_total_due,
|
| 295 |
+
"po": po_total,
|
| 296 |
+
"score": s_total
|
| 297 |
+
})
|
| 298 |
|
| 299 |
# Check for at least one line item strong match
|
| 300 |
line_item_score = 0
|
| 301 |
line_reason = ""
|
| 302 |
+
best_line_detail = None
|
| 303 |
for line in inv_line_items:
|
| 304 |
desc_score = weighted_fuzzy_score(line.get("description", ""), po_desc)
|
| 305 |
qty_score = 100 if clean_num(line.get("quantity")) == clean_num(po_qty) else 0
|
| 306 |
unit_score = 100 if clean_num(line.get("price")) == clean_num(po_unit) else 0
|
| 307 |
amount_score = 100 if clean_num(line.get("amount")) == po_line_total else 0
|
| 308 |
total = desc_score * 0.5 + qty_score * 0.2 + unit_score * 0.15 + amount_score * 0.15
|
| 309 |
+
detail = {
|
| 310 |
+
"field": "Line Item",
|
| 311 |
+
"invoice": {
|
| 312 |
+
"description": line.get("description", ""),
|
| 313 |
+
"quantity": line.get("quantity", ""),
|
| 314 |
+
"price": line.get("price", ""),
|
| 315 |
+
"amount": line.get("amount", ""),
|
| 316 |
+
},
|
| 317 |
+
"po": {
|
| 318 |
+
"description": po_desc,
|
| 319 |
+
"quantity": po_qty,
|
| 320 |
+
"price": po_unit,
|
| 321 |
+
"amount": po_line_total,
|
| 322 |
+
},
|
| 323 |
+
"desc_score": desc_score,
|
| 324 |
+
"qty_score": qty_score,
|
| 325 |
+
"unit_score": unit_score,
|
| 326 |
+
"amount_score": amount_score,
|
| 327 |
+
"line_item_score": total
|
| 328 |
+
}
|
| 329 |
if total > line_item_score:
|
| 330 |
line_item_score = total
|
| 331 |
+
best_line_detail = detail
|
| 332 |
+
line_reason = (
|
| 333 |
+
f"Best line item: desc_score={desc_score}, qty_score={qty_score}, "
|
| 334 |
+
f"unit_score={unit_score}, amount_score={amount_score}"
|
| 335 |
+
)
|
| 336 |
+
|
| 337 |
total_score = (
|
| 338 |
s_supplier * 0.25 +
|
| 339 |
s_ship_to * 0.1 +
|
|
|
|
| 343 |
s_total * 0.2 +
|
| 344 |
line_item_score * 0.2
|
| 345 |
)
|
| 346 |
+
|
| 347 |
reason = (
|
| 348 |
+
f"Supplier match: {s_supplier}/100 (invoice: '{inv_supplier}' vs PO: '{po_supplier}'), "
|
| 349 |
+
f"Ship To: {s_ship_to}/100 (invoice: '{inv_ship_to}' vs PO: '{po_ship_to}'), "
|
| 350 |
+
f"Bill To: {s_bill_to}/100 (invoice: '{inv_bill_to}' vs PO: '{po_bill_to}'), "
|
| 351 |
+
f"Payment Terms: {s_terms}/100 (invoice: '{inv_payment_terms}' vs PO: '{po_payment_terms}'), "
|
| 352 |
+
f"Currency: {s_currency}/100 (invoice: '{inv_currency}' vs PO: '{po_currency}'), "
|
| 353 |
+
f"Total Due: {'match' if s_total else 'no match'} (invoice: {inv_total_due} vs PO: {po_total}), "
|
| 354 |
f"Line item best match: {int(line_item_score)}/100. {line_reason}"
|
| 355 |
)
|
| 356 |
+
|
| 357 |
debug = {
|
| 358 |
"po_idx": idx,
|
| 359 |
"po_supplier": po_supplier,
|
| 360 |
"po_ship_to": po_ship_to,
|
| 361 |
"po_bill_to": po_bill_to,
|
| 362 |
"po_total": po_total,
|
| 363 |
+
"scores": field_details,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 364 |
"line_item_score": line_item_score,
|
| 365 |
+
"best_line_detail": best_line_detail,
|
| 366 |
"total_score": total_score,
|
| 367 |
"line_reason": line_reason,
|
| 368 |
"inv_total_due": inv_total_due
|
|
|
|
| 376 |
best_row, best_score, reason, debug = scores[0]
|
| 377 |
return best_row, best_score, reason, debug
|
| 378 |
|
| 379 |
+
|
| 380 |
def extract_invoice_info(model_choice, text):
|
| 381 |
prompt = get_extraction_prompt(model_choice, text)
|
| 382 |
raw = query_llm(model_choice, prompt)
|