File size: 26,323 Bytes
c289504
cdae312
0592d14
 
cdae312
0592d14
2682cc6
dd32a84
 
e613cc6
d2967d8
 
f52c5eb
 
 
 
e40b807
 
f52c5eb
 
 
 
 
 
 
 
ee4e57c
f52c5eb
 
 
 
 
 
 
 
 
 
ee4e57c
f52c5eb
 
 
 
 
 
 
 
 
 
 
 
91108ba
 
e40b807
 
 
46902a8
 
 
 
 
 
 
 
0592d14
c289504
46902a8
 
0592d14
46902a8
0592d14
 
 
46902a8
 
7b9561b
46902a8
0592d14
 
46902a8
 
0592d14
46902a8
0592d14
 
 
7b9561b
46902a8
 
 
 
 
 
 
 
 
 
 
 
 
 
0592d14
796b6f2
0592d14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
796b6f2
46902a8
 
 
 
 
 
 
 
498d8b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
796b6f2
46902a8
 
91108ba
46902a8
 
 
 
1d73f48
dc0c728
 
0592d14
dc0c728
 
 
 
 
 
 
 
 
0592d14
2c7ba82
 
 
 
d7d969b
46902a8
 
 
 
 
 
 
 
 
 
 
 
f52c5eb
46902a8
 
 
 
 
f52c5eb
46902a8
 
 
 
 
 
 
 
 
 
e40b807
2c7ba82
 
46902a8
2c7ba82
 
 
f52c5eb
2c7ba82
 
 
0ee76cc
2c7ba82
 
46902a8
 
 
 
f52c5eb
46902a8
f52c5eb
2c7ba82
f52c5eb
 
 
 
 
 
 
46902a8
f52c5eb
 
 
 
 
 
 
2c7ba82
f52c5eb
 
 
 
 
 
 
91108ba
f52c5eb
91108ba
 
f52c5eb
 
 
 
 
2c7ba82
46902a8
 
2c7ba82
46902a8
 
 
 
2c7ba82
46902a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c7ba82
 
46902a8
f52c5eb
 
 
 
 
46902a8
2c7ba82
46902a8
 
 
 
 
 
f52c5eb
2c7ba82
46902a8
 
 
91108ba
46902a8
2c7ba82
f52c5eb
2c7ba82
 
46902a8
 
 
 
 
 
 
 
91108ba
2c7ba82
 
f52c5eb
2c7ba82
 
 
 
 
d7d969b
46902a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc0c728
 
 
46902a8
dc0c728
46902a8
dc0c728
 
46902a8
1d61d87
dc0c728
 
 
 
 
 
 
 
46902a8
 
dc0c728
 
46902a8
dc0c728
 
 
 
 
1d61d87
dc0c728
a34f52b
dc0c728
 
 
 
46902a8
 
 
dc0c728
 
 
 
 
 
 
 
 
f52c5eb
 
 
 
 
 
 
 
 
 
 
 
e40b807
f52c5eb
e40b807
f52c5eb
e40b807
f52c5eb
e40b807
 
 
 
 
 
 
 
 
 
f52c5eb
 
 
 
e40b807
 
 
 
 
 
 
 
 
 
 
f52c5eb
e40b807
 
 
f52c5eb
e40b807
f52c5eb
e40b807
f52c5eb
e40b807
f52c5eb
e40b807
 
f52c5eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e40b807
f52c5eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e40b807
 
 
 
 
 
 
 
f52c5eb
 
e40b807
 
46902a8
e40b807
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f52c5eb
 
e40b807
 
 
 
f52c5eb
 
 
 
 
 
e40b807
 
 
f52c5eb
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
import streamlit as st
import requests
import json
import re
import os
import time
import mimetypes
import pandas as pd
from langchain_community.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, Tool, AgentType
from fuzzywuzzy import fuzz

# --- Streamlit Page Settings ---
st.set_page_config(page_title="EZOFIS Accounts Payable Agent", layout="wide")

# --- Styles for SaaS Feel ---
st.markdown("""
    <style>
    .block-card {
        background: #fff;
        border-radius: 20px;
        box-shadow: 0 2px 16px rgba(25,39,64,0.05);
        padding: 32px 26px 24px 26px;
        margin-bottom: 24px;
    }
    .step-num {
        background: #A020F0;
        color: #fff;
        border-radius: 999px;
        padding: 6px 13px;
        font-weight: 700;
        margin-right: 14px;
        font-size: 20px;
        display: inline-block;
        vertical-align: middle;
    }
    .stButton>button {
        background: #A020F0 !important;
        color: white !important;
        border-radius: 12px !important;
        padding: 10px 32px !important;
        font-weight: 700;
        border: none !important;
        font-size: 18px !important;
        margin-top: 12px !important;
    }
    .stSlider>div>div>div>div {
        background: #F3F6FB !important;
        border-radius: 999px;
    }
    .css-12w0qpk {padding-top: 0rem;}
    .css-1kyxreq {padding-top: 0rem;}
    </style>
""", unsafe_allow_html=True)

MODELS = {
    "OpenAI GPT-4.1": {
        "api_url": "https://api.openai.com/v1/chat/completions",
        "model": "gpt-4-1106-preview",
        "key_env": "OPENAI_API_KEY",
        "response_format": None,
        "extra_headers": {},
    },
}

def get_api_key(model_choice):
    key = os.getenv(MODELS[model_choice]["key_env"])
    if not key:
        st.error(f"❌ {MODELS[model_choice]['key_env']} not set")
        st.stop()
    return key

def query_llm(model_choice, prompt):
    cfg = MODELS[model_choice]
    headers = {
        "Authorization": f"Bearer {get_api_key(model_choice)}",
        "Content-Type": "application/json",
    }
    if cfg.get("extra_headers"):
        headers.update(cfg["extra_headers"])
    payload = {
        "model": cfg["model"],
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.1,
        "max_tokens": 2000,
    }
    if cfg.get("response_format"):
        payload["response_format"] = cfg["response_format"]
    try:
        with st.spinner(f"🔍 Fine Tuning The Extracted Data..."):
            r = requests.post(cfg["api_url"], headers=headers, json=payload, timeout=90)
        if r.status_code != 200:
            st.error(f"🚨 API Error {r.status_code}: {r.text}")
            return None
        content = r.json()["choices"][0]["message"]["content"]
        st.session_state.last_api = content
        st.session_state.last_raw = r.text
        return content
    except Exception as e:
        st.error(f"Connection error: {e}")
        return None

def clean_json_response(text):
    if not text:
        return None
    orig = text
    text = re.sub(r'```(?:json)?', '', text).strip()
    start, end = text.find('{'), text.rfind('}') + 1
    if start < 0 or end < 1:
        st.error("Couldn't locate JSON in response.")
        st.code(orig)
        return None
    frag = text[start:end]
    frag = re.sub(r',\s*([}\]])', r'\1', frag)
    try:
        return json.loads(frag)
    except json.JSONDecodeError as e:
        repaired = re.sub(r'"\s*"\s*(?="[^"]+"\s*:)', '","', frag)
        try:
            return json.loads(repaired)
        except json.JSONDecodeError:
            st.error(f"JSON parse error: {e}")
            st.code(frag)
            return None

def fallback_supplier(text):
    for line in text.splitlines():
        line = line.strip()
        if line:
            return line
    return None

def get_extraction_prompt(model_choice, txt):
    return (
        "You are an expert invoice parser. "
        "Extract data according to the visible table structure and column headers in the invoice. "
        "For every line item, only extract fields that correspond to the table columns for that row (do not include header/shipment fields in line items). "
        "Merge all multi-line content within a single cell into that field (especially for the 'description' and 'notes'). "
        "Shipment/invoice-level fields such as CAR NUMBER, SHIPPING POINT, SHIPMENT NUMBER, CURRENCY, etc., must go ONLY into the 'invoice_header', not as line item fields.\n"
        "Use this schema:\n"
        '{\n'
        '  "invoice_header": {\n'
        '    "car_number": "string or null",\n'
        '    "shipment_number": "string or null",\n'
        '    "shipping_point": "string or null",\n'
        '    "currency": "string or null",\n'
        '    "invoice_number": "string or null",\n'
        '    "invoice_date": "string or null",\n'
        '    "order_number": "string or null",\n'
        '    "customer_order_number": "string or null",\n'
        '    "our_order_number": "string or null",\n'
        '    "sales_order_number": "string or null",\n'
        '    "purchase_order_number": "string or null",\n'
        '    "order_date": "string or null",\n'
        '    "supplier_name": "string or null",\n'
        '    "supplier_address": "string or null",\n'
        '    "supplier_phone": "string or null",\n'
        '    "supplier_email": "string or null",\n'
        '    "supplier_tax_id": "string or null",\n'
        '    "customer_name": "string or null",\n'
        '    "customer_address": "string or null",\n'
        '    "customer_phone": "string or null",\n'
        '    "customer_email": "string or null",\n'
        '    "customer_tax_id": "string or null",\n'
        '    "ship_to_name": "string or null",\n'
        '    "ship_to_address": "string or null",\n'
        '    "bill_to_name": "string or null",\n'
        '    "bill_to_address": "string or null",\n'
        '    "remit_to_name": "string or null",\n'
        '    "remit_to_address": "string or null",\n'
        '    "tax_id": "string or null",\n'
        '    "tax_registration_number": "string or null",\n'
        '    "vat_number": "string or null",\n'
        '    "payment_terms": "string or null",\n'
        '    "payment_method": "string or null",\n'
        '    "payment_reference": "string or null",\n'
        '    "bank_account_number": "string or null",\n'
        '    "iban": "string or null",\n'
        '    "swift_code": "string or null",\n'
        '    "total_before_tax": "string or null",\n'
        '    "tax_amount": "string or null",\n'
        '    "tax_rate": "string or null",\n'
        '    "shipping_charges": "string or null",\n'
        '    "discount": "string or null",\n'
        '    "total_due": "string or null",\n'
        '    "amount_paid": "string or null",\n'
        '    "balance_due": "string or null",\n'
        '    "due_date": "string or null",\n'
        '    "invoice_status": "string or null",\n'
        '    "reference_number": "string or null",\n'
        '    "project_code": "string or null",\n'
        '    "department": "string or null",\n'
        '    "contact_person": "string or null",\n'
        '    "notes": "string or null",\n'
        '    "additional_info": "string or null"\n'
        '  },\n'
        '  "line_items": [\n'
        '    {\n'
        '      "quantity": "string or null",\n'
        '      "units": "string or null",\n'
        '      "description": "string or null",\n'
        '      "footage": "string or null",\n'
        '      "price": "string or null",\n'
        '      "amount": "string or null",\n'
        '      "notes": "string or null"\n'
        '    }\n'
        '  ]\n'
        '}'
        "\nIf a field is missing for a line item or header, use null. "
        "Do not invent fields. Do not add any header or shipment data to any line item. Return ONLY the JSON object, no explanation.\n"
        "\nInvoice Text:\n"
        f"{txt}"
    )

def ensure_total_due(invoice_header):
    if invoice_header.get("total_due") in [None, ""]:
        for field in ["invoice_total", "invoice_value", "total_before_tax", "balance_due", "amount_paid"]:
            if field in invoice_header and invoice_header[field]:
                invoice_header["total_due"] = invoice_header[field]
                break
    return invoice_header

def clean_num(val):
    if val is None:
        return None
    if isinstance(val, (int, float)):
        return float(val)
    matches = re.findall(r"[-+]?\d[\d,]*\.?\d*", str(val))
    if matches:
        cleaned = [m.replace(',', '') for m in matches if m]
        as_floats = [float(c) for c in cleaned if c.replace('.', '', 1).isdigit()]
        if as_floats:
            return max(as_floats)
    return None

def weighted_fuzzy_score(s1, s2):
    if not s1 and not s2:
        return 100
    return fuzz.token_set_ratio(str(s1).lower(), str(s2).lower())

def find_po_number_in_json(po_number, invoice_json):
    def _flatten(obj):
        fields = []
        if isinstance(obj, dict):
            for v in obj.values():
                fields.extend(_flatten(v))
        elif isinstance(obj, list):
            for item in obj:
                fields.extend(_flatten(item))
        elif obj is not None:
            fields.append(str(obj))
        return fields

    po_str = str(po_number).strip().replace(" ", "").replace(".0", "")
    try:
        po_int = str(int(float(po_number)))
    except:
        po_int = po_str

    all_strs = [str(s).strip().replace(" ", "").replace(".0", "") for s in _flatten(invoice_json)]
    for s in all_strs:
        if not s:
            continue
        if po_str and (po_str in s or s in po_str):
            return True
        if po_int and (po_int in s or s in po_int):
            return True
    return False

def find_best_po_match(inv, po_df, weight_supplier, weight_po_number, weight_currency, weight_total_due, weight_line_item):
    inv_hdr = inv["invoice_header"]
    inv_supplier = inv_hdr.get("supplier_name") or ""
    inv_po_number = inv_hdr.get("purchase_order_number") or inv_hdr.get("po_number") or inv_hdr.get("order_number") or ""
    inv_currency = inv_hdr.get("currency") or ""
    inv_total_due = clean_num(inv_hdr.get("total_due"))
    inv_line_items = inv.get("line_items", [])

    scores = []
    for idx, row in po_df.iterrows():
        po_supplier = row.get("Supplier Name", "")
        po_po_number = str(row.get("PO Number", ""))
        po_currency = row.get("Currency", "")
        po_total = clean_num(row.get("PO Total Value", ""))
        po_desc = row.get("Item Description", "")
        po_qty = str(row.get("Item Quantity", ""))
        po_unit = str(row.get("Item Unit Price", ""))
        po_line_total = clean_num(row.get("Line Item Total", ""))

        field_details = []

        s_supplier = weighted_fuzzy_score(inv_supplier, po_supplier)
        field_details.append({
            "field": "Supplier Name",
            "invoice": inv_supplier,
            "po": po_supplier,
            "score": s_supplier
        })

        s_po_number = 100 if find_po_number_in_json(po_po_number, inv) else 0
        field_details.append({
            "field": "PO Number (anywhere in JSON)",
            "invoice": "found" if s_po_number else "not found",
            "po": po_po_number,
            "score": s_po_number
        })

        s_currency = weighted_fuzzy_score(inv_currency, po_currency)
        field_details.append({
            "field": "Currency",
            "invoice": inv_currency,
            "po": po_currency,
            "score": s_currency
        })

        s_total = 100 if inv_total_due is not None and po_total is not None and abs(inv_total_due - po_total) < 2 else 0
        field_details.append({
            "field": "Total Due",
            "invoice": inv_total_due,
            "po": po_total,
            "score": s_total
        })

        # Line item logic as before
        line_item_score = 0
        line_reason = ""
        best_line_detail = None
        for line in inv_line_items:
            desc_score = weighted_fuzzy_score(line.get("description", ""), po_desc)
            qty_score = 100 if clean_num(line.get("quantity")) == clean_num(po_qty) else 0
            unit_score = 100 if clean_num(line.get("price")) == clean_num(po_unit) else 0
            amount_score = 100 if clean_num(line.get("amount")) == po_line_total else 0
            total = desc_score * 0.5 + qty_score * 0.2 + unit_score * 0.15 + amount_score * 0.15
            detail = {
                "field": "Line Item",
                "invoice": {
                    "description": line.get("description", ""),
                    "quantity": line.get("quantity", ""),
                    "price": line.get("price", ""),
                    "amount": line.get("amount", ""),
                },
                "po": {
                    "description": po_desc,
                    "quantity": po_qty,
                    "price": po_unit,
                    "amount": po_line_total,
                },
                "desc_score": desc_score,
                "qty_score": qty_score,
                "unit_score": unit_score,
                "amount_score": amount_score,
                "line_item_score": total
            }
            if total > line_item_score:
                line_item_score = total
                best_line_detail = detail
                line_reason = (
                    f"Best line item: desc_score={desc_score}, qty_score={qty_score}, "
                    f"unit_score={unit_score}, amount_score={amount_score}"
                )

        wsum = weight_supplier + weight_po_number + weight_currency + weight_total_due + weight_line_item
        total_score = (
            s_supplier * weight_supplier/100 +
            s_po_number * weight_po_number/100 +
            s_currency * weight_currency/100 +
            s_total * weight_total_due/100 +
            line_item_score * weight_line_item/100
        ) if wsum == 100 else 0

        reason = (
            f"Supplier match: {s_supplier}/100 (invoice: '{inv_supplier}' vs PO: '{po_supplier}'), "
            f"PO Number: {s_po_number}/100 ({'found anywhere in JSON' if s_po_number else 'not found'}), "
            f"Currency: {s_currency}/100 (invoice: '{inv_currency}' vs PO: '{po_currency}'), "
            f"Total Due: {'match' if s_total else 'no match'} (invoice: {inv_total_due} vs PO: {po_total}), "
            f"Line item best match: {int(line_item_score)}/100. {line_reason}"
        )

        debug = {
            "po_idx": idx,
            "po_supplier": po_supplier,
            "po_po_number": po_po_number,
            "po_total": po_total,
            "scores": field_details,
            "line_item_score": line_item_score,
            "best_line_detail": best_line_detail,
            "total_score": total_score,
            "line_reason": line_reason,
            "inv_total_due": inv_total_due
        }
        scores.append((row, total_score, reason, debug))

    scores.sort(key=lambda tup: tup[1], reverse=True)
    if not scores:
        return None, 0, "No POs found.", {}
    best_row, best_score, reason, debug = scores[0]
    return best_row, best_score, reason, debug

def extract_invoice_info(model_choice, text):
    prompt = get_extraction_prompt(model_choice, text)
    raw = query_llm(model_choice, prompt)
    if not raw:
        return None
    data = clean_json_response(raw)
    if not data:
        return None
    hdr = data.get("invoice_header", {})
    if not hdr and any(k in data for k in ("invoice_number","supplier_name","customer_name")):
        hdr = data
    for k in ("invoice_number","invoice_date","po_number","invoice_value","supplier_name","customer_name"):
        hdr.setdefault(k, None)
    if not hdr.get("supplier_name"):
        hdr["supplier_name"] = fallback_supplier(text)
    hdr = ensure_total_due(hdr)
    items = data.get("line_items", [])
    if not isinstance(items, list):
        items = []
    for itm in items:
        if not isinstance(itm, dict):
            continue
        for k in ("item_number","description","quantity","unit_price","total_price"):
            itm.setdefault(k, None)
    return {"invoice_header": hdr, "line_items": items}

def get_content_type(filename):
    mime, _ = mimetypes.guess_type(filename)
    ext = filename.lower().split('.')[-1]
    if ext == "pdf":
        return "text/plain"
    if mime is None:
        return "application/octet-stream"
    return mime

UNSTRACT_BASE = "https://llmwhisperer-api.us-central.unstract.com/api/v2"
UNSTRACT_API_KEY = os.getenv("UNSTRACT_API_KEY")

def extract_text_from_unstract(uploaded_file):
    filename = getattr(uploaded_file, "name", "uploaded_file")
    file_bytes = uploaded_file.read()
    content_type = get_content_type(filename)
    headers = {
        "unstract-key": UNSTRACT_API_KEY,
        "Content-Type": content_type,
    }
    url = f"{UNSTRACT_BASE}/whisper"
    with st.spinner("Uploading and processing document with EZOFIS AI OCR AGENT..."):
        r = requests.post(url, headers=headers, data=file_bytes)
        if r.status_code != 202:
            st.error(f"Unstract: Error uploading file: {r.status_code} - {r.text}")
            return None
        whisper_hash = r.json().get("whisper_hash")
        if not whisper_hash:
            st.error("Unstract: No whisper_hash received.")
            return None

    status_url = f"{UNSTRACT_BASE}/whisper-status?whisper_hash={whisper_hash}"
    status_placeholder = st.empty()
    for i in range(30):
        status_r = requests.get(status_url, headers={"unstract-key": UNSTRACT_API_KEY})
        if status_r.status_code != 200:
            st.error(f"Unstract: Error checking status: {status_r.status_code} - {status_r.text}")
            return None
        status = status_r.json().get("status")
        if status == "processed":
            status_placeholder.info("EZOFIS AI OCR AGENT STATUS: processed! 🎉")
            break
        status_placeholder.info(f"EZOFIS AI OCR AGENT STATUS: {status or 'waiting'}... ({i+1})")
        time.sleep(2)
    else:
        status_placeholder.error("Unstract: Timeout waiting for OCR to finish.")
        return None

    retrieve_url = f"{UNSTRACT_BASE}/whisper-retrieve?whisper_hash={whisper_hash}&text_only=true"
    r = requests.get(retrieve_url, headers={"unstract-key": UNSTRACT_API_KEY})
    if r.status_code != 200:
        st.error(f"Unstract: Error retrieving extracted text: {r.status_code} - {r.text}")
        return None
    try:
        data = r.json()
        return data.get("result_text") or r.text
    except Exception:
        return r.text

# ---------------- UI LAYOUT ----------------------
st.markdown(
    "<h1 style='font-weight:800; margin-bottom:8px;'>EZOFIS Accounts Payable Agent</h1>",
    unsafe_allow_html=True
)
st.markdown(
    "<div style='font-size:20px; margin-bottom:28px; color:#24345C;'>Modern workflow automation for finance teams</div>",
    unsafe_allow_html=True
)

# ---- Three columns layout for horizontal flow
col1, col2, col3 = st.columns([2,2,3])

# ---- Step 1: Upload POs (col1) ----
with col1:
    st.markdown("<span class='step-num'>1</span> <b>Upload Active Purchase Orders (POs)</b>", unsafe_allow_html=True)
    po_file = st.file_uploader(
        "CSV with PO number, Supplier, Items, etc.",
        type=["csv"],
        key="po_csv",
        label_visibility="collapsed"
    )
    po_df = None
    if po_file:
        po_df = pd.read_csv(po_file)
        st.success(f"Loaded {len(po_df)} records from uploaded CSV.")
        st.session_state['last_po_df'] = po_df

# ---- Step 2: Scoring Weights (col1) ----
with col1:
    st.markdown("<span class='step-num'>2</span> <b>Configure Scoring Weights</b>", unsafe_allow_html=True)
    st.markdown("Set weights for matching. Total must equal 100%.", unsafe_allow_html=True)
    def int_slider(label, value, key):
        return st.slider(label, 0, 100, value, 1, key=key, format="%d")
    weight_supplier = int_slider("Supplier Name (%)", 25, "w_supplier")
    weight_po_number = int_slider("PO Number (%)", 25, "w_po")
    weight_currency = int_slider("Currency (%)", 10, "w_curr")
    weight_total_due = int_slider("Total Due (%)", 20, "w_due")
    weight_line_item = int_slider("Line Item (%)", 20, "w_line")
    weight_sum = weight_supplier + weight_po_number + weight_currency + weight_total_due + weight_line_item
    if weight_sum != 100:
        st.warning(f"Sum of weights is {weight_sum}%. Adjust so it equals 100%.")

    st.markdown("<span class='step-num'>3</span> <b>Set Decision Thresholds</b>", unsafe_allow_html=True)
    approved_threshold = st.slider("Threshold for 'APPROVED'", min_value=0, max_value=100, value=85, format="%d")
    partial_threshold = st.slider("Threshold for 'PARTIALLY APPROVED'", min_value=0, max_value=approved_threshold-1, value=70, format="%d")

# ---- Step 4: Upload Invoice (col2) ----
with col2:
    st.markdown("<span class='step-num'>4</span> <b>Upload Invoice/Document</b>", unsafe_allow_html=True)
    inv_file = st.file_uploader(
        "Upload PDF, DOCX, XLSX, PNG, JPG, TIFF",
        type=["pdf", "docx", "xlsx", "xls", "png", "jpg", "jpeg", "tiff"],
        key="invoice_file",
        label_visibility="collapsed"
    )

# ---- Step 5: Extract Data (col2) ----
with col2:
    st.markdown("<span class='step-num'>5</span> <b>Extract Data</b>", unsafe_allow_html=True)
    if st.button("Extract"):
        if inv_file:
            with st.spinner("Extracting text from document..."):
                text = extract_text_from_unstract(inv_file)
            if text:
                mdl = "OpenAI GPT-4.1"
                extracted_info = extract_invoice_info(mdl, text)
                if extracted_info:
                    if "invoice_header" in extracted_info:
                        extracted_info["invoice_header"] = ensure_total_due(extracted_info["invoice_header"])
                    st.success("Extraction Complete")
                    st.session_state['last_extracted_info'] = extracted_info
        else:
            st.warning("Please upload an invoice/document first.")

# ---- Step 6: AP Agent Decision (col3) ----
with col3:
    st.markdown("<span class='step-num'>6</span> <b>AP Agent Decision</b>", unsafe_allow_html=True)
    if st.button("Make a decision (EZOFIS AP AGENT)"):
        extracted_info = st.session_state.get('last_extracted_info', None)
        po_df = st.session_state.get('last_po_df', None)
        if extracted_info is not None and po_df is not None:
            def po_match_tool_func(input_text):
                invoice = st.session_state.get("last_extracted_info")
                po_df = st.session_state.get("last_po_df")
                if invoice is None or po_df is None:
                    return json.dumps({
                        "decision": "REJECTED",
                        "reason": "Invoice or PO data not found.",
                        "debug": {},
                    })
                best_row, best_score, reason, debug = find_best_po_match(
                    invoice, po_df, weight_supplier, weight_po_number, weight_currency, weight_total_due, weight_line_item
                )
                if best_score > approved_threshold:
                    status = "APPROVED"
                elif best_score > partial_threshold:
                    status = "PARTIALLY APPROVED"
                else:
                    status = "REJECTED"
                return json.dumps({
                    "decision": status,
                    "reason": f"Best match score: {int(best_score)}/100. {reason}",
                    "debug": debug,
                    "po_row": best_row.to_dict() if best_row is not None else None
                })
            tools = [
                Tool(
                    name="po_match_tool",
                    func=po_match_tool_func,
                    description="Smartly match invoice to PO using all possible fields.",
                )
            ]
            decision_llm = ChatOpenAI(
                openai_api_key=get_api_key("OpenAI GPT-4.1"),
                model=MODELS["OpenAI GPT-4.1"]["model"],
                temperature=0,
                streaming=False,
            )
            agent = initialize_agent(
                tools,
                decision_llm,
                agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
                verbose=True,
            )
            prompt = (
                "You are an expert accounts payable agent. "
                "Use po_match_tool to check for the best possible match using supplier, PO number (which may appear anywhere in the invoice JSON, even within other fields), currency, line items, and total value. "
                "Weigh the importance of each field as an expert would, according to the user-configured weights. "
                "Return a JSON with decision (APPROVED, PARTIALLY APPROVED, REJECTED), reason (include field scores and reasoning), debug, and the best matched PO row.\n"
                f"Invoice JSON:\n{json.dumps(extracted_info, indent=2)}"
            )
            with st.spinner("AI is reasoning and making a decision..."):
                result = agent.run(prompt)
            # Always display debug/info
            st.markdown("<h3 style='margin-top:18px;'>AI Decision & Reason</h3>", unsafe_allow_html=True)
            try:
                result_json = json.loads(result)
                st.write(f"**Decision:** {result_json.get('decision', 'N/A')}")
                st.write(f"**Reason:** {result_json.get('reason', 'N/A')}")
                st.markdown("##### Debug & Matching Details")
                st.json(result_json.get('debug'))
                st.markdown("##### Extracted Invoice JSON")
                st.json(extracted_info)
                st.markdown("##### Matched PO Row")
                st.json(result_json.get('po_row'))
            except Exception:
                st.subheader("AI Decision & Reason")
                st.write(result)

# Always show extraction/decision debug in full for troubleshooting
if "last_api" in st.session_state:
    with st.expander("Debug"):
        st.code(st.session_state.last_api)
        st.code(st.session_state.last_raw)