Ankushbl6 commited on
Commit
28a8689
·
verified ·
1 Parent(s): 6156157

Create src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +1375 -0
src/app.py ADDED
@@ -0,0 +1,1375 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =========================
2
+ # Invoice Extractor (Qwen3-VL via RunPod vLLM) - Batch Mode with Tax Validation
3
+ # =========================
4
+ import os
5
+ from pathlib import Path
6
+
7
+ # -----------------------------
8
+ # Environment hardening (HF Spaces, /.cache issue)
9
+ # -----------------------------
10
+ _home = os.environ.get("HOME", "")
11
+ if _home in ("", "/", None):
12
+ repo_dir = os.getcwd()
13
+ safe_home = repo_dir if os.access(repo_dir, os.W_OK) else "/tmp"
14
+ os.environ["HOME"] = safe_home
15
+ print(f"[startup] HOME not set or unwritable — setting HOME={safe_home}")
16
+
17
+ streamlit_dir = Path(os.environ["HOME"]) / ".streamlit"
18
+ try:
19
+ streamlit_dir.mkdir(parents=True, exist_ok=True)
20
+ print(f"[startup] ensured {streamlit_dir}")
21
+ except Exception as e:
22
+ print(f"[startup] WARNING: could not create {streamlit_dir}: {e}")
23
+
24
+ # -----------------------------
25
+ # Imports
26
+ # -----------------------------
27
+ import json
28
+ from io import BytesIO
29
+ import hashlib
30
+ from typing import Dict, Any
31
+ from datetime import datetime
32
+
33
+ import streamlit as st
34
+ import pandas as pd
35
+ from PIL import Image
36
+
37
+ # Optional: pdf2image is only needed for PDFs
38
+ try:
39
+ from pdf2image import convert_from_bytes
40
+ except Exception:
41
+ convert_from_bytes = None
42
+
43
+ # -----------------------------
44
+ # RunPod vLLM Configuration (from environment variables)
45
+ # -----------------------------
46
+ import requests
47
+ import base64
48
+ import re
49
+
50
+ POD_URL = os.getenv("POD_URL", "")
51
+ VLLM_API_KEY = os.getenv("VLLM_API_KEY", "")
52
+ MODEL_NAME = "qwen3-vl-8b-nu-merged"
53
+
54
+ # Validate secrets are set
55
+ if not POD_URL or not VLLM_API_KEY:
56
+ st.error("⚠️ API credentials not configured. Please set POD_URL and VLLM_API_KEY in Space settings.")
57
+ st.stop()
58
+ # -----------------------------
59
+ # Page config & CSS
60
+ # -----------------------------
61
+ st.set_page_config(page_title="Invoice Extractor (Qwen3-VL) - Batch Mode", layout="wide")
62
+ st.title("Invoice Extraction")
63
+
64
+ st.markdown(
65
+ """
66
+ <style>
67
+ .stApp { background-color: #ECECEC !important; }
68
+ div.block-container { padding-top: 3rem; padding-bottom: 1rem; }
69
+ [data-testid="stSidebar"] { background-color: #F7F7F7 !important; }
70
+ div[data-testid="stTabs"] > div > div { padding-bottom: 6px !important; }
71
+ /* Keep right column steady on first render post-extraction */
72
+ [data-testid="column"]:nth-of-type(2) { min-height: 780px; }
73
+ </style>
74
+ """,
75
+ unsafe_allow_html=True
76
+ )
77
+
78
+ # Fixed sizes to prevent reflow wobble
79
+ FIXED_IMG_WIDTH = 640
80
+ DATA_EDITOR_HEIGHT = 380
81
+
82
+ # -----------------------------
83
+ # Helpers
84
+ # -----------------------------
85
+ def ensure_state(k: str, default):
86
+ """Initialize a session_state key once, then let widgets bind to it via key=... (no value=...)."""
87
+ if k not in st.session_state:
88
+ st.session_state[k] = default
89
+
90
+ def clean_float(x) -> float:
91
+ import re
92
+ if x is None:
93
+ return 0.0
94
+ if isinstance(x, (int, float)):
95
+ return float(x)
96
+ s = str(x).strip()
97
+ if s == "":
98
+ return 0.0
99
+ s = re.sub(r"[,\s]", "", s)
100
+ s = re.sub(r"[^\d\.\-]", "", s)
101
+ if s in ("", ".", "-", "-."):
102
+ return 0.0
103
+ try:
104
+ return float(s)
105
+ except Exception:
106
+ return 0.0
107
+
108
+ def normalize_date(date_str) -> str:
109
+ """
110
+ Normalize various date formats to dd-MMM-yyyy format (e.g., 01-Jan-2025)
111
+ Handles: ISO, US, EU, and various other common date formats
112
+ Returns empty string if date cannot be parsed
113
+ """
114
+ if not date_str or date_str == "":
115
+ return ""
116
+
117
+ if isinstance(date_str, str):
118
+ date_str = date_str.strip()
119
+ if date_str == "":
120
+ return ""
121
+
122
+ # Common date formats to try
123
+ formats = [
124
+ "%Y-%m-%d", # 2025-01-15 (ISO)
125
+ "%d-%m-%Y", # 15-01-2025 (EU)
126
+ "%m-%d-%Y", # 01-15-2025 (US)
127
+ "%Y/%m/%d", # 2025/01/15
128
+ "%d/%m/%Y", # 15/01/2025
129
+ "%m/%d/%Y", # 01/15/2025
130
+ "%d.%m.%Y", # 15.01.2025
131
+ "%Y.%m.%d", # 2025.01.15
132
+ "%d %B %Y", # 15 January 2025
133
+ "%d %b %Y", # 15 Jan 2025
134
+ "%B %d, %Y", # January 15, 2025
135
+ "%b %d, %Y", # Jan 15, 2025
136
+ "%d-%b-%Y", # 15-Jan-2025
137
+ "%d-%B-%Y", # 15-January-2025
138
+ "%Y%m%d", # 20250115
139
+ ]
140
+
141
+ parsed_date = None
142
+
143
+ # Try parsing with each format
144
+ for fmt in formats:
145
+ try:
146
+ parsed_date = datetime.strptime(str(date_str), fmt)
147
+ break
148
+ except (ValueError, TypeError):
149
+ continue
150
+
151
+ # If no format matched, return empty string
152
+ if parsed_date is None:
153
+ return ""
154
+
155
+ # Format as dd-MMM-yyyy (e.g., 01-Jan-2025)
156
+ return parsed_date.strftime("%d-%b-%Y")
157
+
158
+ def parse_date_to_object(date_str):
159
+ """
160
+ Parse a date string to a datetime.date object for date_input widget
161
+ Returns None if date cannot be parsed
162
+ """
163
+ if not date_str or date_str == "":
164
+ return None
165
+
166
+ if isinstance(date_str, str):
167
+ date_str = date_str.strip()
168
+ if date_str == "":
169
+ return None
170
+
171
+ # Common date formats to try
172
+ formats = [
173
+ "%Y-%m-%d", # 2025-01-15 (ISO)
174
+ "%d-%m-%Y", # 15-01-2025 (EU)
175
+ "%m-%d-%Y", # 01-15-2025 (US)
176
+ "%Y/%m/%d", # 2025/01/15
177
+ "%d/%m/%Y", # 15/01/2025
178
+ "%m/%d/%Y", # 01/15/2025
179
+ "%d.%m.%Y", # 15.01.2025
180
+ "%Y.%m.%d", # 2025.01.15
181
+ "%d %B %Y", # 15 January 2025
182
+ "%d %b %Y", # 15 Jan 2025
183
+ "%B %d, %Y", # January 15, 2025
184
+ "%b %d, %Y", # Jan 15, 2025
185
+ "%d-%b-%Y", # 15-Jan-2025
186
+ "%d-%B-%Y", # 15-January-2025
187
+ "%Y%m%d", # 20250115
188
+ ]
189
+
190
+ # Try parsing with each format
191
+ for fmt in formats:
192
+ try:
193
+ parsed_date = datetime.strptime(str(date_str), fmt)
194
+ return parsed_date.date()
195
+ except (ValueError, TypeError):
196
+ continue
197
+
198
+ return None
199
+
200
+ # -----------------------------
201
+ # HF login flow (REMOVED - No longer needed for vLLM API)
202
+ # -----------------------------
203
+ # Authentication is now handled via POD_URL and VLLM_API_KEY instead
204
+
205
+ # -----------------------------
206
+ # Model config
207
+ # -----------------------------
208
+ # OLD DONUT CODE (COMMENTED OUT - Now using vLLM API)
209
+ # -----------------------------
210
+ # HF_MODEL_ID = "Bhuvi13/model-V7"
211
+ # TASK_PROMPT = "<s_cord-v2>"
212
+ #
213
+ # @st.cache_resource(show_spinner=False)
214
+ # def load_model_and_processor(hf_model_id: str, task_prompt: str):
215
+ # ...
216
+
217
+ # -----------------------------
218
+ # vLLM Inference Function (RunPod API)
219
+ # -----------------------------
220
+ def run_inference_vllm(image: Image.Image):
221
+ """Run inference using RunPod vLLM API"""
222
+
223
+ # Extraction prompt (JSON format)
224
+ EXTRACTION_PROMPT = """Please carefully examine this invoice image and extract all the information into the following structured JSON format. Pay close attention to details and ensure accuracy in number formatting and text extraction.
225
+
226
+ Extract the data into this exact JSON structure (do not add or remove keys):
227
+
228
+ {
229
+ "header": {
230
+ "invoice_no": "Invoice number or reference ID",
231
+ "invoice_date": "Date the invoice was issued (maintain original format)",
232
+ "due_date": "Payment due date if specified",
233
+ "sender_name": "Name of the company/person issuing the invoice",
234
+ "sender_addr": "Complete address of the sender/issuer",
235
+ "rcpt_name": "Name of the recipient/customer",
236
+ "rcpt_addr": "Address of the recipient/customer",
237
+ "bank_iban": "International Bank Account Number",
238
+ "bank_name": "Name of the bank",
239
+ "bank_acc_no": "Bank account number",
240
+ "bank_routing": "Bank routing number",
241
+ "bank_swift": "SWIFT/BIC code",
242
+ "bank_acc_name": "Account holder name",
243
+ "bank_branch": "Bank branch information"
244
+ },
245
+ "items": [
246
+ {
247
+ "descriptions": "Detailed description of the item/service",
248
+ "SKU": "Stock Keeping Unit or item code",
249
+ "quantity": "Quantity of items",
250
+ "unit_price": "Price per unit",
251
+ "amount": "Total amount for this line item",
252
+ "tax": "Tax amount for this item",
253
+ "Line_total": "Total amount including tax for this line"
254
+ }
255
+ ],
256
+ "summary": {
257
+ "subtotal": "Subtotal amount before tax",
258
+ "tax_rate": "Tax rate percentage or description",
259
+ "tax_amount": "Total tax amount",
260
+ "total_amount": "Final total amount to be paid",
261
+ "currency": "Currency code (USD, EUR, etc.)"
262
+ }
263
+ }
264
+
265
+ STRICT POLICY RULES (apply exactly, do not deviate):
266
+ 1) Number formatting & types
267
+ - Preserve the original number formatting from the invoice (commas, decimal places, currency symbols in text fields if shown).
268
+ - In this JSON, output all values as strings. If a field is not present or cannot be determined with high confidence, output "" (empty string). Do not use null, 0, or placeholders.
269
+ 2) Currency selection (multi-currency invoices)
270
+ - If multiple currencies are shown, ALWAYS choose the recipient/customer currency for all monetary fields in items and summary.
271
+ - Do NOT perform FX conversion. Select the column/figures that are explicitly in the recipient’s currency.
272
+ - For "summary.currency", prefer the printed 3-letter code (e.g., USD, EUR, INR). If only an unambiguous symbol is present, map it (₹→INR, €→EUR, $→USD when clearly USD). If ambiguous, leave "".
273
+ 3) Tax handling (no rounding of rates; don’t recompute given totals)
274
+ - Do NOT round tax percentages. Use the original precision for any calculations; keep the printed formatting for "summary.tax_rate".
275
+ - If a TOTAL tax amount is explicitly printed on the invoice (e.g., “Tax”, “VAT”, “IGST”, “Total Tax”), TREAT IT AS AUTHORITATIVE. Do NOT recompute a new total.
276
+ a) If per-line tax amounts are printed, copy them directly.
277
+ b) If per-line tax amounts are not printed, allocate the printed TOTAL tax proportionally across line items by each line’s net amount (quantity * unit_price − discount). Use precise arithmetic; ensure the sum of allocated per-line taxes equals the printed TOTAL tax (adjust the last cent minimally if required).
278
+ - If NO total tax amount is printed but a tax rate is printed, compute per-line tax as: tax = (quantity * unit_price − discount) × (exact, unrounded tax rate). Then set "summary.tax_amount" = sum of per-line taxes.
279
+ - "items[].amount" is the pre-tax line amount AFTER discount. "items[].Line_total" = amount + tax.
280
+ 4) Discounts
281
+ - If discounts are present (per-line or overall), compute tax on the discounted base: (quantity * unit_price − discount). Never compute tax on the undiscounted amount.
282
+ 5) Due date calculation from payment terms
283
+ - Preserve the invoice’s original date format for both "invoice_date" and "due_date".
284
+ - If explicit due date is printed, use it as "due_date".
285
+ - If payment terms specify Net X (e.g., Net 30), set due_date = invoice_date + X days (same format as invoice_date).
286
+ - If terms say “upon receipt”, “upon publication”, or equivalent, due_date = invoice_date.
287
+ - If both a printed due date and terms exist and they conflict, prefer the printed due date.
288
+ 6) Items array
289
+ - Include every visible line item. Preserve multi-line descriptions using literal "\\n" where line breaks exist.
290
+ - If SKU is not shown, set "SKU": "".
291
+ - Ensure "quantity", "unit_price", "amount", "tax", and "Line_total" are consistent with the rules above.
292
+ 7) Summary invariants (when values are available on the invoice)
293
+ - "summary.subtotal" = sum of items[].amount.
294
+ - "summary.tax_amount" = sum of items[].tax (if you allocated or computed it). If the invoice prints a total tax amount, use that exact value and make per-line taxes sum to it.
295
+ - "summary.total_amount" = subtotal + tax_amount.
296
+ - If any of these values are not printed and cannot be derived reliably from the printed numbers, leave them as "".
297
+ 8) Text extraction fidelity
298
+ - Extract text exactly as printed (names, addresses, bank fields, references). Keep special characters and spacing (normalize only obvious OCR artifacts).
299
+ - If a bank field is absent (IBAN/SWIFT/routing/etc.), set it to "".
300
+
301
+ Output constraints:
302
+ - Return ONLY the JSON object described above (no explanations, no code fences, no trailing commas).
303
+ - Keep all values as strings.
304
+ - Do not add extra keys or sections beyond the given schema."""
305
+
306
+ try:
307
+ # Resize image if too large (max dimension 2048px to avoid payload size issues)
308
+ max_dimension = 2048
309
+ width, height = image.size
310
+ if width > max_dimension or height > max_dimension:
311
+ ratio = min(max_dimension / width, max_dimension / height)
312
+ new_size = (int(width * ratio), int(height * ratio))
313
+ image = image.resize(new_size, Image.Resampling.LANCZOS)
314
+ st.info(f"Image resized from {width}x{height} to {new_size[0]}x{new_size[1]} to reduce payload size")
315
+
316
+ # Convert image to base64
317
+ buffer = BytesIO()
318
+ image.save(buffer, format="PNG", optimize=True)
319
+ image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
320
+
321
+ # Check payload size
322
+ payload_size_mb = len(image_base64) / (1024 * 1024)
323
+ if payload_size_mb > 10:
324
+ st.warning(f"Warning: Large image payload ({payload_size_mb:.2f} MB). This might cause issues.")
325
+
326
+ data_url = f"data:image/png;base64,{image_base64}"
327
+
328
+ # Build payload
329
+ payload = {
330
+ "model": MODEL_NAME,
331
+ "messages": [
332
+ {"role": "system", "content": EXTRACTION_PROMPT},
333
+ {"role": "user", "content": [
334
+ {"type": "image_url", "image_url": {"url": data_url}},
335
+ {"type": "text", "text": "Extract invoice data."}
336
+ ]}
337
+ ],
338
+ "temperature": 0,
339
+ "max_tokens": 1536
340
+ }
341
+
342
+ headers = {
343
+ "Authorization": f"Bearer {VLLM_API_KEY}",
344
+ "Content-Type": "application/json"
345
+ }
346
+
347
+ # Call API
348
+ st.info(f"Sending request to API (payload size: {payload_size_mb:.2f} MB)...")
349
+ response = requests.post(
350
+ f"{POD_URL}/v1/chat/completions",
351
+ headers=headers,
352
+ json=payload,
353
+ timeout=90
354
+ )
355
+
356
+ if response.status_code == 200:
357
+ result = response.json()
358
+ return result["choices"][0]["message"]["content"]
359
+ else:
360
+ # Show detailed error for debugging
361
+ st.error(f"❌ API Error {response.status_code}")
362
+ try:
363
+ error_detail = response.json()
364
+ st.json(error_detail) # Show as formatted JSON
365
+ except:
366
+ st.code(response.text) # Show raw text
367
+ return None
368
+
369
+ except Exception as e:
370
+ st.error(f"Error calling vLLM: {str(e)}")
371
+ return None
372
+
373
+
374
+ # -----------------------------
375
+ # JSON Parser for vLLM Output
376
+ # -----------------------------
377
+ def parse_vllm_json(raw_json_text):
378
+ """Parse vLLM JSON output into structured format"""
379
+ try:
380
+ data = json.loads(raw_json_text)
381
+
382
+ def clean_amount(value):
383
+ if not value or value == "":
384
+ return 0.0
385
+ return float(re.sub(r'[^\d\.-]', '', str(value)))
386
+
387
+ header = data.get("header", {})
388
+ summary = data.get("summary", {})
389
+ items = data.get("items", [])
390
+
391
+ result = {
392
+ "Invoice Number": header.get("invoice_no", ""),
393
+ "Invoice Date": normalize_date(header.get("invoice_date", "")),
394
+ "Due Date": normalize_date(header.get("due_date", "")),
395
+ "Sender Name": header.get("sender_name", ""),
396
+ "Sender Address": header.get("sender_addr", ""),
397
+ "Sender": {
398
+ "Name": header.get("sender_name", ""),
399
+ "Address": header.get("sender_addr", "")
400
+ },
401
+ "Recipient Name": header.get("rcpt_name", ""),
402
+ "Recipient Address": header.get("rcpt_addr", ""),
403
+ "Recipient": {
404
+ "Name": header.get("rcpt_name", ""),
405
+ "Address": header.get("rcpt_addr", "")
406
+ },
407
+ "Bank Details": {
408
+ "bank_iban": header.get("bank_iban", ""),
409
+ "bank_name": header.get("bank_name", ""),
410
+ "bank_account_number": header.get("bank_acc_no", ""),
411
+ "bank_routing": header.get("bank_routing", ""),
412
+ "bank_swift": header.get("bank_swift", ""),
413
+ "bank_account_holder": header.get("bank_acc_name", ""),
414
+ "bank_branch": header.get("bank_branch", "")
415
+ },
416
+ "Subtotal": clean_amount(summary.get("subtotal", "0")),
417
+ "Tax Percentage": clean_amount(summary.get("tax_rate", "0")),
418
+ "Total Tax": clean_amount(summary.get("tax_amount", "0")),
419
+ "Total Amount": clean_amount(summary.get("total_amount", "0")),
420
+ "Currency": summary.get("currency", ""),
421
+ "Itemized Data": []
422
+ }
423
+
424
+ for item in items:
425
+ # Store raw tax value to distinguish empty ("") from explicit "0" or "0.00"
426
+ raw_tax = item.get("tax", "")
427
+
428
+ result["Itemized Data"].append({
429
+ "Description": item.get("descriptions", ""),
430
+ "SKU": item.get("SKU", ""),
431
+ "Quantity": clean_amount(item.get("quantity", "0")),
432
+ "Unit Price": clean_amount(item.get("unit_price", "0")),
433
+ "Amount": clean_amount(item.get("amount", "0")),
434
+ "Tax": clean_amount(raw_tax),
435
+ "Tax_Raw": raw_tax, # Keep original to distinguish empty vs 0.00
436
+ "Line Total": clean_amount(item.get("Line_total", "0"))
437
+ })
438
+
439
+ return result
440
+
441
+ except Exception as e:
442
+ st.error(f"JSON parse error: {str(e)}")
443
+ return None
444
+
445
+
446
+ # -----------------------------
447
+ # Tax Validation Function
448
+ # -----------------------------
449
+ def validate_and_calculate_taxes(structured_data):
450
+ """
451
+ Enhanced tax validation with smart line-item calculation:
452
+ 1. Calculate line-item tax ONLY when line item tax is empty/missing
453
+ 2. Skip line items with explicit 0.00 tax (tax-exempt)
454
+ 3. Skip validation if tax_amount is 0 but tax_rate exists
455
+ 4. Ensure both Tax Percentage and Total Tax are properly filled
456
+ """
457
+
458
+ subtotal = structured_data.get("Subtotal", 0.0)
459
+ total_amount = structured_data.get("Total Amount", 0.0)
460
+ model_tax_rate = structured_data.get("Tax Percentage", 0.0)
461
+ model_tax_amount = structured_data.get("Total Tax", 0.0)
462
+ items = structured_data.get("Itemized Data", [])
463
+
464
+ # SKIP VALIDATION if: No tax detected (subtotal >= total) OR subtotal is invalid
465
+ if subtotal >= total_amount or subtotal <= 0:
466
+ structured_data["tax_validated"] = False
467
+ structured_data["tax_skip_reason"] = "No tax detected"
468
+ return structured_data
469
+
470
+ # SKIP if tax_rate exists but tax_amount is 0 (incomplete data)
471
+ if model_tax_rate > 0 and model_tax_amount == 0.0:
472
+ structured_data["tax_validated"] = False
473
+ structured_data["tax_skip_reason"] = "Tax rate exists but tax amount is 0"
474
+ return structured_data
475
+
476
+ # Determine authoritative tax rate from available sources
477
+ authoritative_rate = None
478
+ authority_source = None
479
+
480
+ # TEST SOURCE A: tax_rate
481
+ if model_tax_rate > 0:
482
+ expected_tax_from_rate = subtotal * (model_tax_rate / 100)
483
+ expected_total_from_rate = subtotal + expected_tax_from_rate
484
+ error_from_rate = abs(expected_total_from_rate - total_amount)
485
+ else:
486
+ error_from_rate = float('inf')
487
+
488
+ # TEST SOURCE B: tax_amount
489
+ if model_tax_amount > 0:
490
+ calculated_rate_from_amount = (model_tax_amount / subtotal) * 100
491
+ expected_total_from_amount = subtotal + model_tax_amount
492
+ error_from_amount = abs(expected_total_from_amount - total_amount)
493
+ else:
494
+ error_from_amount = float('inf')
495
+
496
+ # PICK WINNER (or use whichever is available)
497
+ if model_tax_rate > 0 or model_tax_amount > 0:
498
+ if error_from_rate < error_from_amount:
499
+ authoritative_rate = round(model_tax_rate, 4)
500
+ authority_source = "tax_rate"
501
+ else:
502
+ authoritative_rate = round(calculated_rate_from_amount, 4)
503
+ authority_source = "tax_amount"
504
+ else:
505
+ # No tax information available
506
+ structured_data["tax_validated"] = False
507
+ structured_data["tax_skip_reason"] = "No tax rate or amount provided"
508
+ return structured_data
509
+
510
+ # APPLY to line items - BUT respect explicit 0.00 values
511
+ calculated_total_tax = 0.0
512
+
513
+ for item in items:
514
+ amount = item.get("Amount", 0.0)
515
+ original_tax = item.get("Tax", 0.0)
516
+ raw_tax_value = item.get("Tax_Raw", "") # Original string value from JSON
517
+
518
+ # If item amount is 0, keep tax at 0
519
+ if amount == 0.0:
520
+ item["Tax"] = 0.0
521
+ item["Line Total"] = 0.0
522
+ continue
523
+
524
+ # Distinguish between empty ("") and explicit "0" or "0.00"
525
+ # Empty string means tax was not provided - we should calculate it
526
+ # "0", "0.0", "0.00" means explicitly tax-exempt - skip calculation
527
+
528
+ is_explicitly_zero = False
529
+ if isinstance(raw_tax_value, str):
530
+ cleaned = raw_tax_value.strip()
531
+ # Check if it's explicitly set to some form of zero
532
+ if cleaned != "" and float(re.sub(r'[^\d\.-]', '', cleaned) or '0') == 0.0:
533
+ is_explicitly_zero = True
534
+ elif raw_tax_value == 0 or raw_tax_value == 0.0:
535
+ # If it's a number 0, treat as explicit
536
+ is_explicitly_zero = True
537
+
538
+ # If explicitly 0.00 - tax-exempt item, don't calculate
539
+ if is_explicitly_zero and original_tax == 0.0:
540
+ item["Tax"] = 0.0
541
+ item["Line Total"] = amount
542
+ calculated_total_tax += 0.0
543
+ continue
544
+
545
+ # Calculate tax for this line item
546
+ # Either: (1) Tax was empty/missing, or (2) Tax has a value that needs recalculation
547
+ corrected_tax = round(amount * (authoritative_rate / 100), 2)
548
+ item["Tax"] = corrected_tax
549
+ calculated_total_tax += corrected_tax
550
+ item["Line Total"] = round(amount + corrected_tax, 2)
551
+
552
+ # Update summary - ENSURE BOTH FIELDS ARE FILLED
553
+ structured_data["Tax Percentage"] = authoritative_rate
554
+ structured_data["Total Tax"] = round(calculated_total_tax, 2)
555
+ structured_data["Total Amount"] = round(subtotal + calculated_total_tax, 2)
556
+ structured_data["tax_validated"] = True
557
+ structured_data["tax_authority_source"] = authority_source
558
+ structured_data["original_tax_rate"] = model_tax_rate
559
+ structured_data["original_tax_amount"] = model_tax_amount
560
+
561
+ return structured_data
562
+
563
+
564
+ # -----------------------------
565
+ # ORIGINAL (previous) mapping logic — restored verbatim
566
+ # -----------------------------
567
+ def map_prediction_to_ui(pred):
568
+ import json, re
569
+ from collections import defaultdict
570
+
571
+ def safe_json_load(s):
572
+ if s is None:
573
+ return None
574
+ if isinstance(s, (dict, list)):
575
+ return s
576
+ if isinstance(s, str):
577
+ s = s.strip()
578
+ if s == "":
579
+ return None
580
+ try:
581
+ return json.loads(s)
582
+ except Exception:
583
+ subs = []
584
+ stack = []
585
+ start = None
586
+ for i, ch in enumerate(s):
587
+ if ch == "{":
588
+ if not stack:
589
+ start = i
590
+ stack.append("{")
591
+ elif ch == "}":
592
+ if stack:
593
+ stack.pop()
594
+ if not stack and start is not None:
595
+ subs.append(s[start:i+1])
596
+ start = None
597
+ for sub in subs:
598
+ try:
599
+ return json.loads(sub)
600
+ except Exception:
601
+ continue
602
+ return None
603
+
604
+ def clean_number(x):
605
+ if x is None:
606
+ return 0.0
607
+ if isinstance(x, (int, float)):
608
+ return float(x)
609
+ s = str(x).strip()
610
+ if s == "":
611
+ return 0.0
612
+ s = re.sub(r"[,\s]", "", s)
613
+ s = re.sub(r"[^\d\.\-]", "", s)
614
+ if s in ("", ".", "-", "-."):
615
+ return 0.0
616
+ try:
617
+ return float(s)
618
+ except Exception:
619
+ return 0.0
620
+
621
+ def collect_keys(obj, out):
622
+ if isinstance(obj, dict):
623
+ for k, v in obj.items():
624
+ lk = str(k).strip().lower()
625
+ out[lk].append(v)
626
+ collect_keys(v, out)
627
+ elif isinstance(obj, list):
628
+ for it in obj:
629
+ collect_keys(it, out)
630
+
631
+ def collect_lists_of_dicts(obj, out_lists):
632
+ if isinstance(obj, dict):
633
+ for v in obj.values():
634
+ if isinstance(v, list) and v and isinstance(v[0], dict):
635
+ out_lists.append(v)
636
+ else:
637
+ collect_lists_of_dicts(v, out_lists)
638
+ elif isinstance(obj, list):
639
+ for it in obj:
640
+ if isinstance(it, list) and it and isinstance(it[0], dict):
641
+ out_lists.append(it)
642
+ else:
643
+ collect_lists_of_dicts(it, out_lists)
644
+
645
+ def map_item_dict(it):
646
+ if not isinstance(it, dict):
647
+ return None
648
+ lower = {str(k).strip().lower(): v for k, v in it.items()}
649
+ desc = (lower.get("descriptions") or lower.get("description") or lower.get("desc") or lower.get("item") or "")
650
+ qty = lower.get("quantity") or lower.get("qty") or lower.get("count") or ""
651
+ unit_price = lower.get("unit_price") or lower.get("price") or ""
652
+ amount = lower.get("amount") or lower.get("line_total") or lower.get("line total") or lower.get("total") or ""
653
+ tax = lower.get("tax") or lower.get("tax_amount") or ""
654
+ line_total = lower.get("line_total") or lower.get("line_total".lower()) or lower.get("line total") or amount
655
+
656
+ return {
657
+ "Description": str(desc).strip(),
658
+ "Quantity": float(clean_number(qty)),
659
+ "Unit Price": float(clean_number(unit_price)),
660
+ "Amount": float(clean_number(amount)),
661
+ "Tax": float(clean_number(tax)),
662
+ "Line Total": float(clean_number(line_total))
663
+ }
664
+
665
+ parsed = safe_json_load(pred) if isinstance(pred, str) else pred
666
+ if parsed is None and isinstance(pred, str):
667
+ parsed = None
668
+
669
+ if parsed is None and not isinstance(pred, dict):
670
+ parsed = pred
671
+
672
+ ui = {
673
+ "Invoice Number": "",
674
+ "Invoice Date": "",
675
+ "Due Date": "",
676
+ "Currency": "",
677
+ "Subtotal": 0.0,
678
+ "Tax Percentage": 0.0,
679
+ "Total Tax": 0.0,
680
+ "Total Amount": 0.0,
681
+ "Sender": {"Name": "", "Address": ""},
682
+ "Recipient": {"Name": "", "Address": ""},
683
+ "Sender Name": "",
684
+ "Sender Address": "",
685
+ "Recipient Name": "",
686
+ "Recipient Address": "",
687
+ "Bank Details": {},
688
+ "Itemized Data": []
689
+ }
690
+
691
+ key_map = defaultdict(list)
692
+ list_candidates = []
693
+ if isinstance(parsed, dict):
694
+ collect_keys(parsed, key_map)
695
+ collect_lists_of_dicts(parsed, list_candidates)
696
+ elif isinstance(pred, dict):
697
+ collect_keys(pred, key_map)
698
+ collect_lists_of_dicts(pred, list_candidates)
699
+
700
+ def pick_first(*candidate_keys):
701
+ for k in candidate_keys:
702
+ lk = k.strip().lower()
703
+ if lk in key_map:
704
+ for v in key_map[lk]:
705
+ if v is None:
706
+ continue
707
+ if isinstance(v, (dict, list)):
708
+ return v
709
+ s = str(v).strip()
710
+ if s != "":
711
+ return s
712
+ return None
713
+
714
+ ui["Invoice Number"] = pick_first("invoice_no", "invoice_number", "invoiceid", "invoice id") or ""
715
+ ui["Invoice Date"] = normalize_date(pick_first("invoice_date", "date", "invoice date") or "")
716
+ ui["Due Date"] = normalize_date(pick_first("due_date", "due_date", "due") or "")
717
+ ui["Sender Name"] = pick_first("sender_name", "sender") or ""
718
+ ui["Sender Address"] = pick_first("sender_addr", "sender_address", "sender addr") or ""
719
+ ui["Recipient Name"] = pick_first("rcpt_name", "recipient_name", "recipient", "rcpt") or ""
720
+ ui["Recipient Address"] = pick_first("rcpt_addr", "recipient_address", "recipient addr") or ""
721
+
722
+ bank = {}
723
+ for bk in ("bank_name", "bank_acc_no", "bank_account_number", "bank_acc_name", "bank_iban", "bank_swift", "bank_routing", "bank_branch", "iban"):
724
+ val = pick_first(bk, bk.replace("bank_", ""))
725
+ if val:
726
+ if bk == "iban":
727
+ bank["bank_iban"] = str(val)
728
+ else:
729
+ bank[bk if bk != "bank_acc_no" else "bank_account_number"] = str(val)
730
+ ui["Bank Details"] = bank
731
+
732
+ ui["Subtotal"] = clean_number(pick_first("subtotal", "sub_total", "sub total") or 0.0)
733
+ ui["Tax Percentage"] = clean_number(pick_first("tax_rate", "tax_percentage", "tax pct", "tax percentage") or 0.0)
734
+ ui["Total Tax"] = clean_number(pick_first("tax_amount", "tax", "total_tax") or 0.0)
735
+ ui["Total Amount"] = clean_number(pick_first("total_amount", "grand_total", "total", "amount") or 0.0)
736
+ ui["Currency"] = (pick_first("currency") or "").strip()
737
+
738
+ items_rows = []
739
+
740
+ def list_looks_like_items(lst):
741
+ if not isinstance(lst, list) or not lst:
742
+ return False
743
+ if not isinstance(lst[0], dict):
744
+ return False
745
+ expected = {"descriptions", "description", "desc", "item", "quantity", "qty", "amount", "unit_price", "line_total", "line_total".lower(), "line_total"}
746
+ keys0 = {str(k).strip().lower() for k in lst[0].keys()}
747
+ return bool(expected.intersection(keys0))
748
+
749
+ for cand in list_candidates:
750
+ if list_looks_like_items(cand):
751
+ for it in cand:
752
+ row = map_item_dict(it)
753
+ if row is not None:
754
+ items_rows.append(row)
755
+ if items_rows:
756
+ break
757
+
758
+ if not items_rows:
759
+ single_candidate_keys = {k.strip().lower() for k in (parsed.keys() if isinstance(parsed, dict) else [])} if isinstance(parsed, dict) else set()
760
+ item_like_keys = {"descriptions", "description", "desc", "item", "quantity", "qty", "unit_price", "unit price", "price", "amount", "line_total", "line total", "line_total", "line_total".lower(), "sku", "tax", "tax_amount"}
761
+ if single_candidate_keys and single_candidate_keys.intersection(item_like_keys):
762
+ single_row = map_item_dict(parsed)
763
+ if single_row is not None:
764
+ items_rows.append(single_row)
765
+
766
+ if not items_rows:
767
+ for k, vals in key_map.items():
768
+ for v in vals:
769
+ if isinstance(v, dict):
770
+ lower_keys = {str(x).strip().lower() for x in v.keys()}
771
+ if lower_keys.intersection({"descriptions", "description", "desc", "amount", "line_total", "quantity", "qty", "unit_price"}):
772
+ row = map_item_dict(v)
773
+ if row is not None:
774
+ items_rows.append(row)
775
+
776
+ if not items_rows:
777
+ desc = pick_first("descriptions", "description")
778
+ amt = pick_first("amount", "line_total")
779
+ qty = pick_first("quantity", "qty")
780
+ unit_price = pick_first("unit_price", "price")
781
+ if desc or amt or qty or unit_price:
782
+ items_rows.append({
783
+ "Description": str(desc or ""),
784
+ "Quantity": float(clean_number(qty)),
785
+ "Unit Price": float(clean_number(unit_price)),
786
+ "Amount": float(clean_number(amt)),
787
+ "Tax": float(clean_number(pick_first("tax", "tax_amount") or 0.0)),
788
+ "Line Total": float(clean_number(amt or 0.0))
789
+ })
790
+
791
+ ui["Itemized Data"] = items_rows
792
+ ui["Sender"] = {"Name": ui["Sender Name"], "Address": ui["Sender Address"]}
793
+ ui["Recipient"] = {"Name": ui["Recipient Name"], "Address": ui["Recipient Address"]}
794
+
795
+ return ui
796
+
797
+ def flatten_invoice_to_rows(invoice_data) -> list:
798
+ EXPECTED_BANK_FIELDS = [
799
+ "bank_name",
800
+ "bank_account_number",
801
+ "bank_acc_name",
802
+ "bank_iban",
803
+ "bank_swift",
804
+ "bank_routing",
805
+ "bank_branch"
806
+ ]
807
+
808
+ # Helper to format text fields (empty -> NA)
809
+ def format_text_field(value):
810
+ if value is None or str(value).strip() == "":
811
+ return "NA"
812
+ return str(value).strip()
813
+
814
+ # Helper to format amount fields (empty -> 0)
815
+ def format_amount_field(value):
816
+ if value is None or value == "" or (isinstance(value, str) and value.strip() == ""):
817
+ return 0
818
+ try:
819
+ return float(value)
820
+ except (ValueError, TypeError):
821
+ return 0
822
+
823
+ rows = []
824
+ invoice_data = invoice_data or {}
825
+ line_items = invoice_data.get("Itemized Data", []) or []
826
+
827
+ bank_details = {}
828
+ nested = invoice_data.get("Bank Details", {}) or {}
829
+ if isinstance(nested, dict):
830
+ for k, v in nested.items():
831
+ key_name = k if str(k).startswith("bank_") else f"bank_{k}"
832
+ bank_details[key_name] = v
833
+
834
+ for k, v in invoice_data.items():
835
+ if isinstance(k, str) and k.lower().startswith("bank_"):
836
+ bank_details[k] = v
837
+
838
+ for f in EXPECTED_BANK_FIELDS:
839
+ bank_details.setdefault(f, "")
840
+
841
+ def base_invoice_info():
842
+ return {
843
+ "Invoice Number": format_text_field(invoice_data.get("Invoice Number", "")),
844
+ "Invoice Date": format_text_field(invoice_data.get("Invoice Date", "")),
845
+ "Due Date": format_text_field(invoice_data.get("Due Date", "")),
846
+ "Currency": format_text_field(invoice_data.get("Currency", "")),
847
+ "Subtotal": format_amount_field(invoice_data.get("Subtotal", 0.0)),
848
+ "Tax Percentage": format_amount_field(invoice_data.get("Tax Percentage", 0.0)),
849
+ "Total Tax": format_amount_field(invoice_data.get("Total Tax", 0.0)),
850
+ "Total Amount": format_amount_field(invoice_data.get("Total Amount", 0.0)),
851
+ "Sender Name": format_text_field(invoice_data.get("Sender Name", "") or (invoice_data.get("Sender",{}) or {}).get("Name","")),
852
+ "Sender Address": format_text_field(invoice_data.get("Sender Address", "") or (invoice_data.get("Sender",{}) or {}).get("Address","")),
853
+ "Recipient Name": format_text_field(invoice_data.get("Recipient Name", "") or (invoice_data.get("Recipient",{}) or {}).get("Name","")),
854
+ "Recipient Address": format_text_field(invoice_data.get("Recipient Address", "") or (invoice_data.get("Recipient",{}) or {}).get("Address","")),
855
+ }
856
+
857
+ if not line_items:
858
+ row = base_invoice_info()
859
+ for k in EXPECTED_BANK_FIELDS:
860
+ row[k] = format_text_field(bank_details.get(k, ""))
861
+ row.update({
862
+ "Item Description": "NA",
863
+ "Item Quantity": 0,
864
+ "Item Unit Price": 0.0,
865
+ "Item Amount": 0.0,
866
+ "Item Tax": 0.0,
867
+ "Item Line Total": 0.0,
868
+ })
869
+ rows.append(row)
870
+ return rows
871
+
872
+ for item in line_items:
873
+ row = base_invoice_info()
874
+ for k in EXPECTED_BANK_FIELDS:
875
+ row[k] = format_text_field(bank_details.get(k, ""))
876
+ row.update({
877
+ "Item Description": format_text_field(item.get("Description", "") if isinstance(item, dict) else ""),
878
+ "Item Quantity": format_amount_field(item.get("Quantity", 0) if isinstance(item, dict) else 0),
879
+ "Item Unit Price": format_amount_field(item.get("Unit Price", 0.0) if isinstance(item, dict) else 0.0),
880
+ "Item Amount": format_amount_field(item.get("Amount", 0.0) if isinstance(item, dict) else 0.0),
881
+ "Item Tax": format_amount_field(item.get("Tax", 0.0) if isinstance(item, dict) else 0.0),
882
+ "Item Line Total": format_amount_field(item.get("Line Total", item.get("Amount", 0.0)) if isinstance(item, dict) else 0.0),
883
+ })
884
+ rows.append(row)
885
+ return rows
886
+
887
+ # -----------------------------
888
+ # Load model (COMMENTED OUT - Now using vLLM API)
889
+ # -----------------------------
890
+ # try:
891
+ # with st.spinner("Loading model & processor (cached) ..."):
892
+ # processor, model, device, decoder_input_ids = load_model_and_processor(HF_MODEL_ID, TASK_PROMPT)
893
+ # except Exception as e:
894
+ # st.error("Could not load model automatically. See details below.")
895
+ # st.exception(e)
896
+ # st.stop()
897
+
898
+ # -----------------------------
899
+ # Session scaffolding
900
+ # -----------------------------
901
+ if "batch_results" not in st.session_state:
902
+ st.session_state.batch_results = {}
903
+ if "current_file_hash" not in st.session_state:
904
+ st.session_state.current_file_hash = None
905
+ if "is_processing_batch" not in st.session_state:
906
+ st.session_state.is_processing_batch = False
907
+
908
+ # -----------------------------
909
+ # Pre-mount two-column skeleton to avoid layout jump
910
+ # -----------------------------
911
+ frame_left, frame_right = st.columns([1, 1], vertical_alignment="top")
912
+
913
+ # -----------------------------
914
+ # Upload / Process
915
+ # -----------------------------
916
+ if not st.session_state.is_processing_batch and len(st.session_state.batch_results) == 0:
917
+ with frame_left:
918
+ st.header("📤 Upload Invoices")
919
+ uploaded_files = st.file_uploader(
920
+ "Upload invoice images (png/jpg/jpeg/pdf)",
921
+ type=["png", "jpg", "jpeg", "pdf"],
922
+ accept_multiple_files=True
923
+ )
924
+
925
+ if uploaded_files:
926
+ st.session_state.is_processing_batch = True
927
+ progress_bar = st.progress(0)
928
+ status_text = st.empty()
929
+
930
+ for idx, uploaded_file in enumerate(uploaded_files):
931
+ status_text.text(f"Processing {idx+1}/{len(uploaded_files)}: {uploaded_file.name}")
932
+ uploaded_bytes = uploaded_file.read()
933
+ file_hash = hashlib.sha256(uploaded_bytes).hexdigest()
934
+
935
+ if file_hash in st.session_state.batch_results:
936
+ progress_bar.progress((idx + 1) / len(uploaded_files))
937
+ continue
938
+
939
+ # Load image (first page for PDFs)
940
+ image = None
941
+ is_pdf = uploaded_file.name.lower().endswith('.pdf') or (hasattr(uploaded_file, 'type') and uploaded_file.type == 'application/pdf')
942
+ if is_pdf:
943
+ if convert_from_bytes is None:
944
+ st.warning(f"PDF {uploaded_file.name} could not be rendered (pdf2image/poppler missing).")
945
+ continue
946
+ try:
947
+ pages = convert_from_bytes(uploaded_bytes, dpi=200)
948
+ if len(pages) > 0:
949
+ image = pages[0].convert("RGB")
950
+ else:
951
+ st.warning(f"PDF {uploaded_file.name} has no pages.")
952
+ continue
953
+ except Exception:
954
+ st.warning(f"Could not render PDF {uploaded_file.name}. Ensure 'pdf2image' and poppler are installed.")
955
+ continue
956
+ else:
957
+ try:
958
+ image = Image.open(BytesIO(uploaded_bytes)).convert("RGB")
959
+ except Exception:
960
+ st.warning(f"Failed to open {uploaded_file.name}.")
961
+ continue
962
+
963
+ if image is None:
964
+ continue
965
+
966
+ # vLLM Inference + parsing + tax validation
967
+ try:
968
+ # Call vLLM API
969
+ raw_json = run_inference_vllm(image)
970
+
971
+ if raw_json:
972
+ # Parse JSON response
973
+ parsed_data = parse_vllm_json(raw_json)
974
+
975
+ if parsed_data:
976
+ # Apply tax validation
977
+ mapped = validate_and_calculate_taxes(parsed_data)
978
+ else:
979
+ st.warning(f"Failed to parse JSON for {uploaded_file.name}")
980
+ mapped = {}
981
+ else:
982
+ st.warning(f"No response from vLLM for {uploaded_file.name}")
983
+ mapped = {}
984
+
985
+ pred = raw_json # Store raw JSON for debugging
986
+ except Exception as e:
987
+ st.warning(f"Error processing {uploaded_file.name}: {str(e)}")
988
+ pred = None
989
+ mapped = {}
990
+
991
+ safe_mapped = mapped if isinstance(mapped, dict) else {}
992
+
993
+ st.session_state.batch_results[file_hash] = {
994
+ "file_name": uploaded_file.name,
995
+ "image": image,
996
+ "raw_pred": pred,
997
+ "mapped_data": safe_mapped,
998
+ "edited_data": safe_mapped.copy()
999
+ }
1000
+
1001
+ progress_bar.progress((idx + 1) / len(uploaded_files))
1002
+
1003
+ status_text.text("✅ All files processed!")
1004
+ st.session_state.is_processing_batch = False
1005
+ st.rerun()
1006
+
1007
+ with frame_right:
1008
+ st.caption("Preview & editor will appear here after extraction.")
1009
+
1010
+ elif len(st.session_state.batch_results) > 0:
1011
+
1012
+ # --------- Top row: All-results download + Back button ----------
1013
+ with frame_left:
1014
+ all_rows = []
1015
+ for file_hash, result in st.session_state.batch_results.items():
1016
+ rows = flatten_invoice_to_rows(result["edited_data"])
1017
+ for r in rows:
1018
+ r["Source File"] = result.get("file_name", file_hash)
1019
+ all_rows.extend(rows)
1020
+
1021
+ if all_rows:
1022
+ full_df = pd.DataFrame(all_rows)
1023
+ cols = list(full_df.columns)
1024
+ if "Source File" in cols:
1025
+ cols = ["Source File"] + [c for c in cols if c != "Source File"]
1026
+ full_df = full_df[cols]
1027
+ csv_bytes = full_df.to_csv(index=False).encode("utf-8")
1028
+ st.download_button("📦 Download All Results (CSV)", csv_bytes,
1029
+ file_name="all_extracted_invoices.csv", mime="text/csv", key="download_all_csv")
1030
+
1031
+ with frame_right:
1032
+ if st.button("⬅️ Back to Upload"):
1033
+ st.session_state.batch_results.clear()
1034
+ st.session_state.current_file_hash = None
1035
+ st.session_state.is_processing_batch = False
1036
+ st.rerun()
1037
+
1038
+ # --------- Selector ----------
1039
+ with frame_left:
1040
+ file_options = {f"{v['file_name']} ({k[:6]})": k for k, v in st.session_state.batch_results.items()}
1041
+ selected_display = st.selectbox("Select invoice to view/edit:", options=list(file_options.keys()), index=0, key="file_selector")
1042
+ selected_hash = file_options[selected_display]
1043
+ if st.session_state.current_file_hash != selected_hash:
1044
+ st.session_state.current_file_hash = selected_hash
1045
+
1046
+ current = st.session_state.batch_results[selected_hash]
1047
+ image = current["image"]
1048
+ form_data = current["edited_data"]
1049
+
1050
+ # --------- Initialize widget state - FORCE UPDATE from form_data ----------
1051
+ bank = form_data.get("Bank Details", {}) if isinstance(form_data.get("Bank Details", {}), dict) else {}
1052
+
1053
+ # Always update state from form_data (don't use ensure_state which only sets if not exists)
1054
+ st.session_state[f"Invoice Number_{selected_hash}"] = form_data.get('Invoice Number', '')
1055
+
1056
+ # Parse dates to date objects for date_input widgets
1057
+ invoice_date_obj = parse_date_to_object(form_data.get('Invoice Date', ''))
1058
+ due_date_obj = parse_date_to_object(form_data.get('Due Date', ''))
1059
+ st.session_state[f"Invoice Date_{selected_hash}"] = invoice_date_obj
1060
+ st.session_state[f"Due Date_{selected_hash}"] = due_date_obj
1061
+
1062
+ st.session_state[f"Currency_{selected_hash}"] = form_data.get('Currency', 'USD') or 'USD'
1063
+ st.session_state[f"Currency_Custom_{selected_hash}"] = form_data.get('Currency', '') if form_data.get('Currency') not in ['USD','EUR','GBP','INR'] else ''
1064
+ st.session_state[f"Subtotal_{selected_hash}"] = float(form_data.get('Subtotal', 0.0))
1065
+ st.session_state[f"Tax Percentage_{selected_hash}"] = float(form_data.get('Tax Percentage', 0.0))
1066
+ st.session_state[f"Total Tax_{selected_hash}"] = float(form_data.get('Total Tax', 0.0))
1067
+ st.session_state[f"Total Amount_{selected_hash}"] = float(form_data.get('Total Amount', 0.0))
1068
+ st.session_state[f"Sender Name_{selected_hash}"] = form_data.get('Sender Name', '')
1069
+ st.session_state[f"Sender Address_{selected_hash}"] = form_data.get('Sender Address', '')
1070
+ st.session_state[f"Recipient Name_{selected_hash}"] = form_data.get('Recipient Name', '')
1071
+ st.session_state[f"Recipient Address_{selected_hash}"] = form_data.get('Recipient Address', '')
1072
+ st.session_state[f"Bank_bank_name_{selected_hash}"] = bank.get('bank_name', '')
1073
+ st.session_state[f"Bank_bank_account_number_{selected_hash}"] = bank.get('bank_account_number', '') or bank.get('bank_acc_no', '')
1074
+ st.session_state[f"Bank_bank_acc_name_{selected_hash}"] = bank.get('bank_acc_name', '')
1075
+ st.session_state[f"Bank_bank_iban_{selected_hash}"] = bank.get('bank_iban', '')
1076
+ st.session_state[f"Bank_bank_swift_{selected_hash}"] = bank.get('bank_swift', '')
1077
+ st.session_state[f"Bank_bank_routing_{selected_hash}"] = bank.get('bank_routing', '')
1078
+ st.session_state[f"Bank_bank_branch_{selected_hash}"] = bank.get('bank_branch', '')
1079
+
1080
+ # --------- Display (no wobble) ----------
1081
+ with frame_left:
1082
+ st.image(image, caption=current["file_name"], width=FIXED_IMG_WIDTH)
1083
+ st.write(f"**File Hash:** {selected_hash[:8]}...")
1084
+ if current.get('raw_pred') is not None:
1085
+ with st.expander("🔍 Show raw model output"):
1086
+ st.json(current['raw_pred'])
1087
+
1088
+ if st.button("🔁 Re-Run Inference", key=f"rerun_{selected_hash}"):
1089
+ with st.spinner("Re-running inference..."):
1090
+ try:
1091
+ # Call vLLM API
1092
+ raw_json = run_inference_vllm(image)
1093
+
1094
+ if raw_json:
1095
+ # Parse JSON response
1096
+ parsed_data = parse_vllm_json(raw_json)
1097
+
1098
+ if parsed_data:
1099
+ # Apply tax validation
1100
+ mapped = validate_and_calculate_taxes(parsed_data)
1101
+ else:
1102
+ st.error("Failed to parse JSON response")
1103
+ mapped = {}
1104
+ else:
1105
+ st.error("No response from vLLM")
1106
+ mapped = {}
1107
+
1108
+ safe_mapped = mapped if isinstance(mapped, dict) else {}
1109
+ pred = raw_json # Store raw JSON
1110
+
1111
+ # Update stored results
1112
+ st.session_state.batch_results[selected_hash]["raw_pred"] = pred
1113
+ st.session_state.batch_results[selected_hash]["mapped_data"] = mapped
1114
+ st.session_state.batch_results[selected_hash]["edited_data"] = safe_mapped.copy()
1115
+
1116
+ # Clear widget state for this file so defaults refresh from new mapped data
1117
+ for key in [k for k in st.session_state.keys() if k.endswith(f"_{selected_hash}")]:
1118
+ del st.session_state[key]
1119
+
1120
+ st.success("✅ Re-run complete")
1121
+ st.rerun()
1122
+ except Exception as e:
1123
+ st.error(f"Re-run failed: {e}")
1124
+
1125
+ with frame_right:
1126
+ st.subheader(f"Editable Invoice: {current['file_name']}")
1127
+
1128
+ # Quick swap outside the form (one clean rerun)
1129
+ swap_cols = st.columns([1,1,2])
1130
+ with swap_cols[0]:
1131
+ if st.button("⇄ Swap Sender ↔ Recipient", key=f"swap_{selected_hash}"):
1132
+ sn = f"Sender Name_{selected_hash}"
1133
+ rn = f"Recipient Name_{selected_hash}"
1134
+ sa = f"Sender Address_{selected_hash}"
1135
+ ra = f"Recipient Address_{selected_hash}"
1136
+ st.session_state[sn], st.session_state[rn] = st.session_state[rn], st.session_state[sn]
1137
+ st.session_state[sa], st.session_state[ra] = st.session_state[ra], st.session_state[sa]
1138
+ st.rerun()
1139
+
1140
+ # ----------------- FORM START -----------------
1141
+ with st.form(key=f"edit_form_{selected_hash}", clear_on_submit=False):
1142
+ tabs = st.tabs(["Invoice Details", "Sender/Recipient", "Bank Details", "Line Items"])
1143
+
1144
+ with tabs[0]:
1145
+ st.text_input("Invoice Number", key=f"Invoice Number_{selected_hash}")
1146
+ st.date_input("Invoice Date", key=f"Invoice Date_{selected_hash}", format="DD/MM/YYYY")
1147
+ st.date_input("Due Date", key=f"Due Date_{selected_hash}", format="DD/MM/YYYY")
1148
+
1149
+ curr_options = ['USD', 'EUR', 'GBP', 'INR', 'Other']
1150
+ if st.session_state[f"Currency_{selected_hash}"] not in curr_options:
1151
+ st.session_state[f"Currency_{selected_hash}"] = 'Other'
1152
+ st.selectbox("Currency", options=curr_options, key=f"Currency_{selected_hash}")
1153
+
1154
+ if st.session_state.get(f"Currency_{selected_hash}") == 'Other':
1155
+ st.text_input("Specify Currency", key=f"Currency_Custom_{selected_hash}")
1156
+
1157
+ st.number_input("Subtotal", key=f"Subtotal_{selected_hash}")
1158
+ st.number_input("Tax %", key=f"Tax Percentage_{selected_hash}")
1159
+ st.number_input("Total Tax", key=f"Total Tax_{selected_hash}")
1160
+ st.number_input("Total Amount", key=f"Total Amount_{selected_hash}")
1161
+
1162
+ with tabs[1]:
1163
+ st.text_input("Sender Name", key=f"Sender Name_{selected_hash}")
1164
+ st.text_area("Sender Address", key=f"Sender Address_{selected_hash}", height=80)
1165
+ st.text_input("Recipient Name", key=f"Recipient Name_{selected_hash}")
1166
+ st.text_area("Recipient Address", key=f"Recipient Address_{selected_hash}", height=80)
1167
+
1168
+ with tabs[2]:
1169
+ st.text_input("Bank Name", key=f"Bank_bank_name_{selected_hash}")
1170
+ st.text_input("Account Number", key=f"Bank_bank_account_number_{selected_hash}")
1171
+ st.text_input("Account Name", key=f"Bank_bank_acc_name_{selected_hash}")
1172
+ st.text_input("IBAN", key=f"Bank_bank_iban_{selected_hash}")
1173
+ st.text_input("SWIFT", key=f"Bank_bank_swift_{selected_hash}")
1174
+ st.text_input("Routing", key=f"Bank_bank_routing_{selected_hash}")
1175
+ st.text_input("Branch", key=f"Bank_bank_branch_{selected_hash}")
1176
+
1177
+ with tabs[3]:
1178
+ # Build base DF from current edited_data (not raw mapped) so it's always what the user last saved
1179
+ item_rows = form_data.get('Itemized Data', []) or []
1180
+ normalized = []
1181
+ for it in item_rows:
1182
+ if not isinstance(it, dict):
1183
+ it = {}
1184
+ normalized.append({
1185
+ "Description": it.get("Description", it.get("Item Description", "")),
1186
+ "Quantity": it.get("Quantity", it.get("Item Quantity", 0)),
1187
+ "Unit Price": it.get("Unit Price", it.get("Item Unit Price", 0.0)),
1188
+ "Amount": it.get("Amount", it.get("Item Amount", 0.0)),
1189
+ "Tax": it.get("Tax", it.get("Item Tax", 0.0)),
1190
+ "Line Total": it.get("Line Total", it.get("Item Line Total", 0.0)),
1191
+ })
1192
+
1193
+ items_df = pd.DataFrame(normalized) if normalized else pd.DataFrame(
1194
+ columns=["Description", "Quantity", "Unit Price", "Amount", "Tax", "Line Total"]
1195
+ )
1196
+
1197
+ # Show editor without totals
1198
+ edited_df = st.data_editor(
1199
+ items_df,
1200
+ num_rows="dynamic",
1201
+ key=f"items_editor_{selected_hash}",
1202
+ use_container_width=True,
1203
+ height=DATA_EDITOR_HEIGHT - 50, # Reduce height slightly for totals below
1204
+ )
1205
+
1206
+ # Display non-editable totals row immediately below (looks integrated)
1207
+ if len(edited_df) > 0:
1208
+ total_amount = edited_df["Amount"].sum()
1209
+ total_tax = edited_df["Tax"].sum()
1210
+ total_line_total = edited_df["Line Total"].sum()
1211
+
1212
+ # Create totals display - styled to look like part of the table
1213
+ totals_df = pd.DataFrame([{
1214
+ "Description": "──── TOTAL ────",
1215
+ "Quantity": "",
1216
+ "Unit Price": "",
1217
+ "Amount": f"${total_amount:,.2f}",
1218
+ "Tax": f"${total_tax:,.2f}",
1219
+ "Line Total": f"${total_line_total:,.2f}"
1220
+ }])
1221
+
1222
+ st.dataframe(
1223
+ totals_df,
1224
+ use_container_width=True,
1225
+ hide_index=True,
1226
+ height=38 # Single row height
1227
+ )
1228
+
1229
+ saved = st.form_submit_button("💾 Save All Edits")
1230
+ # ----------------- FORM END -----------------
1231
+
1232
+ if saved:
1233
+ currency = st.session_state.get(f"Currency_{selected_hash}", 'USD')
1234
+ if currency == 'Other':
1235
+ currency = st.session_state.get(f"Currency_Custom_{selected_hash}", '')
1236
+
1237
+ # Convert date objects to normalized strings (dd-MMM-yyyy format)
1238
+ invoice_date = st.session_state.get(f"Invoice Date_{selected_hash}", None)
1239
+ due_date = st.session_state.get(f"Due Date_{selected_hash}", None)
1240
+
1241
+ invoice_date_str = ""
1242
+ if invoice_date is not None:
1243
+ try:
1244
+ invoice_date_str = invoice_date.strftime("%d-%b-%Y")
1245
+ except (AttributeError, ValueError):
1246
+ invoice_date_str = ""
1247
+
1248
+ due_date_str = ""
1249
+ if due_date is not None:
1250
+ try:
1251
+ due_date_str = due_date.strftime("%d-%b-%Y")
1252
+ except (AttributeError, ValueError):
1253
+ due_date_str = ""
1254
+
1255
+ # Calculate totals from line items
1256
+ line_items_list = edited_df.to_dict('records')
1257
+ calculated_subtotal = sum(clean_float(item.get('Amount', 0)) for item in line_items_list)
1258
+ calculated_total_tax = sum(clean_float(item.get('Tax', 0)) for item in line_items_list)
1259
+ calculated_total = sum(clean_float(item.get('Line Total', 0)) for item in line_items_list)
1260
+
1261
+ # Calculate tax percentage if possible
1262
+ calculated_tax_pct = 0.0
1263
+ if calculated_subtotal > 0 and calculated_total_tax > 0:
1264
+ calculated_tax_pct = round((calculated_total_tax / calculated_subtotal) * 100, 4)
1265
+
1266
+ updated = {
1267
+ 'Invoice Number': st.session_state.get(f"Invoice Number_{selected_hash}", ''),
1268
+ 'Invoice Date': invoice_date_str,
1269
+ 'Due Date': due_date_str,
1270
+ 'Currency': currency,
1271
+ 'Subtotal': calculated_subtotal, # Auto-calculated from line items
1272
+ 'Tax Percentage': calculated_tax_pct, # Auto-calculated
1273
+ 'Total Tax': calculated_total_tax, # Auto-calculated from line items
1274
+ 'Total Amount': calculated_total, # Auto-calculated from line items
1275
+ 'Sender Name': st.session_state.get(f"Sender Name_{selected_hash}", ''),
1276
+ 'Sender Address': st.session_state.get(f"Sender Address_{selected_hash}", ''),
1277
+ 'Recipient Name': st.session_state.get(f"Recipient Name_{selected_hash}", ''),
1278
+ 'Recipient Address': st.session_state.get(f"Recipient Address_{selected_hash}", ''),
1279
+ 'Bank Details': {
1280
+ 'bank_name': st.session_state.get(f"Bank_bank_name_{selected_hash}", ''),
1281
+ 'bank_account_number': st.session_state.get(f"Bank_bank_account_number_{selected_hash}", ''),
1282
+ 'bank_acc_name': st.session_state.get(f"Bank_bank_acc_name_{selected_hash}", ''),
1283
+ 'bank_iban': st.session_state.get(f"Bank_bank_iban_{selected_hash}", ''),
1284
+ 'bank_swift': st.session_state.get(f"Bank_bank_swift_{selected_hash}", ''),
1285
+ 'bank_routing': st.session_state.get(f"Bank_bank_routing_{selected_hash}", ''),
1286
+ 'bank_branch': st.session_state.get(f"Bank_bank_branch_{selected_hash}", '')
1287
+ },
1288
+ 'Itemized Data': line_items_list,
1289
+ 'Sender': {"Name": st.session_state.get(f"Sender Name_{selected_hash}", ''),
1290
+ "Address": st.session_state.get(f"Sender Address_{selected_hash}", '')},
1291
+ 'Recipient': {"Name": st.session_state.get(f"Recipient Name_{selected_hash}", ''),
1292
+ "Address": st.session_state.get(f"Recipient Address_{selected_hash}", '')},
1293
+ }
1294
+
1295
+ # Update session state fields to reflect the new calculated values
1296
+ st.session_state[f"Subtotal_{selected_hash}"] = calculated_subtotal
1297
+ st.session_state[f"Tax Percentage_{selected_hash}"] = calculated_tax_pct
1298
+ st.session_state[f"Total Tax_{selected_hash}"] = calculated_total_tax
1299
+ st.session_state[f"Total Amount_{selected_hash}"] = calculated_total
1300
+
1301
+ st.session_state.batch_results[selected_hash]["edited_data"] = updated
1302
+ st.success(f"✅ Saved: {current['file_name']} | Updated totals: Subtotal=${calculated_subtotal:,.2f}, Tax=${calculated_total_tax:,.2f}, Total=${calculated_total:,.2f}")
1303
+ st.rerun() # Force rerun to show updated totals in the form fields
1304
+
1305
+ # Per-file CSV download (uses the current editor contents even if not saved)
1306
+ d_currency = st.session_state.get(f"Currency_{selected_hash}", 'USD')
1307
+ if d_currency == 'Other':
1308
+ d_currency = st.session_state.get(f"Currency_Custom_{selected_hash}", '')
1309
+
1310
+ # Convert date objects to strings for download
1311
+ d_invoice_date = st.session_state.get(f"Invoice Date_{selected_hash}", None)
1312
+ d_due_date = st.session_state.get(f"Due Date_{selected_hash}", None)
1313
+
1314
+ d_invoice_date_str = ""
1315
+ if d_invoice_date is not None:
1316
+ try:
1317
+ d_invoice_date_str = d_invoice_date.strftime("%d-%b-%Y")
1318
+ except (AttributeError, ValueError):
1319
+ d_invoice_date_str = ""
1320
+
1321
+ d_due_date_str = ""
1322
+ if d_due_date is not None:
1323
+ try:
1324
+ d_due_date_str = d_due_date.strftime("%d-%b-%Y")
1325
+ except (AttributeError, ValueError):
1326
+ d_due_date_str = ""
1327
+
1328
+ download_data = {
1329
+ 'Invoice Number': st.session_state.get(f"Invoice Number_{selected_hash}", ''),
1330
+ 'Invoice Date': d_invoice_date_str,
1331
+ 'Due Date': d_due_date_str,
1332
+ 'Currency': d_currency,
1333
+ 'Subtotal': st.session_state.get(f"Subtotal_{selected_hash}", 0.0),
1334
+ 'Tax Percentage': st.session_state.get(f"Tax Percentage_{selected_hash}", 0.0),
1335
+ 'Total Tax': st.session_state.get(f"Total Tax_{selected_hash}", 0.0),
1336
+ 'Total Amount': st.session_state.get(f"Total Amount_{selected_hash}", 0.0),
1337
+ 'Sender Name': st.session_state.get(f"Sender Name_{selected_hash}", ''),
1338
+ 'Sender Address': st.session_state.get(f"Sender Address_{selected_hash}", ''),
1339
+ 'Recipient Name': st.session_state.get(f"Recipient Name_{selected_hash}", ''),
1340
+ 'Recipient Address': st.session_state.get(f"Recipient Address_{selected_hash}", ''),
1341
+ 'Bank Details': {
1342
+ 'bank_name': st.session_state.get(f"Bank_bank_name_{selected_hash}", ''),
1343
+ 'bank_account_number': st.session_state.get(f"Bank_bank_account_number_{selected_hash}", ''),
1344
+ 'bank_acc_name': st.session_state.get(f"Bank_bank_acc_name_{selected_hash}", ''),
1345
+ 'bank_iban': st.session_state.get(f"Bank_bank_iban_{selected_hash}", ''),
1346
+ 'bank_swift': st.session_state.get(f"Bank_bank_swift_{selected_hash}", ''),
1347
+ 'bank_routing': st.session_state.get(f"Bank_bank_routing_{selected_hash}", ''),
1348
+ 'bank_branch': st.session_state.get(f"Bank_bank_branch_{selected_hash}", '')
1349
+ },
1350
+ 'Itemized Data': edited_df.to_dict('records')
1351
+ }
1352
+ rows = flatten_invoice_to_rows(download_data)
1353
+ full_df = pd.DataFrame(rows)
1354
+ csv_bytes_one = full_df.to_csv(index=False).encode("utf-8")
1355
+ st.download_button(
1356
+ "📥 Download This Invoice (CSV)",
1357
+ csv_bytes_one,
1358
+ file_name=f"{Path(current['file_name']).stem}_full.csv",
1359
+ mime="text/csv",
1360
+ key=f"dl_{selected_hash}"
1361
+ )
1362
+
1363
+ elif st.session_state.is_processing_batch:
1364
+ with frame_left:
1365
+ st.info("⏳ Processing batch... Please wait.")
1366
+ st.progress(0)
1367
+ with frame_right:
1368
+ st.caption("Preview & editor will appear here after extraction.")
1369
+
1370
+ else:
1371
+ # Shouldn't happen, but keeps skeleton steady
1372
+ with frame_left:
1373
+ st.caption("Ready when you are.")
1374
+ with frame_right:
1375
+ st.caption("Preview & editor will appear here after extraction.")