Ankushbl6 commited on
Commit
fcbb889
·
verified ·
1 Parent(s): 99902ac

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +833 -567
src/streamlit_app.py CHANGED
@@ -1,580 +1,846 @@
1
  import os
2
- from io import BytesIO
3
- import json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  import streamlit as st
6
- from PIL import Image, ImageEnhance
 
 
 
 
7
  from streamlit_drawable_canvas import st_canvas
8
  import pytesseract
9
-
10
- # ---------------------------------
11
- # Page config
12
- # ---------------------------------
13
- st.set_page_config(
14
- page_title="Remittance GT Annotator - Interactive OCR",
15
- layout="wide"
16
- )
17
-
18
- st.title("Remittance GT Annotator - Interactive OCR")
19
-
20
- # ---------------------------------
21
- # Field definitions
22
- # ---------------------------------
23
- SINGLE_FIELDS = [
24
- "Remittance Advice Number",
25
- "Remittance Advice Date",
26
- "Payment Method",
27
- "FCY",
28
- "Total Payment Amount in FCY",
29
- "Payment Date",
30
- "Payment Reference Number/Check Number",
31
- "Customer Name",
32
- "Customer Address",
33
- "Customer Contact Information",
34
- "Supplier Name",
35
- "Supplier Address",
36
- "Supplier Contact Information",
37
- "Bank Name",
38
- "Bank Account Number",
39
- "Bank Routing Number",
40
- "SWIFT/BIC Code",
41
- ]
42
-
43
- LINE_ITEM_FIELDS = [
44
- "PO number",
45
- "Invoice number",
46
- "Other document reference number",
47
- "Invoice Date",
48
- "Invoice Amount in FCY",
49
- "Amount Paid for Each Invoice in FCY",
50
- "Outstanding Balance in FCY",
51
- "Discounts Taken in FCY",
52
- "Adjustments(Withholding Tax) in FCY",
53
- "Description",
54
- ]
55
-
56
- COLOR_PALETTE = [
57
- "#e6194b", "#3cb44b", "#ffe119", "#4363d8", "#f58231",
58
- "#911eb4", "#46f0f0", "#f032e6", "#bcf60c", "#fabebe",
59
- "#008080", "#e6beff", "#9a6324", "#fffac8", "#800000",
60
- "#aaffc3", "#808000", "#ffd8b1", "#000075", "#808080",
61
- "#ffe4e1", "#40e0d0", "#ff1493", "#7fffd4", "#b0e0e6",
62
- "#ffb6c1", "#add8e6",
63
- ]
64
-
65
- ALL_BASE_FIELDS = SINGLE_FIELDS + LINE_ITEM_FIELDS
66
- FIELD_COLORS = {field: COLOR_PALETTE[i % len(COLOR_PALETTE)] for i, field in enumerate(ALL_BASE_FIELDS)}
67
-
68
- # ---------------------------------
69
- # JSONL schema mappings
70
- # ---------------------------------
71
- HEADER_GROUPS = {
72
- "remittance_advice_details": {
73
- "Remittance Advice Number": "remittance_advice_number",
74
- "Remittance Advice Date": "remittance_advice_date",
75
- "Payment Method": "payment_method",
76
- "FCY": "fcy",
77
- "Total Payment Amount in FCY": "total_payment_amount_in_fcy",
78
- "Payment Date": "payment_date",
79
- "Payment Reference Number/Check Number": "payment_reference_number_check_number",
80
- },
81
- "customer_supplier_details": {
82
- "Customer Name": "customer_name",
83
- "Customer Address": "customer_address",
84
- "Customer Contact Information": "customer_contact_information",
85
- "Supplier Name": "supplier_name",
86
- "Supplier Address": "supplier_address",
87
- "Supplier Contact Information": "supplier_contact_information",
88
- },
89
- "bank_details": {
90
- "Bank Name": "bank_name",
91
- "Bank Account Number": "bank_account_number",
92
- "Bank Routing Number": "bank_routing_number",
93
- "SWIFT/BIC Code": "swift_bic_code",
94
- },
95
- }
96
-
97
- LINE_ITEM_FIELD_KEY_MAP = {
98
- "PO number": "po_number",
99
- "Invoice number": "invoice_number",
100
- "Other document reference number": "other_document_reference_number",
101
- "Invoice Date": "invoice_date",
102
- "Invoice Amount in FCY": "invoice_amount_in_fcy",
103
- "Amount Paid for Each Invoice in FCY": "amount_paid_for_each_invoice_in_fcy",
104
- "Outstanding Balance in FCY": "outstanding_balance_in_fcy",
105
- "Discounts Taken in FCY": "discounts_taken_in_fcy",
106
- "Adjustments(Withholding Tax) in FCY": "adjustments_withholding_tax_in_fcy",
107
- "Description": "description",
108
- }
109
-
110
- # Fixed zoom options
111
- ZOOM_OPTIONS = [25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 110, 120, 130, 140, 150]
112
-
113
- # ---------------------------------
114
- # Session state init
115
- # ---------------------------------
116
- if "field_values" not in st.session_state:
117
- st.session_state.field_values = {} # {image_name: {field_name: value}}
118
- if "field_rects_orig" not in st.session_state:
119
- st.session_state.field_rects_orig = {} # {image_name: {field_name: rect_in_orig_coords}}
120
- if "num_line_items" not in st.session_state:
121
- st.session_state.num_line_items = {} # {image_name: int}
122
- if "selected_image" not in st.session_state:
123
- st.session_state.selected_image = None
124
- if "zoom_values" not in st.session_state:
125
- st.session_state.zoom_values = {} # {image_name: zoom_int}
126
- if "rect_version" not in st.session_state:
127
- st.session_state.rect_version = {} # {image_name: int}
128
- if "image_data" not in st.session_state:
129
- st.session_state.image_data = {} # {image_name: bytes}
130
- if "pending_delete" not in st.session_state:
131
- st.session_state.pending_delete = None
132
-
133
- # Process pending delete early
134
- if st.session_state.pending_delete is not None:
135
- img_name, field_key = st.session_state.pending_delete
136
- if img_name in st.session_state.field_rects_orig:
137
- st.session_state.field_rects_orig[img_name].pop(field_key, None)
138
- if img_name in st.session_state.field_values:
139
- st.session_state.field_values[img_name].pop(field_key, None)
140
- if img_name in st.session_state.rect_version:
141
- st.session_state.rect_version[img_name] += 1
142
- st.session_state.pending_delete = None
143
- # Force a quick rerun so canvas reflects deletion
144
- st.experimental_rerun()
145
-
146
- # ---------------------------------
147
- # Helper functions
148
- # ---------------------------------
149
- @st.cache_data
150
- def load_image(file_content: bytes):
151
- return Image.open(BytesIO(file_content)).convert("RGB")
152
-
153
- @st.cache_data
154
- def get_display_image(image_bytes: bytes, width: int, height: int):
155
- """Cached resize + enhancement to minimize flicker on reruns."""
156
- pil_image = Image.open(BytesIO(image_bytes)).convert("RGB")
157
- resized = pil_image.resize((width, height), Image.LANCZOS)
158
- resized = ImageEnhance.Sharpness(resized).enhance(1.2)
159
- resized = ImageEnhance.Contrast(resized).enhance(1.1)
160
- return resized
161
-
162
- def get_default_zoom(pil_image: Image.Image) -> int:
163
- MAX_WIDTH = 850
164
- MAX_HEIGHT = 900
165
- default_scale = min(MAX_WIDTH / pil_image.width, MAX_HEIGHT / pil_image.height, 1.0)
166
- default_zoom = int(default_scale * 100)
167
- closest = min(ZOOM_OPTIONS, key=lambda x: abs(x - default_zoom))
168
- return closest
169
-
170
- def build_gt_record_for_file(file_name: str) -> dict:
171
- values = st.session_state.field_values.get(file_name, {})
172
- num_items = st.session_state.num_line_items.get(file_name, 1)
173
-
174
- def v(label: str) -> str:
175
- return str(values.get(label, "")).strip()
176
-
177
- gt_parse: dict = {}
178
-
179
- # Header sections
180
- for section_name, mapping in HEADER_GROUPS.items():
181
- section_dict = {}
182
- for ui_label, json_key in mapping.items():
183
- section_dict[json_key] = v(ui_label)
184
- gt_parse[section_name] = section_dict
185
-
186
- # Line items
187
- line_items = []
188
- for idx in range(1, num_items + 1):
189
- row = {}
190
- any_non_empty = False
191
- for ui_label, json_key in LINE_ITEM_FIELD_KEY_MAP.items():
192
- key = f"Line {idx}: {ui_label}"
193
- val = str(values.get(key, "")).strip()
194
- row[json_key] = val
195
- if val:
196
- any_non_empty = True
197
- if any_non_empty:
198
- line_items.append(row)
199
-
200
- gt_parse["line_items"] = line_items
201
-
202
- return {
203
- "file_name": file_name,
204
- "gt_parse": gt_parse,
205
  }
206
-
207
- def has_any_label(fname: str) -> bool:
208
- vals = st.session_state.field_values.get(fname, {})
209
- return any(str(v).strip() for v in vals.values())
210
-
211
- # ---------------------------------
212
- # Upload
213
- # ---------------------------------
214
- uploaded_files = st.file_uploader(
215
- "Upload remittance images",
216
- type=["png", "jpg", "jpeg"],
217
- accept_multiple_files=True,
218
- label_visibility="collapsed",
219
- )
220
-
221
- if not uploaded_files:
222
- st.info("Upload at least one image to begin.")
223
- st.stop()
224
-
225
- images = []
226
- for f in uploaded_files:
227
- f.seek(0)
228
- content = f.read()
229
- if f.name not in st.session_state.image_data:
230
- st.session_state.image_data[f.name] = content
231
- img = load_image(st.session_state.image_data[f.name])
232
- images.append({"name": f.name, "image": img, "bytes": st.session_state.image_data[f.name]})
233
-
234
- file_names = [img["name"] for img in images]
235
-
236
- selected_name = st.selectbox("Select image", file_names, label_visibility="collapsed")
237
- st.session_state.selected_image = selected_name
238
-
239
- selected_img_data = next(img for img in images if img["name"] == selected_name)
240
- pil_image = selected_img_data["image"]
241
- image_bytes = selected_img_data["bytes"]
242
-
243
- # Init per-image state
244
- if selected_name not in st.session_state.field_values:
245
- st.session_state.field_values[selected_name] = {}
246
- if selected_name not in st.session_state.field_rects_orig:
247
- st.session_state.field_rects_orig[selected_name] = {}
248
- if selected_name not in st.session_state.num_line_items:
249
- st.session_state.num_line_items[selected_name] = 1
250
- if selected_name not in st.session_state.zoom_values:
251
- st.session_state.zoom_values[selected_name] = get_default_zoom(pil_image)
252
- if selected_name not in st.session_state.rect_version:
253
- st.session_state.rect_version[selected_name] = 0
254
-
255
- # ---------------------------------
256
- # Layout columns
257
- # ---------------------------------
258
- col1, col2 = st.columns([3, 2])
259
-
260
- # Defaults for current field
261
- display_field_name = SINGLE_FIELDS[0]
262
- storage_field_name = SINGLE_FIELDS[0]
263
- base_field_for_color = SINGLE_FIELDS[0]
264
- field_color = FIELD_COLORS[base_field_for_color]
265
-
266
- # ---------------------------------
267
- # RHS TOP: Field selection + zoom
268
- # ---------------------------------
269
- with col2:
270
- st.markdown("#### 🎯 Field Selection")
271
-
272
- def add_line_item():
273
- img = st.session_state.selected_image
274
- if img:
275
- st.session_state.num_line_items[img] += 1
276
-
277
- def remove_line_item():
278
- img = st.session_state.selected_image
279
- if img and st.session_state.num_line_items[img] > 1:
280
- last_num = st.session_state.num_line_items[img]
281
- for lif in LINE_ITEM_FIELDS:
282
- key = f"Line {last_num}: {lif}"
283
- st.session_state.field_values[img].pop(key, None)
284
- st.session_state.field_rects_orig[img].pop(key, None)
285
- st.session_state.num_line_items[img] -= 1
286
- st.session_state.rect_version[img] += 1
287
- st.experimental_rerun()
288
-
289
- field_type = st.radio("Type", ["Single", "Line Item"], horizontal=True, label_visibility="collapsed")
290
-
291
- if field_type == "Single":
292
- field_name = st.selectbox("Field", SINGLE_FIELDS, label_visibility="collapsed")
293
- display_field_name = field_name
294
- storage_field_name = field_name
295
- base_field_for_color = field_name
 
 
 
 
 
 
 
296
  else:
297
- num_items = st.session_state.num_line_items[selected_name]
298
-
299
- line_col1, add_col, rem_col = st.columns([2, 1, 1])
300
- with line_col1:
301
- line_item_options = [f"Line {i+1}" for i in range(num_items)]
302
- selected_line_item = st.selectbox("Line", line_item_options, label_visibility="collapsed")
303
- line_item_num = int(selected_line_item.split()[1])
304
-
305
- with add_col:
306
- st.button("➕", key=f"addli_{selected_name}", on_click=add_line_item, help="Add line item")
307
- with rem_col:
308
- if st.session_state.num_line_items[selected_name] > 1:
309
- st.button("➖", key=f"remli_{selected_name}", on_click=remove_line_item, help="Remove line item")
310
-
311
- base_field = st.selectbox("Field", LINE_ITEM_FIELDS, label_visibility="collapsed")
312
- display_field_name = f"{selected_line_item}: {base_field}"
313
- storage_field_name = f"Line {line_item_num}: {base_field}"
314
- base_field_for_color = base_field
315
-
316
- if not storage_field_name:
317
- storage_field_name = display_field_name
318
-
319
- field_color = FIELD_COLORS.get(base_field_for_color or display_field_name, "#FF0000")
320
- st.markdown(
321
- f"**Current:** <span style='color:{field_color}'>●</span> {display_field_name}",
322
- unsafe_allow_html=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  )
324
-
325
- st.markdown("#### 🔍 Zoom")
326
-
327
- current_zoom = st.session_state.zoom_values[selected_name]
328
- zoom_index = ZOOM_OPTIONS.index(current_zoom) if current_zoom in ZOOM_OPTIONS else 0
329
-
330
- def do_zoom_out():
331
- img = st.session_state.selected_image
332
- curr = st.session_state.zoom_values[img]
333
- idx = ZOOM_OPTIONS.index(curr) if curr in ZOOM_OPTIONS else 0
334
- if idx > 0:
335
- st.session_state.zoom_values[img] = ZOOM_OPTIONS[idx - 1]
336
-
337
- def do_zoom_in():
338
- img = st.session_state.selected_image
339
- curr = st.session_state.zoom_values[img]
340
- idx = ZOOM_OPTIONS.index(curr) if curr in ZOOM_OPTIONS else 0
341
- if idx < len(ZOOM_OPTIONS) - 1:
342
- st.session_state.zoom_values[img] = ZOOM_OPTIONS[idx + 1]
343
-
344
- def do_zoom_fit():
345
- img = st.session_state.selected_image
346
- img_bytes = st.session_state.image_data.get(img)
347
- if img_bytes:
348
- pil_img = load_image(img_bytes)
349
- st.session_state.zoom_values[img] = get_default_zoom(pil_img)
350
-
351
- zoom_col1, zoom_col2, zoom_col3, zoom_col4 = st.columns([2, 1, 1, 1])
352
-
353
- with zoom_col1:
354
- zoom = st.selectbox(
355
- "Zoom",
356
- options=ZOOM_OPTIONS,
357
- index=zoom_index,
358
- format_func=lambda x: f"{x}%",
359
- key=f"zoom_select_{selected_name}",
360
- label_visibility="collapsed",
361
- )
362
- st.session_state.zoom_values[selected_name] = zoom
363
-
364
- with zoom_col2:
365
- st.button("➖", key=f"zoom_out_{selected_name}", help="Zoom out", on_click=do_zoom_out)
366
-
367
- with zoom_col3:
368
- st.button("➕", key=f"zoom_in_{selected_name}", help="Zoom in", on_click=do_zoom_in)
369
-
370
- with zoom_col4:
371
- st.button("Fit", key=f"zoom_fit_{selected_name}", help="Fit to screen", on_click=do_zoom_fit)
372
-
373
- st.caption(f"Original: {pil_image.width}×{pil_image.height}")
374
-
375
- # ---------------------------------
376
- # LHS: Canvas / Image
377
- # ---------------------------------
378
- with col1:
379
- zoom = st.session_state.zoom_values[selected_name]
380
- scale = zoom / 100.0
381
- disp_w = int(pil_image.width * scale)
382
- disp_h = int(pil_image.height * scale)
383
-
384
- display_image = get_display_image(image_bytes, disp_w, disp_h)
385
-
386
- def orig_to_display(rect_orig, s):
387
- return {
388
- "type": "rect",
389
- "left": rect_orig["left"] * s,
390
- "top": rect_orig["top"] * s,
391
- "width": rect_orig["width"] * s,
392
- "height": rect_orig["height"] * s,
393
- "fill": "rgba(0,0,0,0)",
394
- "stroke": rect_orig.get("stroke", "#FF0000"),
395
- "strokeWidth": rect_orig.get("strokeWidth", 2),
396
- "scaleX": 1,
397
- "scaleY": 1,
398
- }
399
-
400
- def display_to_orig(rect_display, s):
401
- w = rect_display.get("width", 0) * rect_display.get("scaleX", 1)
402
- h = rect_display.get("height", 0) * rect_display.get("scaleY", 1)
403
- return {
404
- "left": rect_display.get("left", 0) / s,
405
- "top": rect_display.get("top", 0) / s,
406
- "width": w / s,
407
- "height": h / s,
408
- "stroke": rect_display.get("stroke", "#FF0000"),
409
- "strokeWidth": rect_display.get("strokeWidth", 2),
410
- }
411
-
412
- # Build rectangles from state (one per field)
413
- all_display_objects = []
414
- rects_for_image = st.session_state.field_rects_orig[selected_name]
415
- for fld, rect_orig in rects_for_image.items():
416
- disp_rect = orig_to_display(rect_orig, scale)
417
- base = fld.split(": ", 1)[1] if ": " in fld else fld
418
- disp_rect["stroke"] = FIELD_COLORS.get(base, "#FF0000")
419
- disp_rect["strokeWidth"] = 3 if fld == storage_field_name else 2
420
- all_display_objects.append(disp_rect)
421
-
422
- initial_drawing = {"version": "4.4.0", "objects": all_display_objects}
423
- expected_count = len(all_display_objects)
424
-
425
- rect_ver = st.session_state.rect_version[selected_name]
426
- num_rects = len(rects_for_image)
427
- canvas_key = f"canvas_{selected_name}_z{zoom}_rv{rect_ver}_n{num_rects}"
428
-
429
- canvas_result = st_canvas(
430
- background_image=display_image,
431
- height=disp_h,
432
- width=disp_w,
433
- drawing_mode="rect",
434
- stroke_width=3,
435
- stroke_color=field_color,
436
- fill_color="rgba(255,0,0,0.1)",
437
- update_streamlit=True,
438
- initial_drawing=initial_drawing,
439
- key=canvas_key,
440
- )
441
-
442
- # Detect new rectangle
443
- if canvas_result.json_data is not None:
444
- objs = canvas_result.json_data.get("objects", []) or []
445
- if len(objs) > expected_count:
446
- new_rect_display = objs[-1]
447
- new_rect_orig = display_to_orig(new_rect_display, scale)
448
- new_rect_orig["stroke"] = field_color
449
-
450
- # Overwrite previous rect for this field (so old one disappears)
451
- st.session_state.field_rects_orig[selected_name][storage_field_name] = new_rect_orig
452
- st.session_state.rect_version[selected_name] += 1
453
-
454
- # Auto OCR
455
- x1 = max(0, int(new_rect_orig["left"]))
456
- y1 = max(0, int(new_rect_orig["top"]))
457
- x2 = min(pil_image.width, int(new_rect_orig["left"] + new_rect_orig["width"]))
458
- y2 = min(pil_image.height, int(new_rect_orig["top"] + new_rect_orig["height"]))
459
-
460
- if x2 > x1 and y2 > y1:
461
- crop = pil_image.crop((x1, y1, x2, y2))
462
- try:
463
- text = pytesseract.image_to_string(crop, config="--psm 6").strip()
464
- if text:
465
- st.session_state.field_values[selected_name][storage_field_name] = text
466
- value_state_key = f"value_{selected_name}_{storage_field_name}"
467
- st.session_state[value_state_key] = text
468
- st.toast(f"✅ OCR: {text[:50]}{'...' if len(text) > 50 else ''}")
469
- else:
470
- st.toast("✅ Rectangle saved (no text detected)")
471
- except Exception:
472
- st.toast("✅ Rectangle saved")
473
  else:
474
- st.toast("✅ Rectangle saved")
475
-
476
- # Rerun once so canvas remounts with cleaned rectangles (no old ones)
477
- st.experimental_rerun()
478
-
479
- # ---------------------------------
480
- # RHS BOTTOM: OCR value, all values, export
481
- # ---------------------------------
482
- with col2:
483
- st.markdown("#### ✏️ OCR & Value")
484
-
485
- current_rect_orig = st.session_state.field_rects_orig[selected_name].get(storage_field_name)
486
- value_state_key = f"value_{selected_name}_{storage_field_name}"
487
- if value_state_key not in st.session_state:
488
- st.session_state[value_state_key] = st.session_state.field_values[selected_name].get(
489
- storage_field_name, ""
490
- )
491
-
492
- col_btn1, col_btn2, col_btn3 = st.columns(3)
493
-
494
- with col_btn1:
495
- if st.button("💾 Save"):
496
- st.session_state.field_values[selected_name][storage_field_name] = st.session_state[value_state_key]
497
- st.success("Saved!")
498
-
499
- with col_btn2:
500
- if current_rect_orig and st.button("🔄 Re-OCR"):
501
- x1 = max(0, int(current_rect_orig["left"]))
502
- y1 = max(0, int(current_rect_orig["top"]))
503
- x2 = min(pil_image.width, int(current_rect_orig["left"] + current_rect_orig["width"]))
504
- y2 = min(pil_image.height, int(current_rect_orig["top"] + current_rect_orig["height"]))
505
- if x2 > x1 and y2 > y1:
506
- crop = pil_image.crop((x1, y1, x2, y2))
507
- try:
508
- text = pytesseract.image_to_string(crop, config="--psm 6").strip()
509
- if text:
510
- st.session_state.field_values[selected_name][storage_field_name] = text
511
- st.session_state[value_state_key] = text
512
- st.success(f"OCR: {text}")
513
- else:
514
- st.warning("Empty result")
515
- except Exception as e:
516
- st.error(f"OCR failed: {e}")
517
-
518
- with col_btn3:
519
- def delete_rect():
520
- st.session_state.pending_delete = (selected_name, storage_field_name)
521
- if current_rect_orig:
522
- st.button("🗑️ Delete", on_click=delete_rect)
523
-
524
- st.text_area(
525
- "Value (auto-filled by OCR)",
526
- key=value_state_key,
527
- height=80,
528
- label_visibility="collapsed",
529
- placeholder="Value (auto-filled by OCR)",
530
- )
531
-
532
- # All values
533
- with st.expander("📋 All Values"):
534
- for f in SINGLE_FIELDS:
535
- v = st.session_state.field_values[selected_name].get(f, "")
536
- if v.strip():
537
- st.write(f"**{f}:** {v}")
538
- num_items = st.session_state.num_line_items[selected_name]
539
- for i in range(1, num_items + 1):
540
- vals = [
541
- (lif, st.session_state.field_values[selected_name].get(f"Line {i}: {lif}", ""))
542
- for lif in LINE_ITEM_FIELDS
543
- ]
544
- vals = [(lif, v) for lif, v in vals if v.strip()]
545
- if vals:
546
- st.write(f"**Line {i}:**")
547
- for lif, v in vals:
548
- st.write(f" {lif}: {v}")
549
-
550
- # Export
551
- st.markdown("#### 📤 JSONL Export")
552
-
553
- records_all = [
554
- build_gt_record_for_file(img["name"])
555
- for img in images
556
- if has_any_label(img["name"])
557
- ]
558
-
559
- if records_all:
560
- all_jsonl_str = "\n".join(json.dumps(rec, ensure_ascii=False) for rec in records_all)
561
  st.download_button(
562
- "⬇️ Export ALL labeled (JSONL)",
563
- data=all_jsonl_str.encode("utf-8"),
564
- file_name="remittances_ground_truth.jsonl",
565
- mime="application/json",
 
566
  )
567
- else:
568
- st.caption("No labeled remittances yet.")
569
-
570
- current_record = build_gt_record_for_file(selected_name)
571
- with st.expander("Preview CURRENT JSON"):
572
- st.json(current_record)
573
 
574
- current_jsonl_str = json.dumps(current_record, ensure_ascii=False) + "\n"
575
- st.download_button(
576
- "⬇️ Export CURRENT (JSONL)",
577
- data=current_jsonl_str.encode("utf-8"),
578
- file_name=f"{os.path.splitext(selected_name)[0]}_remittance.jsonl",
579
- mime="application/json",
 
 
 
 
 
580
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ from pathlib import Path
3
+
4
+ # -----------------------------
5
+ # Environment hardening (HF Spaces, /.cache issue)
6
+ # -----------------------------
7
+ _home = os.environ.get("HOME", "")
8
+ if _home in ("", "/", None):
9
+ repo_dir = os.getcwd()
10
+ safe_home = repo_dir if os.access(repo_dir, os.W_OK) else "/tmp"
11
+ os.environ["HOME"] = safe_home
12
+ print(f"[startup] HOME not set or unwritable — setting HOME={safe_home}")
13
+
14
+ streamlit_dir = Path(os.environ["HOME"]) / ".streamlit"
15
+ try:
16
+ streamlit_dir.mkdir(parents=True, exist_ok=True)
17
+ print(f"[startup] ensured {streamlit_dir}")
18
+ except Exception as e:
19
+ print(f"[startup] WARNING: could not create {streamlit_dir}: {e}")
20
 
21
  import streamlit as st
22
+ import json
23
+ import io
24
+ from PIL import Image
25
+ import time
26
+ import pandas as pd
27
  from streamlit_drawable_canvas import st_canvas
28
  import pytesseract
29
+ import numpy as np
30
+
31
+ # Set Tesseract path - auto-detect based on OS
32
+ if os.name == 'nt': # Windows
33
+ pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
34
+ else: # Linux/Mac (HF Spaces uses Linux)
35
+ # On HF Spaces with packages.txt, tesseract is in system PATH
36
+ # No need to set path explicitly
37
+ pass
38
+
39
+ # Page configuration
40
+ st.set_page_config(page_title="Remittance Data Viewer", layout="wide")
41
+
42
+ # Custom CSS to reduce gaps between form fields and style buttons
43
+ st.markdown("""
44
+ <style>
45
+ /* Reduce spacing between form fields */
46
+ .stTextInput > div > div > input,
47
+ .stTextArea > div > div > textarea,
48
+ .stSelectbox > div > div > div {
49
+ margin-bottom: 0px !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  }
51
+
52
+ div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stTextInput"]),
53
+ div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stTextArea"]),
54
+ div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stSelectbox"]) {
55
+ margin-bottom: 4px !important;
56
+ }
57
+
58
+ /* Reduce gap between selectbox and following elements */
59
+ .stSelectbox {
60
+ margin-bottom: 4px !important;
61
+ }
62
+
63
+ /* Style for small buttons */
64
+ .stButton > button {
65
+ padding: 0.25rem 0.5rem !important;
66
+ font-size: 1.2rem !important;
67
+ line-height: 1 !important;
68
+ min-height: 2rem !important;
69
+ height: 2rem !important;
70
+ }
71
+
72
+ /* Reduce padding in form containers */
73
+ [data-testid="stVerticalBlock"] > [data-testid="stVerticalBlock"] {
74
+ gap: 0.25rem !important;
75
+ }
76
+
77
+ /* REDUCE GAP BETWEEN COLUMNS */
78
+ [data-testid="column"] {
79
+ padding-left: 0.5rem !important;
80
+ padding-right: 0.5rem !important;
81
+ }
82
+
83
+ [data-testid="stHorizontalBlock"] {
84
+ gap: 0.5rem !important;
85
+ }
86
+ </style>
87
+ """, unsafe_allow_html=True)
88
+
89
+ def load_jsonl(file):
90
+ """Load JSONL file and return list of records"""
91
+ data = []
92
+ content = file.getvalue().decode('utf-8')
93
+ for line in content.strip().split('\n'):
94
+ if line.strip():
95
+ data.append(json.loads(line))
96
+ return data
97
+
98
+ def save_to_jsonl(data):
99
+ """Convert data list to JSONL format"""
100
+ jsonl_content = '\n'.join([json.dumps(record) for record in data])
101
+ return jsonl_content
102
+
103
+ def perform_ocr(image, bbox):
104
+ """Perform OCR on the selected region of the image"""
105
+ try:
106
+ # bbox is [x1, y1, x2, y2]
107
+ x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
108
+
109
+ # Ensure coordinates are within image bounds
110
+ x1, y1 = max(0, x1), max(0, y1)
111
+ x2, y2 = min(image.width, x2), min(image.height, y2)
112
+
113
+ # Crop the image
114
+ cropped = image.crop((x1, y1, x2, y2))
115
+
116
+ # Perform OCR
117
+ text = pytesseract.image_to_string(cropped, config='--psm 6').strip()
118
+ return text
119
+ except Exception as e:
120
+ return f"OCR Error: {str(e)}"
121
+
122
+ def scale_image_to_fixed_size(image, target_width=700, target_height=900):
123
+ """Scale and pad image to exact fixed size while maintaining aspect ratio and quality"""
124
+ # Convert image to RGB if it's not already (handles RGBA, L, etc.)
125
+ if image.mode not in ('RGB', 'RGBA'):
126
+ image = image.convert('RGB')
127
+ elif image.mode == 'RGBA':
128
+ # Create white background for transparent images
129
+ background = Image.new('RGB', image.size, (255, 255, 255))
130
+ background.paste(image, mask=image.split()[3]) # Use alpha channel as mask
131
+ image = background
132
+
133
+ # Calculate scaling ratio to fit within target dimensions
134
+ width_ratio = target_width / image.width
135
+ height_ratio = target_height / image.height
136
+
137
+ # Use the smaller ratio to ensure image fits within both constraints
138
+ ratio = min(width_ratio, height_ratio)
139
+
140
+ # Calculate new dimensions
141
+ new_width = int(image.width * ratio)
142
+ new_height = int(image.height * ratio)
143
+
144
+ # Resize image with high-quality LANCZOS resampling
145
+ # Only resize if needed (don't upscale small images too much)
146
+ if ratio < 1.0 or (ratio > 1.0 and ratio < 1.5):
147
+ resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
148
  else:
149
+ # For significant upscaling, use BICUBIC which can be sharper
150
+ resized_image = image.resize((new_width, new_height), Image.Resampling.BICUBIC)
151
+
152
+ # Create a new image with target size and white background
153
+ final_image = Image.new('RGB', (target_width, target_height), (255, 255, 255))
154
+
155
+ # Calculate position to paste resized image (center it)
156
+ paste_x = (target_width - new_width) // 2
157
+ paste_y = (target_height - new_height) // 2
158
+
159
+ # Paste resized image onto white background
160
+ final_image.paste(resized_image, (paste_x, paste_y))
161
+
162
+ return final_image, ratio, paste_x, paste_y
163
+
164
+ # Initialize session state
165
+ if 'data' not in st.session_state:
166
+ st.session_state.data = None
167
+ if 'current_index' not in st.session_state:
168
+ st.session_state.current_index = 0
169
+ if 'edited_data' not in st.session_state:
170
+ st.session_state.edited_data = None
171
+ if 'page' not in st.session_state:
172
+ st.session_state.page = 'upload'
173
+ if 'images' not in st.session_state:
174
+ st.session_state.images = {}
175
+ if 'modified_indices' not in st.session_state:
176
+ st.session_state.modified_indices = set()
177
+ if 'ocr_active_section' not in st.session_state:
178
+ st.session_state.ocr_active_section = None
179
+ if 'ocr_active_field' not in st.session_state:
180
+ st.session_state.ocr_active_field = None
181
+ if 'ocr_line_item_row' not in st.session_state:
182
+ st.session_state.ocr_line_item_row = None
183
+ if 'canvas_key' not in st.session_state:
184
+ st.session_state.canvas_key = 0
185
+ if 'line_items_temp' not in st.session_state:
186
+ st.session_state.line_items_temp = []
187
+ if 'button_clicked' not in st.session_state:
188
+ st.session_state.button_clicked = False
189
+ if 'save_message' not in st.session_state:
190
+ st.session_state.save_message = None
191
+ if 'save_message_time' not in st.session_state:
192
+ st.session_state.save_message_time = None
193
+ if 'just_saved' not in st.session_state:
194
+ st.session_state.just_saved = False
195
+
196
+ # Auto-save function
197
+ def auto_save(index):
198
+ """Automatically save changes to session state and mark as modified"""
199
+ if st.session_state.edited_data:
200
+ st.session_state.data = st.session_state.edited_data.copy()
201
+ st.session_state.modified_indices.add(index)
202
+
203
+ # Save button callback
204
+ def save_changes_callback():
205
+ """Callback function for save button"""
206
+ auto_save(st.session_state.current_index)
207
+ st.session_state.save_message = "✅ Changes saved successfully!"
208
+ st.session_state.save_message_time = time.time()
209
+
210
+ # PAGE 1: Upload Page
211
+ if st.session_state.page == 'upload':
212
+ st.title("📤 Remittance Data Viewer with OCR")
213
+ st.markdown("### Upload your files to begin")
214
+
215
+ # Step 1: Upload JSONL
216
+
217
+ st.markdown("**Step 1: Upload JSONL File**")
218
+ uploaded_file = st.file_uploader("Choose a JSONL file", type=['jsonl', 'json'])
219
+
220
+ if uploaded_file is not None:
221
+ try:
222
+ data = load_jsonl(uploaded_file)
223
+ st.session_state.data = data
224
+ st.session_state.edited_data = data.copy()
225
+ st.success(f"✅ Successfully loaded {len(data)} records!")
226
+ except Exception as e:
227
+ st.error(f"Error loading file: {str(e)}")
228
+
229
+ # Step 2: Upload Images
230
+
231
+ st.markdown("**Step 2: Upload Images Folder**")
232
+
233
+
234
+ uploaded_images = st.file_uploader(
235
+ "Choose image files",
236
+ type=['png', 'jpg', 'jpeg', 'tiff', 'tif', 'bmp'],
237
+ accept_multiple_files=True,
238
+ help="Select all images from your folder at once"
239
  )
240
+
241
+ if uploaded_images:
242
+ # Load images into session state
243
+ images_dict = {}
244
+ for img_file in uploaded_images:
245
+ try:
246
+ image = Image.open(img_file)
247
+ images_dict[img_file.name] = image
248
+ except Exception as e:
249
+ st.warning(f"Could not load image {img_file.name}: {str(e)}")
250
+
251
+ st.session_state.images = images_dict
252
+ # Show summary of loaded images and matches with ground truth
253
+ if st.session_state.data is not None:
254
+ # gather ground truth file names
255
+ gt_file_names = [rec.get('file_name', '') for rec in st.session_state.data]
256
+ matched_images = set()
257
+ unmatched_gt_files = []
258
+
259
+ # Find matched images - CASE SENSITIVE EXACT MATCH ONLY
260
+ for fname in gt_file_names:
261
+ if not fname:
262
+ continue
263
+ # Check for exact match in uploaded images
264
+ if fname in images_dict:
265
+ matched_images.add(fname)
266
+
267
+ # Find unmatched ground truth file names
268
+ for fname in gt_file_names:
269
+ if fname and fname not in matched_images:
270
+ unmatched_gt_files.append(fname)
271
+
272
+ st.success(f"✅ Successfully loaded {len(images_dict)} images!")
273
+ st.info(f"🔎 Exact matches: {len(matched_images)}/{len([f for f in gt_file_names if f])}")
274
+
275
+ # Show unmatched files
276
+ if unmatched_gt_files:
277
+ st.warning(f"⚠️ {len(unmatched_gt_files)} file(s) from JSONL not matched to images:")
278
+ with st.expander(f"Show {len(unmatched_gt_files)} unmatched file names"):
279
+ for fname in unmatched_gt_files:
280
+ st.text(f" • {fname}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  else:
282
+ st.success("✅ All JSONL file names matched to images!")
283
+ else:
284
+ st.success(f"✅ Successfully loaded {len(images_dict)} images!")
285
+ st.info("ℹ️ Upload a JSONL file to see how many images match the ground truth 'file_name' field.")
286
+
287
+ # Continue Button
288
+
289
+ if st.session_state.data is not None:
290
+ col1, col2, col3 = st.columns([1, 1, 1])
291
+ with col2:
292
+ if st.button("Continue to Viewer →", type="primary", use_container_width=True):
293
+ st.session_state.page = 'viewer'
294
+ st.session_state.modified_indices = set()
295
+ st.rerun()
296
+
297
+ # PAGE 2: Viewer Page
298
+ elif st.session_state.page == 'viewer':
299
+ # Clear old save messages (after 3 seconds)
300
+ if st.session_state.save_message_time is not None:
301
+ if time.time() - st.session_state.save_message_time > 3:
302
+ st.session_state.save_message = None
303
+ st.session_state.save_message_time = None
304
+
305
+ # Header with back button and download options
306
+ col1, col2, col3, col4 = st.columns([1, 2, 2, 2])
307
+
308
+ with col1:
309
+ if st.button("← Back to Upload"):
310
+ st.session_state.page = 'upload'
311
+ st.session_state.ocr_active_section = None
312
+ st.session_state.ocr_active_field = None
313
+ st.session_state.save_message = None
314
+ st.session_state.save_message_time = None
315
+ st.rerun()
316
+
317
+ # Download modified records and unmodified records separately
318
+ with col2:
319
+ if st.session_state.modified_indices:
320
+ modified_data = [st.session_state.edited_data[i] for i in sorted(st.session_state.modified_indices)]
321
+ jsonl_modified = save_to_jsonl(modified_data)
322
+ st.download_button(
323
+ label=f"⬇️ Download Modified ({len(modified_data)})",
324
+ data=jsonl_modified,
325
+ file_name="modified_remittance_data.jsonl",
326
+ mime="application/jsonl",
327
+ type="primary",
328
+ use_container_width=True
329
+ )
330
+ else:
331
+ st.button(
332
+ "⬇️ No Modified Records",
333
+ disabled=True,
334
+ use_container_width=True
335
+ )
336
+
337
+ # Download unmodified records (original data excluding modified)
338
+ with col3:
339
+ if st.session_state.modified_indices:
340
+ # Get original unmodified data
341
+ unmodified_data = [st.session_state.data[i] for i in range(len(st.session_state.data))
342
+ if i not in st.session_state.modified_indices]
343
+ jsonl_unmodified = save_to_jsonl(unmodified_data)
344
+ st.download_button(
345
+ label=f"⬇️ Download Unmodified ({len(unmodified_data)})",
346
+ data=jsonl_unmodified,
347
+ file_name="unmodified_remittance_data.jsonl",
348
+ mime="application/jsonl",
349
+ use_container_width=True
350
+ )
351
+ else:
352
+ st.button(
353
+ "⬇️ No Unmodified Records",
354
+ disabled=True,
355
+ use_container_width=True
356
+ )
357
+
358
+ # Download all edited data
359
+ with col4:
360
+ jsonl_all = save_to_jsonl(st.session_state.edited_data)
 
 
 
 
 
 
 
 
361
  st.download_button(
362
+ label=f"⬇️ Download All ({len(st.session_state.edited_data)})",
363
+ data=jsonl_all,
364
+ file_name="all_remittance_data.jsonl",
365
+ mime="application/jsonl",
366
+ use_container_width=True
367
  )
 
 
 
 
 
 
368
 
369
+
370
+
371
+
372
+ # File selector dropdown
373
+ file_names = [record.get('file_name', f'Record {i}') for i, record in enumerate(st.session_state.data)]
374
+
375
+ selected_file = st.selectbox(
376
+ "Select a file to view:",
377
+ options=range(len(file_names)),
378
+ format_func=lambda x: f"{'✏️ ' if x in st.session_state.modified_indices else ''}{file_names[x]}",
379
+ index=st.session_state.current_index
380
  )
381
+
382
+ st.session_state.current_index = selected_file
383
+ current_record = st.session_state.edited_data[selected_file]
384
+
385
+
386
+
387
+ # Main layout: LHS (Image) and RHS (Details) - REDUCED GAP
388
+ left_col, right_col = st.columns([1.3, 1], gap="small")
389
+
390
+ # LEFT SIDE: Image Display with OCR Canvas
391
+ with left_col:
392
+ st.markdown("### 🖼️ Document Image")
393
+
394
+ file_name = current_record.get('file_name', '')
395
+
396
+ if file_name:
397
+ st.caption(f"**File:** {file_name}")
398
+
399
+ # Try to find matching image - CASE SENSITIVE EXACT MATCH ONLY
400
+ current_image = None
401
+ if file_name in st.session_state.images:
402
+ current_image = st.session_state.images[file_name]
403
+ else:
404
+ st.error(f"❌ Image '{file_name}' not found in uploaded images")
405
+ st.info("💡 Available images:")
406
+ with st.expander("Show available images"):
407
+ for img_name in list(st.session_state.images.keys())[:20]:
408
+ st.text(f" • {img_name}")
409
+ if len(st.session_state.images) > 20:
410
+ st.text(f" ... and {len(st.session_state.images) - 20} more")
411
+
412
+ if current_image:
413
+ # Scale image to fixed size
414
+ scaled_image, scale_ratio, paste_x, paste_y = scale_image_to_fixed_size(current_image)
415
+
416
+ # Always show canvas for drawing rectangles
417
+ canvas_result = st_canvas(
418
+ fill_color="rgba(255, 165, 0, 0.3)",
419
+ stroke_width=2,
420
+ stroke_color="#FF0000",
421
+ background_image=scaled_image,
422
+ update_streamlit=True,
423
+ height=scaled_image.height,
424
+ width=scaled_image.width,
425
+ drawing_mode="rect",
426
+ key=f"canvas_{selected_file}_{st.session_state.canvas_key}",
427
+ )
428
+
429
+ # Process OCR when rectangle is drawn and field is selected
430
+ if canvas_result.json_data is not None and st.session_state.ocr_active_field:
431
+ objects = canvas_result.json_data["objects"]
432
+ if len(objects) > 0:
433
+ # Get the last drawn rectangle
434
+ rect = objects[-1]
435
+
436
+ # Adjust coordinates for padding and scale back to original image coordinates
437
+ bbox = [
438
+ (rect["left"] - paste_x) / scale_ratio,
439
+ (rect["top"] - paste_y) / scale_ratio,
440
+ (rect["left"] + rect["width"] - paste_x) / scale_ratio,
441
+ (rect["top"] + rect["height"] - paste_y) / scale_ratio
442
+ ]
443
+
444
+ # Perform OCR on original image
445
+ with st.spinner("Performing OCR..."):
446
+ ocr_text = perform_ocr(current_image, bbox)
447
+
448
+ if ocr_text and not ocr_text.startswith("OCR Error"):
449
+ st.success(f"✅ OCR Result: {ocr_text}")
450
+
451
+ # Update the field value
452
+ gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
453
+
454
+ if st.session_state.ocr_active_section == 'Line_items':
455
+ # Handle line items
456
+ line_items = gt_parse.get('Line_items', [])
457
+ row_idx = st.session_state.ocr_line_item_row
458
+ if row_idx is not None and row_idx < len(line_items):
459
+ line_items[row_idx][st.session_state.ocr_active_field] = ocr_text
460
+ gt_parse['Line_items'] = line_items
461
+ else:
462
+ # Handle other sections
463
+ section = st.session_state.ocr_active_section
464
+ field = st.session_state.ocr_active_field
465
+ if section not in gt_parse:
466
+ gt_parse[section] = {}
467
+ gt_parse[section][field] = ocr_text
468
+
469
+ st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
470
+
471
+ # Clear canvas and reset
472
+ st.session_state.canvas_key += 1
473
+ time.sleep(0.3)
474
+ st.rerun()
475
+ else:
476
+ st.error(ocr_text)
477
+ else:
478
+ st.warning("No file name specified in record")
479
+
480
+ # RIGHT SIDE: Editable Details
481
+ with right_col:
482
+ st.markdown("### 📝 Document Details")
483
+
484
+ gt_parse = current_record.get('gt_parse', {})
485
+
486
+ # Create tabs for each section
487
+ tab1, tab2, tab3, tab4 = st.tabs([
488
+ "📄 Remittance Details",
489
+ "👥 Party Details",
490
+ "🏦 Bank Details",
491
+ "📋 Line Items"
492
+ ])
493
+
494
+ # TAB 1: Remittance Details
495
+ with tab1:
496
+
497
+
498
+ # OCR Field Selector
499
+ remittance_fields = [
500
+ 'Select fields',
501
+ 'Remittance_adv_no',
502
+ 'Remittance_adv_date',
503
+ 'Payment_method',
504
+ 'FCY',
505
+ 'Total_payment_amt_FCY',
506
+ 'Payment_date',
507
+ 'Payment_ref_no'
508
+ ]
509
+
510
+ selected_rem_field = st.selectbox(
511
+ "🔍 Select field to populate via OCR:",
512
+ options=remittance_fields,
513
+ key=f"rem_ocr_select_{selected_file}"
514
+ )
515
+
516
+ if selected_rem_field != 'Select fields':
517
+ st.session_state.ocr_active_section = 'Remittance_details'
518
+ st.session_state.ocr_active_field = selected_rem_field
519
+ st.session_state.ocr_line_item_row = None
520
+ else:
521
+ if st.session_state.ocr_active_section == 'Remittance_details':
522
+ st.session_state.ocr_active_section = None
523
+ st.session_state.ocr_active_field = None
524
+
525
+ remittance = gt_parse.get('Remittance_details', {})
526
+
527
+ remittance['Remittance_adv_no'] = st.text_input(
528
+ "Remittance Advice No",
529
+ value=remittance.get('Remittance_adv_no', ''),
530
+ key=f"rem_adv_no_{selected_file}"
531
+ )
532
+ remittance['Remittance_adv_date'] = st.text_input(
533
+ "Remittance Advice Date",
534
+ value=remittance.get('Remittance_adv_date', ''),
535
+ key=f"rem_adv_date_{selected_file}"
536
+ )
537
+ remittance['Payment_method'] = st.text_input(
538
+ "Payment Method",
539
+ value=remittance.get('Payment_method', ''),
540
+ key=f"payment_method_{selected_file}"
541
+ )
542
+ remittance['FCY'] = st.text_input(
543
+ "FCY (Foreign Currency)",
544
+ value=remittance.get('FCY', ''),
545
+ key=f"fcy_{selected_file}"
546
+ )
547
+ remittance['Total_payment_amt_FCY'] = st.text_input(
548
+ "Total Payment Amount (FCY)",
549
+ value=remittance.get('Total_payment_amt_FCY', ''),
550
+ key=f"total_payment_{selected_file}"
551
+ )
552
+ remittance['Payment_date'] = st.text_input(
553
+ "Payment Date",
554
+ value=remittance.get('Payment_date', ''),
555
+ key=f"payment_date_{selected_file}"
556
+ )
557
+ remittance['Payment_ref_no'] = st.text_input(
558
+ "Payment Reference No",
559
+ value=remittance.get('Payment_ref_no', ''),
560
+ key=f"payment_ref_{selected_file}"
561
+ )
562
+
563
+ gt_parse['Remittance_details'] = remittance
564
+
565
+ # TAB 2: Customer/Supplier Details
566
+ with tab2:
567
+
568
+
569
+ # OCR Field Selector
570
+ customer_fields = [
571
+ 'Select fields',
572
+ 'Customer_name',
573
+ 'Customer_address',
574
+ 'Customer_contact_info',
575
+ 'Supplier_name',
576
+ 'Supplier_address',
577
+ 'Supplier_contact_info'
578
+ ]
579
+
580
+ selected_cust_field = st.selectbox(
581
+ "🔍 Select field to populate via OCR:",
582
+ options=customer_fields,
583
+ key=f"cust_ocr_select_{selected_file}"
584
+ )
585
+
586
+ if selected_cust_field != 'Select fields':
587
+ st.session_state.ocr_active_section = 'Customer_supplier_details'
588
+ st.session_state.ocr_active_field = selected_cust_field
589
+ st.session_state.ocr_line_item_row = None
590
+ else:
591
+ if st.session_state.ocr_active_section == 'Customer_supplier_details':
592
+ st.session_state.ocr_active_section = None
593
+ st.session_state.ocr_active_field = None
594
+
595
+ st.markdown("**Customer Details**")
596
+ customer_supplier = gt_parse.get('Customer_supplier_details', {})
597
+
598
+ customer_supplier['Customer_name'] = st.text_input(
599
+ "Customer Name",
600
+ value=customer_supplier.get('Customer_name', ''),
601
+ key=f"cust_name_{selected_file}"
602
+ )
603
+ customer_supplier['Customer_address'] = st.text_area(
604
+ "Customer Address",
605
+ value=customer_supplier.get('Customer_address', ''),
606
+ key=f"cust_addr_{selected_file}",
607
+ height=60
608
+ )
609
+ customer_supplier['Customer_contact_info'] = st.text_input(
610
+ "Customer Contact Info",
611
+ value=customer_supplier.get('Customer_contact_info', ''),
612
+ key=f"cust_contact_{selected_file}"
613
+ )
614
+
615
+ st.markdown("**Supplier Details**")
616
+ customer_supplier['Supplier_name'] = st.text_input(
617
+ "Supplier Name",
618
+ value=customer_supplier.get('Supplier_name', ''),
619
+ key=f"supp_name_{selected_file}"
620
+ )
621
+ customer_supplier['Supplier_address'] = st.text_area(
622
+ "Supplier Address",
623
+ value=customer_supplier.get('Supplier_address', ''),
624
+ key=f"supp_addr_{selected_file}",
625
+ height=60
626
+ )
627
+ customer_supplier['Supplier_contact_info'] = st.text_input(
628
+ "Supplier Contact Info",
629
+ value=customer_supplier.get('Supplier_contact_info', ''),
630
+ key=f"supp_contact_{selected_file}"
631
+ )
632
+
633
+ gt_parse['Customer_supplier_details'] = customer_supplier
634
+
635
+ # TAB 3: Bank Details
636
+ with tab3:
637
+
638
+
639
+ # OCR Field Selector
640
+ bank_fields = [
641
+ 'Select fields',
642
+ 'Bank_name',
643
+ 'Bank_acc_no',
644
+ 'Bank_routing_no',
645
+ 'Swift_code'
646
+ ]
647
+
648
+ selected_bank_field = st.selectbox(
649
+ "🔍 Select field to populate via OCR:",
650
+ options=bank_fields,
651
+ key=f"bank_ocr_select_{selected_file}"
652
+ )
653
+
654
+ if selected_bank_field != 'Select fields':
655
+ st.session_state.ocr_active_section = 'Bank_details'
656
+ st.session_state.ocr_active_field = selected_bank_field
657
+ st.session_state.ocr_line_item_row = None
658
+ else:
659
+ if st.session_state.ocr_active_section == 'Bank_details':
660
+ st.session_state.ocr_active_section = None
661
+ st.session_state.ocr_active_field = None
662
+
663
+ bank = gt_parse.get('Bank_details', {})
664
+
665
+ bank['Bank_name'] = st.text_input(
666
+ "Bank Name",
667
+ value=bank.get('Bank_name', ''),
668
+ key=f"bank_name_{selected_file}"
669
+ )
670
+ bank['Bank_acc_no'] = st.text_input(
671
+ "Bank Account No",
672
+ value=bank.get('Bank_acc_no', ''),
673
+ key=f"bank_acc_{selected_file}"
674
+ )
675
+ bank['Bank_routing_no'] = st.text_input(
676
+ "Bank Routing No",
677
+ value=bank.get('Bank_routing_no', ''),
678
+ key=f"bank_routing_{selected_file}"
679
+ )
680
+ bank['Swift_code'] = st.text_input(
681
+ "SWIFT Code",
682
+ value=bank.get('Swift_code', ''),
683
+ key=f"swift_{selected_file}"
684
+ )
685
+
686
+ gt_parse['Bank_details'] = bank
687
+
688
+ # TAB 4: Line Items
689
+ with tab4:
690
+
691
+
692
+ # OCR Controls for Line Items - Fixed layout
693
+ line_items = gt_parse.get('Line_items', [])
694
+
695
+ # Adjusted column widths - all controls in single compact line
696
+ col_field, col_row, col_add, col_remove = st.columns([1.5, 0.7, 0.30, 0.30])
697
+
698
+ line_item_fields = [
699
+ 'Select fields',
700
+ 'Po_number',
701
+ 'Invoice_no',
702
+ 'Other_doc_ref_no',
703
+ 'Invoice_date',
704
+ 'Invoice_amount_FCY',
705
+ 'Amount_paid_for_each_invoice',
706
+ 'Outstanding_balance_FCY',
707
+ 'Discounts_taken_FCY',
708
+ 'Adjustments(without_holding_tax)_FCY',
709
+ 'Descriptions'
710
+ ]
711
+
712
+ with col_field:
713
+ selected_line_field = st.selectbox(
714
+ "🔍 Field:",
715
+ options=line_item_fields,
716
+ key=f"line_ocr_field_{selected_file}"
717
+ )
718
+
719
+ with col_row:
720
+ if len(line_items) > 0:
721
+ selected_row = st.selectbox(
722
+ "Row:",
723
+ options=list(range(len(line_items))),
724
+ format_func=lambda x: f"Row {x + 1}",
725
+ key=f"line_ocr_row_{selected_file}"
726
+ )
727
+ else:
728
+ st.selectbox("Row:", options=[], disabled=True, key=f"line_ocr_row_empty_{selected_file}")
729
+ selected_row = None
730
+
731
+ with col_add:
732
+ # Use button with on_click callback to prevent loop
733
+ if st.button("➕", key=f"add_row_{selected_file}", help="Add new row"):
734
+ if not st.session_state.button_clicked:
735
+ st.session_state.button_clicked = True
736
+ new_row = {
737
+ "Po_number": "",
738
+ "Invoice_no": "",
739
+ "Other_doc_ref_no": "",
740
+ "Invoice_date": "",
741
+ "Invoice_amount_FCY": "",
742
+ "Amount_paid_for_each_invoice": "",
743
+ "Outstanding_balance_FCY": "",
744
+ "Discounts_taken_FCY": "",
745
+ "Adjustments(without_holding_tax)_FCY": "",
746
+ "Descriptions": ""
747
+ }
748
+ line_items.append(new_row)
749
+ gt_parse['Line_items'] = line_items
750
+ st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
751
+ st.session_state.modified_indices.add(selected_file)
752
+ st.rerun()
753
+
754
+ with col_remove:
755
+ if st.button("➖", key=f"remove_row_{selected_file}", help="Remove selected row", disabled=(len(line_items) == 0)):
756
+ if not st.session_state.button_clicked and len(line_items) > 0 and selected_row is not None:
757
+ st.session_state.button_clicked = True
758
+ line_items.pop(selected_row)
759
+ gt_parse['Line_items'] = line_items
760
+ st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
761
+ st.session_state.modified_indices.add(selected_file)
762
+ st.rerun()
763
+
764
+ # Reset button clicked flag after processing
765
+ if st.session_state.button_clicked:
766
+ st.session_state.button_clicked = False
767
+
768
+ # Set OCR state for line items
769
+ if selected_line_field != 'Select fields' and selected_row is not None:
770
+ st.session_state.ocr_active_section = 'Line_items'
771
+ st.session_state.ocr_active_field = selected_line_field
772
+ st.session_state.ocr_line_item_row = selected_row
773
+ else:
774
+ if st.session_state.ocr_active_section == 'Line_items':
775
+ st.session_state.ocr_active_section = None
776
+ st.session_state.ocr_active_field = None
777
+ st.session_state.ocr_line_item_row = None
778
+
779
+
780
+
781
+ # Display line items table
782
+ if line_items:
783
+ df = pd.DataFrame(line_items)
784
+
785
+ # Convert amount fields to numeric
786
+ amount_fields = ['Invoice_amount_FCY', 'Amount_paid_for_each_invoice',
787
+ 'Outstanding_balance_FCY', 'Discounts_taken_FCY',
788
+ 'Adjustments(without_holding_tax)_FCY']
789
+
790
+ for field in amount_fields:
791
+ if field in df.columns:
792
+ df[field] = pd.to_numeric(df[field].replace('', None), errors='coerce')
793
+
794
+ column_config = {
795
+ "Po_number": st.column_config.TextColumn("PO Number", width="small"),
796
+ "Invoice_no": st.column_config.TextColumn("Invoice No", width="small"),
797
+ "Other_doc_ref_no": st.column_config.TextColumn("Other Doc Ref No", width="small"),
798
+ "Invoice_date": st.column_config.TextColumn("Invoice Date", width="small"),
799
+ "Invoice_amount_FCY": st.column_config.NumberColumn("Invoice Amt FCY", width="small", format="%.2f"),
800
+ "Amount_paid_for_each_invoice": st.column_config.NumberColumn("Amount Paid", width="small", format="%.2f"),
801
+ "Outstanding_balance_FCY": st.column_config.NumberColumn("Outstanding FCY", width="small", format="%.2f"),
802
+ "Discounts_taken_FCY": st.column_config.NumberColumn("Discounts FCY", width="small", format="%.2f"),
803
+ "Adjustments(without_holding_tax)_FCY": st.column_config.NumberColumn("Adjustments FCY", width="small", format="%.2f"),
804
+ "Descriptions": st.column_config.TextColumn("Descriptions", width="medium"),
805
+ }
806
+
807
+ edited_df = st.data_editor(
808
+ df,
809
+ column_config=column_config,
810
+ num_rows="fixed",
811
+ use_container_width=True,
812
+ key=f"line_items_table_{selected_file}",
813
+ hide_index=False
814
+ )
815
+
816
+ # Convert back to string
817
+ for field in amount_fields:
818
+ if field in edited_df.columns:
819
+ edited_df[field] = edited_df[field].apply(lambda x: str(x) if pd.notna(x) else '')
820
+
821
+ gt_parse['Line_items'] = edited_df.to_dict('records')
822
+ else:
823
+ st.info("No line items. Click ➕ to add a new row.")
824
+
825
+ # Update the edited data
826
+ st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
827
+
828
+ # Save button
829
+ st.markdown("---")
830
+ col1, col2 = st.columns([1, 1])
831
+ with col1:
832
+ if st.button("💾 Save Changes", type="primary", use_container_width=True, key=f"save_btn_{selected_file}"):
833
+ if not st.session_state.just_saved:
834
+ st.session_state.just_saved = True
835
+ auto_save(selected_file)
836
+ st.session_state.save_message = "✅ Changes saved successfully!"
837
+ st.session_state.save_message_time = time.time()
838
+ st.rerun()
839
+
840
+ # Reset the just_saved flag after rerun
841
+ if st.session_state.just_saved:
842
+ st.session_state.just_saved = False
843
+
844
+ # Display save message under the button (appears after rerun)
845
+ if st.session_state.save_message:
846
+ st.success(st.session_state.save_message)