Bhuvi13 commited on
Commit
369d6e9
Β·
verified Β·
1 Parent(s): fcbb889

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +829 -450
src/streamlit_app.py CHANGED
@@ -27,63 +27,111 @@ import pandas as pd
27
  from streamlit_drawable_canvas import st_canvas
28
  import pytesseract
29
  import numpy as np
 
 
30
 
31
  # Set Tesseract path - auto-detect based on OS
32
  if os.name == 'nt': # Windows
33
  pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
34
  else: # Linux/Mac (HF Spaces uses Linux)
35
- # On HF Spaces with packages.txt, tesseract is in system PATH
36
- # No need to set path explicitly
37
  pass
38
 
39
  # Page configuration
40
  st.set_page_config(page_title="Remittance Data Viewer", layout="wide")
41
 
42
- # Custom CSS to reduce gaps between form fields and style buttons
43
  st.markdown("""
44
  <style>
45
- /* Reduce spacing between form fields */
46
- .stTextInput > div > div > input,
47
- .stTextArea > div > div > textarea,
48
- .stSelectbox > div > div > div {
49
- margin-bottom: 0px !important;
50
- }
51
-
52
- div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stTextInput"]),
53
- div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stTextArea"]),
54
- div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stSelectbox"]) {
55
- margin-bottom: 4px !important;
56
- }
57
-
58
- /* Reduce gap between selectbox and following elements */
59
- .stSelectbox {
60
- margin-bottom: 4px !important;
61
- }
62
-
63
- /* Style for small buttons */
64
- .stButton > button {
65
- padding: 0.25rem 0.5rem !important;
66
- font-size: 1.2rem !important;
67
- line-height: 1 !important;
68
- min-height: 2rem !important;
69
- height: 2rem !important;
70
- }
71
-
72
- /* Reduce padding in form containers */
73
- [data-testid="stVerticalBlock"] > [data-testid="stVerticalBlock"] {
74
- gap: 0.25rem !important;
75
- }
76
-
77
- /* REDUCE GAP BETWEEN COLUMNS */
78
- [data-testid="column"] {
79
- padding-left: 0.5rem !important;
80
- padding-right: 0.5rem !important;
81
- }
82
-
83
- [data-testid="stHorizontalBlock"] {
84
- gap: 0.5rem !important;
85
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  </style>
 
87
  """, unsafe_allow_html=True)
88
 
89
  def load_jsonl(file):
@@ -100,66 +148,88 @@ def save_to_jsonl(data):
100
  jsonl_content = '\n'.join([json.dumps(record) for record in data])
101
  return jsonl_content
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  def perform_ocr(image, bbox):
104
  """Perform OCR on the selected region of the image"""
105
  try:
106
- # bbox is [x1, y1, x2, y2]
107
  x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
108
-
109
- # Ensure coordinates are within image bounds
110
  x1, y1 = max(0, x1), max(0, y1)
111
  x2, y2 = min(image.width, x2), min(image.height, y2)
112
-
113
- # Crop the image
114
  cropped = image.crop((x1, y1, x2, y2))
115
-
116
- # Perform OCR
117
  text = pytesseract.image_to_string(cropped, config='--psm 6').strip()
118
  return text
119
  except Exception as e:
120
  return f"OCR Error: {str(e)}"
121
 
122
- def scale_image_to_fixed_size(image, target_width=700, target_height=900):
123
- """Scale and pad image to exact fixed size while maintaining aspect ratio and quality"""
124
- # Convert image to RGB if it's not already (handles RGBA, L, etc.)
125
  if image.mode not in ('RGB', 'RGBA'):
126
  image = image.convert('RGB')
127
  elif image.mode == 'RGBA':
128
- # Create white background for transparent images
129
  background = Image.new('RGB', image.size, (255, 255, 255))
130
- background.paste(image, mask=image.split()[3]) # Use alpha channel as mask
131
  image = background
132
 
133
- # Calculate scaling ratio to fit within target dimensions
134
- width_ratio = target_width / image.width
135
- height_ratio = target_height / image.height
136
-
137
- # Use the smaller ratio to ensure image fits within both constraints
138
  ratio = min(width_ratio, height_ratio)
139
 
140
- # Calculate new dimensions
141
  new_width = int(image.width * ratio)
142
  new_height = int(image.height * ratio)
143
 
144
- # Resize image with high-quality LANCZOS resampling
145
- # Only resize if needed (don't upscale small images too much)
146
- if ratio < 1.0 or (ratio > 1.0 and ratio < 1.5):
147
- resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
148
- else:
149
- # For significant upscaling, use BICUBIC which can be sharper
150
- resized_image = image.resize((new_width, new_height), Image.Resampling.BICUBIC)
 
 
 
151
 
152
- # Create a new image with target size and white background
153
- final_image = Image.new('RGB', (target_width, target_height), (255, 255, 255))
 
 
154
 
155
- # Calculate position to paste resized image (center it)
156
- paste_x = (target_width - new_width) // 2
157
- paste_y = (target_height - new_height) // 2
 
158
 
159
- # Paste resized image onto white background
160
- final_image.paste(resized_image, (paste_x, paste_y))
 
 
161
 
162
- return final_image, ratio, paste_x, paste_y
 
 
 
163
 
164
  # Initialize session state
165
  if 'data' not in st.session_state:
@@ -172,6 +242,10 @@ if 'page' not in st.session_state:
172
  st.session_state.page = 'upload'
173
  if 'images' not in st.session_state:
174
  st.session_state.images = {}
 
 
 
 
175
  if 'modified_indices' not in st.session_state:
176
  st.session_state.modified_indices = set()
177
  if 'ocr_active_section' not in st.session_state:
@@ -192,28 +266,56 @@ if 'save_message_time' not in st.session_state:
192
  st.session_state.save_message_time = None
193
  if 'just_saved' not in st.session_state:
194
  st.session_state.just_saved = False
 
 
 
 
195
 
196
- # Auto-save function
197
  def auto_save(index):
198
  """Automatically save changes to session state and mark as modified"""
199
  if st.session_state.edited_data:
200
  st.session_state.data = st.session_state.edited_data.copy()
201
  st.session_state.modified_indices.add(index)
202
 
203
- # Save button callback
204
- def save_changes_callback():
205
- """Callback function for save button"""
206
- auto_save(st.session_state.current_index)
207
- st.session_state.save_message = "βœ… Changes saved successfully!"
208
- st.session_state.save_message_time = time.time()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
  # PAGE 1: Upload Page
211
  if st.session_state.page == 'upload':
212
  st.title("πŸ“€ Remittance Data Viewer with OCR")
213
  st.markdown("### Upload your files to begin")
214
 
215
- # Step 1: Upload JSONL
216
-
217
  st.markdown("**Step 1: Upload JSONL File**")
218
  uploaded_file = st.file_uploader("Choose a JSONL file", type=['jsonl', 'json'])
219
 
@@ -226,65 +328,100 @@ if st.session_state.page == 'upload':
226
  except Exception as e:
227
  st.error(f"Error loading file: {str(e)}")
228
 
229
- # Step 2: Upload Images
230
 
231
- st.markdown("**Step 2: Upload Images Folder**")
232
-
233
-
234
- uploaded_images = st.file_uploader(
235
- "Choose image files",
236
- type=['png', 'jpg', 'jpeg', 'tiff', 'tif', 'bmp'],
237
  accept_multiple_files=True,
238
- help="Select all images from your folder at once"
239
  )
240
 
241
- if uploaded_images:
242
- # Load images into session state
243
  images_dict = {}
244
- for img_file in uploaded_images:
 
 
245
  try:
246
- image = Image.open(img_file)
247
- images_dict[img_file.name] = image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  except Exception as e:
249
- st.warning(f"Could not load image {img_file.name}: {str(e)}")
250
 
251
  st.session_state.images = images_dict
252
- # Show summary of loaded images and matches with ground truth
 
 
 
 
 
 
253
  if st.session_state.data is not None:
254
- # gather ground truth file names
255
  gt_file_names = [rec.get('file_name', '') for rec in st.session_state.data]
256
  matched_images = set()
257
  unmatched_gt_files = []
258
 
259
- # Find matched images - CASE SENSITIVE EXACT MATCH ONLY
260
  for fname in gt_file_names:
261
  if not fname:
262
  continue
263
- # Check for exact match in uploaded images
 
264
  if fname in images_dict:
265
  matched_images.add(fname)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
 
267
- # Find unmatched ground truth file names
268
  for fname in gt_file_names:
269
  if fname and fname not in matched_images:
270
  unmatched_gt_files.append(fname)
271
 
272
- st.success(f"βœ… Successfully loaded {len(images_dict)} images!")
273
  st.info(f"πŸ”Ž Exact matches: {len(matched_images)}/{len([f for f in gt_file_names if f])}")
274
 
275
- # Show unmatched files
276
  if unmatched_gt_files:
277
- st.warning(f"⚠️ {len(unmatched_gt_files)} file(s) from JSONL not matched to images:")
278
  with st.expander(f"Show {len(unmatched_gt_files)} unmatched file names"):
279
  for fname in unmatched_gt_files:
280
  st.text(f" β€’ {fname}")
281
  else:
282
- st.success("βœ… All JSONL file names matched to images!")
283
  else:
284
- st.success(f"βœ… Successfully loaded {len(images_dict)} images!")
285
- st.info("ℹ️ Upload a JSONL file to see how many images match the ground truth 'file_name' field.")
286
-
287
- # Continue Button
288
 
289
  if st.session_state.data is not None:
290
  col1, col2, col3 = st.columns([1, 1, 1])
@@ -292,17 +429,18 @@ if st.session_state.page == 'upload':
292
  if st.button("Continue to Viewer β†’", type="primary", use_container_width=True):
293
  st.session_state.page = 'viewer'
294
  st.session_state.modified_indices = set()
 
295
  st.rerun()
296
 
297
  # PAGE 2: Viewer Page
298
  elif st.session_state.page == 'viewer':
299
- # Clear old save messages (after 3 seconds)
300
  if st.session_state.save_message_time is not None:
301
  if time.time() - st.session_state.save_message_time > 3:
302
  st.session_state.save_message = None
303
  st.session_state.save_message_time = None
304
 
305
- # Header with back button and download options
 
306
  col1, col2, col3, col4 = st.columns([1, 2, 2, 2])
307
 
308
  with col1:
@@ -312,9 +450,9 @@ elif st.session_state.page == 'viewer':
312
  st.session_state.ocr_active_field = None
313
  st.session_state.save_message = None
314
  st.session_state.save_message_time = None
 
315
  st.rerun()
316
 
317
- # Download modified records and unmodified records separately
318
  with col2:
319
  if st.session_state.modified_indices:
320
  modified_data = [st.session_state.edited_data[i] for i in sorted(st.session_state.modified_indices)]
@@ -322,98 +460,175 @@ elif st.session_state.page == 'viewer':
322
  st.download_button(
323
  label=f"⬇️ Download Modified ({len(modified_data)})",
324
  data=jsonl_modified,
325
- file_name="modified_remittance_data.jsonl",
326
  mime="application/jsonl",
327
  type="primary",
328
  use_container_width=True
329
  )
330
  else:
331
- st.button(
332
- "⬇️ No Modified Records",
333
- disabled=True,
334
- use_container_width=True
335
- )
336
 
337
- # Download unmodified records (original data excluding modified)
338
  with col3:
339
  if st.session_state.modified_indices:
340
- # Get original unmodified data
341
  unmodified_data = [st.session_state.data[i] for i in range(len(st.session_state.data))
342
  if i not in st.session_state.modified_indices]
343
  jsonl_unmodified = save_to_jsonl(unmodified_data)
344
  st.download_button(
345
  label=f"⬇️ Download Unmodified ({len(unmodified_data)})",
346
  data=jsonl_unmodified,
347
- file_name="unmodified_remittance_data.jsonl",
348
  mime="application/jsonl",
349
  use_container_width=True
350
  )
351
  else:
352
- st.button(
353
- "⬇️ No Unmodified Records",
354
- disabled=True,
355
- use_container_width=True
356
- )
357
 
358
- # Download all edited data
359
  with col4:
360
  jsonl_all = save_to_jsonl(st.session_state.edited_data)
361
  st.download_button(
362
  label=f"⬇️ Download All ({len(st.session_state.edited_data)})",
363
  data=jsonl_all,
364
- file_name="all_remittance_data.jsonl",
365
  mime="application/jsonl",
366
  use_container_width=True
367
  )
368
-
369
-
370
-
371
-
372
- # File selector dropdown
373
- file_names = [record.get('file_name', f'Record {i}') for i, record in enumerate(st.session_state.data)]
374
-
375
- selected_file = st.selectbox(
376
- "Select a file to view:",
377
- options=range(len(file_names)),
378
- format_func=lambda x: f"{'✏️ ' if x in st.session_state.modified_indices else ''}{file_names[x]}",
379
- index=st.session_state.current_index
380
- )
381
-
382
- st.session_state.current_index = selected_file
383
- current_record = st.session_state.edited_data[selected_file]
384
-
385
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
 
387
- # Main layout: LHS (Image) and RHS (Details) - REDUCED GAP
388
- left_col, right_col = st.columns([1.3, 1], gap="small")
389
 
390
  # LEFT SIDE: Image Display with OCR Canvas
391
  with left_col:
392
- st.markdown("### πŸ–ΌοΈ Document Image")
393
-
394
  file_name = current_record.get('file_name', '')
395
 
396
  if file_name:
397
- st.caption(f"**File:** {file_name}")
398
-
399
- # Try to find matching image - CASE SENSITIVE EXACT MATCH ONLY
400
- current_image = None
401
  if file_name in st.session_state.images:
402
- current_image = st.session_state.images[file_name]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
  else:
404
- st.error(f"❌ Image '{file_name}' not found in uploaded images")
405
- st.info("πŸ’‘ Available images:")
406
- with st.expander("Show available images"):
407
  for img_name in list(st.session_state.images.keys())[:20]:
408
  st.text(f" β€’ {img_name}")
409
  if len(st.session_state.images) > 20:
410
  st.text(f" ... and {len(st.session_state.images) - 20} more")
411
 
412
  if current_image:
413
- # Scale image to fixed size
414
  scaled_image, scale_ratio, paste_x, paste_y = scale_image_to_fixed_size(current_image)
415
 
416
- # Always show canvas for drawing rectangles
417
  canvas_result = st_canvas(
418
  fill_color="rgba(255, 165, 0, 0.3)",
419
  stroke_width=2,
@@ -426,14 +641,12 @@ elif st.session_state.page == 'viewer':
426
  key=f"canvas_{selected_file}_{st.session_state.canvas_key}",
427
  )
428
 
429
- # Process OCR when rectangle is drawn and field is selected
430
  if canvas_result.json_data is not None and st.session_state.ocr_active_field:
431
- objects = canvas_result.json_data["objects"]
432
  if len(objects) > 0:
433
- # Get the last drawn rectangle
434
  rect = objects[-1]
435
 
436
- # Adjust coordinates for padding and scale back to original image coordinates
437
  bbox = [
438
  (rect["left"] - paste_x) / scale_ratio,
439
  (rect["top"] - paste_y) / scale_ratio,
@@ -441,25 +654,25 @@ elif st.session_state.page == 'viewer':
441
  (rect["top"] + rect["height"] - paste_y) / scale_ratio
442
  ]
443
 
444
- # Perform OCR on original image
445
  with st.spinner("Performing OCR..."):
446
  ocr_text = perform_ocr(current_image, bbox)
447
 
448
  if ocr_text and not ocr_text.startswith("OCR Error"):
449
  st.success(f"βœ… OCR Result: {ocr_text}")
450
 
451
- # Update the field value
452
  gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
453
 
454
  if st.session_state.ocr_active_section == 'Line_items':
455
- # Handle line items
456
  line_items = gt_parse.get('Line_items', [])
457
  row_idx = st.session_state.ocr_line_item_row
458
  if row_idx is not None and row_idx < len(line_items):
459
  line_items[row_idx][st.session_state.ocr_active_field] = ocr_text
460
  gt_parse['Line_items'] = line_items
 
 
 
 
461
  else:
462
- # Handle other sections
463
  section = st.session_state.ocr_active_section
464
  field = st.session_state.ocr_active_field
465
  if section not in gt_parse:
@@ -467,10 +680,15 @@ elif st.session_state.page == 'viewer':
467
  gt_parse[section][field] = ocr_text
468
 
469
  st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
 
 
 
 
 
 
470
 
471
- # Clear canvas and reset
472
  st.session_state.canvas_key += 1
473
- time.sleep(0.3)
474
  st.rerun()
475
  else:
476
  st.error(ocr_text)
@@ -481,9 +699,8 @@ elif st.session_state.page == 'viewer':
481
  with right_col:
482
  st.markdown("### πŸ“ Document Details")
483
 
484
- gt_parse = current_record.get('gt_parse', {})
485
 
486
- # Create tabs for each section
487
  tab1, tab2, tab3, tab4 = st.tabs([
488
  "πŸ“„ Remittance Details",
489
  "πŸ‘₯ Party Details",
@@ -493,340 +710,504 @@ elif st.session_state.page == 'viewer':
493
 
494
  # TAB 1: Remittance Details
495
  with tab1:
 
496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
 
498
- # OCR Field Selector
499
- remittance_fields = [
500
- 'Select fields',
501
- 'Remittance_adv_no',
502
- 'Remittance_adv_date',
503
- 'Payment_method',
504
- 'FCY',
505
- 'Total_payment_amt_FCY',
506
- 'Payment_date',
507
- 'Payment_ref_no'
508
- ]
509
-
510
- selected_rem_field = st.selectbox(
511
- "πŸ” Select field to populate via OCR:",
512
- options=remittance_fields,
513
- key=f"rem_ocr_select_{selected_file}"
514
- )
515
 
516
- if selected_rem_field != 'Select fields':
517
- st.session_state.ocr_active_section = 'Remittance_details'
518
- st.session_state.ocr_active_field = selected_rem_field
519
- st.session_state.ocr_line_item_row = None
520
- else:
521
- if st.session_state.ocr_active_section == 'Remittance_details':
522
- st.session_state.ocr_active_section = None
523
- st.session_state.ocr_active_field = None
 
 
 
 
524
 
525
- remittance = gt_parse.get('Remittance_details', {})
 
 
 
 
 
 
 
 
 
 
 
526
 
527
- remittance['Remittance_adv_no'] = st.text_input(
528
- "Remittance Advice No",
529
- value=remittance.get('Remittance_adv_no', ''),
530
- key=f"rem_adv_no_{selected_file}"
531
- )
532
- remittance['Remittance_adv_date'] = st.text_input(
533
- "Remittance Advice Date",
534
- value=remittance.get('Remittance_adv_date', ''),
535
- key=f"rem_adv_date_{selected_file}"
536
- )
537
- remittance['Payment_method'] = st.text_input(
538
- "Payment Method",
539
- value=remittance.get('Payment_method', ''),
540
- key=f"payment_method_{selected_file}"
541
- )
542
- remittance['FCY'] = st.text_input(
543
- "FCY (Foreign Currency)",
544
- value=remittance.get('FCY', ''),
545
- key=f"fcy_{selected_file}"
546
- )
547
- remittance['Total_payment_amt_FCY'] = st.text_input(
548
- "Total Payment Amount (FCY)",
549
- value=remittance.get('Total_payment_amt_FCY', ''),
550
- key=f"total_payment_{selected_file}"
551
- )
552
- remittance['Payment_date'] = st.text_input(
553
- "Payment Date",
554
- value=remittance.get('Payment_date', ''),
555
- key=f"payment_date_{selected_file}"
556
- )
557
- remittance['Payment_ref_no'] = st.text_input(
558
- "Payment Reference No",
559
- value=remittance.get('Payment_ref_no', ''),
560
- key=f"payment_ref_{selected_file}"
561
- )
 
 
 
562
 
563
  gt_parse['Remittance_details'] = remittance
564
 
565
- # TAB 2: Customer/Supplier Details
566
  with tab2:
 
 
 
 
 
 
 
 
 
567
 
568
-
569
- # OCR Field Selector
570
- customer_fields = [
571
- 'Select fields',
572
- 'Customer_name',
573
- 'Customer_address',
574
- 'Customer_contact_info',
575
- 'Supplier_name',
576
- 'Supplier_address',
577
- 'Supplier_contact_info'
578
- ]
579
-
580
- selected_cust_field = st.selectbox(
581
- "πŸ” Select field to populate via OCR:",
582
- options=customer_fields,
583
- key=f"cust_ocr_select_{selected_file}"
584
- )
585
-
586
- if selected_cust_field != 'Select fields':
587
- st.session_state.ocr_active_section = 'Customer_supplier_details'
588
- st.session_state.ocr_active_field = selected_cust_field
589
- st.session_state.ocr_line_item_row = None
590
- else:
591
- if st.session_state.ocr_active_section == 'Customer_supplier_details':
592
- st.session_state.ocr_active_section = None
593
- st.session_state.ocr_active_field = None
594
 
595
  st.markdown("**Customer Details**")
596
  customer_supplier = gt_parse.get('Customer_supplier_details', {})
597
 
598
- customer_supplier['Customer_name'] = st.text_input(
599
- "Customer Name",
600
- value=customer_supplier.get('Customer_name', ''),
601
- key=f"cust_name_{selected_file}"
602
- )
603
- customer_supplier['Customer_address'] = st.text_area(
604
- "Customer Address",
605
- value=customer_supplier.get('Customer_address', ''),
606
- key=f"cust_addr_{selected_file}",
607
- height=60
608
- )
609
- customer_supplier['Customer_contact_info'] = st.text_input(
610
- "Customer Contact Info",
611
- value=customer_supplier.get('Customer_contact_info', ''),
612
- key=f"cust_contact_{selected_file}"
613
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
614
 
615
  st.markdown("**Supplier Details**")
616
- customer_supplier['Supplier_name'] = st.text_input(
617
- "Supplier Name",
618
- value=customer_supplier.get('Supplier_name', ''),
619
- key=f"supp_name_{selected_file}"
620
- )
621
- customer_supplier['Supplier_address'] = st.text_area(
622
- "Supplier Address",
623
- value=customer_supplier.get('Supplier_address', ''),
624
- key=f"supp_addr_{selected_file}",
625
- height=60
626
- )
627
- customer_supplier['Supplier_contact_info'] = st.text_input(
628
- "Supplier Contact Info",
629
- value=customer_supplier.get('Supplier_contact_info', ''),
630
- key=f"supp_contact_{selected_file}"
631
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
632
 
633
  gt_parse['Customer_supplier_details'] = customer_supplier
634
 
635
  # TAB 3: Bank Details
636
  with tab3:
 
637
 
 
 
 
 
 
 
 
 
 
 
 
 
638
 
639
- # OCR Field Selector
640
- bank_fields = [
641
- 'Select fields',
642
- 'Bank_name',
643
- 'Bank_acc_no',
644
- 'Bank_routing_no',
645
- 'Swift_code'
646
- ]
647
-
648
- selected_bank_field = st.selectbox(
649
- "πŸ” Select field to populate via OCR:",
650
- options=bank_fields,
651
- key=f"bank_ocr_select_{selected_file}"
652
- )
653
-
654
- if selected_bank_field != 'Select fields':
655
- st.session_state.ocr_active_section = 'Bank_details'
656
- st.session_state.ocr_active_field = selected_bank_field
657
- st.session_state.ocr_line_item_row = None
658
- else:
659
- if st.session_state.ocr_active_section == 'Bank_details':
660
- st.session_state.ocr_active_section = None
661
- st.session_state.ocr_active_field = None
662
 
663
- bank = gt_parse.get('Bank_details', {})
 
 
 
 
 
 
 
 
 
 
 
664
 
665
- bank['Bank_name'] = st.text_input(
666
- "Bank Name",
667
- value=bank.get('Bank_name', ''),
668
- key=f"bank_name_{selected_file}"
669
- )
670
- bank['Bank_acc_no'] = st.text_input(
671
- "Bank Account No",
672
- value=bank.get('Bank_acc_no', ''),
673
- key=f"bank_acc_{selected_file}"
674
- )
675
- bank['Bank_routing_no'] = st.text_input(
676
- "Bank Routing No",
677
- value=bank.get('Bank_routing_no', ''),
678
- key=f"bank_routing_{selected_file}"
679
- )
680
- bank['Swift_code'] = st.text_input(
681
- "SWIFT Code",
682
- value=bank.get('Swift_code', ''),
683
- key=f"swift_{selected_file}"
684
- )
685
 
686
  gt_parse['Bank_details'] = bank
687
 
688
  # TAB 4: Line Items
689
  with tab4:
 
 
690
 
691
-
692
- # OCR Controls for Line Items - Fixed layout
693
- line_items = gt_parse.get('Line_items', [])
694
-
695
- # Adjusted column widths - all controls in single compact line
696
- col_field, col_row, col_add, col_remove = st.columns([1.5, 0.7, 0.30, 0.30])
697
-
698
- line_item_fields = [
699
- 'Select fields',
700
- 'Po_number',
701
- 'Invoice_no',
702
- 'Other_doc_ref_no',
703
- 'Invoice_date',
704
- 'Invoice_amount_FCY',
705
- 'Amount_paid_for_each_invoice',
706
- 'Outstanding_balance_FCY',
707
- 'Discounts_taken_FCY',
708
- 'Adjustments(without_holding_tax)_FCY',
709
- 'Descriptions'
710
- ]
711
-
712
- with col_field:
713
- selected_line_field = st.selectbox(
714
- "πŸ” Field:",
715
- options=line_item_fields,
716
- key=f"line_ocr_field_{selected_file}"
717
- )
718
-
719
- with col_row:
720
- if len(line_items) > 0:
721
- selected_row = st.selectbox(
722
- "Row:",
723
- options=list(range(len(line_items))),
724
- format_func=lambda x: f"Row {x + 1}",
725
- key=f"line_ocr_row_{selected_file}"
726
- )
727
- else:
728
- st.selectbox("Row:", options=[], disabled=True, key=f"line_ocr_row_empty_{selected_file}")
729
- selected_row = None
730
-
731
  with col_add:
732
- # Use button with on_click callback to prevent loop
733
- if st.button("βž•", key=f"add_row_{selected_file}", help="Add new row"):
734
  if not st.session_state.button_clicked:
735
  st.session_state.button_clicked = True
736
  new_row = {
737
- "Po_number": "",
738
- "Invoice_no": "",
739
- "Other_doc_ref_no": "",
740
- "Invoice_date": "",
741
- "Invoice_amount_FCY": "",
742
- "Amount_paid_for_each_invoice": "",
743
- "Outstanding_balance_FCY": "",
744
- "Discounts_taken_FCY": "",
745
- "Adjustments(without_holding_tax)_FCY": "",
746
  "Descriptions": ""
747
  }
748
- line_items.append(new_row)
749
- gt_parse['Line_items'] = line_items
750
- st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
 
 
751
  st.session_state.modified_indices.add(selected_file)
 
 
 
 
 
 
752
  st.rerun()
753
 
754
  with col_remove:
755
- if st.button("βž–", key=f"remove_row_{selected_file}", help="Remove selected row", disabled=(len(line_items) == 0)):
756
- if not st.session_state.button_clicked and len(line_items) > 0 and selected_row is not None:
 
757
  st.session_state.button_clicked = True
758
- line_items.pop(selected_row)
759
- gt_parse['Line_items'] = line_items
760
- st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
 
 
 
761
  st.session_state.modified_indices.add(selected_file)
 
 
 
 
 
 
 
762
  st.rerun()
763
 
764
- # Reset button clicked flag after processing
765
  if st.session_state.button_clicked:
766
  st.session_state.button_clicked = False
767
 
768
- # Set OCR state for line items
769
- if selected_line_field != 'Select fields' and selected_row is not None:
770
- st.session_state.ocr_active_section = 'Line_items'
771
- st.session_state.ocr_active_field = selected_line_field
772
- st.session_state.ocr_line_item_row = selected_row
773
- else:
774
- if st.session_state.ocr_active_section == 'Line_items':
775
- st.session_state.ocr_active_section = None
776
- st.session_state.ocr_active_field = None
777
- st.session_state.ocr_line_item_row = None
778
-
779
 
780
-
781
- # Display line items table
782
  if line_items:
783
- df = pd.DataFrame(line_items)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
784
 
785
- # Convert amount fields to numeric
786
- amount_fields = ['Invoice_amount_FCY', 'Amount_paid_for_each_invoice',
787
- 'Outstanding_balance_FCY', 'Discounts_taken_FCY',
788
- 'Adjustments(without_holding_tax)_FCY']
789
 
790
- for field in amount_fields:
791
- if field in df.columns:
792
- df[field] = pd.to_numeric(df[field].replace('', None), errors='coerce')
793
 
794
- column_config = {
795
- "Po_number": st.column_config.TextColumn("PO Number", width="small"),
796
- "Invoice_no": st.column_config.TextColumn("Invoice No", width="small"),
797
- "Other_doc_ref_no": st.column_config.TextColumn("Other Doc Ref No", width="small"),
798
- "Invoice_date": st.column_config.TextColumn("Invoice Date", width="small"),
799
- "Invoice_amount_FCY": st.column_config.NumberColumn("Invoice Amt FCY", width="small", format="%.2f"),
800
- "Amount_paid_for_each_invoice": st.column_config.NumberColumn("Amount Paid", width="small", format="%.2f"),
801
- "Outstanding_balance_FCY": st.column_config.NumberColumn("Outstanding FCY", width="small", format="%.2f"),
802
- "Discounts_taken_FCY": st.column_config.NumberColumn("Discounts FCY", width="small", format="%.2f"),
803
- "Adjustments(without_holding_tax)_FCY": st.column_config.NumberColumn("Adjustments FCY", width="small", format="%.2f"),
804
- "Descriptions": st.column_config.TextColumn("Descriptions", width="medium"),
805
- }
806
 
807
- edited_df = st.data_editor(
808
  df,
809
- column_config=column_config,
810
- num_rows="fixed",
811
  use_container_width=True,
812
- key=f"line_items_table_{selected_file}",
813
- hide_index=False
814
  )
815
-
816
- # Convert back to string
817
- for field in amount_fields:
818
- if field in edited_df.columns:
819
- edited_df[field] = edited_df[field].apply(lambda x: str(x) if pd.notna(x) else '')
820
-
821
- gt_parse['Line_items'] = edited_df.to_dict('records')
822
  else:
823
- st.info("No line items. Click βž• to add a new row.")
824
 
825
- # Update the edited data
826
  st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
827
 
828
  # Save button
829
- st.markdown("---")
830
  col1, col2 = st.columns([1, 1])
831
  with col1:
832
  if st.button("πŸ’Ύ Save Changes", type="primary", use_container_width=True, key=f"save_btn_{selected_file}"):
@@ -837,10 +1218,8 @@ elif st.session_state.page == 'viewer':
837
  st.session_state.save_message_time = time.time()
838
  st.rerun()
839
 
840
- # Reset the just_saved flag after rerun
841
  if st.session_state.just_saved:
842
  st.session_state.just_saved = False
843
 
844
- # Display save message under the button (appears after rerun)
845
  if st.session_state.save_message:
846
- st.success(st.session_state.save_message)
 
27
  from streamlit_drawable_canvas import st_canvas
28
  import pytesseract
29
  import numpy as np
30
+ from datetime import datetime
31
+ import fitz # PyMuPDF for PDF handling
32
 
33
  # Set Tesseract path - auto-detect based on OS
34
  if os.name == 'nt': # Windows
35
  pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
36
  else: # Linux/Mac (HF Spaces uses Linux)
 
 
37
  pass
38
 
39
  # Page configuration
40
  st.set_page_config(page_title="Remittance Data Viewer", layout="wide")
41
 
42
+ # Custom CSS
43
  st.markdown("""
44
  <style>
45
+ /* Reduce spacing between form fields (unchanged) */
46
+ .stTextInput > div > div > input,
47
+ .stTextArea > div > div > textarea,
48
+ .stSelectbox > div > div > div {
49
+ margin-bottom: 0px !important;
50
+ }
51
+ div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stTextInput"]),
52
+ div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stTextArea"]),
53
+ div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stSelectbox"]) {
54
+ margin-bottom: 4px !important;
55
+ }
56
+ .stSelectbox { margin-bottom: 4px !important; }
57
+
58
+ /* Button styling (unchanged) */
59
+ .stButton > button {
60
+ padding: 0.25rem 0.5rem !important;
61
+ font-size: 0.85rem !important;
62
+ line-height: 1 !important;
63
+ min-height: 1.8rem !important;
64
+ height: 1.8rem !important;
65
+ }
66
+ .stButton > button[kind="primary"] {
67
+ background-color: #FF0000 !important;
68
+ border-color: #FF0000 !important;
69
+ color: white !important;
70
+ }
71
+ .stButton > button[kind="primary"]:hover {
72
+ background-color: #CC0000 !important;
73
+ border-color: #CC0000 !important;
74
+ }
75
+
76
+ /* Small vertical gaps */
77
+ [data-testid="stVerticalBlock"] > [data-testid="stVerticalBlock"] { gap: 0.25rem !important; }
78
+ [data-testid="column"] { padding-left: 0.5rem !important; padding-right: 0.5rem !important; }
79
+ [data-testid="stHorizontalBlock"] { gap: 0.5rem !important; }
80
+
81
+ /* Active OCR field highlighting */
82
+ .ocr-active {
83
+ border: 2px solid #ff4b4b !important;
84
+ box-shadow: 0 0 5px rgba(255, 75, 75, 0.5) !important;
85
+ }
86
+
87
+ /* left/right helper classes (still usable if you wrap content in html containers) */
88
+ .left-scroll, .right-scroll {
89
+ max-height: calc(100vh - 160px);
90
+ overflow-y: auto;
91
+ overflow-x: auto;
92
+ padding: 8px;
93
+ border: 1px solid #e6e6e6;
94
+ border-radius: 6px;
95
+ background: #ffffff;
96
+ }
97
+ .left-scroll canvas, .left-scroll img, .right-scroll canvas, .right-scroll img {
98
+ max-width: 100% !important;
99
+ height: auto !important;
100
+ display: block;
101
+ }
102
+
103
+ /* Make column elements create independent scroll areas:
104
+ - give them a fixed-ish max-height relative to viewport
105
+ - force overflow-y: auto so they show their own scrollbar
106
+ */
107
+ div[data-testid="column"] {
108
+ /* room for header + padding; tune the 140px to your UI if needed */
109
+ max-height: calc(100vh - 140px) !important;
110
+ overflow-y: auto !important;
111
+ overflow-x: hidden !important;
112
+ position: relative !important;
113
+ }
114
+
115
+ /* keep canvas/images from forcing expansion */
116
+ div[data-testid="column"] img,
117
+ div[data-testid="column"] canvas {
118
+ max-width: 100% !important;
119
+ height: auto !important;
120
+ display: block !important;
121
+ }
122
+
123
+ /* Thin subtle scrollbars for WebKit */
124
+ div[data-testid="column"]::-webkit-scrollbar { width: 10px; height: 10px; }
125
+ div[data-testid="column"]::-webkit-scrollbar-thumb { border-radius: 8px; background-color: rgba(0,0,0,0.12); }
126
+ div[data-testid="column"]::-webkit-scrollbar-track { background: transparent; }
127
+
128
+ /* IMPORTANT: remove any rule that forcibly hides page scrollbars.
129
+ If you previously set overflow: hidden on the main container it prevents children from scrolling properly.
130
+ DO NOT set `overflow: hidden` on the top-level app container.
131
+ */
132
+ /* removed: [data-testid="stAppViewContainer"] > .main { overflow: hidden !important; height: 100vh !important; } */
133
  </style>
134
+
135
  """, unsafe_allow_html=True)
136
 
137
  def load_jsonl(file):
 
148
  jsonl_content = '\n'.join([json.dumps(record) for record in data])
149
  return jsonl_content
150
 
151
+ def pdf_to_images(pdf_file):
152
+ """Convert PDF to list of PIL Images (one per page)"""
153
+ try:
154
+ pdf_bytes = pdf_file.read()
155
+ pdf_document = fitz.open(stream=pdf_bytes, filetype="pdf")
156
+ images = []
157
+
158
+ for page_num in range(pdf_document.page_count):
159
+ page = pdf_document[page_num]
160
+ # Render page to an image (higher DPI for better quality)
161
+ # Using 3x zoom (300 DPI equivalent) for much better clarity
162
+ pix = page.get_pixmap(matrix=fitz.Matrix(3, 3), alpha=False)
163
+ img_data = pix.tobytes("png")
164
+ img = Image.open(io.BytesIO(img_data))
165
+ images.append(img)
166
+
167
+ pdf_document.close()
168
+ return images
169
+ except Exception as e:
170
+ st.error(f"Error converting PDF: {str(e)}")
171
+ return []
172
+
173
  def perform_ocr(image, bbox):
174
  """Perform OCR on the selected region of the image"""
175
  try:
 
176
  x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
 
 
177
  x1, y1 = max(0, x1), max(0, y1)
178
  x2, y2 = min(image.width, x2), min(image.height, y2)
 
 
179
  cropped = image.crop((x1, y1, x2, y2))
 
 
180
  text = pytesseract.image_to_string(cropped, config='--psm 6').strip()
181
  return text
182
  except Exception as e:
183
  return f"OCR Error: {str(e)}"
184
 
185
+ def scale_image_to_fixed_size(image, max_width=900, max_height=1200):
186
+ """Scale image to fit within max dimensions while maintaining aspect ratio - NO PADDING"""
187
+ # Convert to RGB with proper handling
188
  if image.mode not in ('RGB', 'RGBA'):
189
  image = image.convert('RGB')
190
  elif image.mode == 'RGBA':
 
191
  background = Image.new('RGB', image.size, (255, 255, 255))
192
+ background.paste(image, mask=image.split()[3])
193
  image = background
194
 
195
+ # Calculate scaling ratio
196
+ width_ratio = max_width / image.width
197
+ height_ratio = max_height / image.height
 
 
198
  ratio = min(width_ratio, height_ratio)
199
 
 
200
  new_width = int(image.width * ratio)
201
  new_height = int(image.height * ratio)
202
 
203
+ # Always use LANCZOS for highest quality resampling
204
+ resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
205
+
206
+ # Return without padding - image takes only the space it needs
207
+ return resized_image, ratio, 0, 0
208
+
209
+ def swap_customer_supplier_details(index):
210
+ """Swap customer and supplier details"""
211
+ gt_parse = st.session_state.edited_data[index].get('gt_parse', {})
212
+ customer_supplier = gt_parse.get('Customer_supplier_details', {})
213
 
214
+ # Store customer values
215
+ temp_customer_name = customer_supplier.get('Customer_name', '')
216
+ temp_customer_address = customer_supplier.get('Customer_address', '')
217
+ temp_customer_contact = customer_supplier.get('Customer_contact_info', '')
218
 
219
+ # Swap: Customer ← Supplier
220
+ customer_supplier['Customer_name'] = customer_supplier.get('Supplier_name', '')
221
+ customer_supplier['Customer_address'] = customer_supplier.get('Supplier_address', '')
222
+ customer_supplier['Customer_contact_info'] = customer_supplier.get('Supplier_contact_info', '')
223
 
224
+ # Swap: Supplier ← Customer (from temp)
225
+ customer_supplier['Supplier_name'] = temp_customer_name
226
+ customer_supplier['Supplier_address'] = temp_customer_address
227
+ customer_supplier['Supplier_contact_info'] = temp_customer_contact
228
 
229
+ # Update session state
230
+ gt_parse['Customer_supplier_details'] = customer_supplier
231
+ st.session_state.edited_data[index]['gt_parse'] = gt_parse
232
+ st.session_state.modified_indices.add(index)
233
 
234
  # Initialize session state
235
  if 'data' not in st.session_state:
 
242
  st.session_state.page = 'upload'
243
  if 'images' not in st.session_state:
244
  st.session_state.images = {}
245
+ if 'pdf_metadata' not in st.session_state:
246
+ st.session_state.pdf_metadata = {} # Store {filename: {'pages': [images], 'current_page': 0}}
247
+ if 'current_page_num' not in st.session_state:
248
+ st.session_state.current_page_num = {} # Track current page for each file
249
  if 'modified_indices' not in st.session_state:
250
  st.session_state.modified_indices = set()
251
  if 'ocr_active_section' not in st.session_state:
 
266
  st.session_state.save_message_time = None
267
  if 'just_saved' not in st.session_state:
268
  st.session_state.just_saved = False
269
+ if 'just_swapped' not in st.session_state:
270
+ st.session_state.just_swapped = False
271
+ if 'navigating_page' not in st.session_state:
272
+ st.session_state.navigating_page = False
273
 
 
274
  def auto_save(index):
275
  """Automatically save changes to session state and mark as modified"""
276
  if st.session_state.edited_data:
277
  st.session_state.data = st.session_state.edited_data.copy()
278
  st.session_state.modified_indices.add(index)
279
 
280
+ def activate_ocr_field(section, field, row_idx=None):
281
+ """Activate OCR for a specific field.
282
+ Toggle behavior: if the same field is already active, deactivate it to avoid repeated activations/looping.
283
+ Also ensures the line-item expander stays expanded when OCR is requested.
284
+ """
285
+ # If the requested field is already active, deactivate it (toggle off)
286
+ if (st.session_state.ocr_active_section == section and
287
+ st.session_state.ocr_active_field == field and
288
+ st.session_state.ocr_line_item_row == row_idx):
289
+ st.session_state.ocr_active_section = None
290
+ st.session_state.ocr_active_field = None
291
+ st.session_state.ocr_line_item_row = None
292
+ else:
293
+ # Activate new OCR target
294
+ st.session_state.ocr_active_section = section
295
+ st.session_state.ocr_active_field = field
296
+ st.session_state.ocr_line_item_row = row_idx
297
+
298
+ # If it's a line-item, mark that expander as expanded so it remains open after rerun
299
+ if section == 'Line_items' and row_idx is not None:
300
+ current_idx = st.session_state.get('current_index', 0)
301
+ expander_key = f"line_item_expander_{current_idx}_{row_idx}"
302
+ st.session_state[expander_key] = True
303
+
304
+ # Bump canvas_key to ensure canvas is refreshed/cleared when toggling OCR
305
+ st.session_state.canvas_key += 1
306
+ st.rerun()
307
+
308
+ def is_ocr_active(section, field, row_idx=None):
309
+ """Check if this OCR button is currently active"""
310
+ return (st.session_state.ocr_active_section == section and
311
+ st.session_state.ocr_active_field == field and
312
+ st.session_state.ocr_line_item_row == row_idx)
313
 
314
  # PAGE 1: Upload Page
315
  if st.session_state.page == 'upload':
316
  st.title("πŸ“€ Remittance Data Viewer with OCR")
317
  st.markdown("### Upload your files to begin")
318
 
 
 
319
  st.markdown("**Step 1: Upload JSONL File**")
320
  uploaded_file = st.file_uploader("Choose a JSONL file", type=['jsonl', 'json'])
321
 
 
328
  except Exception as e:
329
  st.error(f"Error loading file: {str(e)}")
330
 
331
+ st.markdown("**Step 2: Upload Images/PDFs Folder**")
332
 
333
+ uploaded_files = st.file_uploader(
334
+ "Choose image or PDF files",
335
+ type=['png', 'jpg', 'jpeg', 'tiff', 'tif', 'bmp', 'pdf'],
 
 
 
336
  accept_multiple_files=True,
337
+ help="Select all images and PDFs from your folder at once"
338
  )
339
 
340
+ if uploaded_files:
 
341
  images_dict = {}
342
+ pdf_metadata = {}
343
+
344
+ for file in uploaded_files:
345
  try:
346
+ file_ext = file.name.lower().split('.')[-1]
347
+
348
+ if file_ext == 'pdf':
349
+ # Convert PDF to images
350
+ pdf_images = pdf_to_images(file)
351
+ if pdf_images:
352
+ # Store first page as the main image
353
+ images_dict[file.name] = pdf_images[0]
354
+ # Store all pages in metadata
355
+ pdf_metadata[file.name] = {
356
+ 'pages': pdf_images,
357
+ 'total_pages': len(pdf_images),
358
+ 'current_page': 0
359
+ }
360
+ #st.info(f"πŸ“„ Converted PDF '{file.name}' ({len(pdf_images)} pages)")
361
+ else:
362
+ # Handle regular images
363
+ image = Image.open(file)
364
+ images_dict[file.name] = image
365
+
366
  except Exception as e:
367
+ st.warning(f"Could not load file {file.name}: {str(e)}")
368
 
369
  st.session_state.images = images_dict
370
+ st.session_state.pdf_metadata = pdf_metadata
371
+
372
+ # Initialize current page tracking
373
+ for filename in pdf_metadata.keys():
374
+ if filename not in st.session_state.current_page_num:
375
+ st.session_state.current_page_num[filename] = 0
376
+
377
  if st.session_state.data is not None:
 
378
  gt_file_names = [rec.get('file_name', '') for rec in st.session_state.data]
379
  matched_images = set()
380
  unmatched_gt_files = []
381
 
382
+ # Try to match with and without extensions
383
  for fname in gt_file_names:
384
  if not fname:
385
  continue
386
+
387
+ # Try exact match first
388
  if fname in images_dict:
389
  matched_images.add(fname)
390
+ else:
391
+ # Try adding common extensions
392
+ found = False
393
+ for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
394
+ if fname + ext in images_dict:
395
+ matched_images.add(fname)
396
+ found = True
397
+ break
398
+
399
+ # Try matching filename without extension from uploaded files
400
+ if not found:
401
+ for uploaded_name in images_dict.keys():
402
+ uploaded_base = uploaded_name.rsplit('.', 1)[0]
403
+ if uploaded_base == fname:
404
+ matched_images.add(fname)
405
+ found = True
406
+ break
407
 
 
408
  for fname in gt_file_names:
409
  if fname and fname not in matched_images:
410
  unmatched_gt_files.append(fname)
411
 
412
+ st.success(f"βœ… Successfully loaded {len(images_dict)} files ({len(pdf_metadata)} PDFs)!")
413
  st.info(f"πŸ”Ž Exact matches: {len(matched_images)}/{len([f for f in gt_file_names if f])}")
414
 
 
415
  if unmatched_gt_files:
416
+ st.warning(f"⚠️ {len(unmatched_gt_files)} file(s) from JSONL not matched:")
417
  with st.expander(f"Show {len(unmatched_gt_files)} unmatched file names"):
418
  for fname in unmatched_gt_files:
419
  st.text(f" β€’ {fname}")
420
  else:
421
+ st.success("βœ… All JSONL file names matched to files!")
422
  else:
423
+ st.success(f"βœ… Successfully loaded {len(images_dict)} files ({len(pdf_metadata)} PDFs)!")
424
+ st.info("ℹ️ Upload a JSONL file to see how many files match the ground truth 'file_name' field.")
 
 
425
 
426
  if st.session_state.data is not None:
427
  col1, col2, col3 = st.columns([1, 1, 1])
 
429
  if st.button("Continue to Viewer β†’", type="primary", use_container_width=True):
430
  st.session_state.page = 'viewer'
431
  st.session_state.modified_indices = set()
432
+ st.session_state.navigating_page = False
433
  st.rerun()
434
 
435
  # PAGE 2: Viewer Page
436
  elif st.session_state.page == 'viewer':
 
437
  if st.session_state.save_message_time is not None:
438
  if time.time() - st.session_state.save_message_time > 3:
439
  st.session_state.save_message = None
440
  st.session_state.save_message_time = None
441
 
442
+ today_date = datetime.now().strftime("%Y-%m-%d")
443
+
444
  col1, col2, col3, col4 = st.columns([1, 2, 2, 2])
445
 
446
  with col1:
 
450
  st.session_state.ocr_active_field = None
451
  st.session_state.save_message = None
452
  st.session_state.save_message_time = None
453
+ st.session_state.navigating_page = False
454
  st.rerun()
455
 
 
456
  with col2:
457
  if st.session_state.modified_indices:
458
  modified_data = [st.session_state.edited_data[i] for i in sorted(st.session_state.modified_indices)]
 
460
  st.download_button(
461
  label=f"⬇️ Download Modified ({len(modified_data)})",
462
  data=jsonl_modified,
463
+ file_name=f"modified_remittance_data_{today_date}.jsonl",
464
  mime="application/jsonl",
465
  type="primary",
466
  use_container_width=True
467
  )
468
  else:
469
+ st.button("⬇️ No Modified Records", disabled=True, use_container_width=True)
 
 
 
 
470
 
 
471
  with col3:
472
  if st.session_state.modified_indices:
 
473
  unmodified_data = [st.session_state.data[i] for i in range(len(st.session_state.data))
474
  if i not in st.session_state.modified_indices]
475
  jsonl_unmodified = save_to_jsonl(unmodified_data)
476
  st.download_button(
477
  label=f"⬇️ Download Unmodified ({len(unmodified_data)})",
478
  data=jsonl_unmodified,
479
+ file_name=f"unmodified_remittance_data_{today_date}.jsonl",
480
  mime="application/jsonl",
481
  use_container_width=True
482
  )
483
  else:
484
+ st.button("⬇️ No Unmodified Records", disabled=True, use_container_width=True)
 
 
 
 
485
 
 
486
  with col4:
487
  jsonl_all = save_to_jsonl(st.session_state.edited_data)
488
  st.download_button(
489
  label=f"⬇️ Download All ({len(st.session_state.edited_data)})",
490
  data=jsonl_all,
491
+ file_name=f"all_remittance_data_{today_date}.jsonl",
492
  mime="application/jsonl",
493
  use_container_width=True
494
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
495
 
496
+ file_names = [record.get('file_name', f'Record {i}') for i, record in enumerate(st.session_state.data or [])]
497
+
498
+ # Guard: no records at all
499
+ if not file_names:
500
+ st.error("No records loaded. Please upload a JSONL file on the Upload page.")
501
+ if st.button("← Back to Upload"):
502
+ st.session_state.page = 'upload'
503
+ st.rerun()
504
+ else:
505
+ # Build options (list is safer than range for length checks)
506
+ options = list(range(len(file_names)))
507
+
508
+ # Ensure edited_data exists and has consistent length
509
+ if not st.session_state.edited_data or len(st.session_state.edited_data) != len(file_names):
510
+ # try to sync edited_data to data
511
+ st.session_state.edited_data = (st.session_state.data or []).copy()
512
+
513
+ # Clamp current_index into valid range
514
+ cur_idx = st.session_state.get('current_index', 0)
515
+ try:
516
+ cur_idx = int(cur_idx)
517
+ except Exception:
518
+ cur_idx = 0
519
+ if cur_idx < 0:
520
+ cur_idx = 0
521
+ if cur_idx >= len(options):
522
+ cur_idx = len(options) - 1
523
+
524
+ # Show selectbox with a safe index
525
+ selected_file = st.selectbox(
526
+ "Select a file to view:",
527
+ options=options,
528
+ format_func=lambda x: f"{'✏️ ' if x in st.session_state.modified_indices else ''}{file_names[x]}",
529
+ index=cur_idx
530
+ )
531
+
532
+ # Persist chosen index
533
+ st.session_state.current_index = selected_file
534
+
535
+ # Safe access to the current record
536
+ current_record = st.session_state.edited_data[selected_file]
537
 
538
+ left_col, right_col = st.columns([1.8, 1.0], gap="small")
 
539
 
540
  # LEFT SIDE: Image Display with OCR Canvas
541
  with left_col:
 
 
542
  file_name = current_record.get('file_name', '')
543
 
544
  if file_name:
545
+ # Find the actual file name (handle cases where extension is missing)
546
+ actual_file_name = None
 
 
547
  if file_name in st.session_state.images:
548
+ actual_file_name = file_name
549
+ else:
550
+ # Try adding common extensions
551
+ for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
552
+ if file_name + ext in st.session_state.images:
553
+ actual_file_name = file_name + ext
554
+ break
555
+
556
+ # Try matching without extension
557
+ if not actual_file_name:
558
+ for uploaded_name in st.session_state.images.keys():
559
+ uploaded_base = uploaded_name.rsplit('.', 1)[0]
560
+ if uploaded_base == file_name:
561
+ actual_file_name = uploaded_name
562
+ break
563
+
564
+ if actual_file_name:
565
+ # Check if this is a PDF with multiple pages
566
+ is_pdf = actual_file_name in st.session_state.pdf_metadata
567
+
568
+ if is_pdf:
569
+ pdf_meta = st.session_state.pdf_metadata[actual_file_name]
570
+ total_pages = pdf_meta['total_pages']
571
+ current_page = st.session_state.current_page_num.get(actual_file_name, 0)
572
+
573
+ # PDF Navigation Header
574
+ col_prev, col_info, col_next = st.columns([1, 2, 1])
575
+
576
+ with col_prev:
577
+ prev_clicked = st.button("⬅️ Previous", key=f"prev_page_{selected_file}_{actual_file_name}",
578
+ disabled=(current_page == 0), use_container_width=True)
579
+
580
+ with col_info:
581
+ st.markdown(f"<div style='text-align: center; padding: 5px;'><b>πŸ“„ Page {current_page + 1} of {total_pages}</b></div>", unsafe_allow_html=True)
582
+
583
+ with col_next:
584
+ next_clicked = st.button("Next ➑️", key=f"next_page_{selected_file}_{actual_file_name}",
585
+ disabled=(current_page >= total_pages - 1), use_container_width=True)
586
+
587
+ # Handle navigation only if not already navigating
588
+ if not st.session_state.navigating_page:
589
+ if prev_clicked:
590
+ st.session_state.navigating_page = True
591
+ st.session_state.current_page_num[actual_file_name] = max(0, current_page - 1)
592
+ st.session_state.canvas_key += 1
593
+ st.session_state.ocr_active_section = None
594
+ st.session_state.ocr_active_field = None
595
+ st.rerun()
596
+ elif next_clicked:
597
+ st.session_state.navigating_page = True
598
+ st.session_state.current_page_num[actual_file_name] = min(total_pages - 1, current_page + 1)
599
+ st.session_state.canvas_key += 1
600
+ st.session_state.ocr_active_section = None
601
+ st.session_state.ocr_active_field = None
602
+ st.rerun()
603
+ else:
604
+ # Reset the flag after rerun
605
+ st.session_state.navigating_page = False
606
+
607
+ if actual_file_name:
608
+ # Determine if PDF and get the appropriate image
609
+ is_pdf = actual_file_name in st.session_state.pdf_metadata
610
+
611
+ if is_pdf:
612
+ # Get the current page image
613
+ current_page = st.session_state.current_page_num.get(actual_file_name, 0)
614
+ pdf_meta = st.session_state.pdf_metadata[actual_file_name]
615
+ current_image = pdf_meta['pages'][current_page]
616
+ else:
617
+ current_image = st.session_state.images[actual_file_name]
618
  else:
619
+ st.error(f"❌ File '{file_name}' not found in uploaded files")
620
+ st.info("πŸ’‘ Available files:")
621
+ with st.expander("Show available files"):
622
  for img_name in list(st.session_state.images.keys())[:20]:
623
  st.text(f" β€’ {img_name}")
624
  if len(st.session_state.images) > 20:
625
  st.text(f" ... and {len(st.session_state.images) - 20} more")
626
 
627
  if current_image:
628
+ # Scale to a reasonable size so canvas doesn't become excessively large
629
  scaled_image, scale_ratio, paste_x, paste_y = scale_image_to_fixed_size(current_image)
630
 
631
+ # Render the canvas. Its internal canvas will be constrained by the wrapper due to CSS above.
632
  canvas_result = st_canvas(
633
  fill_color="rgba(255, 165, 0, 0.3)",
634
  stroke_width=2,
 
641
  key=f"canvas_{selected_file}_{st.session_state.canvas_key}",
642
  )
643
 
644
+ # Only attempt OCR if there's an active OCR target AND the user has drawn something (objects exist)
645
  if canvas_result.json_data is not None and st.session_state.ocr_active_field:
646
+ objects = canvas_result.json_data.get("objects", [])
647
  if len(objects) > 0:
 
648
  rect = objects[-1]
649
 
 
650
  bbox = [
651
  (rect["left"] - paste_x) / scale_ratio,
652
  (rect["top"] - paste_y) / scale_ratio,
 
654
  (rect["top"] + rect["height"] - paste_y) / scale_ratio
655
  ]
656
 
 
657
  with st.spinner("Performing OCR..."):
658
  ocr_text = perform_ocr(current_image, bbox)
659
 
660
  if ocr_text and not ocr_text.startswith("OCR Error"):
661
  st.success(f"βœ… OCR Result: {ocr_text}")
662
 
 
663
  gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
664
 
665
  if st.session_state.ocr_active_section == 'Line_items':
 
666
  line_items = gt_parse.get('Line_items', [])
667
  row_idx = st.session_state.ocr_line_item_row
668
  if row_idx is not None and row_idx < len(line_items):
669
  line_items[row_idx][st.session_state.ocr_active_field] = ocr_text
670
  gt_parse['Line_items'] = line_items
671
+
672
+ # ensure expander stays open for this row after OCR
673
+ expander_key = f"line_item_expander_{selected_file}_{row_idx}"
674
+ st.session_state[expander_key] = True
675
  else:
 
676
  section = st.session_state.ocr_active_section
677
  field = st.session_state.ocr_active_field
678
  if section not in gt_parse:
 
680
  gt_parse[section][field] = ocr_text
681
 
682
  st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
683
+ st.session_state.modified_indices.add(selected_file)
684
+
685
+ # After successful OCR, deactivate OCR target to prevent repeated triggers/loops
686
+ st.session_state.ocr_active_section = None
687
+ st.session_state.ocr_active_field = None
688
+ st.session_state.ocr_line_item_row = None
689
 
690
+ # Clear canvas for next OCR by bumping canvas_key then rerun
691
  st.session_state.canvas_key += 1
 
692
  st.rerun()
693
  else:
694
  st.error(ocr_text)
 
699
  with right_col:
700
  st.markdown("### πŸ“ Document Details")
701
 
702
+ gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
703
 
 
704
  tab1, tab2, tab3, tab4 = st.tabs([
705
  "πŸ“„ Remittance Details",
706
  "πŸ‘₯ Party Details",
 
710
 
711
  # TAB 1: Remittance Details
712
  with tab1:
713
+ remittance = gt_parse.get('Remittance_details', {})
714
 
715
+ # Each field with OCR button
716
+ col_input, col_btn = st.columns([5, 1])
717
+ with col_input:
718
+ remittance['Remittance_adv_no'] = st.text_input(
719
+ "Remittance Advice No",
720
+ value=remittance.get('Remittance_adv_no', ''),
721
+ key=f"rem_adv_no_{selected_file}"
722
+ )
723
+ with col_btn:
724
+ st.markdown("<br>", unsafe_allow_html=True)
725
+ if st.button("πŸ”", key=f"ocr_rem_adv_no_{selected_file}",
726
+ type="primary" if is_ocr_active('Remittance_details', 'Remittance_adv_no') else "secondary"):
727
+ activate_ocr_field('Remittance_details', 'Remittance_adv_no')
728
 
729
+ col_input, col_btn = st.columns([5, 1])
730
+ with col_input:
731
+ remittance['Remittance_adv_date'] = st.text_input(
732
+ "Remittance Advice Date",
733
+ value=remittance.get('Remittance_adv_date', ''),
734
+ key=f"rem_adv_date_{selected_file}"
735
+ )
736
+ with col_btn:
737
+ st.markdown("<br>", unsafe_allow_html=True)
738
+ if st.button("πŸ”", key=f"ocr_rem_adv_date_{selected_file}",
739
+ type="primary" if is_ocr_active('Remittance_details', 'Remittance_adv_date') else "secondary"):
740
+ activate_ocr_field('Remittance_details', 'Remittance_adv_date')
 
 
 
 
 
741
 
742
+ col_input, col_btn = st.columns([5, 1])
743
+ with col_input:
744
+ remittance['Payment_method'] = st.text_input(
745
+ "Payment Method",
746
+ value=remittance.get('Payment_method', ''),
747
+ key=f"payment_method_{selected_file}"
748
+ )
749
+ with col_btn:
750
+ st.markdown("<br>", unsafe_allow_html=True)
751
+ if st.button("πŸ”", key=f"ocr_payment_method_{selected_file}",
752
+ type="primary" if is_ocr_active('Remittance_details', 'Payment_method') else "secondary"):
753
+ activate_ocr_field('Remittance_details', 'Payment_method')
754
 
755
+ col_input, col_btn = st.columns([5, 1])
756
+ with col_input:
757
+ remittance['FCY'] = st.text_input(
758
+ "FCY (Foreign Currency)",
759
+ value=remittance.get('FCY', ''),
760
+ key=f"fcy_{selected_file}"
761
+ )
762
+ with col_btn:
763
+ st.markdown("<br>", unsafe_allow_html=True)
764
+ if st.button("πŸ”", key=f"ocr_fcy_{selected_file}",
765
+ type="primary" if is_ocr_active('Remittance_details', 'FCY') else "secondary"):
766
+ activate_ocr_field('Remittance_details', 'FCY')
767
 
768
+ col_input, col_btn = st.columns([5, 1])
769
+ with col_input:
770
+ remittance['Total_payment_amt_FCY'] = st.text_input(
771
+ "Total Payment Amount (FCY)",
772
+ value=remittance.get('Total_payment_amt_FCY', ''),
773
+ key=f"total_payment_{selected_file}"
774
+ )
775
+ with col_btn:
776
+ st.markdown("<br>", unsafe_allow_html=True)
777
+ if st.button("πŸ”", key=f"ocr_total_payment_{selected_file}",
778
+ type="primary" if is_ocr_active('Remittance_details', 'Total_payment_amt_FCY') else "secondary"):
779
+ activate_ocr_field('Remittance_details', 'Total_payment_amt_FCY')
780
+
781
+ col_input, col_btn = st.columns([5, 1])
782
+ with col_input:
783
+ remittance['Payment_date'] = st.text_input(
784
+ "Payment Date",
785
+ value=remittance.get('Payment_date', ''),
786
+ key=f"payment_date_{selected_file}"
787
+ )
788
+ with col_btn:
789
+ st.markdown("<br>", unsafe_allow_html=True)
790
+ if st.button("πŸ”", key=f"ocr_payment_date_{selected_file}",
791
+ type="primary" if is_ocr_active('Remittance_details', 'Payment_date') else "secondary"):
792
+ activate_ocr_field('Remittance_details', 'Payment_date')
793
+
794
+ col_input, col_btn = st.columns([5, 1])
795
+ with col_input:
796
+ remittance['Payment_ref_no'] = st.text_input(
797
+ "Payment Reference No",
798
+ value=remittance.get('Payment_ref_no', ''),
799
+ key=f"payment_ref_{selected_file}"
800
+ )
801
+ with col_btn:
802
+ st.markdown("<br>", unsafe_allow_html=True)
803
+ if st.button("πŸ”", key=f"ocr_payment_ref_{selected_file}",
804
+ type="primary" if is_ocr_active('Remittance_details', 'Payment_ref_no') else "secondary"):
805
+ activate_ocr_field('Remittance_details', 'Payment_ref_no')
806
 
807
  gt_parse['Remittance_details'] = remittance
808
 
809
+ # TAB 2: Customer/Supplier Details with SWAP button
810
  with tab2:
811
+ # SWAP BUTTON - Centered and prominent
812
+ col1, col2, col3 = st.columns([1, 2, 1])
813
+ with col2:
814
+ if st.button("πŸ”„ Swap Customer ↔ Supplier", key=f"swap_btn_{selected_file}",
815
+ type="primary", use_container_width=True):
816
+ if not st.session_state.just_swapped:
817
+ st.session_state.just_swapped = True
818
+ swap_customer_supplier_details(selected_file)
819
+ st.rerun()
820
 
821
+ # Reset the flag after rerun
822
+ if st.session_state.just_swapped:
823
+ st.session_state.just_swapped = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
824
 
825
  st.markdown("**Customer Details**")
826
  customer_supplier = gt_parse.get('Customer_supplier_details', {})
827
 
828
+ col_input, col_btn = st.columns([5, 1])
829
+ with col_input:
830
+ customer_supplier['Customer_name'] = st.text_input(
831
+ "Customer Name",
832
+ value=customer_supplier.get('Customer_name', ''),
833
+ key=f"cust_name_{selected_file}"
834
+ )
835
+ with col_btn:
836
+ st.markdown("<br>", unsafe_allow_html=True)
837
+ if st.button("πŸ”", key=f"ocr_cust_name_{selected_file}",
838
+ type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_name') else "secondary"):
839
+ activate_ocr_field('Customer_supplier_details', 'Customer_name')
840
+
841
+ col_input, col_btn = st.columns([5, 1])
842
+ with col_input:
843
+ customer_supplier['Customer_address'] = st.text_area(
844
+ "Customer Address",
845
+ value=customer_supplier.get('Customer_address', ''),
846
+ key=f"cust_addr_{selected_file}",
847
+ height=60
848
+ )
849
+ with col_btn:
850
+ st.markdown("<br>", unsafe_allow_html=True)
851
+ if st.button("πŸ”", key=f"ocr_cust_addr_{selected_file}",
852
+ type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_address') else "secondary"):
853
+ activate_ocr_field('Customer_supplier_details', 'Customer_address')
854
+
855
+ col_input, col_btn = st.columns([5, 1])
856
+ with col_input:
857
+ customer_supplier['Customer_contact_info'] = st.text_input(
858
+ "Customer Contact Info",
859
+ value=customer_supplier.get('Customer_contact_info', ''),
860
+ key=f"cust_contact_{selected_file}"
861
+ )
862
+ with col_btn:
863
+ st.markdown("<br>", unsafe_allow_html=True)
864
+ if st.button("πŸ”", key=f"ocr_cust_contact_{selected_file}",
865
+ type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_contact_info') else "secondary"):
866
+ activate_ocr_field('Customer_supplier_details', 'Customer_contact_info')
867
 
868
  st.markdown("**Supplier Details**")
869
+
870
+ col_input, col_btn = st.columns([5, 1])
871
+ with col_input:
872
+ customer_supplier['Supplier_name'] = st.text_input(
873
+ "Supplier Name",
874
+ value=customer_supplier.get('Supplier_name', ''),
875
+ key=f"supp_name_{selected_file}"
876
+ )
877
+ with col_btn:
878
+ st.markdown("<br>", unsafe_allow_html=True)
879
+ if st.button("πŸ”", key=f"ocr_supp_name_{selected_file}",
880
+ type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_name') else "secondary"):
881
+ activate_ocr_field('Customer_supplier_details', 'Supplier_name')
882
+
883
+ col_input, col_btn = st.columns([5, 1])
884
+ with col_input:
885
+ customer_supplier['Supplier_address'] = st.text_area(
886
+ "Supplier Address",
887
+ value=customer_supplier.get('Supplier_address', ''),
888
+ key=f"supp_addr_{selected_file}",
889
+ height=60
890
+ )
891
+ with col_btn:
892
+ st.markdown("<br>", unsafe_allow_html=True)
893
+ if st.button("πŸ”", key=f"ocr_supp_addr_{selected_file}",
894
+ type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_address') else "secondary"):
895
+ activate_ocr_field('Customer_supplier_details', 'Supplier_address')
896
+
897
+ col_input, col_btn = st.columns([5, 1])
898
+ with col_input:
899
+ customer_supplier['Supplier_contact_info'] = st.text_input(
900
+ "Supplier Contact Info",
901
+ value=customer_supplier.get('Supplier_contact_info', ''),
902
+ key=f"supp_contact_{selected_file}"
903
+ )
904
+ with col_btn:
905
+ st.markdown("<br>", unsafe_allow_html=True)
906
+ if st.button("πŸ”", key=f"ocr_supp_contact_{selected_file}",
907
+ type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_contact_info') else "secondary"):
908
+ activate_ocr_field('Customer_supplier_details', 'Supplier_contact_info')
909
 
910
  gt_parse['Customer_supplier_details'] = customer_supplier
911
 
912
  # TAB 3: Bank Details
913
  with tab3:
914
+ bank = gt_parse.get('Bank_details', {})
915
 
916
+ col_input, col_btn = st.columns([5, 1])
917
+ with col_input:
918
+ bank['Bank_name'] = st.text_input(
919
+ "Bank Name",
920
+ value=bank.get('Bank_name', ''),
921
+ key=f"bank_name_{selected_file}"
922
+ )
923
+ with col_btn:
924
+ st.markdown("<br>", unsafe_allow_html=True)
925
+ if st.button("πŸ”", key=f"ocr_bank_name_{selected_file}",
926
+ type="primary" if is_ocr_active('Bank_details', 'Bank_name') else "secondary"):
927
+ activate_ocr_field('Bank_details', 'Bank_name')
928
 
929
+ col_input, col_btn = st.columns([5, 1])
930
+ with col_input:
931
+ bank['Bank_acc_no'] = st.text_input(
932
+ "Bank Account No",
933
+ value=bank.get('Bank_acc_no', ''),
934
+ key=f"bank_acc_{selected_file}"
935
+ )
936
+ with col_btn:
937
+ st.markdown("<br>", unsafe_allow_html=True)
938
+ if st.button("πŸ”", key=f"ocr_bank_acc_{selected_file}",
939
+ type="primary" if is_ocr_active('Bank_details', 'Bank_acc_no') else "secondary"):
940
+ activate_ocr_field('Bank_details', 'Bank_acc_no')
 
 
 
 
 
 
 
 
 
 
 
941
 
942
+ col_input, col_btn = st.columns([5, 1])
943
+ with col_input:
944
+ bank['Bank_routing_no'] = st.text_input(
945
+ "Bank Routing No",
946
+ value=bank.get('Bank_routing_no', ''),
947
+ key=f"bank_routing_{selected_file}"
948
+ )
949
+ with col_btn:
950
+ st.markdown("<br>", unsafe_allow_html=True)
951
+ if st.button("πŸ”", key=f"ocr_bank_routing_{selected_file}",
952
+ type="primary" if is_ocr_active('Bank_details', 'Bank_routing_no') else "secondary"):
953
+ activate_ocr_field('Bank_details', 'Bank_routing_no')
954
 
955
+ col_input, col_btn = st.columns([5, 1])
956
+ with col_input:
957
+ bank['Swift_code'] = st.text_input(
958
+ "SWIFT Code",
959
+ value=bank.get('Swift_code', ''),
960
+ key=f"swift_{selected_file}"
961
+ )
962
+ with col_btn:
963
+ st.markdown("<br>", unsafe_allow_html=True)
964
+ if st.button("πŸ”", key=f"ocr_swift_{selected_file}",
965
+ type="primary" if is_ocr_active('Bank_details', 'Swift_code') else "secondary"):
966
+ activate_ocr_field('Bank_details', 'Swift_code')
 
 
 
 
 
 
 
 
967
 
968
  gt_parse['Bank_details'] = bank
969
 
970
  # TAB 4: Line Items
971
  with tab4:
972
+ current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
973
+ line_items = current_gt_parse.get('Line_items', [])
974
 
975
+ # Add/Remove row buttons
976
+ col_add, col_remove = st.columns([1, 1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
977
  with col_add:
978
+ if st.button("βž• Add New Row", key=f"add_row_{selected_file}", use_container_width=True):
 
979
  if not st.session_state.button_clicked:
980
  st.session_state.button_clicked = True
981
  new_row = {
982
+ "Po_number": "", "Invoice_no": "", "Other_doc_ref_no": "",
983
+ "Invoice_date": "", "Invoice_amount_FCY": "",
984
+ "Amount_paid_for_each_invoice": "", "Outstanding_balance_FCY": "",
985
+ "Discounts_taken_FCY": "", "Adjustments(without_holding_tax)_FCY": "",
 
 
 
 
 
986
  "Descriptions": ""
987
  }
988
+ current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
989
+ current_line_items = current_gt_parse.get('Line_items', [])
990
+ current_line_items.append(new_row)
991
+ current_gt_parse['Line_items'] = current_line_items
992
+ st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse
993
  st.session_state.modified_indices.add(selected_file)
994
+
995
+ # Ensure the newly added row's expander is open
996
+ new_idx = len(current_line_items) - 1
997
+ expander_key_new = f"line_item_expander_{selected_file}_{new_idx}"
998
+ st.session_state[expander_key_new] = True
999
+
1000
  st.rerun()
1001
 
1002
  with col_remove:
1003
+ if st.button("βž– Remove Last Row", key=f"remove_row_{selected_file}",
1004
+ disabled=(len(line_items) == 0), use_container_width=True):
1005
+ if not st.session_state.button_clicked and len(line_items) > 0:
1006
  st.session_state.button_clicked = True
1007
+ current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
1008
+ current_line_items = current_gt_parse.get('Line_items', [])
1009
+ N = len(current_line_items)
1010
+ current_line_items.pop()
1011
+ current_gt_parse['Line_items'] = current_line_items
1012
+ st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse
1013
  st.session_state.modified_indices.add(selected_file)
1014
+
1015
+ # Remove the expander flag for the popped row (if present)
1016
+ popped_idx = N - 1
1017
+ expander_key_popped = f"line_item_expander_{selected_file}_{popped_idx}"
1018
+ if expander_key_popped in st.session_state:
1019
+ del st.session_state[expander_key_popped]
1020
+
1021
  st.rerun()
1022
 
 
1023
  if st.session_state.button_clicked:
1024
  st.session_state.button_clicked = False
1025
 
1026
+ # Display each row as an expander with OCR buttons
1027
+ current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
1028
+ line_items = current_gt_parse.get('Line_items', [])
 
 
 
 
 
 
 
 
1029
 
 
 
1030
  if line_items:
1031
+ for idx, item in enumerate(line_items):
1032
+ # Use a persistent session_state flag so expansion state is preserved across reruns.
1033
+ expander_key = f"line_item_expander_{selected_file}_{idx}"
1034
+ expanded_default = st.session_state.get(expander_key, False)
1035
+
1036
+ # Note: do NOT pass a 'key' arg to st.expander to maintain compatibility; control expanded via session_state flag.
1037
+ with st.expander(f"**Row {idx + 1}** - Invoice: {item.get('Invoice_no', 'N/A')}", expanded=expanded_default):
1038
+ # PO Number
1039
+ col_input, col_btn = st.columns([5, 1])
1040
+ with col_input:
1041
+ item['Po_number'] = st.text_input(
1042
+ "PO Number",
1043
+ value=item.get('Po_number', ''),
1044
+ key=f"po_num_{selected_file}_{idx}"
1045
+ )
1046
+ with col_btn:
1047
+ st.markdown("<br>", unsafe_allow_html=True)
1048
+ if st.button("πŸ”", key=f"ocr_po_{selected_file}_{idx}",
1049
+ type="primary" if is_ocr_active('Line_items', 'Po_number', idx) else "secondary"):
1050
+ # ensure expander stays open when user explicitly requests OCR
1051
+ st.session_state[expander_key] = True
1052
+ activate_ocr_field('Line_items', 'Po_number', idx)
1053
+
1054
+ # Invoice No
1055
+ col_input, col_btn = st.columns([5, 1])
1056
+ with col_input:
1057
+ item['Invoice_no'] = st.text_input(
1058
+ "Invoice No",
1059
+ value=item.get('Invoice_no', ''),
1060
+ key=f"inv_no_{selected_file}_{idx}"
1061
+ )
1062
+ with col_btn:
1063
+ st.markdown("<br>", unsafe_allow_html=True)
1064
+ if st.button("πŸ”", key=f"ocr_inv_{selected_file}_{idx}",
1065
+ type="primary" if is_ocr_active('Line_items', 'Invoice_no', idx) else "secondary"):
1066
+ st.session_state[expander_key] = True
1067
+ activate_ocr_field('Line_items', 'Invoice_no', idx)
1068
+
1069
+ # Other Doc Ref No
1070
+ col_input, col_btn = st.columns([5, 1])
1071
+ with col_input:
1072
+ item['Other_doc_ref_no'] = st.text_input(
1073
+ "Other Doc Ref No",
1074
+ value=item.get('Other_doc_ref_no', ''),
1075
+ key=f"other_doc_{selected_file}_{idx}"
1076
+ )
1077
+ with col_btn:
1078
+ st.markdown("<br>", unsafe_allow_html=True)
1079
+ if st.button("πŸ”", key=f"ocr_other_{selected_file}_{idx}",
1080
+ type="primary" if is_ocr_active('Line_items', 'Other_doc_ref_no', idx) else "secondary"):
1081
+ st.session_state[expander_key] = True
1082
+ activate_ocr_field('Line_items', 'Other_doc_ref_no', idx)
1083
+
1084
+ # Invoice Date
1085
+ col_input, col_btn = st.columns([5, 1])
1086
+ with col_input:
1087
+ item['Invoice_date'] = st.text_input(
1088
+ "Invoice Date",
1089
+ value=item.get('Invoice_date', ''),
1090
+ key=f"inv_date_{selected_file}_{idx}"
1091
+ )
1092
+ with col_btn:
1093
+ st.markdown("<br>", unsafe_allow_html=True)
1094
+ if st.button("πŸ”", key=f"ocr_inv_date_{selected_file}_{idx}",
1095
+ type="primary" if is_ocr_active('Line_items', 'Invoice_date', idx) else "secondary"):
1096
+ st.session_state[expander_key] = True
1097
+ activate_ocr_field('Line_items', 'Invoice_date', idx)
1098
+
1099
+ # Invoice Amount FCY
1100
+ col_input, col_btn = st.columns([5, 1])
1101
+ with col_input:
1102
+ item['Invoice_amount_FCY'] = st.text_input(
1103
+ "Invoice Amount FCY",
1104
+ value=item.get('Invoice_amount_FCY', ''),
1105
+ key=f"inv_amt_{selected_file}_{idx}"
1106
+ )
1107
+ with col_btn:
1108
+ st.markdown("<br>", unsafe_allow_html=True)
1109
+ if st.button("πŸ”", key=f"ocr_inv_amt_{selected_file}_{idx}",
1110
+ type="primary" if is_ocr_active('Line_items', 'Invoice_amount_FCY', idx) else "secondary"):
1111
+ st.session_state[expander_key] = True
1112
+ activate_ocr_field('Line_items', 'Invoice_amount_FCY', idx)
1113
+
1114
+ # Amount Paid
1115
+ col_input, col_btn = st.columns([5, 1])
1116
+ with col_input:
1117
+ item['Amount_paid_for_each_invoice'] = st.text_input(
1118
+ "Amount Paid",
1119
+ value=item.get('Amount_paid_for_each_invoice', ''),
1120
+ key=f"amt_paid_{selected_file}_{idx}"
1121
+ )
1122
+ with col_btn:
1123
+ st.markdown("<br>", unsafe_allow_html=True)
1124
+ if st.button("πŸ”", key=f"ocr_amt_paid_{selected_file}_{idx}",
1125
+ type="primary" if is_ocr_active('Line_items', 'Amount_paid_for_each_invoice', idx) else "secondary"):
1126
+ st.session_state[expander_key] = True
1127
+ activate_ocr_field('Line_items', 'Amount_paid_for_each_invoice', idx)
1128
+
1129
+ # Outstanding Balance
1130
+ col_input, col_btn = st.columns([5, 1])
1131
+ with col_input:
1132
+ item['Outstanding_balance_FCY'] = st.text_input(
1133
+ "Outstanding Balance FCY",
1134
+ value=item.get('Outstanding_balance_FCY', ''),
1135
+ key=f"out_bal_{selected_file}_{idx}"
1136
+ )
1137
+ with col_btn:
1138
+ st.markdown("<br>", unsafe_allow_html=True)
1139
+ if st.button("πŸ”", key=f"ocr_out_bal_{selected_file}_{idx}",
1140
+ type="primary" if is_ocr_active('Line_items', 'Outstanding_balance_FCY', idx) else "secondary"):
1141
+ st.session_state[expander_key] = True
1142
+ activate_ocr_field('Line_items', 'Outstanding_balance_FCY', idx)
1143
+
1144
+ # Discounts
1145
+ col_input, col_btn = st.columns([5, 1])
1146
+ with col_input:
1147
+ item['Discounts_taken_FCY'] = st.text_input(
1148
+ "Discounts Taken FCY",
1149
+ value=item.get('Discounts_taken_FCY', ''),
1150
+ key=f"disc_{selected_file}_{idx}"
1151
+ )
1152
+ with col_btn:
1153
+ st.markdown("<br>", unsafe_allow_html=True)
1154
+ if st.button("πŸ”", key=f"ocr_disc_{selected_file}_{idx}",
1155
+ type="primary" if is_ocr_active('Line_items', 'Discounts_taken_FCY', idx) else "secondary"):
1156
+ st.session_state[expander_key] = True
1157
+ activate_ocr_field('Line_items', 'Discounts_taken_FCY', idx)
1158
+
1159
+ # Adjustments
1160
+ col_input, col_btn = st.columns([5, 1])
1161
+ with col_input:
1162
+ item['Adjustments(without_holding_tax)_FCY'] = st.text_input(
1163
+ "Adjustments FCY",
1164
+ value=item.get('Adjustments(without_holding_tax)_FCY', ''),
1165
+ key=f"adj_{selected_file}_{idx}"
1166
+ )
1167
+ with col_btn:
1168
+ st.markdown("<br>", unsafe_allow_html=True)
1169
+ if st.button("πŸ”", key=f"ocr_adj_{selected_file}_{idx}",
1170
+ type="primary" if is_ocr_active('Line_items', 'Adjustments(without_holding_tax)_FCY', idx) else "secondary"):
1171
+ st.session_state[expander_key] = True
1172
+ activate_ocr_field('Line_items', 'Adjustments(without_holding_tax)_FCY', idx)
1173
+
1174
+ # Descriptions
1175
+ col_input, col_btn = st.columns([5, 1])
1176
+ with col_input:
1177
+ item['Descriptions'] = st.text_area(
1178
+ "Descriptions",
1179
+ value=item.get('Descriptions', ''),
1180
+ key=f"desc_{selected_file}_{idx}",
1181
+ height=60
1182
+ )
1183
+ with col_btn:
1184
+ st.markdown("<br>", unsafe_allow_html=True)
1185
+ if st.button("πŸ”", key=f"ocr_desc_{selected_file}_{idx}",
1186
+ type="primary" if is_ocr_active('Line_items', 'Descriptions', idx) else "secondary"):
1187
+ st.session_state[expander_key] = True
1188
+ activate_ocr_field('Line_items', 'Descriptions', idx)
1189
 
1190
+ # Update line items back to gt_parse
1191
+ current_gt_parse['Line_items'] = line_items
 
 
1192
 
1193
+ st.markdown("**πŸ“Š Line Items Summary Table**")
 
 
1194
 
1195
+ # Display summary table with index starting from 1
1196
+ df = pd.DataFrame(line_items)
1197
+ df.index = df.index + 1 # Start index from 1
1198
+ df.index.name = 'SL No'
 
 
 
 
 
 
 
 
1199
 
1200
+ st.dataframe(
1201
  df,
 
 
1202
  use_container_width=True,
1203
+ height=300
 
1204
  )
 
 
 
 
 
 
 
1205
  else:
1206
+ st.info("No line items. Click 'βž• Add New Row' to add a new row.")
1207
 
 
1208
  st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
1209
 
1210
  # Save button
 
1211
  col1, col2 = st.columns([1, 1])
1212
  with col1:
1213
  if st.button("πŸ’Ύ Save Changes", type="primary", use_container_width=True, key=f"save_btn_{selected_file}"):
 
1218
  st.session_state.save_message_time = time.time()
1219
  st.rerun()
1220
 
 
1221
  if st.session_state.just_saved:
1222
  st.session_state.just_saved = False
1223
 
 
1224
  if st.session_state.save_message:
1225
+ st.success(st.session_state.save_message)