Bhuvi13 commited on
Commit
6f3fd48
Β·
verified Β·
1 Parent(s): 406dba2

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +470 -373
src/streamlit_app.py CHANGED
@@ -33,14 +33,12 @@ from datetime import datetime
33
  if os.name == 'nt': # Windows
34
  pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
35
  else: # Linux/Mac (HF Spaces uses Linux)
36
- # On HF Spaces with packages.txt, tesseract is in system PATH
37
- # No need to set path explicitly
38
  pass
39
 
40
  # Page configuration
41
  st.set_page_config(page_title="Remittance Data Viewer", layout="wide")
42
 
43
- # Custom CSS to reduce gaps between form fields and style buttons
44
  st.markdown("""
45
  <style>
46
  /* Reduce spacing between form fields */
@@ -56,21 +54,31 @@ st.markdown("""
56
  margin-bottom: 4px !important;
57
  }
58
 
59
- /* Reduce gap between selectbox and following elements */
60
  .stSelectbox {
61
  margin-bottom: 4px !important;
62
  }
63
 
64
- /* Style for small buttons */
65
  .stButton > button {
66
  padding: 0.25rem 0.5rem !important;
67
- font-size: 1.2rem !important;
68
  line-height: 1 !important;
69
- min-height: 2rem !important;
70
- height: 2rem !important;
 
 
 
 
 
 
 
 
 
 
 
 
71
  }
72
 
73
- /* Reduce padding in form containers */
74
  [data-testid="stVerticalBlock"] > [data-testid="stVerticalBlock"] {
75
  gap: 0.25rem !important;
76
  }
@@ -84,6 +92,12 @@ st.markdown("""
84
  [data-testid="stHorizontalBlock"] {
85
  gap: 0.5rem !important;
86
  }
 
 
 
 
 
 
87
  </style>
88
  """, unsafe_allow_html=True)
89
 
@@ -104,17 +118,10 @@ def save_to_jsonl(data):
104
  def perform_ocr(image, bbox):
105
  """Perform OCR on the selected region of the image"""
106
  try:
107
- # bbox is [x1, y1, x2, y2]
108
  x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
109
-
110
- # Ensure coordinates are within image bounds
111
  x1, y1 = max(0, x1), max(0, y1)
112
  x2, y2 = min(image.width, x2), min(image.height, y2)
113
-
114
- # Crop the image
115
  cropped = image.crop((x1, y1, x2, y2))
116
-
117
- # Perform OCR
118
  text = pytesseract.image_to_string(cropped, config='--psm 6').strip()
119
  return text
120
  except Exception as e:
@@ -122,42 +129,28 @@ def perform_ocr(image, bbox):
122
 
123
  def scale_image_to_fixed_size(image, target_width=700, target_height=900):
124
  """Scale and pad image to exact fixed size while maintaining aspect ratio and quality"""
125
- # Convert image to RGB if it's not already (handles RGBA, L, etc.)
126
  if image.mode not in ('RGB', 'RGBA'):
127
  image = image.convert('RGB')
128
  elif image.mode == 'RGBA':
129
- # Create white background for transparent images
130
  background = Image.new('RGB', image.size, (255, 255, 255))
131
- background.paste(image, mask=image.split()[3]) # Use alpha channel as mask
132
  image = background
133
 
134
- # Calculate scaling ratio to fit within target dimensions
135
  width_ratio = target_width / image.width
136
  height_ratio = target_height / image.height
137
-
138
- # Use the smaller ratio to ensure image fits within both constraints
139
  ratio = min(width_ratio, height_ratio)
140
 
141
- # Calculate new dimensions
142
  new_width = int(image.width * ratio)
143
  new_height = int(image.height * ratio)
144
 
145
- # Resize image with high-quality LANCZOS resampling
146
- # Only resize if needed (don't upscale small images too much)
147
  if ratio < 1.0 or (ratio > 1.0 and ratio < 1.5):
148
  resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
149
  else:
150
- # For significant upscaling, use BICUBIC which can be sharper
151
  resized_image = image.resize((new_width, new_height), Image.Resampling.BICUBIC)
152
 
153
- # Create a new image with target size and white background
154
  final_image = Image.new('RGB', (target_width, target_height), (255, 255, 255))
155
-
156
- # Calculate position to paste resized image (center it)
157
  paste_x = (target_width - new_width) // 2
158
  paste_y = (target_height - new_height) // 2
159
-
160
- # Paste resized image onto white background
161
  final_image.paste(resized_image, (paste_x, paste_y))
162
 
163
  return final_image, ratio, paste_x, paste_y
@@ -194,27 +187,30 @@ if 'save_message_time' not in st.session_state:
194
  if 'just_saved' not in st.session_state:
195
  st.session_state.just_saved = False
196
 
197
- # Auto-save function
198
  def auto_save(index):
199
  """Automatically save changes to session state and mark as modified"""
200
  if st.session_state.edited_data:
201
  st.session_state.data = st.session_state.edited_data.copy()
202
  st.session_state.modified_indices.add(index)
203
 
204
- # Save button callback
205
- def save_changes_callback():
206
- """Callback function for save button"""
207
- auto_save(st.session_state.current_index)
208
- st.session_state.save_message = "βœ… Changes saved successfully!"
209
- st.session_state.save_message_time = time.time()
 
 
 
 
 
 
210
 
211
  # PAGE 1: Upload Page
212
  if st.session_state.page == 'upload':
213
  st.title("πŸ“€ Remittance Data Viewer with OCR")
214
  st.markdown("### Upload your files to begin")
215
 
216
- # Step 1: Upload JSONL
217
-
218
  st.markdown("**Step 1: Upload JSONL File**")
219
  uploaded_file = st.file_uploader("Choose a JSONL file", type=['jsonl', 'json'])
220
 
@@ -227,11 +223,8 @@ if st.session_state.page == 'upload':
227
  except Exception as e:
228
  st.error(f"Error loading file: {str(e)}")
229
 
230
- # Step 2: Upload Images
231
-
232
  st.markdown("**Step 2: Upload Images Folder**")
233
 
234
-
235
  uploaded_images = st.file_uploader(
236
  "Choose image files",
237
  type=['png', 'jpg', 'jpeg', 'tiff', 'tif', 'bmp'],
@@ -240,7 +233,6 @@ if st.session_state.page == 'upload':
240
  )
241
 
242
  if uploaded_images:
243
- # Load images into session state
244
  images_dict = {}
245
  for img_file in uploaded_images:
246
  try:
@@ -250,22 +242,18 @@ if st.session_state.page == 'upload':
250
  st.warning(f"Could not load image {img_file.name}: {str(e)}")
251
 
252
  st.session_state.images = images_dict
253
- # Show summary of loaded images and matches with ground truth
254
  if st.session_state.data is not None:
255
- # gather ground truth file names
256
  gt_file_names = [rec.get('file_name', '') for rec in st.session_state.data]
257
  matched_images = set()
258
  unmatched_gt_files = []
259
 
260
- # Find matched images - CASE SENSITIVE EXACT MATCH ONLY
261
  for fname in gt_file_names:
262
  if not fname:
263
  continue
264
- # Check for exact match in uploaded images
265
  if fname in images_dict:
266
  matched_images.add(fname)
267
 
268
- # Find unmatched ground truth file names
269
  for fname in gt_file_names:
270
  if fname and fname not in matched_images:
271
  unmatched_gt_files.append(fname)
@@ -273,7 +261,6 @@ if st.session_state.page == 'upload':
273
  st.success(f"βœ… Successfully loaded {len(images_dict)} images!")
274
  st.info(f"πŸ”Ž Exact matches: {len(matched_images)}/{len([f for f in gt_file_names if f])}")
275
 
276
- # Show unmatched files
277
  if unmatched_gt_files:
278
  st.warning(f"⚠️ {len(unmatched_gt_files)} file(s) from JSONL not matched to images:")
279
  with st.expander(f"Show {len(unmatched_gt_files)} unmatched file names"):
@@ -285,8 +272,6 @@ if st.session_state.page == 'upload':
285
  st.success(f"βœ… Successfully loaded {len(images_dict)} images!")
286
  st.info("ℹ️ Upload a JSONL file to see how many images match the ground truth 'file_name' field.")
287
 
288
- # Continue Button
289
-
290
  if st.session_state.data is not None:
291
  col1, col2, col3 = st.columns([1, 1, 1])
292
  with col2:
@@ -297,16 +282,13 @@ if st.session_state.page == 'upload':
297
 
298
  # PAGE 2: Viewer Page
299
  elif st.session_state.page == 'viewer':
300
- # Clear old save messages (after 3 seconds)
301
  if st.session_state.save_message_time is not None:
302
  if time.time() - st.session_state.save_message_time > 3:
303
  st.session_state.save_message = None
304
  st.session_state.save_message_time = None
305
 
306
- # Get today's date for filename
307
  today_date = datetime.now().strftime("%Y-%m-%d")
308
 
309
- # Header with back button and download options
310
  col1, col2, col3, col4 = st.columns([1, 2, 2, 2])
311
 
312
  with col1:
@@ -318,7 +300,6 @@ elif st.session_state.page == 'viewer':
318
  st.session_state.save_message_time = None
319
  st.rerun()
320
 
321
- # Download modified records and unmodified records separately
322
  with col2:
323
  if st.session_state.modified_indices:
324
  modified_data = [st.session_state.edited_data[i] for i in sorted(st.session_state.modified_indices)]
@@ -332,16 +313,10 @@ elif st.session_state.page == 'viewer':
332
  use_container_width=True
333
  )
334
  else:
335
- st.button(
336
- "⬇️ No Modified Records",
337
- disabled=True,
338
- use_container_width=True
339
- )
340
 
341
- # Download unmodified records (original data excluding modified)
342
  with col3:
343
  if st.session_state.modified_indices:
344
- # Get original unmodified data
345
  unmodified_data = [st.session_state.data[i] for i in range(len(st.session_state.data))
346
  if i not in st.session_state.modified_indices]
347
  jsonl_unmodified = save_to_jsonl(unmodified_data)
@@ -353,13 +328,8 @@ elif st.session_state.page == 'viewer':
353
  use_container_width=True
354
  )
355
  else:
356
- st.button(
357
- "⬇️ No Unmodified Records",
358
- disabled=True,
359
- use_container_width=True
360
- )
361
 
362
- # Download all edited data
363
  with col4:
364
  jsonl_all = save_to_jsonl(st.session_state.edited_data)
365
  st.download_button(
@@ -369,11 +339,7 @@ elif st.session_state.page == 'viewer':
369
  mime="application/jsonl",
370
  use_container_width=True
371
  )
372
-
373
 
374
-
375
-
376
- # File selector dropdown
377
  file_names = [record.get('file_name', f'Record {i}') for i, record in enumerate(st.session_state.data)]
378
 
379
  selected_file = st.selectbox(
@@ -386,9 +352,6 @@ elif st.session_state.page == 'viewer':
386
  st.session_state.current_index = selected_file
387
  current_record = st.session_state.edited_data[selected_file]
388
 
389
-
390
-
391
- # Main layout: LHS (Image) and RHS (Details) - REDUCED GAP
392
  left_col, right_col = st.columns([1.3, 1], gap="small")
393
 
394
  # LEFT SIDE: Image Display with OCR Canvas
@@ -400,7 +363,6 @@ elif st.session_state.page == 'viewer':
400
  if file_name:
401
  st.caption(f"**File:** {file_name}")
402
 
403
- # Try to find matching image - CASE SENSITIVE EXACT MATCH ONLY
404
  current_image = None
405
  if file_name in st.session_state.images:
406
  current_image = st.session_state.images[file_name]
@@ -414,10 +376,8 @@ elif st.session_state.page == 'viewer':
414
  st.text(f" ... and {len(st.session_state.images) - 20} more")
415
 
416
  if current_image:
417
- # Scale image to fixed size
418
  scaled_image, scale_ratio, paste_x, paste_y = scale_image_to_fixed_size(current_image)
419
 
420
- # Always show canvas for drawing rectangles
421
  canvas_result = st_canvas(
422
  fill_color="rgba(255, 165, 0, 0.3)",
423
  stroke_width=2,
@@ -430,14 +390,11 @@ elif st.session_state.page == 'viewer':
430
  key=f"canvas_{selected_file}_{st.session_state.canvas_key}",
431
  )
432
 
433
- # Process OCR when rectangle is drawn and field is selected
434
  if canvas_result.json_data is not None and st.session_state.ocr_active_field:
435
  objects = canvas_result.json_data["objects"]
436
  if len(objects) > 0:
437
- # Get the last drawn rectangle
438
  rect = objects[-1]
439
 
440
- # Adjust coordinates for padding and scale back to original image coordinates
441
  bbox = [
442
  (rect["left"] - paste_x) / scale_ratio,
443
  (rect["top"] - paste_y) / scale_ratio,
@@ -445,25 +402,21 @@ elif st.session_state.page == 'viewer':
445
  (rect["top"] + rect["height"] - paste_y) / scale_ratio
446
  ]
447
 
448
- # Perform OCR on original image
449
  with st.spinner("Performing OCR..."):
450
  ocr_text = perform_ocr(current_image, bbox)
451
 
452
  if ocr_text and not ocr_text.startswith("OCR Error"):
453
  st.success(f"βœ… OCR Result: {ocr_text}")
454
 
455
- # Update the field value
456
  gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
457
 
458
  if st.session_state.ocr_active_section == 'Line_items':
459
- # Handle line items
460
  line_items = gt_parse.get('Line_items', [])
461
  row_idx = st.session_state.ocr_line_item_row
462
  if row_idx is not None and row_idx < len(line_items):
463
  line_items[row_idx][st.session_state.ocr_active_field] = ocr_text
464
  gt_parse['Line_items'] = line_items
465
  else:
466
- # Handle other sections
467
  section = st.session_state.ocr_active_section
468
  field = st.session_state.ocr_active_field
469
  if section not in gt_parse:
@@ -471,10 +424,10 @@ elif st.session_state.page == 'viewer':
471
  gt_parse[section][field] = ocr_text
472
 
473
  st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
 
474
 
475
- # Clear canvas and reset
476
  st.session_state.canvas_key += 1
477
- time.sleep(0.3)
478
  st.rerun()
479
  else:
480
  st.error(ocr_text)
@@ -485,9 +438,8 @@ elif st.session_state.page == 'viewer':
485
  with right_col:
486
  st.markdown("### πŸ“ Document Details")
487
 
488
- gt_parse = current_record.get('gt_parse', {})
489
 
490
- # Create tabs for each section
491
  tab1, tab2, tab3, tab4 = st.tabs([
492
  "πŸ“„ Remittance Details",
493
  "πŸ‘₯ Party Details",
@@ -497,340 +449,487 @@ elif st.session_state.page == 'viewer':
497
 
498
  # TAB 1: Remittance Details
499
  with tab1:
500
-
501
-
502
- # OCR Field Selector
503
- remittance_fields = [
504
- 'Select fields',
505
- 'Remittance_adv_no',
506
- 'Remittance_adv_date',
507
- 'Payment_method',
508
- 'FCY',
509
- 'Total_payment_amt_FCY',
510
- 'Payment_date',
511
- 'Payment_ref_no'
512
- ]
513
-
514
- selected_rem_field = st.selectbox(
515
- "πŸ” Select field to populate via OCR:",
516
- options=remittance_fields,
517
- key=f"rem_ocr_select_{selected_file}"
518
- )
519
-
520
- if selected_rem_field != 'Select fields':
521
- st.session_state.ocr_active_section = 'Remittance_details'
522
- st.session_state.ocr_active_field = selected_rem_field
523
- st.session_state.ocr_line_item_row = None
524
- else:
525
- if st.session_state.ocr_active_section == 'Remittance_details':
526
- st.session_state.ocr_active_section = None
527
- st.session_state.ocr_active_field = None
528
-
529
  remittance = gt_parse.get('Remittance_details', {})
530
 
531
- remittance['Remittance_adv_no'] = st.text_input(
532
- "Remittance Advice No",
533
- value=remittance.get('Remittance_adv_no', ''),
534
- key=f"rem_adv_no_{selected_file}"
535
- )
536
- remittance['Remittance_adv_date'] = st.text_input(
537
- "Remittance Advice Date",
538
- value=remittance.get('Remittance_adv_date', ''),
539
- key=f"rem_adv_date_{selected_file}"
540
- )
541
- remittance['Payment_method'] = st.text_input(
542
- "Payment Method",
543
- value=remittance.get('Payment_method', ''),
544
- key=f"payment_method_{selected_file}"
545
- )
546
- remittance['FCY'] = st.text_input(
547
- "FCY (Foreign Currency)",
548
- value=remittance.get('FCY', ''),
549
- key=f"fcy_{selected_file}"
550
- )
551
- remittance['Total_payment_amt_FCY'] = st.text_input(
552
- "Total Payment Amount (FCY)",
553
- value=remittance.get('Total_payment_amt_FCY', ''),
554
- key=f"total_payment_{selected_file}"
555
- )
556
- remittance['Payment_date'] = st.text_input(
557
- "Payment Date",
558
- value=remittance.get('Payment_date', ''),
559
- key=f"payment_date_{selected_file}"
560
- )
561
- remittance['Payment_ref_no'] = st.text_input(
562
- "Payment Reference No",
563
- value=remittance.get('Payment_ref_no', ''),
564
- key=f"payment_ref_{selected_file}"
565
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
566
 
567
  gt_parse['Remittance_details'] = remittance
568
 
569
  # TAB 2: Customer/Supplier Details
570
  with tab2:
571
-
572
-
573
- # OCR Field Selector
574
- customer_fields = [
575
- 'Select fields',
576
- 'Customer_name',
577
- 'Customer_address',
578
- 'Customer_contact_info',
579
- 'Supplier_name',
580
- 'Supplier_address',
581
- 'Supplier_contact_info'
582
- ]
583
-
584
- selected_cust_field = st.selectbox(
585
- "πŸ” Select field to populate via OCR:",
586
- options=customer_fields,
587
- key=f"cust_ocr_select_{selected_file}"
588
- )
589
-
590
- if selected_cust_field != 'Select fields':
591
- st.session_state.ocr_active_section = 'Customer_supplier_details'
592
- st.session_state.ocr_active_field = selected_cust_field
593
- st.session_state.ocr_line_item_row = None
594
- else:
595
- if st.session_state.ocr_active_section == 'Customer_supplier_details':
596
- st.session_state.ocr_active_section = None
597
- st.session_state.ocr_active_field = None
598
-
599
  st.markdown("**Customer Details**")
600
  customer_supplier = gt_parse.get('Customer_supplier_details', {})
601
 
602
- customer_supplier['Customer_name'] = st.text_input(
603
- "Customer Name",
604
- value=customer_supplier.get('Customer_name', ''),
605
- key=f"cust_name_{selected_file}"
606
- )
607
- customer_supplier['Customer_address'] = st.text_area(
608
- "Customer Address",
609
- value=customer_supplier.get('Customer_address', ''),
610
- key=f"cust_addr_{selected_file}",
611
- height=60
612
- )
613
- customer_supplier['Customer_contact_info'] = st.text_input(
614
- "Customer Contact Info",
615
- value=customer_supplier.get('Customer_contact_info', ''),
616
- key=f"cust_contact_{selected_file}"
617
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
618
 
619
  st.markdown("**Supplier Details**")
620
- customer_supplier['Supplier_name'] = st.text_input(
621
- "Supplier Name",
622
- value=customer_supplier.get('Supplier_name', ''),
623
- key=f"supp_name_{selected_file}"
624
- )
625
- customer_supplier['Supplier_address'] = st.text_area(
626
- "Supplier Address",
627
- value=customer_supplier.get('Supplier_address', ''),
628
- key=f"supp_addr_{selected_file}",
629
- height=60
630
- )
631
- customer_supplier['Supplier_contact_info'] = st.text_input(
632
- "Supplier Contact Info",
633
- value=customer_supplier.get('Supplier_contact_info', ''),
634
- key=f"supp_contact_{selected_file}"
635
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
636
 
637
  gt_parse['Customer_supplier_details'] = customer_supplier
638
 
639
  # TAB 3: Bank Details
640
  with tab3:
641
-
642
-
643
- # OCR Field Selector
644
- bank_fields = [
645
- 'Select fields',
646
- 'Bank_name',
647
- 'Bank_acc_no',
648
- 'Bank_routing_no',
649
- 'Swift_code'
650
- ]
651
-
652
- selected_bank_field = st.selectbox(
653
- "πŸ” Select field to populate via OCR:",
654
- options=bank_fields,
655
- key=f"bank_ocr_select_{selected_file}"
656
- )
657
-
658
- if selected_bank_field != 'Select fields':
659
- st.session_state.ocr_active_section = 'Bank_details'
660
- st.session_state.ocr_active_field = selected_bank_field
661
- st.session_state.ocr_line_item_row = None
662
- else:
663
- if st.session_state.ocr_active_section == 'Bank_details':
664
- st.session_state.ocr_active_section = None
665
- st.session_state.ocr_active_field = None
666
-
667
  bank = gt_parse.get('Bank_details', {})
668
 
669
- bank['Bank_name'] = st.text_input(
670
- "Bank Name",
671
- value=bank.get('Bank_name', ''),
672
- key=f"bank_name_{selected_file}"
673
- )
674
- bank['Bank_acc_no'] = st.text_input(
675
- "Bank Account No",
676
- value=bank.get('Bank_acc_no', ''),
677
- key=f"bank_acc_{selected_file}"
678
- )
679
- bank['Bank_routing_no'] = st.text_input(
680
- "Bank Routing No",
681
- value=bank.get('Bank_routing_no', ''),
682
- key=f"bank_routing_{selected_file}"
683
- )
684
- bank['Swift_code'] = st.text_input(
685
- "SWIFT Code",
686
- value=bank.get('Swift_code', ''),
687
- key=f"swift_{selected_file}"
688
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
689
 
690
  gt_parse['Bank_details'] = bank
691
 
692
  # TAB 4: Line Items
693
  with tab4:
 
 
694
 
695
-
696
- # OCR Controls for Line Items - Fixed layout
697
- line_items = gt_parse.get('Line_items', [])
698
-
699
- # Adjusted column widths - all controls in single compact line
700
- col_field, col_row, col_add, col_remove = st.columns([1.5, 0.7, 0.30, 0.30])
701
-
702
- line_item_fields = [
703
- 'Select fields',
704
- 'Po_number',
705
- 'Invoice_no',
706
- 'Other_doc_ref_no',
707
- 'Invoice_date',
708
- 'Invoice_amount_FCY',
709
- 'Amount_paid_for_each_invoice',
710
- 'Outstanding_balance_FCY',
711
- 'Discounts_taken_FCY',
712
- 'Adjustments(without_holding_tax)_FCY',
713
- 'Descriptions'
714
- ]
715
-
716
- with col_field:
717
- selected_line_field = st.selectbox(
718
- "πŸ” Field:",
719
- options=line_item_fields,
720
- key=f"line_ocr_field_{selected_file}"
721
- )
722
-
723
- with col_row:
724
- if len(line_items) > 0:
725
- selected_row = st.selectbox(
726
- "Row:",
727
- options=list(range(len(line_items))),
728
- format_func=lambda x: f"Row {x + 1}",
729
- key=f"line_ocr_row_{selected_file}"
730
- )
731
- else:
732
- st.selectbox("Row:", options=[], disabled=True, key=f"line_ocr_row_empty_{selected_file}")
733
- selected_row = None
734
-
735
  with col_add:
736
- # Use button with on_click callback to prevent loop
737
- if st.button("βž•", key=f"add_row_{selected_file}", help="Add new row"):
738
  if not st.session_state.button_clicked:
739
  st.session_state.button_clicked = True
740
  new_row = {
741
- "Po_number": "",
742
- "Invoice_no": "",
743
- "Other_doc_ref_no": "",
744
- "Invoice_date": "",
745
- "Invoice_amount_FCY": "",
746
- "Amount_paid_for_each_invoice": "",
747
- "Outstanding_balance_FCY": "",
748
- "Discounts_taken_FCY": "",
749
- "Adjustments(without_holding_tax)_FCY": "",
750
  "Descriptions": ""
751
  }
752
- line_items.append(new_row)
753
- gt_parse['Line_items'] = line_items
754
- st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
 
 
755
  st.session_state.modified_indices.add(selected_file)
756
  st.rerun()
757
 
758
  with col_remove:
759
- if st.button("βž–", key=f"remove_row_{selected_file}", help="Remove selected row", disabled=(len(line_items) == 0)):
760
- if not st.session_state.button_clicked and len(line_items) > 0 and selected_row is not None:
 
761
  st.session_state.button_clicked = True
762
- line_items.pop(selected_row)
763
- gt_parse['Line_items'] = line_items
764
- st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
 
 
765
  st.session_state.modified_indices.add(selected_file)
766
  st.rerun()
767
 
768
- # Reset button clicked flag after processing
769
  if st.session_state.button_clicked:
770
  st.session_state.button_clicked = False
771
 
772
- # Set OCR state for line items
773
- if selected_line_field != 'Select fields' and selected_row is not None:
774
- st.session_state.ocr_active_section = 'Line_items'
775
- st.session_state.ocr_active_field = selected_line_field
776
- st.session_state.ocr_line_item_row = selected_row
777
- else:
778
- if st.session_state.ocr_active_section == 'Line_items':
779
- st.session_state.ocr_active_section = None
780
- st.session_state.ocr_active_field = None
781
- st.session_state.ocr_line_item_row = None
782
-
783
 
 
 
 
784
 
785
- # Display line items table
786
  if line_items:
787
- df = pd.DataFrame(line_items)
788
-
789
- # Set index to start from 1 instead of 0
790
- df.index = range(1, len(df) + 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
791
 
792
- # Convert amount fields to numeric
793
- amount_fields = ['Invoice_amount_FCY', 'Amount_paid_for_each_invoice',
794
- 'Outstanding_balance_FCY', 'Discounts_taken_FCY',
795
- 'Adjustments(without_holding_tax)_FCY']
796
 
797
- for field in amount_fields:
798
- if field in df.columns:
799
- df[field] = pd.to_numeric(df[field].replace('', None), errors='coerce')
800
 
801
- column_config = {
802
- "Po_number": st.column_config.TextColumn("PO Number", width="medium"),
803
- "Invoice_no": st.column_config.TextColumn("Invoice No", width="medium"),
804
- "Other_doc_ref_no": st.column_config.TextColumn("Other Doc Ref No", width="medium"),
805
- "Invoice_date": st.column_config.TextColumn("Invoice Date", width="medium"),
806
- "Invoice_amount_FCY": st.column_config.NumberColumn("Invoice Amt FCY", width="medium", format="%.2f"),
807
- "Amount_paid_for_each_invoice": st.column_config.NumberColumn("Amount Paid", width="medium", format="%.2f"),
808
- "Outstanding_balance_FCY": st.column_config.NumberColumn("Outstanding FCY", width="medium", format="%.2f"),
809
- "Discounts_taken_FCY": st.column_config.NumberColumn("Discounts FCY", width="medium", format="%.2f"),
810
- "Adjustments(without_holding_tax)_FCY": st.column_config.NumberColumn("Adjustments FCY", width="medium", format="%.2f"),
811
- "Descriptions": st.column_config.TextColumn("Descriptions", width="medium"),
812
- }
813
 
814
- edited_df = st.data_editor(
815
  df,
816
- column_config=column_config,
817
- num_rows="fixed",
818
  use_container_width=True,
819
- key=f"line_items_table_{selected_file}",
820
- hide_index=False
821
  )
822
-
823
- # Convert back to string and reset index to 0-based for storage
824
- edited_df.index = range(len(edited_df))
825
- for field in amount_fields:
826
- if field in edited_df.columns:
827
- edited_df[field] = edited_df[field].apply(lambda x: str(x) if pd.notna(x) else '')
828
-
829
- gt_parse['Line_items'] = edited_df.to_dict('records')
830
  else:
831
- st.info("No line items. Click βž• to add a new row.")
832
 
833
- # Update the edited data
834
  st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
835
 
836
  # Save button
@@ -845,10 +944,8 @@ elif st.session_state.page == 'viewer':
845
  st.session_state.save_message_time = time.time()
846
  st.rerun()
847
 
848
- # Reset the just_saved flag after rerun
849
  if st.session_state.just_saved:
850
  st.session_state.just_saved = False
851
 
852
- # Display save message under the button (appears after rerun)
853
  if st.session_state.save_message:
854
- st.success(st.session_state.save_message)
 
33
  if os.name == 'nt': # Windows
34
  pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
35
  else: # Linux/Mac (HF Spaces uses Linux)
 
 
36
  pass
37
 
38
  # Page configuration
39
  st.set_page_config(page_title="Remittance Data Viewer", layout="wide")
40
 
41
+ # Custom CSS
42
  st.markdown("""
43
  <style>
44
  /* Reduce spacing between form fields */
 
54
  margin-bottom: 4px !important;
55
  }
56
 
 
57
  .stSelectbox {
58
  margin-bottom: 4px !important;
59
  }
60
 
61
+ /* Style for OCR buttons - small and compact */
62
  .stButton > button {
63
  padding: 0.25rem 0.5rem !important;
64
+ font-size: 0.85rem !important;
65
  line-height: 1 !important;
66
+ min-height: 1.8rem !important;
67
+ height: 1.8rem !important;
68
+ }
69
+
70
+ /* Active OCR buttons - RED highlight */
71
+ .stButton > button[kind="primary"] {
72
+ background-color: #FF0000 !important;
73
+ border-color: #FF0000 !important;
74
+ color: white !important;
75
+ }
76
+
77
+ .stButton > button[kind="primary"]:hover {
78
+ background-color: #CC0000 !important;
79
+ border-color: #CC0000 !important;
80
  }
81
 
 
82
  [data-testid="stVerticalBlock"] > [data-testid="stVerticalBlock"] {
83
  gap: 0.25rem !important;
84
  }
 
92
  [data-testid="stHorizontalBlock"] {
93
  gap: 0.5rem !important;
94
  }
95
+
96
+ /* Active OCR field highlighting */
97
+ .ocr-active {
98
+ border: 2px solid #ff4b4b !important;
99
+ box-shadow: 0 0 5px rgba(255, 75, 75, 0.5) !important;
100
+ }
101
  </style>
102
  """, unsafe_allow_html=True)
103
 
 
118
  def perform_ocr(image, bbox):
119
  """Perform OCR on the selected region of the image"""
120
  try:
 
121
  x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
 
 
122
  x1, y1 = max(0, x1), max(0, y1)
123
  x2, y2 = min(image.width, x2), min(image.height, y2)
 
 
124
  cropped = image.crop((x1, y1, x2, y2))
 
 
125
  text = pytesseract.image_to_string(cropped, config='--psm 6').strip()
126
  return text
127
  except Exception as e:
 
129
 
130
  def scale_image_to_fixed_size(image, target_width=700, target_height=900):
131
  """Scale and pad image to exact fixed size while maintaining aspect ratio and quality"""
 
132
  if image.mode not in ('RGB', 'RGBA'):
133
  image = image.convert('RGB')
134
  elif image.mode == 'RGBA':
 
135
  background = Image.new('RGB', image.size, (255, 255, 255))
136
+ background.paste(image, mask=image.split()[3])
137
  image = background
138
 
 
139
  width_ratio = target_width / image.width
140
  height_ratio = target_height / image.height
 
 
141
  ratio = min(width_ratio, height_ratio)
142
 
 
143
  new_width = int(image.width * ratio)
144
  new_height = int(image.height * ratio)
145
 
 
 
146
  if ratio < 1.0 or (ratio > 1.0 and ratio < 1.5):
147
  resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
148
  else:
 
149
  resized_image = image.resize((new_width, new_height), Image.Resampling.BICUBIC)
150
 
 
151
  final_image = Image.new('RGB', (target_width, target_height), (255, 255, 255))
 
 
152
  paste_x = (target_width - new_width) // 2
153
  paste_y = (target_height - new_height) // 2
 
 
154
  final_image.paste(resized_image, (paste_x, paste_y))
155
 
156
  return final_image, ratio, paste_x, paste_y
 
187
  if 'just_saved' not in st.session_state:
188
  st.session_state.just_saved = False
189
 
 
190
  def auto_save(index):
191
  """Automatically save changes to session state and mark as modified"""
192
  if st.session_state.edited_data:
193
  st.session_state.data = st.session_state.edited_data.copy()
194
  st.session_state.modified_indices.add(index)
195
 
196
+ def activate_ocr_field(section, field, row_idx=None):
197
+ """Activate OCR for a specific field"""
198
+ st.session_state.ocr_active_section = section
199
+ st.session_state.ocr_active_field = field
200
+ st.session_state.ocr_line_item_row = row_idx
201
+ st.rerun()
202
+
203
+ def is_ocr_active(section, field, row_idx=None):
204
+ """Check if this OCR button is currently active"""
205
+ return (st.session_state.ocr_active_section == section and
206
+ st.session_state.ocr_active_field == field and
207
+ st.session_state.ocr_line_item_row == row_idx)
208
 
209
  # PAGE 1: Upload Page
210
  if st.session_state.page == 'upload':
211
  st.title("πŸ“€ Remittance Data Viewer with OCR")
212
  st.markdown("### Upload your files to begin")
213
 
 
 
214
  st.markdown("**Step 1: Upload JSONL File**")
215
  uploaded_file = st.file_uploader("Choose a JSONL file", type=['jsonl', 'json'])
216
 
 
223
  except Exception as e:
224
  st.error(f"Error loading file: {str(e)}")
225
 
 
 
226
  st.markdown("**Step 2: Upload Images Folder**")
227
 
 
228
  uploaded_images = st.file_uploader(
229
  "Choose image files",
230
  type=['png', 'jpg', 'jpeg', 'tiff', 'tif', 'bmp'],
 
233
  )
234
 
235
  if uploaded_images:
 
236
  images_dict = {}
237
  for img_file in uploaded_images:
238
  try:
 
242
  st.warning(f"Could not load image {img_file.name}: {str(e)}")
243
 
244
  st.session_state.images = images_dict
245
+
246
  if st.session_state.data is not None:
 
247
  gt_file_names = [rec.get('file_name', '') for rec in st.session_state.data]
248
  matched_images = set()
249
  unmatched_gt_files = []
250
 
 
251
  for fname in gt_file_names:
252
  if not fname:
253
  continue
 
254
  if fname in images_dict:
255
  matched_images.add(fname)
256
 
 
257
  for fname in gt_file_names:
258
  if fname and fname not in matched_images:
259
  unmatched_gt_files.append(fname)
 
261
  st.success(f"βœ… Successfully loaded {len(images_dict)} images!")
262
  st.info(f"πŸ”Ž Exact matches: {len(matched_images)}/{len([f for f in gt_file_names if f])}")
263
 
 
264
  if unmatched_gt_files:
265
  st.warning(f"⚠️ {len(unmatched_gt_files)} file(s) from JSONL not matched to images:")
266
  with st.expander(f"Show {len(unmatched_gt_files)} unmatched file names"):
 
272
  st.success(f"βœ… Successfully loaded {len(images_dict)} images!")
273
  st.info("ℹ️ Upload a JSONL file to see how many images match the ground truth 'file_name' field.")
274
 
 
 
275
  if st.session_state.data is not None:
276
  col1, col2, col3 = st.columns([1, 1, 1])
277
  with col2:
 
282
 
283
  # PAGE 2: Viewer Page
284
  elif st.session_state.page == 'viewer':
 
285
  if st.session_state.save_message_time is not None:
286
  if time.time() - st.session_state.save_message_time > 3:
287
  st.session_state.save_message = None
288
  st.session_state.save_message_time = None
289
 
 
290
  today_date = datetime.now().strftime("%Y-%m-%d")
291
 
 
292
  col1, col2, col3, col4 = st.columns([1, 2, 2, 2])
293
 
294
  with col1:
 
300
  st.session_state.save_message_time = None
301
  st.rerun()
302
 
 
303
  with col2:
304
  if st.session_state.modified_indices:
305
  modified_data = [st.session_state.edited_data[i] for i in sorted(st.session_state.modified_indices)]
 
313
  use_container_width=True
314
  )
315
  else:
316
+ st.button("⬇️ No Modified Records", disabled=True, use_container_width=True)
 
 
 
 
317
 
 
318
  with col3:
319
  if st.session_state.modified_indices:
 
320
  unmodified_data = [st.session_state.data[i] for i in range(len(st.session_state.data))
321
  if i not in st.session_state.modified_indices]
322
  jsonl_unmodified = save_to_jsonl(unmodified_data)
 
328
  use_container_width=True
329
  )
330
  else:
331
+ st.button("⬇️ No Unmodified Records", disabled=True, use_container_width=True)
 
 
 
 
332
 
 
333
  with col4:
334
  jsonl_all = save_to_jsonl(st.session_state.edited_data)
335
  st.download_button(
 
339
  mime="application/jsonl",
340
  use_container_width=True
341
  )
 
342
 
 
 
 
343
  file_names = [record.get('file_name', f'Record {i}') for i, record in enumerate(st.session_state.data)]
344
 
345
  selected_file = st.selectbox(
 
352
  st.session_state.current_index = selected_file
353
  current_record = st.session_state.edited_data[selected_file]
354
 
 
 
 
355
  left_col, right_col = st.columns([1.3, 1], gap="small")
356
 
357
  # LEFT SIDE: Image Display with OCR Canvas
 
363
  if file_name:
364
  st.caption(f"**File:** {file_name}")
365
 
 
366
  current_image = None
367
  if file_name in st.session_state.images:
368
  current_image = st.session_state.images[file_name]
 
376
  st.text(f" ... and {len(st.session_state.images) - 20} more")
377
 
378
  if current_image:
 
379
  scaled_image, scale_ratio, paste_x, paste_y = scale_image_to_fixed_size(current_image)
380
 
 
381
  canvas_result = st_canvas(
382
  fill_color="rgba(255, 165, 0, 0.3)",
383
  stroke_width=2,
 
390
  key=f"canvas_{selected_file}_{st.session_state.canvas_key}",
391
  )
392
 
 
393
  if canvas_result.json_data is not None and st.session_state.ocr_active_field:
394
  objects = canvas_result.json_data["objects"]
395
  if len(objects) > 0:
 
396
  rect = objects[-1]
397
 
 
398
  bbox = [
399
  (rect["left"] - paste_x) / scale_ratio,
400
  (rect["top"] - paste_y) / scale_ratio,
 
402
  (rect["top"] + rect["height"] - paste_y) / scale_ratio
403
  ]
404
 
 
405
  with st.spinner("Performing OCR..."):
406
  ocr_text = perform_ocr(current_image, bbox)
407
 
408
  if ocr_text and not ocr_text.startswith("OCR Error"):
409
  st.success(f"βœ… OCR Result: {ocr_text}")
410
 
 
411
  gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
412
 
413
  if st.session_state.ocr_active_section == 'Line_items':
 
414
  line_items = gt_parse.get('Line_items', [])
415
  row_idx = st.session_state.ocr_line_item_row
416
  if row_idx is not None and row_idx < len(line_items):
417
  line_items[row_idx][st.session_state.ocr_active_field] = ocr_text
418
  gt_parse['Line_items'] = line_items
419
  else:
 
420
  section = st.session_state.ocr_active_section
421
  field = st.session_state.ocr_active_field
422
  if section not in gt_parse:
 
424
  gt_parse[section][field] = ocr_text
425
 
426
  st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
427
+ st.session_state.modified_indices.add(selected_file)
428
 
429
+ # Clear canvas for next OCR
430
  st.session_state.canvas_key += 1
 
431
  st.rerun()
432
  else:
433
  st.error(ocr_text)
 
438
  with right_col:
439
  st.markdown("### πŸ“ Document Details")
440
 
441
+ gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
442
 
 
443
  tab1, tab2, tab3, tab4 = st.tabs([
444
  "πŸ“„ Remittance Details",
445
  "πŸ‘₯ Party Details",
 
449
 
450
  # TAB 1: Remittance Details
451
  with tab1:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
  remittance = gt_parse.get('Remittance_details', {})
453
 
454
+ # Each field with OCR button
455
+ col_input, col_btn = st.columns([5, 1])
456
+ with col_input:
457
+ remittance['Remittance_adv_no'] = st.text_input(
458
+ "Remittance Advice No",
459
+ value=remittance.get('Remittance_adv_no', ''),
460
+ key=f"rem_adv_no_{selected_file}"
461
+ )
462
+ with col_btn:
463
+ st.markdown("<br>", unsafe_allow_html=True)
464
+ if st.button("πŸ”", key=f"ocr_rem_adv_no_{selected_file}",
465
+ type="primary" if is_ocr_active('Remittance_details', 'Remittance_adv_no') else "secondary",
466
+ help="OCR this field"):
467
+ activate_ocr_field('Remittance_details', 'Remittance_adv_no')
468
+
469
+ col_input, col_btn = st.columns([5, 1])
470
+ with col_input:
471
+ remittance['Remittance_adv_date'] = st.text_input(
472
+ "Remittance Advice Date",
473
+ value=remittance.get('Remittance_adv_date', ''),
474
+ key=f"rem_adv_date_{selected_file}"
475
+ )
476
+ with col_btn:
477
+ st.markdown("<br>", unsafe_allow_html=True)
478
+ if st.button("πŸ”", key=f"ocr_rem_adv_date_{selected_file}",
479
+ type="primary" if is_ocr_active('Remittance_details', 'Remittance_adv_date') else "secondary",
480
+ help="OCR this field"):
481
+ activate_ocr_field('Remittance_details', 'Remittance_adv_date')
482
+
483
+ col_input, col_btn = st.columns([5, 1])
484
+ with col_input:
485
+ remittance['Payment_method'] = st.text_input(
486
+ "Payment Method",
487
+ value=remittance.get('Payment_method', ''),
488
+ key=f"payment_method_{selected_file}"
489
+ )
490
+ with col_btn:
491
+ st.markdown("<br>", unsafe_allow_html=True)
492
+ if st.button("πŸ”", key=f"ocr_payment_method_{selected_file}",
493
+ type="primary" if is_ocr_active('Remittance_details', 'Payment_method') else "secondary",
494
+ help="OCR this field"):
495
+ activate_ocr_field('Remittance_details', 'Payment_method')
496
+
497
+ col_input, col_btn = st.columns([5, 1])
498
+ with col_input:
499
+ remittance['FCY'] = st.text_input(
500
+ "FCY (Foreign Currency)",
501
+ value=remittance.get('FCY', ''),
502
+ key=f"fcy_{selected_file}"
503
+ )
504
+ with col_btn:
505
+ st.markdown("<br>", unsafe_allow_html=True)
506
+ if st.button("πŸ”", key=f"ocr_fcy_{selected_file}",
507
+ type="primary" if is_ocr_active('Remittance_details', 'FCY') else "secondary",
508
+ help="OCR this field"):
509
+ activate_ocr_field('Remittance_details', 'FCY')
510
+
511
+ col_input, col_btn = st.columns([5, 1])
512
+ with col_input:
513
+ remittance['Total_payment_amt_FCY'] = st.text_input(
514
+ "Total Payment Amount (FCY)",
515
+ value=remittance.get('Total_payment_amt_FCY', ''),
516
+ key=f"total_payment_{selected_file}"
517
+ )
518
+ with col_btn:
519
+ st.markdown("<br>", unsafe_allow_html=True)
520
+ if st.button("πŸ”", key=f"ocr_total_payment_{selected_file}",
521
+ type="primary" if is_ocr_active('Remittance_details', 'Total_payment_amt_FCY') else "secondary",
522
+ help="OCR this field"):
523
+ activate_ocr_field('Remittance_details', 'Total_payment_amt_FCY')
524
+
525
+ col_input, col_btn = st.columns([5, 1])
526
+ with col_input:
527
+ remittance['Payment_date'] = st.text_input(
528
+ "Payment Date",
529
+ value=remittance.get('Payment_date', ''),
530
+ key=f"payment_date_{selected_file}"
531
+ )
532
+ with col_btn:
533
+ st.markdown("<br>", unsafe_allow_html=True)
534
+ if st.button("πŸ”", key=f"ocr_payment_date_{selected_file}",
535
+ type="primary" if is_ocr_active('Remittance_details', 'Payment_date') else "secondary",
536
+ help="OCR this field"):
537
+ activate_ocr_field('Remittance_details', 'Payment_date')
538
+
539
+ col_input, col_btn = st.columns([5, 1])
540
+ with col_input:
541
+ remittance['Payment_ref_no'] = st.text_input(
542
+ "Payment Reference No",
543
+ value=remittance.get('Payment_ref_no', ''),
544
+ key=f"payment_ref_{selected_file}"
545
+ )
546
+ with col_btn:
547
+ st.markdown("<br>", unsafe_allow_html=True)
548
+ if st.button("πŸ”", key=f"ocr_payment_ref_{selected_file}",
549
+ type="primary" if is_ocr_active('Remittance_details', 'Payment_ref_no') else "secondary",
550
+ help="OCR this field"):
551
+ activate_ocr_field('Remittance_details', 'Payment_ref_no')
552
 
553
  gt_parse['Remittance_details'] = remittance
554
 
555
  # TAB 2: Customer/Supplier Details
556
  with tab2:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
557
  st.markdown("**Customer Details**")
558
  customer_supplier = gt_parse.get('Customer_supplier_details', {})
559
 
560
+ col_input, col_btn = st.columns([5, 1])
561
+ with col_input:
562
+ customer_supplier['Customer_name'] = st.text_input(
563
+ "Customer Name",
564
+ value=customer_supplier.get('Customer_name', ''),
565
+ key=f"cust_name_{selected_file}"
566
+ )
567
+ with col_btn:
568
+ st.markdown("<br>", unsafe_allow_html=True)
569
+ if st.button("πŸ”", key=f"ocr_cust_name_{selected_file}",
570
+ type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_name') else "secondary",
571
+ help="OCR this field"):
572
+ activate_ocr_field('Customer_supplier_details', 'Customer_name')
573
+
574
+ col_input, col_btn = st.columns([5, 1])
575
+ with col_input:
576
+ customer_supplier['Customer_address'] = st.text_area(
577
+ "Customer Address",
578
+ value=customer_supplier.get('Customer_address', ''),
579
+ key=f"cust_addr_{selected_file}",
580
+ height=60
581
+ )
582
+ with col_btn:
583
+ st.markdown("<br>", unsafe_allow_html=True)
584
+ if st.button("πŸ”", key=f"ocr_cust_addr_{selected_file}",
585
+ type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_address') else "secondary",
586
+ help="OCR this field"):
587
+ activate_ocr_field('Customer_supplier_details', 'Customer_address')
588
+
589
+ col_input, col_btn = st.columns([5, 1])
590
+ with col_input:
591
+ customer_supplier['Customer_contact_info'] = st.text_input(
592
+ "Customer Contact Info",
593
+ value=customer_supplier.get('Customer_contact_info', ''),
594
+ key=f"cust_contact_{selected_file}"
595
+ )
596
+ with col_btn:
597
+ st.markdown("<br>", unsafe_allow_html=True)
598
+ if st.button("πŸ”", key=f"ocr_cust_contact_{selected_file}",
599
+ type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_contact_info') else "secondary",
600
+ help="OCR this field"):
601
+ activate_ocr_field('Customer_supplier_details', 'Customer_contact_info')
602
 
603
  st.markdown("**Supplier Details**")
604
+
605
+ col_input, col_btn = st.columns([5, 1])
606
+ with col_input:
607
+ customer_supplier['Supplier_name'] = st.text_input(
608
+ "Supplier Name",
609
+ value=customer_supplier.get('Supplier_name', ''),
610
+ key=f"supp_name_{selected_file}"
611
+ )
612
+ with col_btn:
613
+ st.markdown("<br>", unsafe_allow_html=True)
614
+ if st.button("πŸ”", key=f"ocr_supp_name_{selected_file}",
615
+ type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_name') else "secondary",
616
+ help="OCR this field"):
617
+ activate_ocr_field('Customer_supplier_details', 'Supplier_name')
618
+
619
+ col_input, col_btn = st.columns([5, 1])
620
+ with col_input:
621
+ customer_supplier['Supplier_address'] = st.text_area(
622
+ "Supplier Address",
623
+ value=customer_supplier.get('Supplier_address', ''),
624
+ key=f"supp_addr_{selected_file}",
625
+ height=60
626
+ )
627
+ with col_btn:
628
+ st.markdown("<br>", unsafe_allow_html=True)
629
+ if st.button("πŸ”", key=f"ocr_supp_addr_{selected_file}",
630
+ type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_address') else "secondary",
631
+ help="OCR this field"):
632
+ activate_ocr_field('Customer_supplier_details', 'Supplier_address')
633
+
634
+ col_input, col_btn = st.columns([5, 1])
635
+ with col_input:
636
+ customer_supplier['Supplier_contact_info'] = st.text_input(
637
+ "Supplier Contact Info",
638
+ value=customer_supplier.get('Supplier_contact_info', ''),
639
+ key=f"supp_contact_{selected_file}"
640
+ )
641
+ with col_btn:
642
+ st.markdown("<br>", unsafe_allow_html=True)
643
+ if st.button("πŸ”", key=f"ocr_supp_contact_{selected_file}",
644
+ type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_contact_info') else "secondary",
645
+ help="OCR this field"):
646
+ activate_ocr_field('Customer_supplier_details', 'Supplier_contact_info')
647
 
648
  gt_parse['Customer_supplier_details'] = customer_supplier
649
 
650
  # TAB 3: Bank Details
651
  with tab3:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
652
  bank = gt_parse.get('Bank_details', {})
653
 
654
+ col_input, col_btn = st.columns([5, 1])
655
+ with col_input:
656
+ bank['Bank_name'] = st.text_input(
657
+ "Bank Name",
658
+ value=bank.get('Bank_name', ''),
659
+ key=f"bank_name_{selected_file}"
660
+ )
661
+ with col_btn:
662
+ st.markdown("<br>", unsafe_allow_html=True)
663
+ if st.button("πŸ”", key=f"ocr_bank_name_{selected_file}",
664
+ type="primary" if is_ocr_active('Bank_details', 'Bank_name') else "secondary",
665
+ help="OCR this field"):
666
+ activate_ocr_field('Bank_details', 'Bank_name')
667
+
668
+ col_input, col_btn = st.columns([5, 1])
669
+ with col_input:
670
+ bank['Bank_acc_no'] = st.text_input(
671
+ "Bank Account No",
672
+ value=bank.get('Bank_acc_no', ''),
673
+ key=f"bank_acc_{selected_file}"
674
+ )
675
+ with col_btn:
676
+ st.markdown("<br>", unsafe_allow_html=True)
677
+ if st.button("πŸ”", key=f"ocr_bank_acc_{selected_file}",
678
+ type="primary" if is_ocr_active('Bank_details', 'Bank_acc_no') else "secondary",
679
+ help="OCR this field"):
680
+ activate_ocr_field('Bank_details', 'Bank_acc_no')
681
+
682
+ col_input, col_btn = st.columns([5, 1])
683
+ with col_input:
684
+ bank['Bank_routing_no'] = st.text_input(
685
+ "Bank Routing No",
686
+ value=bank.get('Bank_routing_no', ''),
687
+ key=f"bank_routing_{selected_file}"
688
+ )
689
+ with col_btn:
690
+ st.markdown("<br>", unsafe_allow_html=True)
691
+ if st.button("πŸ”", key=f"ocr_bank_routing_{selected_file}",
692
+ type="primary" if is_ocr_active('Bank_details', 'Bank_routing_no') else "secondary",
693
+ help="OCR this field"):
694
+ activate_ocr_field('Bank_details', 'Bank_routing_no')
695
+
696
+ col_input, col_btn = st.columns([5, 1])
697
+ with col_input:
698
+ bank['Swift_code'] = st.text_input(
699
+ "SWIFT Code",
700
+ value=bank.get('Swift_code', ''),
701
+ key=f"swift_{selected_file}"
702
+ )
703
+ with col_btn:
704
+ st.markdown("<br>", unsafe_allow_html=True)
705
+ if st.button("πŸ”", key=f"ocr_swift_{selected_file}",
706
+ type="primary" if is_ocr_active('Bank_details', 'Swift_code') else "secondary",
707
+ help="OCR this field"):
708
+ activate_ocr_field('Bank_details', 'Swift_code')
709
 
710
  gt_parse['Bank_details'] = bank
711
 
712
  # TAB 4: Line Items
713
  with tab4:
714
+ current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
715
+ line_items = current_gt_parse.get('Line_items', [])
716
 
717
+ # Add/Remove row buttons
718
+ col_add, col_remove = st.columns([1, 1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
719
  with col_add:
720
+ if st.button("βž• Add New Row", key=f"add_row_{selected_file}", use_container_width=True):
 
721
  if not st.session_state.button_clicked:
722
  st.session_state.button_clicked = True
723
  new_row = {
724
+ "Po_number": "", "Invoice_no": "", "Other_doc_ref_no": "",
725
+ "Invoice_date": "", "Invoice_amount_FCY": "",
726
+ "Amount_paid_for_each_invoice": "", "Outstanding_balance_FCY": "",
727
+ "Discounts_taken_FCY": "", "Adjustments(without_holding_tax)_FCY": "",
 
 
 
 
 
728
  "Descriptions": ""
729
  }
730
+ current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
731
+ current_line_items = current_gt_parse.get('Line_items', [])
732
+ current_line_items.append(new_row)
733
+ current_gt_parse['Line_items'] = current_line_items
734
+ st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse
735
  st.session_state.modified_indices.add(selected_file)
736
  st.rerun()
737
 
738
  with col_remove:
739
+ if st.button("βž– Remove Last Row", key=f"remove_row_{selected_file}",
740
+ disabled=(len(line_items) == 0), use_container_width=True):
741
+ if not st.session_state.button_clicked and len(line_items) > 0:
742
  st.session_state.button_clicked = True
743
+ current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
744
+ current_line_items = current_gt_parse.get('Line_items', [])
745
+ current_line_items.pop()
746
+ current_gt_parse['Line_items'] = current_line_items
747
+ st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse
748
  st.session_state.modified_indices.add(selected_file)
749
  st.rerun()
750
 
 
751
  if st.session_state.button_clicked:
752
  st.session_state.button_clicked = False
753
 
754
+ st.markdown("---")
 
 
 
 
 
 
 
 
 
 
755
 
756
+ # Display each row as an expander with OCR buttons
757
+ current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
758
+ line_items = current_gt_parse.get('Line_items', [])
759
 
 
760
  if line_items:
761
+ for idx, item in enumerate(line_items):
762
+ with st.expander(f"**Row {idx + 1}** - Invoice: {item.get('Invoice_no', 'N/A')}", expanded=(idx == 0)):
763
+ # PO Number
764
+ col_input, col_btn = st.columns([5, 1])
765
+ with col_input:
766
+ item['Po_number'] = st.text_input(
767
+ "PO Number",
768
+ value=item.get('Po_number', ''),
769
+ key=f"po_num_{selected_file}_{idx}"
770
+ )
771
+ with col_btn:
772
+ st.markdown("<br>", unsafe_allow_html=True)
773
+ if st.button("πŸ”", key=f"ocr_po_{selected_file}_{idx}",
774
+ type="primary" if is_ocr_active('Line_items', 'Po_number', idx) else "secondary",
775
+ help="OCR this field"):
776
+ activate_ocr_field('Line_items', 'Po_number', idx)
777
+
778
+ # Invoice No
779
+ col_input, col_btn = st.columns([5, 1])
780
+ with col_input:
781
+ item['Invoice_no'] = st.text_input(
782
+ "Invoice No",
783
+ value=item.get('Invoice_no', ''),
784
+ key=f"inv_no_{selected_file}_{idx}"
785
+ )
786
+ with col_btn:
787
+ st.markdown("<br>", unsafe_allow_html=True)
788
+ if st.button("πŸ”", key=f"ocr_inv_{selected_file}_{idx}",
789
+ type="primary" if is_ocr_active('Line_items', 'Invoice_no', idx) else "secondary",
790
+ help="OCR this field"):
791
+ activate_ocr_field('Line_items', 'Invoice_no', idx)
792
+
793
+ # Other Doc Ref No
794
+ col_input, col_btn = st.columns([5, 1])
795
+ with col_input:
796
+ item['Other_doc_ref_no'] = st.text_input(
797
+ "Other Doc Ref No",
798
+ value=item.get('Other_doc_ref_no', ''),
799
+ key=f"other_doc_{selected_file}_{idx}"
800
+ )
801
+ with col_btn:
802
+ st.markdown("<br>", unsafe_allow_html=True)
803
+ if st.button("πŸ”", key=f"ocr_other_{selected_file}_{idx}",
804
+ type="primary" if is_ocr_active('Line_items', 'Other_doc_ref_no', idx) else "secondary",
805
+ help="OCR this field"):
806
+ activate_ocr_field('Line_items', 'Other_doc_ref_no', idx)
807
+
808
+ # Invoice Date
809
+ col_input, col_btn = st.columns([5, 1])
810
+ with col_input:
811
+ item['Invoice_date'] = st.text_input(
812
+ "Invoice Date",
813
+ value=item.get('Invoice_date', ''),
814
+ key=f"inv_date_{selected_file}_{idx}"
815
+ )
816
+ with col_btn:
817
+ st.markdown("<br>", unsafe_allow_html=True)
818
+ if st.button("πŸ”", key=f"ocr_inv_date_{selected_file}_{idx}",
819
+ type="primary" if is_ocr_active('Line_items', 'Invoice_date', idx) else "secondary",
820
+ help="OCR this field"):
821
+ activate_ocr_field('Line_items', 'Invoice_date', idx)
822
+
823
+ # Invoice Amount FCY
824
+ col_input, col_btn = st.columns([5, 1])
825
+ with col_input:
826
+ item['Invoice_amount_FCY'] = st.text_input(
827
+ "Invoice Amount FCY",
828
+ value=item.get('Invoice_amount_FCY', ''),
829
+ key=f"inv_amt_{selected_file}_{idx}"
830
+ )
831
+ with col_btn:
832
+ st.markdown("<br>", unsafe_allow_html=True)
833
+ if st.button("πŸ”", key=f"ocr_inv_amt_{selected_file}_{idx}",
834
+ type="primary" if is_ocr_active('Line_items', 'Invoice_amount_FCY', idx) else "secondary",
835
+ help="OCR this field"):
836
+ activate_ocr_field('Line_items', 'Invoice_amount_FCY', idx)
837
+
838
+ # Amount Paid
839
+ col_input, col_btn = st.columns([5, 1])
840
+ with col_input:
841
+ item['Amount_paid_for_each_invoice'] = st.text_input(
842
+ "Amount Paid",
843
+ value=item.get('Amount_paid_for_each_invoice', ''),
844
+ key=f"amt_paid_{selected_file}_{idx}"
845
+ )
846
+ with col_btn:
847
+ st.markdown("<br>", unsafe_allow_html=True)
848
+ if st.button("πŸ”", key=f"ocr_amt_paid_{selected_file}_{idx}",
849
+ type="primary" if is_ocr_active('Line_items', 'Amount_paid_for_each_invoice', idx) else "secondary",
850
+ help="OCR this field"):
851
+ activate_ocr_field('Line_items', 'Amount_paid_for_each_invoice', idx)
852
+
853
+ # Outstanding Balance
854
+ col_input, col_btn = st.columns([5, 1])
855
+ with col_input:
856
+ item['Outstanding_balance_FCY'] = st.text_input(
857
+ "Outstanding Balance FCY",
858
+ value=item.get('Outstanding_balance_FCY', ''),
859
+ key=f"out_bal_{selected_file}_{idx}"
860
+ )
861
+ with col_btn:
862
+ st.markdown("<br>", unsafe_allow_html=True)
863
+ if st.button("πŸ”", key=f"ocr_out_bal_{selected_file}_{idx}",
864
+ type="primary" if is_ocr_active('Line_items', 'Outstanding_balance_FCY', idx) else "secondary",
865
+ help="OCR this field"):
866
+ activate_ocr_field('Line_items', 'Outstanding_balance_FCY', idx)
867
+
868
+ # Discounts
869
+ col_input, col_btn = st.columns([5, 1])
870
+ with col_input:
871
+ item['Discounts_taken_FCY'] = st.text_input(
872
+ "Discounts Taken FCY",
873
+ value=item.get('Discounts_taken_FCY', ''),
874
+ key=f"disc_{selected_file}_{idx}"
875
+ )
876
+ with col_btn:
877
+ st.markdown("<br>", unsafe_allow_html=True)
878
+ if st.button("πŸ”", key=f"ocr_disc_{selected_file}_{idx}",
879
+ type="primary" if is_ocr_active('Line_items', 'Discounts_taken_FCY', idx) else "secondary",
880
+ help="OCR this field"):
881
+ activate_ocr_field('Line_items', 'Discounts_taken_FCY', idx)
882
+
883
+ # Adjustments
884
+ col_input, col_btn = st.columns([5, 1])
885
+ with col_input:
886
+ item['Adjustments(without_holding_tax)_FCY'] = st.text_input(
887
+ "Adjustments FCY",
888
+ value=item.get('Adjustments(without_holding_tax)_FCY', ''),
889
+ key=f"adj_{selected_file}_{idx}"
890
+ )
891
+ with col_btn:
892
+ st.markdown("<br>", unsafe_allow_html=True)
893
+ if st.button("πŸ”", key=f"ocr_adj_{selected_file}_{idx}",
894
+ type="primary" if is_ocr_active('Line_items', 'Adjustments(without_holding_tax)_FCY', idx) else "secondary",
895
+ help="OCR this field"):
896
+ activate_ocr_field('Line_items', 'Adjustments(without_holding_tax)_FCY', idx)
897
+
898
+ # Descriptions
899
+ col_input, col_btn = st.columns([5, 1])
900
+ with col_input:
901
+ item['Descriptions'] = st.text_area(
902
+ "Descriptions",
903
+ value=item.get('Descriptions', ''),
904
+ key=f"desc_{selected_file}_{idx}",
905
+ height=60
906
+ )
907
+ with col_btn:
908
+ st.markdown("<br>", unsafe_allow_html=True)
909
+ if st.button("πŸ”", key=f"ocr_desc_{selected_file}_{idx}",
910
+ type="primary" if is_ocr_active('Line_items', 'Descriptions', idx) else "secondary",
911
+ help="OCR this field"):
912
+ activate_ocr_field('Line_items', 'Descriptions', idx)
913
 
914
+ # Update line items back to gt_parse
915
+ current_gt_parse['Line_items'] = line_items
 
 
916
 
917
+ st.markdown("---")
918
+ st.markdown("**πŸ“Š Line Items Summary Table**")
 
919
 
920
+ # Display summary table with index starting from 1
921
+ df = pd.DataFrame(line_items)
922
+ df.index = df.index + 1 # Start index from 1
923
+ df.index.name = 'SL No'
 
 
 
 
 
 
 
 
924
 
925
+ st.dataframe(
926
  df,
 
 
927
  use_container_width=True,
928
+ height=300
 
929
  )
 
 
 
 
 
 
 
 
930
  else:
931
+ st.info("No line items. Click 'βž• Add New Row' to add a new row.")
932
 
 
933
  st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
934
 
935
  # Save button
 
944
  st.session_state.save_message_time = time.time()
945
  st.rerun()
946
 
 
947
  if st.session_state.just_saved:
948
  st.session_state.just_saved = False
949
 
 
950
  if st.session_state.save_message:
951
+ st.success(st.session_state.save_message)