Bhuvi13 commited on
Commit
52a0874
Β·
verified Β·
1 Parent(s): 5dac2a1

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +989 -676
src/streamlit_app.py CHANGED
@@ -79,7 +79,6 @@ st.markdown("""
79
  [data-testid="stHorizontalBlock"] { gap: 0.5rem !important; }
80
 
81
  /* FIXED: Remove problematic viewport-based heights */
82
- /* Instead use fixed pixel heights that work in iframes */
83
  section[data-testid="stAppViewContainer"] {
84
  overflow: visible !important;
85
  }
@@ -89,45 +88,31 @@ st.markdown("""
89
  padding-bottom: 1rem !important;
90
  }
91
 
92
- /* Make columns scrollable with fixed height */
93
- div[data-testid="column"] {
94
- max-height: 85vh !important;
95
- overflow-y: auto !important;
96
- overflow-x: hidden !important;
97
- position: relative !important;
98
  }
99
-
100
- /* Ensure images don't expand the column */
101
- div[data-testid="column"] img,
102
- div[data-testid="column"] canvas {
103
- max-width: 100% !important;
104
- height: auto !important;
105
- display: block !important;
106
- }
107
-
108
- /* Custom scrollbar styling */
109
- div[data-testid="column"]::-webkit-scrollbar {
110
- width: 10px;
111
- height: 10px;
112
  }
113
- div[data-testid="column"]::-webkit-scrollbar-thumb {
114
- border-radius: 8px;
115
- background-color: rgba(0,0,0,0.3);
116
- border: 2px solid transparent;
117
- background-clip: padding-box;
118
  }
119
- div[data-testid="column"]::-webkit-scrollbar-thumb:hover {
120
- background-color: rgba(0,0,0,0.5);
 
121
  }
122
- div[data-testid="column"]::-webkit-scrollbar-track {
123
- background: rgba(0,0,0,0.05);
124
- border-radius: 8px;
125
  }
126
-
127
- /* Firefox scrollbar */
128
- div[data-testid="column"] {
129
- scrollbar-width: thin;
130
- scrollbar-color: rgba(0,0,0,0.3) rgba(0,0,0,0.05);
131
  }
132
  </style>
133
  """, unsafe_allow_html=True)
@@ -202,7 +187,7 @@ def perform_ocr(image, bbox):
202
  except Exception as e:
203
  return f"OCR Error: {str(e)}"
204
 
205
- def scale_image_to_fixed_size(image, max_width=900, max_height=1100):
206
  """Scale image to fit within max dimensions while maintaining aspect ratio - NO PADDING"""
207
  if image.mode not in ('RGB', 'RGBA'):
208
  image = image.convert('RGB')
@@ -238,7 +223,65 @@ def get_base_filename(record):
238
  return record['file_names']
239
 
240
  # Fall back to file_name (singular)
241
- return record.get('file_name', '')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
  def swap_sender_recipient_details(index):
244
  """Swap sender and recipient details"""
@@ -275,6 +318,8 @@ if 'images' not in st.session_state:
275
  st.session_state.images = {}
276
  if 'pdf_metadata' not in st.session_state:
277
  st.session_state.pdf_metadata = {}
 
 
278
  if 'current_page_num' not in st.session_state:
279
  st.session_state.current_page_num = {}
280
  if 'modified_indices' not in st.session_state:
@@ -355,9 +400,35 @@ def auto_save(index):
355
  if 'file_names' in st.session_state.edited_data[index]:
356
  del st.session_state.edited_data[index]['file_names']
357
 
 
 
 
 
 
 
 
 
 
358
  st.session_state.data = st.session_state.edited_data.copy()
359
  st.session_state.modified_indices.add(index)
360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  def activate_ocr_field(section, field, row_idx=None):
362
  """Activate OCR for a specific field"""
363
  if (st.session_state.ocr_active_section == section and
@@ -376,9 +447,6 @@ def activate_ocr_field(section, field, row_idx=None):
376
  expander_key = f"line_item_expander_{current_idx}_{row_idx}"
377
  st.session_state[expander_key] = True
378
 
379
- st.session_state.canvas_key += 1
380
- st.rerun()
381
-
382
  def is_ocr_active(section, field, row_idx=None):
383
  """Check if this OCR button is currently active"""
384
  return (st.session_state.ocr_active_section == section and
@@ -438,10 +506,19 @@ if st.session_state.page == 'upload':
438
  st.session_state.images = images_dict
439
  st.session_state.pdf_metadata = pdf_metadata
440
 
 
 
 
 
 
441
  for filename in pdf_metadata.keys():
442
  if filename not in st.session_state.current_page_num:
443
  st.session_state.current_page_num[filename] = 0
444
 
 
 
 
 
445
  if st.session_state.data is not None:
446
  gt_file_names = []
447
  for rec in st.session_state.data:
@@ -456,10 +533,25 @@ if st.session_state.page == 'upload':
456
  if not fname:
457
  continue
458
 
 
 
 
 
 
 
 
 
459
  if fname in images_dict:
460
  matched_images.add(fname)
 
 
 
 
 
 
461
  else:
462
  found = False
 
463
  for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
464
  if fname + ext in images_dict:
465
  matched_images.add(fname)
@@ -467,9 +559,10 @@ if st.session_state.page == 'upload':
467
  break
468
 
469
  if not found:
 
470
  for uploaded_name in images_dict.keys():
471
  uploaded_base = uploaded_name.rsplit('.', 1)[0]
472
- if uploaded_base == fname:
473
  matched_images.add(fname)
474
  found = True
475
  break
@@ -478,7 +571,7 @@ if st.session_state.page == 'upload':
478
  if fname and fname not in matched_images:
479
  unmatched_gt_files.append(fname)
480
 
481
- st.success(f"βœ… Successfully loaded {len(images_dict)} files ({len(pdf_metadata)} PDFs)!")
482
  st.info(f"πŸ”Ž Exact matches: {len(matched_images)}/{len([f for f in gt_file_names if f])}")
483
 
484
  if unmatched_gt_files:
@@ -489,7 +582,7 @@ if st.session_state.page == 'upload':
489
  else:
490
  st.success("βœ… All JSONL file names matched to files!")
491
  else:
492
- st.success(f"βœ… Successfully loaded {len(images_dict)} files ({len(pdf_metadata)} PDFs)!")
493
  st.info("ℹ️ Upload a JSONL file to see how many files match the ground truth 'file_name' field.")
494
 
495
  if st.session_state.data is not None:
@@ -603,665 +696,885 @@ elif st.session_state.page == 'viewer':
603
 
604
  # LEFT SIDE: Image Display with OCR Canvas
605
  with left_col:
606
- with st.container(height=700, border=False):
607
- # Use helper function to get base file name
608
- file_name = get_base_filename(current_record)
 
 
 
 
 
 
 
609
 
610
- if file_name:
611
- actual_file_name = None
612
- if file_name in st.session_state.images:
613
- actual_file_name = file_name
614
- else:
615
- for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
616
- if file_name + ext in st.session_state.images:
617
- actual_file_name = file_name + ext
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
618
  break
619
-
620
- if not actual_file_name:
621
- for uploaded_name in st.session_state.images.keys():
622
- uploaded_base = uploaded_name.rsplit('.', 1)[0]
623
- if uploaded_base == file_name:
624
- actual_file_name = uploaded_name
625
- break
626
 
627
- if actual_file_name:
628
- is_pdf = actual_file_name in st.session_state.pdf_metadata
629
-
630
- if is_pdf:
631
- pdf_meta = st.session_state.pdf_metadata[actual_file_name]
632
- total_pages = pdf_meta['total_pages']
633
- current_page = st.session_state.current_page_num.get(actual_file_name, 0)
634
-
635
- col_prev, col_info, col_next = st.columns([1, 2, 1])
636
-
637
- with col_prev:
638
- prev_clicked = st.button("⬅️ Previous", key=f"prev_page_{selected_file}_{actual_file_name}",
639
- disabled=(current_page == 0), use_container_width=True)
640
-
641
- with col_info:
642
- st.markdown(f"<div style='text-align: center; padding: 5px;'><b>πŸ“„ Page {current_page + 1} of {total_pages}</b></div>", unsafe_allow_html=True)
643
-
644
- with col_next:
645
- next_clicked = st.button("Next ➑️", key=f"next_page_{selected_file}_{actual_file_name}",
646
- disabled=(current_page >= total_pages - 1), use_container_width=True)
647
-
648
- if not st.session_state.navigating_page:
649
- if prev_clicked:
650
- st.session_state.navigating_page = True
651
- st.session_state.current_page_num[actual_file_name] = max(0, current_page - 1)
652
- st.session_state.canvas_key += 1
653
- st.session_state.ocr_active_section = None
654
- st.session_state.ocr_active_field = None
655
- st.rerun()
656
- elif next_clicked:
657
- st.session_state.navigating_page = True
658
- st.session_state.current_page_num[actual_file_name] = min(total_pages - 1, current_page + 1)
659
- st.session_state.canvas_key += 1
660
- st.session_state.ocr_active_section = None
661
- st.session_state.ocr_active_field = None
662
- st.rerun()
663
- else:
664
- st.session_state.navigating_page = False
665
 
666
- if actual_file_name:
667
- is_pdf = actual_file_name in st.session_state.pdf_metadata
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
668
 
669
- if is_pdf:
670
- current_page = st.session_state.current_page_num.get(actual_file_name, 0)
671
- pdf_meta = st.session_state.pdf_metadata[actual_file_name]
672
- current_image = pdf_meta['pages'][current_page]
 
 
 
 
 
 
 
 
 
 
 
673
  else:
674
- current_image = st.session_state.images[actual_file_name]
675
- else:
676
- st.error(f"❌ File '{file_name}' not found in uploaded files")
677
- st.info("πŸ’‘ Available files:")
678
- with st.expander("Show available files"):
679
- for img_name in list(st.session_state.images.keys())[:20]:
680
- st.text(f" β€’ {img_name}")
681
- if len(st.session_state.images) > 20:
682
- st.text(f" ... and {len(st.session_state.images) - 20} more")
683
- current_image = None
684
 
685
- if current_image:
686
- scaled_image, scale_ratio, paste_x, paste_y = scale_image_to_fixed_size(current_image)
 
 
687
 
688
- canvas_result = st_canvas(
689
- fill_color="rgba(255, 165, 0, 0.3)",
690
- stroke_width=2,
691
- stroke_color="#FF0000",
692
- background_image=scaled_image,
693
- update_streamlit=True,
694
- height=scaled_image.height,
695
- width=scaled_image.width,
696
- drawing_mode="rect",
697
- key=f"canvas_{selected_file}_{st.session_state.canvas_key}",
698
- )
699
 
700
- if canvas_result.json_data is not None and st.session_state.ocr_active_field:
701
- objects = canvas_result.json_data.get("objects", [])
702
- if len(objects) > 0:
703
- rect = objects[-1]
704
-
705
- bbox = [
706
- (rect["left"] - paste_x) / scale_ratio,
707
- (rect["top"] - paste_y) / scale_ratio,
708
- (rect["left"] + rect["width"] - paste_x) / scale_ratio,
709
- (rect["top"] + rect["height"] - paste_y) / scale_ratio
710
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
711
 
712
- with st.spinner("Performing OCR..."):
713
- ocr_text = perform_ocr(current_image, bbox)
714
 
715
- if ocr_text and not ocr_text.startswith("OCR Error"):
716
- st.success(f"βœ… OCR Result: {ocr_text}")
717
-
718
- gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
719
-
720
- if st.session_state.ocr_active_section == 'items':
721
- items = gt_parse.get('items', [])
722
- row_idx = st.session_state.ocr_line_item_row
723
- if row_idx is not None and row_idx < len(items):
724
- items[row_idx][st.session_state.ocr_active_field] = ocr_text
725
- gt_parse['items'] = items
726
 
727
- expander_key = f"line_item_expander_{selected_file}_{row_idx}"
728
- st.session_state[expander_key] = True
729
- else:
730
- section = st.session_state.ocr_active_section
731
- field = st.session_state.ocr_active_field
732
- if section not in gt_parse:
733
- gt_parse[section] = {}
734
- gt_parse[section][field] = ocr_text
735
-
736
- st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
737
- st.session_state.modified_indices.add(selected_file)
738
-
739
- st.session_state.canvas_key += 1
740
- st.rerun()
741
  else:
742
- st.error(ocr_text)
743
- else:
744
- st.warning("No file name specified in record")
 
 
 
 
 
 
 
 
 
 
 
 
745
 
746
  # RIGHT SIDE: Editable Details
747
  with right_col:
748
- with st.container(height=700, border=False):
749
- st.markdown("### πŸ“ Invoice Details")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
750
 
751
- gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
752
 
753
- tab1, tab2, tab3, tab4 = st.tabs([
754
- "πŸ“„ Invoice Details",
755
- "πŸ‘₯ Party Details",
756
- "🏦 Bank Details",
757
- "πŸ“‹ Line Items"
758
- ])
 
 
 
 
 
 
 
 
 
759
 
760
- # TAB 1: Header (includes invoice details + summary fields)
761
- with tab1:
762
- header = gt_parse.get('header', {})
763
- summary = gt_parse.get('summary', {})
764
-
765
- st.markdown("**Invoice Information**")
766
-
767
- # Invoice No
768
- col_input, col_btn = st.columns([5, 1])
769
- with col_input:
770
- header['invoice_no'] = st.text_input(
771
- "Invoice No",
772
- value=header.get('invoice_no', ''),
773
- key=f"invoice_no_{selected_file}"
774
- )
775
- with col_btn:
776
- st.markdown("<br>", unsafe_allow_html=True)
777
- if st.button("πŸ”", key=f"ocr_invoice_no_{selected_file}",
778
- type="primary" if is_ocr_active('header', 'invoice_no') else "secondary"):
779
- activate_ocr_field('header', 'invoice_no')
780
-
781
- # Invoice Date
782
- col_input, col_btn = st.columns([5, 1])
783
- with col_input:
784
- header['invoice_date'] = st.text_input(
785
- "Invoice Date",
786
- value=header.get('invoice_date', ''),
787
- key=f"invoice_date_{selected_file}"
788
- )
789
- with col_btn:
790
- st.markdown("<br>", unsafe_allow_html=True)
791
- if st.button("πŸ”", key=f"ocr_invoice_date_{selected_file}",
792
- type="primary" if is_ocr_active('header', 'invoice_date') else "secondary"):
793
- activate_ocr_field('header', 'invoice_date')
794
-
795
- # Due Date
796
- col_input, col_btn = st.columns([5, 1])
797
- with col_input:
798
- header['due_date'] = st.text_input(
799
- "Due Date",
800
- value=header.get('due_date', ''),
801
- key=f"due_date_{selected_file}"
802
- )
803
- with col_btn:
804
- st.markdown("<br>", unsafe_allow_html=True)
805
- if st.button("πŸ”", key=f"ocr_due_date_{selected_file}",
806
- type="primary" if is_ocr_active('header', 'due_date') else "secondary"):
807
- activate_ocr_field('header', 'due_date')
808
-
809
- st.markdown("**Financial Summary**")
810
-
811
- # Subtotal
812
- col_input, col_btn = st.columns([5, 1])
813
- with col_input:
814
- summary['subtotal'] = st.text_input(
815
- "Subtotal",
816
- value=summary.get('subtotal', ''),
817
- key=f"subtotal_{selected_file}"
818
- )
819
- with col_btn:
820
- st.markdown("<br>", unsafe_allow_html=True)
821
- if st.button("πŸ”", key=f"ocr_subtotal_{selected_file}",
822
- type="primary" if is_ocr_active('summary', 'subtotal') else "secondary"):
823
- activate_ocr_field('summary', 'subtotal')
824
-
825
- # Tax Rate
826
- col_input, col_btn = st.columns([5, 1])
827
- with col_input:
828
- summary['tax_rate'] = st.text_input(
829
- "Tax Rate",
830
- value=summary.get('tax_rate', ''),
831
- key=f"tax_rate_{selected_file}"
832
- )
833
- with col_btn:
834
- st.markdown("<br>", unsafe_allow_html=True)
835
- if st.button("πŸ”", key=f"ocr_tax_rate_{selected_file}",
836
- type="primary" if is_ocr_active('summary', 'tax_rate') else "secondary"):
837
- activate_ocr_field('summary', 'tax_rate')
838
-
839
- # Tax Amount
840
- col_input, col_btn = st.columns([5, 1])
841
- with col_input:
842
- summary['tax_amount'] = st.text_input(
843
- "Tax Amount",
844
- value=summary.get('tax_amount', ''),
845
- key=f"tax_amount_{selected_file}"
846
- )
847
- with col_btn:
848
- st.markdown("<br>", unsafe_allow_html=True)
849
- if st.button("πŸ”", key=f"ocr_tax_amount_{selected_file}",
850
- type="primary" if is_ocr_active('summary', 'tax_amount') else "secondary"):
851
- activate_ocr_field('summary', 'tax_amount')
852
-
853
- # Total Amount
854
- col_input, col_btn = st.columns([5, 1])
855
- with col_input:
856
- summary['total_amount'] = st.text_input(
857
- "Total Amount",
858
- value=summary.get('total_amount', ''),
859
- key=f"total_amount_{selected_file}"
860
- )
861
- with col_btn:
862
- st.markdown("<br>", unsafe_allow_html=True)
863
- if st.button("πŸ”", key=f"ocr_total_amount_{selected_file}",
864
- type="primary" if is_ocr_active('summary', 'total_amount') else "secondary"):
865
- activate_ocr_field('summary', 'total_amount')
866
-
867
- # Currency
868
- col_input, col_btn = st.columns([5, 1])
869
- with col_input:
870
- summary['currency'] = st.text_input(
871
- "Currency",
872
- value=summary.get('currency', ''),
873
- key=f"currency_{selected_file}"
874
- )
875
- with col_btn:
876
- st.markdown("<br>", unsafe_allow_html=True)
877
- if st.button("πŸ”", key=f"ocr_currency_{selected_file}",
878
- type="primary" if is_ocr_active('summary', 'currency') else "secondary"):
879
- activate_ocr_field('summary', 'currency')
880
-
881
- gt_parse['header'] = header
882
- gt_parse['summary'] = summary
883
 
884
- # TAB 2: Party Details (without bank details)
885
- with tab2:
886
- # SWAP BUTTON
887
- col1, col2, col3 = st.columns([1, 2, 1])
888
- with col2:
889
- if st.button("πŸ”„ Swap Sender ↔ Recipient", key=f"swap_btn_{selected_file}",
890
- type="primary", use_container_width=True):
891
- if not st.session_state.just_swapped:
892
- st.session_state.just_swapped = True
893
- swap_sender_recipient_details(selected_file)
894
- st.rerun()
895
-
896
- if st.session_state.just_swapped:
897
- st.session_state.just_swapped = False
898
-
899
- st.markdown("**Sender Details**")
900
- header = gt_parse.get('header', {})
901
-
902
- # Sender Name
903
- col_input, col_btn = st.columns([5, 1])
904
- with col_input:
905
- header['sender_name'] = st.text_input(
906
- "Sender Name",
907
- value=header.get('sender_name', ''),
908
- key=f"sender_name_{selected_file}"
909
- )
910
- with col_btn:
911
- st.markdown("<br>", unsafe_allow_html=True)
912
- if st.button("πŸ”", key=f"ocr_sender_name_{selected_file}",
913
- type="primary" if is_ocr_active('header', 'sender_name') else "secondary"):
914
- activate_ocr_field('header', 'sender_name')
915
-
916
- # Sender Address
917
- col_input, col_btn = st.columns([5, 1])
918
- with col_input:
919
- header['sender_addr'] = st.text_area(
920
- "Sender Address",
921
- value=header.get('sender_addr', ''),
922
- key=f"sender_addr_{selected_file}",
923
- height=60
924
- )
925
- with col_btn:
926
- st.markdown("<br>", unsafe_allow_html=True)
927
- if st.button("πŸ”", key=f"ocr_sender_addr_{selected_file}",
928
- type="primary" if is_ocr_active('header', 'sender_addr') else "secondary"):
929
- activate_ocr_field('header', 'sender_addr')
930
-
931
- st.markdown("**Recipient Details**")
932
-
933
- # Recipient Name
934
- col_input, col_btn = st.columns([5, 1])
935
- with col_input:
936
- header['rcpt_name'] = st.text_input(
937
- "Recipient Name",
938
- value=header.get('rcpt_name', ''),
939
- key=f"rcpt_name_{selected_file}"
940
- )
941
- with col_btn:
942
- st.markdown("<br>", unsafe_allow_html=True)
943
- if st.button("πŸ”", key=f"ocr_rcpt_name_{selected_file}",
944
- type="primary" if is_ocr_active('header', 'rcpt_name') else "secondary"):
945
- activate_ocr_field('header', 'rcpt_name')
946
-
947
- # Recipient Address
948
- col_input, col_btn = st.columns([5, 1])
949
- with col_input:
950
- header['rcpt_addr'] = st.text_area(
951
- "Recipient Address",
952
- value=header.get('rcpt_addr', ''),
953
- key=f"rcpt_addr_{selected_file}",
954
- height=60
955
- )
956
- with col_btn:
957
- st.markdown("<br>", unsafe_allow_html=True)
958
- if st.button("πŸ”", key=f"ocr_rcpt_addr_{selected_file}",
959
- type="primary" if is_ocr_active('header', 'rcpt_addr') else "secondary"):
960
- activate_ocr_field('header', 'rcpt_addr')
961
-
962
- gt_parse['header'] = header
963
 
964
- # TAB 3: Bank Details
965
- with tab3:
966
- header = gt_parse.get('header', {})
967
-
968
- # Bank IBAN
969
- col_input, col_btn = st.columns([5, 1])
970
- with col_input:
971
- header['bank_iban'] = st.text_input(
972
- "Bank IBAN",
973
- value=header.get('bank_iban', ''),
974
- key=f"bank_iban_{selected_file}"
975
- )
976
- with col_btn:
977
- st.markdown("<br>", unsafe_allow_html=True)
978
- if st.button("πŸ”", key=f"ocr_bank_iban_{selected_file}",
979
- type="primary" if is_ocr_active('header', 'bank_iban') else "secondary"):
980
- activate_ocr_field('header', 'bank_iban')
981
-
982
- # Bank Name
983
- col_input, col_btn = st.columns([5, 1])
984
- with col_input:
985
- header['bank_name'] = st.text_input(
986
- "Bank Name",
987
- value=header.get('bank_name', ''),
988
- key=f"bank_name_{selected_file}"
989
- )
990
- with col_btn:
991
- st.markdown("<br>", unsafe_allow_html=True)
992
- if st.button("πŸ”", key=f"ocr_bank_name_{selected_file}",
993
- type="primary" if is_ocr_active('header', 'bank_name') else "secondary"):
994
- activate_ocr_field('header', 'bank_name')
995
-
996
- # Bank Account No
997
- col_input, col_btn = st.columns([5, 1])
998
- with col_input:
999
- header['bank_acc_no'] = st.text_input(
1000
- "Bank Account No",
1001
- value=header.get('bank_acc_no', ''),
1002
- key=f"bank_acc_no_{selected_file}"
1003
- )
1004
- with col_btn:
1005
- st.markdown("<br>", unsafe_allow_html=True)
1006
- if st.button("πŸ”", key=f"ocr_bank_acc_no_{selected_file}",
1007
- type="primary" if is_ocr_active('header', 'bank_acc_no') else "secondary"):
1008
- activate_ocr_field('header', 'bank_acc_no')
1009
-
1010
- # Bank Routing
1011
- col_input, col_btn = st.columns([5, 1])
1012
- with col_input:
1013
- header['bank_routing'] = st.text_input(
1014
- "Bank Routing",
1015
- value=header.get('bank_routing', ''),
1016
- key=f"bank_routing_{selected_file}"
1017
- )
1018
- with col_btn:
1019
- st.markdown("<br>", unsafe_allow_html=True)
1020
- if st.button("πŸ”", key=f"ocr_bank_routing_{selected_file}",
1021
- type="primary" if is_ocr_active('header', 'bank_routing') else "secondary"):
1022
- activate_ocr_field('header', 'bank_routing')
1023
-
1024
- # Bank SWIFT
1025
- col_input, col_btn = st.columns([5, 1])
1026
- with col_input:
1027
- header['bank_swift'] = st.text_input(
1028
- "Bank SWIFT",
1029
- value=header.get('bank_swift', ''),
1030
- key=f"bank_swift_{selected_file}"
1031
- )
1032
- with col_btn:
1033
- st.markdown("<br>", unsafe_allow_html=True)
1034
- if st.button("πŸ”", key=f"ocr_bank_swift_{selected_file}",
1035
- type="primary" if is_ocr_active('header', 'bank_swift') else "secondary"):
1036
- activate_ocr_field('header', 'bank_swift')
1037
-
1038
- # Bank Account Name
1039
- col_input, col_btn = st.columns([5, 1])
1040
- with col_input:
1041
- header['bank_acc_name'] = st.text_input(
1042
- "Bank Account Name",
1043
- value=header.get('bank_acc_name', ''),
1044
- key=f"bank_acc_name_{selected_file}"
1045
- )
1046
- with col_btn:
1047
- st.markdown("<br>", unsafe_allow_html=True)
1048
- if st.button("πŸ”", key=f"ocr_bank_acc_name_{selected_file}",
1049
- type="primary" if is_ocr_active('header', 'bank_acc_name') else "secondary"):
1050
- activate_ocr_field('header', 'bank_acc_name')
1051
-
1052
- # Bank Branch
1053
- col_input, col_btn = st.columns([5, 1])
1054
- with col_input:
1055
- header['bank_branch'] = st.text_input(
1056
- "Bank Branch",
1057
- value=header.get('bank_branch', ''),
1058
- key=f"bank_branch_{selected_file}"
1059
- )
1060
- with col_btn:
1061
- st.markdown("<br>", unsafe_allow_html=True)
1062
- if st.button("πŸ”", key=f"ocr_bank_branch_{selected_file}",
1063
- type="primary" if is_ocr_active('header', 'bank_branch') else "secondary"):
1064
- activate_ocr_field('header', 'bank_branch')
1065
-
1066
- gt_parse['header'] = header
1067
 
1068
- # TAB 4: Items
1069
- with tab4:
1070
- current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
1071
- items = current_gt_parse.get('items', [])
1072
-
1073
- # Add/Remove row buttons
1074
- col_add, col_remove = st.columns([1, 1])
1075
- with col_add:
1076
- if st.button("βž• Add New Item", key=f"add_item_{selected_file}", use_container_width=True):
1077
- if not st.session_state.button_clicked:
1078
- st.session_state.button_clicked = True
1079
- new_item = {
1080
- "descriptions": "", "SKU": "", "quantity": "",
1081
- "unit_price": "", "amount": "", "tax": "", "Line_total": ""
1082
- }
1083
- current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
1084
- current_items = current_gt_parse.get('items', [])
1085
- current_items.append(new_item)
1086
- current_gt_parse['items'] = current_items
1087
- st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse
1088
- st.session_state.modified_indices.add(selected_file)
1089
-
1090
- new_idx = len(current_items) - 1
1091
- expander_key_new = f"line_item_expander_{selected_file}_{new_idx}"
1092
- st.session_state[expander_key_new] = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1093
 
1094
- st.rerun()
1095
-
1096
- with col_remove:
1097
- if st.button("βž– Remove Last Item", key=f"remove_item_{selected_file}",
1098
- disabled=(len(items) == 0), use_container_width=True):
1099
- if not st.session_state.button_clicked and len(items) > 0:
1100
- st.session_state.button_clicked = True
1101
- current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
1102
- current_items = current_gt_parse.get('items', [])
1103
- N = len(current_items)
1104
- current_items.pop()
1105
- current_gt_parse['items'] = current_items
1106
- st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse
1107
- st.session_state.modified_indices.add(selected_file)
1108
 
1109
- popped_idx = N - 1
1110
- expander_key_popped = f"line_item_expander_{selected_file}_{popped_idx}"
1111
- if expander_key_popped in st.session_state:
1112
- del st.session_state[expander_key_popped]
 
 
 
 
 
 
 
 
 
 
1113
 
1114
- st.rerun()
1115
-
1116
- if st.session_state.button_clicked:
1117
- st.session_state.button_clicked = False
1118
-
1119
- current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
1120
- items = current_gt_parse.get('items', [])
1121
-
1122
- if items:
1123
- for idx, item in enumerate(items):
1124
- expander_key = f"line_item_expander_{selected_file}_{idx}"
1125
- expanded_default = st.session_state.get(expander_key, False)
1126
 
1127
- with st.expander(f"**Item {idx + 1}** - {item.get('descriptions', 'N/A')[:30]}", expanded=expanded_default):
1128
- # Descriptions
1129
- col_input, col_btn = st.columns([5, 1])
1130
- with col_input:
1131
- item['descriptions'] = st.text_area(
1132
- "Descriptions",
1133
- value=item.get('descriptions', ''),
1134
- key=f"desc_{selected_file}_{idx}",
1135
- height=60
1136
- )
1137
- with col_btn:
1138
- st.markdown("<br>", unsafe_allow_html=True)
1139
- if st.button("πŸ”", key=f"ocr_desc_{selected_file}_{idx}",
1140
- type="primary" if is_ocr_active('items', 'descriptions', idx) else "secondary"):
1141
- st.session_state[expander_key] = True
1142
- activate_ocr_field('items', 'descriptions', idx)
1143
-
1144
- # SKU
1145
- col_input, col_btn = st.columns([5, 1])
1146
- with col_input:
1147
- item['SKU'] = st.text_input(
1148
- "SKU",
1149
- value=item.get('SKU', ''),
1150
- key=f"sku_{selected_file}_{idx}"
1151
- )
1152
- with col_btn:
1153
- st.markdown("<br>", unsafe_allow_html=True)
1154
- if st.button("πŸ”", key=f"ocr_sku_{selected_file}_{idx}",
1155
- type="primary" if is_ocr_active('items', 'SKU', idx) else "secondary"):
1156
- st.session_state[expander_key] = True
1157
- activate_ocr_field('items', 'SKU', idx)
1158
-
1159
- # Quantity
1160
- col_input, col_btn = st.columns([5, 1])
1161
- with col_input:
1162
- item['quantity'] = st.text_input(
1163
- "Quantity",
1164
- value=item.get('quantity', ''),
1165
- key=f"qty_{selected_file}_{idx}"
1166
- )
1167
- with col_btn:
1168
- st.markdown("<br>", unsafe_allow_html=True)
1169
- if st.button("πŸ”", key=f"ocr_qty_{selected_file}_{idx}",
1170
- type="primary" if is_ocr_active('items', 'quantity', idx) else "secondary"):
1171
- st.session_state[expander_key] = True
1172
- activate_ocr_field('items', 'quantity', idx)
1173
-
1174
- # Unit Price
1175
- col_input, col_btn = st.columns([5, 1])
1176
- with col_input:
1177
- item['unit_price'] = st.text_input(
1178
- "Unit Price",
1179
- value=item.get('unit_price', ''),
1180
- key=f"unit_price_{selected_file}_{idx}"
1181
- )
1182
- with col_btn:
1183
- st.markdown("<br>", unsafe_allow_html=True)
1184
- if st.button("πŸ”", key=f"ocr_unit_price_{selected_file}_{idx}",
1185
- type="primary" if is_ocr_active('items', 'unit_price', idx) else "secondary"):
1186
- st.session_state[expander_key] = True
1187
- activate_ocr_field('items', 'unit_price', idx)
1188
-
1189
- # Amount
1190
- col_input, col_btn = st.columns([5, 1])
1191
- with col_input:
1192
- item['amount'] = st.text_input(
1193
- "Amount",
1194
- value=item.get('amount', ''),
1195
- key=f"amount_{selected_file}_{idx}"
1196
- )
1197
- with col_btn:
1198
- st.markdown("<br>", unsafe_allow_html=True)
1199
- if st.button("πŸ”", key=f"ocr_amount_{selected_file}_{idx}",
1200
- type="primary" if is_ocr_active('items', 'amount', idx) else "secondary"):
1201
- st.session_state[expander_key] = True
1202
- activate_ocr_field('items', 'amount', idx)
1203
-
1204
- # Tax
1205
- col_input, col_btn = st.columns([5, 1])
1206
- with col_input:
1207
- item['tax'] = st.text_input(
1208
- "Tax",
1209
- value=item.get('tax', ''),
1210
- key=f"tax_{selected_file}_{idx}"
1211
- )
1212
- with col_btn:
1213
- st.markdown("<br>", unsafe_allow_html=True)
1214
- if st.button("πŸ”", key=f"ocr_tax_{selected_file}_{idx}",
1215
- type="primary" if is_ocr_active('items', 'tax', idx) else "secondary"):
1216
- st.session_state[expander_key] = True
1217
- activate_ocr_field('items', 'tax', idx)
1218
-
1219
- # Line Total
1220
- col_input, col_btn = st.columns([5, 1])
1221
- with col_input:
1222
- item['Line_total'] = st.text_input(
1223
- "Line Total",
1224
- value=item.get('Line_total', ''),
1225
- key=f"line_total_{selected_file}_{idx}"
1226
- )
1227
- with col_btn:
1228
- st.markdown("<br>", unsafe_allow_html=True)
1229
- if st.button("πŸ”", key=f"ocr_line_total_{selected_file}_{idx}",
1230
- type="primary" if is_ocr_active('items', 'Line_total', idx) else "secondary"):
1231
- st.session_state[expander_key] = True
1232
- activate_ocr_field('items', 'Line_total', idx)
1233
-
1234
- current_gt_parse['items'] = items
1235
-
1236
- st.markdown("**πŸ“Š Items Summary Table**")
1237
-
1238
- df = pd.DataFrame(items)
1239
- df.index = df.index + 1
1240
- df.index.name = 'SL No'
1241
-
1242
- st.dataframe(
1243
- df,
1244
- use_container_width=True,
1245
- height=300
1246
- )
1247
- else:
1248
- st.info("No items. Click 'βž• Add New Item' to add a new item.")
1249
-
1250
- st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
1251
-
1252
- # Save button
1253
- col1, col2 = st.columns([1, 1])
1254
- with col1:
1255
- if st.button("πŸ’Ύ Save Changes", type="primary", use_container_width=True, key=f"save_btn_{selected_file}"):
1256
- if not st.session_state.just_saved:
1257
- st.session_state.just_saved = True
1258
- auto_save(selected_file)
1259
- st.session_state.save_message = "βœ… Changes saved successfully!"
1260
- st.session_state.save_message_time = time.time()
1261
  st.rerun()
1262
 
1263
- if st.session_state.just_saved:
1264
- st.session_state.just_saved = False
 
 
 
1265
 
1266
- if st.session_state.save_message:
1267
- st.success(st.session_state.save_message)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  [data-testid="stHorizontalBlock"] { gap: 0.5rem !important; }
80
 
81
  /* FIXED: Remove problematic viewport-based heights */
 
82
  section[data-testid="stAppViewContainer"] {
83
  overflow: visible !important;
84
  }
 
88
  padding-bottom: 1rem !important;
89
  }
90
 
91
+ /* Force the column containing the canvas to allow horizontal scroll */
92
+ [data-testid="column"]:has(.stCanvas) {
93
+ overflow-x: auto !important;
94
+ overflow-y: hidden !important;
 
 
95
  }
96
+
97
+ /* Ensure canvas doesn't shrink */
98
+ .stCanvas {
99
+ min-width: max-content !important;
 
 
 
 
 
 
 
 
 
100
  }
101
+
102
+ /* Style the scrollbar */
103
+ [data-testid="column"]:has(.stCanvas)::-webkit-scrollbar {
104
+ height: 12px;
 
105
  }
106
+ [data-testid="column"]:has(.stCanvas)::-webkit-scrollbar-track {
107
+ background: #e0e0e0;
108
+ border-radius: 6px;
109
  }
110
+ [data-testid="column"]:has(.stCanvas)::-webkit-scrollbar-thumb {
111
+ background: rgba(0,0,0,0.4);
112
+ border-radius: 6px;
113
  }
114
+ [data-testid="column"]:has(.stCanvas)::-webkit-scrollbar-thumb:hover {
115
+ background: rgba(0,0,0,0.6);
 
 
 
116
  }
117
  </style>
118
  """, unsafe_allow_html=True)
 
187
  except Exception as e:
188
  return f"OCR Error: {str(e)}"
189
 
190
+ def scale_image_to_fixed_size(image, max_width=1400, max_height=1100):
191
  """Scale image to fit within max dimensions while maintaining aspect ratio - NO PADDING"""
192
  if image.mode not in ('RGB', 'RGBA'):
193
  image = image.convert('RGB')
 
223
  return record['file_names']
224
 
225
  # Fall back to file_name (singular)
226
+ file_name = record.get('file_name', '')
227
+
228
+ # Strip PDF extension if present (for cases where PDF was converted to images)
229
+ if file_name.lower().endswith('.pdf'):
230
+ file_name = file_name[:-4] # Remove .pdf
231
+
232
+ # Also strip other image extensions if present
233
+ for ext in ['.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
234
+ if file_name.lower().endswith(ext):
235
+ file_name = file_name[:-(len(ext))]
236
+ break
237
+
238
+ return file_name
239
+
240
+ def detect_image_groups(images_dict):
241
+ """Detect multi-page image groups from uploaded files (e.g., invoice01_page1.png, invoice01_page2.png)"""
242
+ import re
243
+
244
+ image_groups = {}
245
+ grouped_files = set()
246
+
247
+ # Pattern to match: basename_pageN.extension
248
+ pattern = r'^(.+)_page(\d+)\.(png|jpg|jpeg|tiff|tif|bmp)$'
249
+
250
+ for filename in images_dict.keys():
251
+ match = re.match(pattern, filename, re.IGNORECASE)
252
+ if match:
253
+ base_name = match.group(1)
254
+ page_num = int(match.group(2))
255
+ ext = match.group(3)
256
+
257
+ if base_name not in image_groups:
258
+ image_groups[base_name] = []
259
+
260
+ image_groups[base_name].append({
261
+ 'filename': filename,
262
+ 'page_num': page_num,
263
+ 'ext': ext
264
+ })
265
+ grouped_files.add(filename)
266
+
267
+ # Sort pages for each group and create metadata
268
+ image_groups_metadata = {}
269
+ for base_name, pages in image_groups.items():
270
+ # Sort by page number
271
+ pages.sort(key=lambda x: x['page_num'])
272
+
273
+ # Only consider it a group if there are multiple pages
274
+ if len(pages) > 1:
275
+ image_list = [images_dict[p['filename']] for p in pages]
276
+
277
+ image_groups_metadata[base_name] = {
278
+ 'pages': image_list,
279
+ 'filenames': [p['filename'] for p in pages],
280
+ 'total_pages': len(pages),
281
+ 'current_page': 0
282
+ }
283
+
284
+ return image_groups_metadata, grouped_files
285
 
286
  def swap_sender_recipient_details(index):
287
  """Swap sender and recipient details"""
 
318
  st.session_state.images = {}
319
  if 'pdf_metadata' not in st.session_state:
320
  st.session_state.pdf_metadata = {}
321
+ if 'image_groups_metadata' not in st.session_state:
322
+ st.session_state.image_groups_metadata = {}
323
  if 'current_page_num' not in st.session_state:
324
  st.session_state.current_page_num = {}
325
  if 'modified_indices' not in st.session_state:
 
400
  if 'file_names' in st.session_state.edited_data[index]:
401
  del st.session_state.edited_data[index]['file_names']
402
 
403
+ # Check if it's an image group and update file_name accordingly
404
+ elif base_file_name in st.session_state.image_groups_metadata:
405
+ # It's a multi-page image group - use file_names array
406
+ img_group_meta = st.session_state.image_groups_metadata[base_file_name]
407
+ st.session_state.edited_data[index]['file_names'] = img_group_meta['filenames']
408
+ # Remove old file_name field if it exists (was likely a .pdf in original JSONL)
409
+ if 'file_name' in st.session_state.edited_data[index]:
410
+ del st.session_state.edited_data[index]['file_name']
411
+
412
  st.session_state.data = st.session_state.edited_data.copy()
413
  st.session_state.modified_indices.add(index)
414
 
415
+ def sync_field_to_data(index, section, field, value, row_idx=None):
416
+ """Sync a field value from widget to data structure immediately"""
417
+ gt_parse = st.session_state.edited_data[index].get('gt_parse', {})
418
+
419
+ if section == 'items':
420
+ items = gt_parse.get('items', [])
421
+ if row_idx is not None and row_idx < len(items):
422
+ items[row_idx][field] = value
423
+ gt_parse['items'] = items
424
+ else:
425
+ if section not in gt_parse:
426
+ gt_parse[section] = {}
427
+ gt_parse[section][field] = value
428
+
429
+ st.session_state.edited_data[index]['gt_parse'] = gt_parse
430
+ st.session_state.modified_indices.add(index)
431
+
432
  def activate_ocr_field(section, field, row_idx=None):
433
  """Activate OCR for a specific field"""
434
  if (st.session_state.ocr_active_section == section and
 
447
  expander_key = f"line_item_expander_{current_idx}_{row_idx}"
448
  st.session_state[expander_key] = True
449
 
 
 
 
450
  def is_ocr_active(section, field, row_idx=None):
451
  """Check if this OCR button is currently active"""
452
  return (st.session_state.ocr_active_section == section and
 
506
  st.session_state.images = images_dict
507
  st.session_state.pdf_metadata = pdf_metadata
508
 
509
+ # Detect multi-page image groups (e.g., invoice01_page1.png, invoice01_page2.png)
510
+ image_groups_metadata, grouped_files = detect_image_groups(images_dict)
511
+ st.session_state.image_groups_metadata = image_groups_metadata
512
+
513
+ # Initialize current page for PDFs and image groups
514
  for filename in pdf_metadata.keys():
515
  if filename not in st.session_state.current_page_num:
516
  st.session_state.current_page_num[filename] = 0
517
 
518
+ for base_name in image_groups_metadata.keys():
519
+ if base_name not in st.session_state.current_page_num:
520
+ st.session_state.current_page_num[base_name] = 0
521
+
522
  if st.session_state.data is not None:
523
  gt_file_names = []
524
  for rec in st.session_state.data:
 
533
  if not fname:
534
  continue
535
 
536
+ # Create a base name by stripping common extensions
537
+ fname_base = fname
538
+ for ext in ['.pdf', '.PDF', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
539
+ if fname.lower().endswith(ext.lower()):
540
+ fname_base = fname[:-len(ext)]
541
+ break
542
+
543
+ # Check direct match
544
  if fname in images_dict:
545
  matched_images.add(fname)
546
+ # Check base name in image groups (handles PDF converted to multi-page PNGs)
547
+ elif fname_base in image_groups_metadata:
548
+ matched_images.add(fname)
549
+ # Check full name in image groups
550
+ elif fname in image_groups_metadata:
551
+ matched_images.add(fname)
552
  else:
553
  found = False
554
+ # Try with extensions
555
  for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
556
  if fname + ext in images_dict:
557
  matched_images.add(fname)
 
559
  break
560
 
561
  if not found:
562
+ # Try matching base name in uploaded files
563
  for uploaded_name in images_dict.keys():
564
  uploaded_base = uploaded_name.rsplit('.', 1)[0]
565
+ if uploaded_base == fname or uploaded_base == fname_base:
566
  matched_images.add(fname)
567
  found = True
568
  break
 
571
  if fname and fname not in matched_images:
572
  unmatched_gt_files.append(fname)
573
 
574
+ st.success(f"βœ… Successfully loaded {len(images_dict)} files ({len(pdf_metadata)} PDFs, {len(image_groups_metadata)} multi-page image groups)!")
575
  st.info(f"πŸ”Ž Exact matches: {len(matched_images)}/{len([f for f in gt_file_names if f])}")
576
 
577
  if unmatched_gt_files:
 
582
  else:
583
  st.success("βœ… All JSONL file names matched to files!")
584
  else:
585
+ st.success(f"βœ… Successfully loaded {len(images_dict)} files ({len(pdf_metadata)} PDFs, {len(image_groups_metadata)} multi-page image groups)!")
586
  st.info("ℹ️ Upload a JSONL file to see how many files match the ground truth 'file_name' field.")
587
 
588
  if st.session_state.data is not None:
 
696
 
697
  # LEFT SIDE: Image Display with OCR Canvas
698
  with left_col:
699
+ # Use helper function to get base file name
700
+ file_name = get_base_filename(current_record)
701
+
702
+ if file_name:
703
+ # Create base name by stripping extensions
704
+ file_name_base = file_name
705
+ for ext in ['.pdf', '.PDF', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
706
+ if file_name.lower().endswith(ext.lower()):
707
+ file_name_base = file_name[:-len(ext)]
708
+ break
709
 
710
+ actual_file_name = None
711
+ # First check for direct match
712
+ if file_name in st.session_state.images:
713
+ actual_file_name = file_name
714
+ # Check if base name matches an image group (handles PDF converted to images)
715
+ elif file_name_base in st.session_state.image_groups_metadata:
716
+ actual_file_name = file_name_base # Use base name for image groups
717
+ # Check if full name is an image group
718
+ elif file_name in st.session_state.image_groups_metadata:
719
+ actual_file_name = file_name # Use as-is for image groups
720
+ else:
721
+ # Try with extensions
722
+ for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
723
+ if file_name + ext in st.session_state.images:
724
+ actual_file_name = file_name + ext
725
+ break
726
+
727
+ if not actual_file_name:
728
+ # Try matching base name
729
+ for uploaded_name in st.session_state.images.keys():
730
+ uploaded_base = uploaded_name.rsplit('.', 1)[0]
731
+ if uploaded_base == file_name or uploaded_base == file_name_base:
732
+ actual_file_name = uploaded_name
733
  break
734
+
735
+ if actual_file_name:
736
+ is_pdf = actual_file_name in st.session_state.pdf_metadata
737
+ is_image_group = actual_file_name in st.session_state.image_groups_metadata or file_name_base in st.session_state.image_groups_metadata
 
 
 
738
 
739
+ # Determine which key to use for image group
740
+ image_group_key = None
741
+ if is_image_group:
742
+ if actual_file_name in st.session_state.image_groups_metadata:
743
+ image_group_key = actual_file_name
744
+ else:
745
+ image_group_key = file_name_base
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
746
 
747
+ if is_pdf:
748
+ pdf_meta = st.session_state.pdf_metadata[actual_file_name]
749
+ total_pages = pdf_meta['total_pages']
750
+ current_page = st.session_state.current_page_num.get(actual_file_name, 0)
751
+
752
+ col_prev, col_info, col_next = st.columns([1, 2, 1])
753
+
754
+ with col_prev:
755
+ prev_clicked = st.button("⬅️ Previous", key=f"prev_page_{selected_file}_{actual_file_name}",
756
+ disabled=(current_page == 0), use_container_width=True)
757
+
758
+ with col_info:
759
+ st.markdown(f"<div style='text-align: center; padding: 5px;'><b>πŸ“„ Page {current_page + 1} of {total_pages}</b></div>", unsafe_allow_html=True)
760
+
761
+ with col_next:
762
+ next_clicked = st.button("Next ➑️", key=f"next_page_{selected_file}_{actual_file_name}",
763
+ disabled=(current_page >= total_pages - 1), use_container_width=True)
764
 
765
+ if not st.session_state.navigating_page:
766
+ if prev_clicked:
767
+ st.session_state.navigating_page = True
768
+ st.session_state.current_page_num[actual_file_name] = max(0, current_page - 1)
769
+ st.session_state.canvas_key += 1
770
+ st.session_state.ocr_active_section = None
771
+ st.session_state.ocr_active_field = None
772
+ st.rerun()
773
+ elif next_clicked:
774
+ st.session_state.navigating_page = True
775
+ st.session_state.current_page_num[actual_file_name] = min(total_pages - 1, current_page + 1)
776
+ st.session_state.canvas_key += 1
777
+ st.session_state.ocr_active_section = None
778
+ st.session_state.ocr_active_field = None
779
+ st.rerun()
780
  else:
781
+ st.session_state.navigating_page = False
 
 
 
 
 
 
 
 
 
782
 
783
+ elif is_image_group and image_group_key:
784
+ img_group_meta = st.session_state.image_groups_metadata[image_group_key]
785
+ total_pages = img_group_meta['total_pages']
786
+ current_page = st.session_state.current_page_num.get(image_group_key, 0)
787
 
788
+ col_prev, col_info, col_next = st.columns([1, 2, 1])
 
 
 
 
 
 
 
 
 
 
789
 
790
+ with col_prev:
791
+ prev_clicked = st.button("⬅️ Previous", key=f"prev_page_{selected_file}_{image_group_key}",
792
+ disabled=(current_page == 0), use_container_width=True)
793
+
794
+ with col_info:
795
+ st.markdown(f"<div style='text-align: center; padding: 5px;'><b>πŸ–ΌοΈ Page {current_page + 1} of {total_pages}</b></div>", unsafe_allow_html=True)
796
+
797
+ with col_next:
798
+ next_clicked = st.button("Next ➑️", key=f"next_page_{selected_file}_{image_group_key}",
799
+ disabled=(current_page >= total_pages - 1), use_container_width=True)
800
+
801
+ if not st.session_state.navigating_page:
802
+ if prev_clicked:
803
+ st.session_state.navigating_page = True
804
+ st.session_state.current_page_num[image_group_key] = max(0, current_page - 1)
805
+ st.session_state.canvas_key += 1
806
+ st.session_state.ocr_active_section = None
807
+ st.session_state.ocr_active_field = None
808
+ st.rerun()
809
+ elif next_clicked:
810
+ st.session_state.navigating_page = True
811
+ st.session_state.current_page_num[image_group_key] = min(total_pages - 1, current_page + 1)
812
+ st.session_state.canvas_key += 1
813
+ st.session_state.ocr_active_section = None
814
+ st.session_state.ocr_active_field = None
815
+ st.rerun()
816
+ else:
817
+ st.session_state.navigating_page = False
818
+
819
+ if actual_file_name:
820
+ is_pdf = actual_file_name in st.session_state.pdf_metadata
821
+ is_image_group = actual_file_name in st.session_state.image_groups_metadata or file_name_base in st.session_state.image_groups_metadata
822
+
823
+ # Determine which key to use for image group
824
+ image_group_key = None
825
+ if is_image_group:
826
+ if actual_file_name in st.session_state.image_groups_metadata:
827
+ image_group_key = actual_file_name
828
+ else:
829
+ image_group_key = file_name_base
830
+
831
+ if is_pdf:
832
+ current_page = st.session_state.current_page_num.get(actual_file_name, 0)
833
+ pdf_meta = st.session_state.pdf_metadata[actual_file_name]
834
+ current_image = pdf_meta['pages'][current_page]
835
+ elif is_image_group and image_group_key:
836
+ current_page = st.session_state.current_page_num.get(image_group_key, 0)
837
+ img_group_meta = st.session_state.image_groups_metadata[image_group_key]
838
+ current_image = img_group_meta['pages'][current_page]
839
+ else:
840
+ current_image = st.session_state.images[actual_file_name]
841
+ else:
842
+ st.error(f"❌ File '{file_name}' not found in uploaded files")
843
+ st.info("πŸ’‘ Available files:")
844
+ with st.expander("Show available files"):
845
+ for img_name in list(st.session_state.images.keys())[:20]:
846
+ st.text(f" β€’ {img_name}")
847
+ if len(st.session_state.images) > 20:
848
+ st.text(f" ... and {len(st.session_state.images) - 20} more")
849
+ current_image = None
850
+
851
+ if current_image:
852
+ scaled_image, scale_ratio, paste_x, paste_y = scale_image_to_fixed_size(current_image, max_width=900, max_height=1100)
853
+
854
+ # Wrap canvas in scrollable container
855
+ st.markdown(f'<div class="image-scroll-container" style="max-height: {scaled_image.height + 40}px;">', unsafe_allow_html=True)
856
+
857
+ canvas_result = st_canvas(
858
+ fill_color="rgba(255, 165, 0, 0.3)",
859
+ stroke_width=2,
860
+ stroke_color="#FF0000",
861
+ background_image=scaled_image,
862
+ update_streamlit=True,
863
+ height=scaled_image.height,
864
+ width=scaled_image.width,
865
+ drawing_mode="rect",
866
+ key=f"canvas_{selected_file}_{st.session_state.canvas_key}",
867
+ )
868
+
869
+ st.markdown('</div>', unsafe_allow_html=True)
870
+
871
+ if canvas_result.json_data is not None and st.session_state.ocr_active_field:
872
+ objects = canvas_result.json_data.get("objects", [])
873
+ if len(objects) > 0:
874
+ rect = objects[-1]
875
+
876
+ bbox = [
877
+ (rect["left"] - paste_x) / scale_ratio,
878
+ (rect["top"] - paste_y) / scale_ratio,
879
+ (rect["left"] + rect["width"] - paste_x) / scale_ratio,
880
+ (rect["top"] + rect["height"] - paste_y) / scale_ratio
881
+ ]
882
+
883
+ with st.spinner("Performing OCR..."):
884
+ ocr_text = perform_ocr(current_image, bbox)
885
+
886
+ if ocr_text and not ocr_text.startswith("OCR Error"):
887
+ st.success(f"βœ… OCR Result: {ocr_text}")
888
 
889
+ gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
 
890
 
891
+ if st.session_state.ocr_active_section == 'items':
892
+ items = gt_parse.get('items', [])
893
+ row_idx = st.session_state.ocr_line_item_row
894
+ if row_idx is not None and row_idx < len(items):
895
+ items[row_idx][st.session_state.ocr_active_field] = ocr_text
896
+ gt_parse['items'] = items
 
 
 
 
 
897
 
898
+ expander_key = f"line_item_expander_{selected_file}_{row_idx}"
899
+ st.session_state[expander_key] = True
 
 
 
 
 
 
 
 
 
 
 
 
900
  else:
901
+ section = st.session_state.ocr_active_section
902
+ field = st.session_state.ocr_active_field
903
+ if section not in gt_parse:
904
+ gt_parse[section] = {}
905
+ gt_parse[section][field] = ocr_text
906
+
907
+ st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
908
+ st.session_state.modified_indices.add(selected_file)
909
+
910
+ st.session_state.canvas_key += 1
911
+ st.rerun()
912
+ else:
913
+ st.error(ocr_text)
914
+ else:
915
+ st.warning("No file name specified in record")
916
 
917
  # RIGHT SIDE: Editable Details
918
  with right_col:
919
+ # Create scrollable container for form fields
920
+ st.markdown('<div style="max-height: 85vh; overflow-y: auto; overflow-x: hidden; padding-right: 10px;">', unsafe_allow_html=True)
921
+
922
+ st.markdown("### πŸ“ Invoice Details")
923
+
924
+ gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
925
+
926
+ tab1, tab2, tab3, tab4 = st.tabs([
927
+ "πŸ“„ Invoice Details",
928
+ "πŸ‘₯ Party Details",
929
+ "🏦 Bank Details",
930
+ "πŸ“‹ Line Items"
931
+ ])
932
+
933
+ # TAB 1: Header (includes invoice details + summary fields)
934
+ with tab1:
935
+ header = gt_parse.get('header', {})
936
+ summary = gt_parse.get('summary', {})
937
 
938
+ # Invoice No
939
+ col_input, col_btn = st.columns([5, 1])
940
+ with col_input:
941
+ new_value = st.text_input(
942
+ "Invoice No",
943
+ value=header.get('invoice_no', ''),
944
+ key=f"invoice_no_{selected_file}",
945
+ on_change=lambda: sync_field_to_data(selected_file, 'header', 'invoice_no',
946
+ st.session_state[f"invoice_no_{selected_file}"])
947
+ )
948
+ with col_btn:
949
+ st.markdown("<br>", unsafe_allow_html=True)
950
+ if st.button("πŸ”", key=f"ocr_invoice_no_{selected_file}",
951
+ type="primary" if is_ocr_active('header', 'invoice_no') else "secondary"):
952
+ activate_ocr_field('header', 'invoice_no')
953
 
954
+ # Invoice Date
955
+ col_input, col_btn = st.columns([5, 1])
956
+ with col_input:
957
+ new_value = st.text_input(
958
+ "Invoice Date",
959
+ value=header.get('invoice_date', ''),
960
+ key=f"invoice_date_{selected_file}",
961
+ on_change=lambda: sync_field_to_data(selected_file, 'header', 'invoice_date',
962
+ st.session_state[f"invoice_date_{selected_file}"])
963
+ )
964
+ with col_btn:
965
+ st.markdown("<br>", unsafe_allow_html=True)
966
+ if st.button("πŸ”", key=f"ocr_invoice_date_{selected_file}",
967
+ type="primary" if is_ocr_active('header', 'invoice_date') else "secondary"):
968
+ activate_ocr_field('header', 'invoice_date')
969
 
970
+ # Payment Terms
971
+ col_input, col_btn = st.columns([5, 1])
972
+ with col_input:
973
+ new_value = st.text_input(
974
+ "Payment Terms",
975
+ value=header.get('payment_terms', ''),
976
+ key=f"payment_terms_{selected_file}",
977
+ on_change=lambda: sync_field_to_data(selected_file, 'header', 'payment_terms',
978
+ st.session_state[f"payment_terms_{selected_file}"])
979
+ )
980
+ with col_btn:
981
+ st.markdown("<br>", unsafe_allow_html=True)
982
+ if st.button("πŸ”", key=f"ocr_payment_terms_{selected_file}",
983
+ type="primary" if is_ocr_active('header', 'payment_terms') else "secondary"):
984
+ activate_ocr_field('header', 'payment_terms')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
985
 
986
+ # Due Date
987
+ col_input, col_btn = st.columns([5, 1])
988
+ with col_input:
989
+ new_value = st.text_input(
990
+ "Due Date",
991
+ value=header.get('due_date', ''),
992
+ key=f"due_date_{selected_file}",
993
+ on_change=lambda: sync_field_to_data(selected_file, 'header', 'due_date',
994
+ st.session_state[f"due_date_{selected_file}"])
995
+ )
996
+ with col_btn:
997
+ st.markdown("<br>", unsafe_allow_html=True)
998
+ if st.button("πŸ”", key=f"ocr_due_date_{selected_file}",
999
+ type="primary" if is_ocr_active('header', 'due_date') else "secondary"):
1000
+ activate_ocr_field('header', 'due_date')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1001
 
1002
+ # Subtotal
1003
+ col_input, col_btn = st.columns([5, 1])
1004
+ with col_input:
1005
+ new_value = st.text_input(
1006
+ "Subtotal",
1007
+ value=summary.get('subtotal', ''),
1008
+ key=f"subtotal_{selected_file}",
1009
+ on_change=lambda: sync_field_to_data(selected_file, 'summary', 'subtotal',
1010
+ st.session_state[f"subtotal_{selected_file}"])
1011
+ )
1012
+ with col_btn:
1013
+ st.markdown("<br>", unsafe_allow_html=True)
1014
+ if st.button("πŸ”", key=f"ocr_subtotal_{selected_file}",
1015
+ type="primary" if is_ocr_active('summary', 'subtotal') else "secondary"):
1016
+ activate_ocr_field('summary', 'subtotal')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1017
 
1018
+ # Tax Rate
1019
+ col_input, col_btn = st.columns([5, 1])
1020
+ with col_input:
1021
+ new_value = st.text_input(
1022
+ "Tax Rate",
1023
+ value=summary.get('tax_rate', ''),
1024
+ key=f"tax_rate_{selected_file}",
1025
+ on_change=lambda: sync_field_to_data(selected_file, 'summary', 'tax_rate',
1026
+ st.session_state[f"tax_rate_{selected_file}"])
1027
+ )
1028
+ with col_btn:
1029
+ st.markdown("<br>", unsafe_allow_html=True)
1030
+ if st.button("πŸ”", key=f"ocr_tax_rate_{selected_file}",
1031
+ type="primary" if is_ocr_active('summary', 'tax_rate') else "secondary"):
1032
+ activate_ocr_field('summary', 'tax_rate')
1033
+
1034
+ # Tax Amount
1035
+ col_input, col_btn = st.columns([5, 1])
1036
+ with col_input:
1037
+ new_value = st.text_input(
1038
+ "Tax Amount",
1039
+ value=summary.get('tax_amount', ''),
1040
+ key=f"tax_amount_{selected_file}",
1041
+ on_change=lambda: sync_field_to_data(selected_file, 'summary', 'tax_amount',
1042
+ st.session_state[f"tax_amount_{selected_file}"])
1043
+ )
1044
+ with col_btn:
1045
+ st.markdown("<br>", unsafe_allow_html=True)
1046
+ if st.button("πŸ”", key=f"ocr_tax_amount_{selected_file}",
1047
+ type="primary" if is_ocr_active('summary', 'tax_amount') else "secondary"):
1048
+ activate_ocr_field('summary', 'tax_amount')
1049
+
1050
+ # Discount Rate
1051
+ col_input, col_btn = st.columns([5, 1])
1052
+ with col_input:
1053
+ new_value = st.text_input(
1054
+ "Discount Rate",
1055
+ value=summary.get('discount_rate', ''),
1056
+ key=f"discount_rate_{selected_file}",
1057
+ on_change=lambda: sync_field_to_data(selected_file, 'summary', 'discount_rate',
1058
+ st.session_state[f"discount_rate_{selected_file}"])
1059
+ )
1060
+ with col_btn:
1061
+ st.markdown("<br>", unsafe_allow_html=True)
1062
+ if st.button("πŸ”", key=f"ocr_discount_rate_{selected_file}",
1063
+ type="primary" if is_ocr_active('summary', 'discount_rate') else "secondary"):
1064
+ activate_ocr_field('summary', 'discount_rate')
1065
+
1066
+ # Total Discount Amount
1067
+ col_input, col_btn = st.columns([5, 1])
1068
+ with col_input:
1069
+ new_value = st.text_input(
1070
+ "Total Discount Amount",
1071
+ value=summary.get('total_discount_amount', ''),
1072
+ key=f"total_discount_amount_{selected_file}",
1073
+ on_change=lambda: sync_field_to_data(selected_file, 'summary', 'total_discount_amount',
1074
+ st.session_state[f"total_discount_amount_{selected_file}"])
1075
+ )
1076
+ with col_btn:
1077
+ st.markdown("<br>", unsafe_allow_html=True)
1078
+ if st.button("πŸ”", key=f"ocr_total_discount_amount_{selected_file}",
1079
+ type="primary" if is_ocr_active('summary', 'total_discount_amount') else "secondary"):
1080
+ activate_ocr_field('summary', 'total_discount_amount')
1081
+
1082
+ # Total Amount
1083
+ col_input, col_btn = st.columns([5, 1])
1084
+ with col_input:
1085
+ new_value = st.text_input(
1086
+ "Total Amount",
1087
+ value=summary.get('total_amount', ''),
1088
+ key=f"total_amount_{selected_file}",
1089
+ on_change=lambda: sync_field_to_data(selected_file, 'summary', 'total_amount',
1090
+ st.session_state[f"total_amount_{selected_file}"])
1091
+ )
1092
+ with col_btn:
1093
+ st.markdown("<br>", unsafe_allow_html=True)
1094
+ if st.button("πŸ”", key=f"ocr_total_amount_{selected_file}",
1095
+ type="primary" if is_ocr_active('summary', 'total_amount') else "secondary"):
1096
+ activate_ocr_field('summary', 'total_amount')
1097
+
1098
+ # Currency
1099
+ col_input, col_btn = st.columns([5, 1])
1100
+ with col_input:
1101
+ new_value = st.text_input(
1102
+ "Currency",
1103
+ value=summary.get('currency', ''),
1104
+ key=f"currency_{selected_file}",
1105
+ on_change=lambda: sync_field_to_data(selected_file, 'summary', 'currency',
1106
+ st.session_state[f"currency_{selected_file}"])
1107
+ )
1108
+ with col_btn:
1109
+ st.markdown("<br>", unsafe_allow_html=True)
1110
+ if st.button("πŸ”", key=f"ocr_currency_{selected_file}",
1111
+ type="primary" if is_ocr_active('summary', 'currency') else "secondary"):
1112
+ activate_ocr_field('summary', 'currency')
1113
+
1114
+ # TAB 2: Party Details (without bank details)
1115
+ with tab2:
1116
+ # SWAP BUTTON
1117
+ col1, col2, col3 = st.columns([1, 2, 1])
1118
+ with col2:
1119
+ if st.button("πŸ”„ Swap Sender ↔ Recipient", key=f"swap_btn_{selected_file}",
1120
+ type="primary", use_container_width=True):
1121
+ if not st.session_state.just_swapped:
1122
+ st.session_state.just_swapped = True
1123
+ swap_sender_recipient_details(selected_file)
1124
+ st.rerun()
1125
+
1126
+ if st.session_state.just_swapped:
1127
+ st.session_state.just_swapped = False
1128
+
1129
+ st.markdown("**Sender Details**")
1130
+ header = gt_parse.get('header', {})
1131
+
1132
+ # Sender Name
1133
+ col_input, col_btn = st.columns([5, 1])
1134
+ with col_input:
1135
+ new_value = st.text_input(
1136
+ "Sender Name",
1137
+ value=header.get('sender_name', ''),
1138
+ key=f"sender_name_{selected_file}",
1139
+ on_change=lambda: sync_field_to_data(selected_file, 'header', 'sender_name',
1140
+ st.session_state[f"sender_name_{selected_file}"])
1141
+ )
1142
+ with col_btn:
1143
+ st.markdown("<br>", unsafe_allow_html=True)
1144
+ if st.button("πŸ”", key=f"ocr_sender_name_{selected_file}",
1145
+ type="primary" if is_ocr_active('header', 'sender_name') else "secondary"):
1146
+ activate_ocr_field('header', 'sender_name')
1147
+
1148
+ # Sender Address
1149
+ col_input, col_btn = st.columns([5, 1])
1150
+ with col_input:
1151
+ new_value = st.text_area(
1152
+ "Sender Address",
1153
+ value=header.get('sender_addr', ''),
1154
+ key=f"sender_addr_{selected_file}",
1155
+ height=60,
1156
+ on_change=lambda: sync_field_to_data(selected_file, 'header', 'sender_addr',
1157
+ st.session_state[f"sender_addr_{selected_file}"])
1158
+ )
1159
+ with col_btn:
1160
+ st.markdown("<br>", unsafe_allow_html=True)
1161
+ if st.button("πŸ”", key=f"ocr_sender_addr_{selected_file}",
1162
+ type="primary" if is_ocr_active('header', 'sender_addr') else "secondary"):
1163
+ activate_ocr_field('header', 'sender_addr')
1164
+
1165
+ st.markdown("**Recipient Details**")
1166
+
1167
+ # Recipient Name
1168
+ col_input, col_btn = st.columns([5, 1])
1169
+ with col_input:
1170
+ new_value = st.text_input(
1171
+ "Recipient Name",
1172
+ value=header.get('rcpt_name', ''),
1173
+ key=f"rcpt_name_{selected_file}",
1174
+ on_change=lambda: sync_field_to_data(selected_file, 'header', 'rcpt_name',
1175
+ st.session_state[f"rcpt_name_{selected_file}"])
1176
+ )
1177
+ with col_btn:
1178
+ st.markdown("<br>", unsafe_allow_html=True)
1179
+ if st.button("πŸ”", key=f"ocr_rcpt_name_{selected_file}",
1180
+ type="primary" if is_ocr_active('header', 'rcpt_name') else "secondary"):
1181
+ activate_ocr_field('header', 'rcpt_name')
1182
+
1183
+ # Recipient Address
1184
+ col_input, col_btn = st.columns([5, 1])
1185
+ with col_input:
1186
+ new_value = st.text_area(
1187
+ "Recipient Address",
1188
+ value=header.get('rcpt_addr', ''),
1189
+ key=f"rcpt_addr_{selected_file}",
1190
+ height=60,
1191
+ on_change=lambda: sync_field_to_data(selected_file, 'header', 'rcpt_addr',
1192
+ st.session_state[f"rcpt_addr_{selected_file}"])
1193
+ )
1194
+ with col_btn:
1195
+ st.markdown("<br>", unsafe_allow_html=True)
1196
+ if st.button("πŸ”", key=f"ocr_rcpt_addr_{selected_file}",
1197
+ type="primary" if is_ocr_active('header', 'rcpt_addr') else "secondary"):
1198
+ activate_ocr_field('header', 'rcpt_addr')
1199
+
1200
+ # TAB 3: Bank Details
1201
+ with tab3:
1202
+ header = gt_parse.get('header', {})
1203
+
1204
+ # Bank IBAN
1205
+ col_input, col_btn = st.columns([5, 1])
1206
+ with col_input:
1207
+ new_value = st.text_input(
1208
+ "Bank IBAN",
1209
+ value=header.get('bank_iban', ''),
1210
+ key=f"bank_iban_{selected_file}",
1211
+ on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_iban',
1212
+ st.session_state[f"bank_iban_{selected_file}"])
1213
+ )
1214
+ with col_btn:
1215
+ st.markdown("<br>", unsafe_allow_html=True)
1216
+ if st.button("πŸ”", key=f"ocr_bank_iban_{selected_file}",
1217
+ type="primary" if is_ocr_active('header', 'bank_iban') else "secondary"):
1218
+ activate_ocr_field('header', 'bank_iban')
1219
+
1220
+ # Bank Name
1221
+ col_input, col_btn = st.columns([5, 1])
1222
+ with col_input:
1223
+ new_value = st.text_input(
1224
+ "Bank Name",
1225
+ value=header.get('bank_name', ''),
1226
+ key=f"bank_name_{selected_file}",
1227
+ on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_name',
1228
+ st.session_state[f"bank_name_{selected_file}"])
1229
+ )
1230
+ with col_btn:
1231
+ st.markdown("<br>", unsafe_allow_html=True)
1232
+ if st.button("πŸ”", key=f"ocr_bank_name_{selected_file}",
1233
+ type="primary" if is_ocr_active('header', 'bank_name') else "secondary"):
1234
+ activate_ocr_field('header', 'bank_name')
1235
+
1236
+ # Bank Account No
1237
+ col_input, col_btn = st.columns([5, 1])
1238
+ with col_input:
1239
+ new_value = st.text_input(
1240
+ "Bank Account No",
1241
+ value=header.get('bank_acc_no', ''),
1242
+ key=f"bank_acc_no_{selected_file}",
1243
+ on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_acc_no',
1244
+ st.session_state[f"bank_acc_no_{selected_file}"])
1245
+ )
1246
+ with col_btn:
1247
+ st.markdown("<br>", unsafe_allow_html=True)
1248
+ if st.button("πŸ”", key=f"ocr_bank_acc_no_{selected_file}",
1249
+ type="primary" if is_ocr_active('header', 'bank_acc_no') else "secondary"):
1250
+ activate_ocr_field('header', 'bank_acc_no')
1251
+
1252
+ # Bank Routing
1253
+ col_input, col_btn = st.columns([5, 1])
1254
+ with col_input:
1255
+ new_value = st.text_input(
1256
+ "Bank Routing",
1257
+ value=header.get('bank_routing', ''),
1258
+ key=f"bank_routing_{selected_file}",
1259
+ on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_routing',
1260
+ st.session_state[f"bank_routing_{selected_file}"])
1261
+ )
1262
+ with col_btn:
1263
+ st.markdown("<br>", unsafe_allow_html=True)
1264
+ if st.button("πŸ”", key=f"ocr_bank_routing_{selected_file}",
1265
+ type="primary" if is_ocr_active('header', 'bank_routing') else "secondary"):
1266
+ activate_ocr_field('header', 'bank_routing')
1267
+
1268
+ # Bank SWIFT
1269
+ col_input, col_btn = st.columns([5, 1])
1270
+ with col_input:
1271
+ new_value = st.text_input(
1272
+ "Bank SWIFT",
1273
+ value=header.get('bank_swift', ''),
1274
+ key=f"bank_swift_{selected_file}",
1275
+ on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_swift',
1276
+ st.session_state[f"bank_swift_{selected_file}"])
1277
+ )
1278
+ with col_btn:
1279
+ st.markdown("<br>", unsafe_allow_html=True)
1280
+ if st.button("πŸ”", key=f"ocr_bank_swift_{selected_file}",
1281
+ type="primary" if is_ocr_active('header', 'bank_swift') else "secondary"):
1282
+ activate_ocr_field('header', 'bank_swift')
1283
+
1284
+ # Bank Account Name
1285
+ col_input, col_btn = st.columns([5, 1])
1286
+ with col_input:
1287
+ new_value = st.text_input(
1288
+ "Bank Account Name",
1289
+ value=header.get('bank_acc_name', ''),
1290
+ key=f"bank_acc_name_{selected_file}",
1291
+ on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_acc_name',
1292
+ st.session_state[f"bank_acc_name_{selected_file}"])
1293
+ )
1294
+ with col_btn:
1295
+ st.markdown("<br>", unsafe_allow_html=True)
1296
+ if st.button("πŸ”", key=f"ocr_bank_acc_name_{selected_file}",
1297
+ type="primary" if is_ocr_active('header', 'bank_acc_name') else "secondary"):
1298
+ activate_ocr_field('header', 'bank_acc_name')
1299
+
1300
+ # Bank Branch
1301
+ col_input, col_btn = st.columns([5, 1])
1302
+ with col_input:
1303
+ new_value = st.text_input(
1304
+ "Bank Branch",
1305
+ value=header.get('bank_branch', ''),
1306
+ key=f"bank_branch_{selected_file}",
1307
+ on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_branch',
1308
+ st.session_state[f"bank_branch_{selected_file}"])
1309
+ )
1310
+ with col_btn:
1311
+ st.markdown("<br>", unsafe_allow_html=True)
1312
+ if st.button("πŸ”", key=f"ocr_bank_branch_{selected_file}",
1313
+ type="primary" if is_ocr_active('header', 'bank_branch') else "secondary"):
1314
+ activate_ocr_field('header', 'bank_branch')
1315
+
1316
+ # TAB 4: Items
1317
+ with tab4:
1318
+ current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
1319
+ items = current_gt_parse.get('items', [])
1320
+
1321
+ # Add/Remove row buttons
1322
+ col_add, col_remove = st.columns([1, 1])
1323
+ with col_add:
1324
+ if st.button("βž• Add New Item", key=f"add_item_{selected_file}", use_container_width=True):
1325
+ if not st.session_state.button_clicked:
1326
+ st.session_state.button_clicked = True
1327
+ new_item = {
1328
+ "descriptions": "", "SKU": "", "quantity": "",
1329
+ "unit_price": "", "amount": "", "discount_rate_per_item": "",
1330
+ "discount_amount_per_item": "", "tax_rate_per_item": "",
1331
+ "tax_amount_per_item": "", "Line_total": ""
1332
+ }
1333
+ current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
1334
+ current_items = current_gt_parse.get('items', [])
1335
+ current_items.append(new_item)
1336
+ current_gt_parse['items'] = current_items
1337
+ st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse
1338
+ st.session_state.modified_indices.add(selected_file)
1339
 
1340
+ new_idx = len(current_items) - 1
1341
+ expander_key_new = f"line_item_expander_{selected_file}_{new_idx}"
1342
+ st.session_state[expander_key_new] = True
 
 
 
 
 
 
 
 
 
 
 
1343
 
1344
+ st.rerun()
1345
+
1346
+ with col_remove:
1347
+ if st.button("βž– Remove Last Item", key=f"remove_item_{selected_file}",
1348
+ disabled=(len(items) == 0), use_container_width=True):
1349
+ if not st.session_state.button_clicked and len(items) > 0:
1350
+ st.session_state.button_clicked = True
1351
+ current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
1352
+ current_items = current_gt_parse.get('items', [])
1353
+ N = len(current_items)
1354
+ current_items.pop()
1355
+ current_gt_parse['items'] = current_items
1356
+ st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse
1357
+ st.session_state.modified_indices.add(selected_file)
1358
 
1359
+ popped_idx = N - 1
1360
+ expander_key_popped = f"line_item_expander_{selected_file}_{popped_idx}"
1361
+ if expander_key_popped in st.session_state:
1362
+ del st.session_state[expander_key_popped]
 
 
 
 
 
 
 
 
1363
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1364
  st.rerun()
1365
 
1366
+ if st.session_state.button_clicked:
1367
+ st.session_state.button_clicked = False
1368
+
1369
+ current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
1370
+ items = current_gt_parse.get('items', [])
1371
 
1372
+ if items:
1373
+ for idx, item in enumerate(items):
1374
+ expander_key = f"line_item_expander_{selected_file}_{idx}"
1375
+ expanded_default = st.session_state.get(expander_key, False)
1376
+
1377
+ with st.expander(f"**Item {idx + 1}** - {item.get('descriptions', 'N/A')[:30]}", expanded=expanded_default):
1378
+ # Descriptions
1379
+ col_input, col_btn = st.columns([5, 1])
1380
+ with col_input:
1381
+ new_value = st.text_area(
1382
+ "Descriptions",
1383
+ value=item.get('descriptions', ''),
1384
+ key=f"desc_{selected_file}_{idx}",
1385
+ height=60,
1386
+ on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'descriptions',
1387
+ st.session_state[f"desc_{selected_file}_{i}"], i)
1388
+ )
1389
+ with col_btn:
1390
+ st.markdown("<br>", unsafe_allow_html=True)
1391
+ if st.button("πŸ”", key=f"ocr_desc_{selected_file}_{idx}",
1392
+ type="primary" if is_ocr_active('items', 'descriptions', idx) else "secondary"):
1393
+ st.session_state[expander_key] = True
1394
+ activate_ocr_field('items', 'descriptions', idx)
1395
+
1396
+ # SKU
1397
+ col_input, col_btn = st.columns([5, 1])
1398
+ with col_input:
1399
+ new_value = st.text_input(
1400
+ "SKU",
1401
+ value=item.get('SKU', ''),
1402
+ key=f"sku_{selected_file}_{idx}",
1403
+ on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'SKU',
1404
+ st.session_state[f"sku_{selected_file}_{i}"], i)
1405
+ )
1406
+ with col_btn:
1407
+ st.markdown("<br>", unsafe_allow_html=True)
1408
+ if st.button("πŸ”", key=f"ocr_sku_{selected_file}_{idx}",
1409
+ type="primary" if is_ocr_active('items', 'SKU', idx) else "secondary"):
1410
+ st.session_state[expander_key] = True
1411
+ activate_ocr_field('items', 'SKU', idx)
1412
+
1413
+ # Quantity
1414
+ col_input, col_btn = st.columns([5, 1])
1415
+ with col_input:
1416
+ new_value = st.text_input(
1417
+ "Quantity",
1418
+ value=item.get('quantity', ''),
1419
+ key=f"qty_{selected_file}_{idx}",
1420
+ on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'quantity',
1421
+ st.session_state[f"qty_{selected_file}_{i}"], i)
1422
+ )
1423
+ with col_btn:
1424
+ st.markdown("<br>", unsafe_allow_html=True)
1425
+ if st.button("πŸ”", key=f"ocr_qty_{selected_file}_{idx}",
1426
+ type="primary" if is_ocr_active('items', 'quantity', idx) else "secondary"):
1427
+ st.session_state[expander_key] = True
1428
+ activate_ocr_field('items', 'quantity', idx)
1429
+
1430
+ # Unit Price
1431
+ col_input, col_btn = st.columns([5, 1])
1432
+ with col_input:
1433
+ new_value = st.text_input(
1434
+ "Unit Price",
1435
+ value=item.get('unit_price', ''),
1436
+ key=f"unit_price_{selected_file}_{idx}",
1437
+ on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'unit_price',
1438
+ st.session_state[f"unit_price_{selected_file}_{i}"], i)
1439
+ )
1440
+ with col_btn:
1441
+ st.markdown("<br>", unsafe_allow_html=True)
1442
+ if st.button("πŸ”", key=f"ocr_unit_price_{selected_file}_{idx}",
1443
+ type="primary" if is_ocr_active('items', 'unit_price', idx) else "secondary"):
1444
+ st.session_state[expander_key] = True
1445
+ activate_ocr_field('items', 'unit_price', idx)
1446
+
1447
+ # Amount
1448
+ col_input, col_btn = st.columns([5, 1])
1449
+ with col_input:
1450
+ new_value = st.text_input(
1451
+ "Amount",
1452
+ value=item.get('amount', ''),
1453
+ key=f"amount_{selected_file}_{idx}",
1454
+ on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'amount',
1455
+ st.session_state[f"amount_{selected_file}_{i}"], i)
1456
+ )
1457
+ with col_btn:
1458
+ st.markdown("<br>", unsafe_allow_html=True)
1459
+ if st.button("πŸ”", key=f"ocr_amount_{selected_file}_{idx}",
1460
+ type="primary" if is_ocr_active('items', 'amount', idx) else "secondary"):
1461
+ st.session_state[expander_key] = True
1462
+ activate_ocr_field('items', 'amount', idx)
1463
+
1464
+ # Discount Rate Per Item
1465
+ col_input, col_btn = st.columns([5, 1])
1466
+ with col_input:
1467
+ new_value = st.text_input(
1468
+ "Discount Rate Per Item",
1469
+ value=item.get('discount_rate_per_item', ''),
1470
+ key=f"discount_rate_per_item_{selected_file}_{idx}",
1471
+ on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'discount_rate_per_item',
1472
+ st.session_state[f"discount_rate_per_item_{selected_file}_{i}"], i)
1473
+ )
1474
+ with col_btn:
1475
+ st.markdown("<br>", unsafe_allow_html=True)
1476
+ if st.button("πŸ”", key=f"ocr_discount_rate_per_item_{selected_file}_{idx}",
1477
+ type="primary" if is_ocr_active('items', 'discount_rate_per_item', idx) else "secondary"):
1478
+ st.session_state[expander_key] = True
1479
+ activate_ocr_field('items', 'discount_rate_per_item', idx)
1480
+
1481
+ # Discount Amount Per Item
1482
+ col_input, col_btn = st.columns([5, 1])
1483
+ with col_input:
1484
+ new_value = st.text_input(
1485
+ "Discount Amount Per Item",
1486
+ value=item.get('discount_amount_per_item', ''),
1487
+ key=f"discount_amount_per_item_{selected_file}_{idx}",
1488
+ on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'discount_amount_per_item',
1489
+ st.session_state[f"discount_amount_per_item_{selected_file}_{i}"], i)
1490
+ )
1491
+ with col_btn:
1492
+ st.markdown("<br>", unsafe_allow_html=True)
1493
+ if st.button("πŸ”", key=f"ocr_discount_amount_per_item_{selected_file}_{idx}",
1494
+ type="primary" if is_ocr_active('items', 'discount_amount_per_item', idx) else "secondary"):
1495
+ st.session_state[expander_key] = True
1496
+ activate_ocr_field('items', 'discount_amount_per_item', idx)
1497
+
1498
+ # Tax Rate Per Item (NEW FIELD)
1499
+ col_input, col_btn = st.columns([5, 1])
1500
+ with col_input:
1501
+ new_value = st.text_input(
1502
+ "Tax Rate Per Item",
1503
+ value=item.get('tax_rate_per_item', ''),
1504
+ key=f"tax_rate_per_item_{selected_file}_{idx}",
1505
+ on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'tax_rate_per_item',
1506
+ st.session_state[f"tax_rate_per_item_{selected_file}_{i}"], i)
1507
+ )
1508
+ with col_btn:
1509
+ st.markdown("<br>", unsafe_allow_html=True)
1510
+ if st.button("πŸ”", key=f"ocr_tax_rate_per_item_{selected_file}_{idx}",
1511
+ type="primary" if is_ocr_active('items', 'tax_rate_per_item', idx) else "secondary"):
1512
+ st.session_state[expander_key] = True
1513
+ activate_ocr_field('items', 'tax_rate_per_item', idx)
1514
+
1515
+ # Tax Amount Per Item (RENAMED from "Tax")
1516
+ col_input, col_btn = st.columns([5, 1])
1517
+ with col_input:
1518
+ new_value = st.text_input(
1519
+ "Tax Amount Per Item",
1520
+ value=item.get('tax_amount_per_item', ''),
1521
+ key=f"tax_amount_per_item_{selected_file}_{idx}",
1522
+ on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'tax_amount_per_item',
1523
+ st.session_state[f"tax_amount_per_item_{selected_file}_{i}"], i)
1524
+ )
1525
+ with col_btn:
1526
+ st.markdown("<br>", unsafe_allow_html=True)
1527
+ if st.button("πŸ”", key=f"ocr_tax_amount_per_item_{selected_file}_{idx}",
1528
+ type="primary" if is_ocr_active('items', 'tax_amount_per_item', idx) else "secondary"):
1529
+ st.session_state[expander_key] = True
1530
+ activate_ocr_field('items', 'tax_amount_per_item', idx)
1531
+
1532
+ # Line Total
1533
+ col_input, col_btn = st.columns([5, 1])
1534
+ with col_input:
1535
+ new_value = st.text_input(
1536
+ "Line Total",
1537
+ value=item.get('Line_total', ''),
1538
+ key=f"line_total_{selected_file}_{idx}",
1539
+ on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'Line_total',
1540
+ st.session_state[f"line_total_{selected_file}_{i}"], i)
1541
+ )
1542
+ with col_btn:
1543
+ st.markdown("<br>", unsafe_allow_html=True)
1544
+ if st.button("πŸ”", key=f"ocr_line_total_{selected_file}_{idx}",
1545
+ type="primary" if is_ocr_active('items', 'Line_total', idx) else "secondary"):
1546
+ st.session_state[expander_key] = True
1547
+ activate_ocr_field('items', 'Line_total', idx)
1548
+
1549
+ st.markdown("**πŸ“Š Items Summary Table**")
1550
+
1551
+ df = pd.DataFrame(items)
1552
+ df.index = df.index + 1
1553
+ df.index.name = 'SL No'
1554
+
1555
+ st.dataframe(
1556
+ df,
1557
+ use_container_width=True,
1558
+ height=300
1559
+ )
1560
+ else:
1561
+ st.info("No items. Click 'βž• Add New Item' to add a new item.")
1562
+
1563
+ # Save button
1564
+ col1, col2 = st.columns([1, 1])
1565
+ with col1:
1566
+ if st.button("πŸ’Ύ Save Changes", type="primary", use_container_width=True, key=f"save_btn_{selected_file}"):
1567
+ if not st.session_state.just_saved:
1568
+ st.session_state.just_saved = True
1569
+ auto_save(selected_file)
1570
+ st.session_state.save_message = "βœ… Changes saved successfully!"
1571
+ st.session_state.save_message_time = time.time()
1572
+ st.rerun()
1573
+
1574
+ if st.session_state.just_saved:
1575
+ st.session_state.just_saved = False
1576
+
1577
+ if st.session_state.save_message:
1578
+ st.success(st.session_state.save_message)
1579
+
1580
+ st.markdown('</div>', unsafe_allow_html=True) # Close scrollable container