diff --git "a/src/streamlit_app.py" "b/src/streamlit_app.py"
--- "a/src/streamlit_app.py"
+++ "b/src/streamlit_app.py"
@@ -1,5 +1,16 @@
import os
from pathlib import Path
+import streamlit as st
+import json
+import io
+from PIL import Image
+import time
+import pandas as pd
+from streamlit_drawable_canvas import st_canvas
+import pytesseract
+import numpy as np
+from datetime import datetime
+import fitz # PyMuPDF for PDF handling
# -----------------------------
# Environment hardening (HF Spaces, /.cache issue)
@@ -18,18 +29,6 @@ try:
except Exception as e:
print(f"[startup] WARNING: could not create {streamlit_dir}: {e}")
-import streamlit as st
-import json
-import io
-from PIL import Image
-import time
-import pandas as pd
-from streamlit_drawable_canvas import st_canvas
-import pytesseract
-import numpy as np
-from datetime import datetime
-import fitz # PyMuPDF for PDF handling
-
# Set Tesseract path - auto-detect based on OS
if os.name == 'nt': # Windows
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
@@ -42,93 +41,85 @@ st.set_page_config(page_title="Remittance Data Viewer", layout="wide")
# Custom CSS
st.markdown("""
""", unsafe_allow_html=True)
@@ -141,8 +132,80 @@ def load_jsonl(file):
data.append(json.loads(line))
return data
-def save_to_jsonl(data):
+def get_file_names_from_record(record, actual_file_name):
+ """Generate file_name or file_names based on the PDF metadata"""
+ if not actual_file_name:
+ return record.get('file_name', record.get('file_names', []))
+
+ # Check if it's a PDF with multiple pages
+ is_pdf = actual_file_name in st.session_state.pdf_metadata
+ if is_pdf:
+ pdf_meta = st.session_state.pdf_metadata[actual_file_name]
+ total_pages = pdf_meta['total_pages']
+ base_name = os.path.splitext(actual_file_name)[0]
+
+ if total_pages > 1:
+ # Return list of file names for multi-page PDF
+ return [f"{base_name}_page{i+1}.png" for i in range(total_pages)]
+ else:
+ # Single page PDF
+ return f"{base_name}.png"
+ else:
+ # Regular image file
+ if not actual_file_name.lower().endswith('.png'):
+ base_name = os.path.splitext(actual_file_name)[0]
+ return f"{base_name}.png"
+ return actual_file_name
+
+def save_to_jsonl(data, transform_filenames=False):
"""Convert data list to JSONL format"""
+ if transform_filenames:
+ transformed_data = []
+ for record in data:
+ new_record = {}
+
+ # First, determine file_name(s) and add to new_record
+ original_file_name = record.get('file_name', '')
+ if not original_file_name and 'file_names' in record:
+ # Handle file_names field if it exists
+ original_file_name = record.get('file_names', [])[0] if record.get('file_names', []) else ''
+
+ # Find the actual uploaded file
+ actual_file_name = None
+ if original_file_name:
+ if original_file_name in st.session_state.images:
+ actual_file_name = original_file_name
+ else:
+ # Try with extensions
+ for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
+ if original_file_name + ext in st.session_state.images:
+ actual_file_name = original_file_name + ext
+ break
+ # Try without extension
+ if not actual_file_name:
+ for uploaded_name in st.session_state.images.keys():
+ uploaded_base = os.path.splitext(uploaded_name)[0]
+ if uploaded_base == original_file_name:
+ actual_file_name = uploaded_name
+ break
+
+ # Get the transformed file name(s)
+ new_file_names = get_file_names_from_record(record, actual_file_name)
+
+ # Add file_name or file_names as the FIRST field
+ if isinstance(new_file_names, list):
+ new_record['file_names'] = new_file_names
+ else:
+ new_record['file_name'] = new_file_names
+
+ # Now add all other fields (excluding the original file_name/file_names)
+ for key, value in record.items():
+ if key not in ['file_name', 'file_names']:
+ new_record[key] = value
+
+ transformed_data.append(new_record)
+ data = transformed_data
+
jsonl_content = '\n'.join([json.dumps(record) for record in data])
return jsonl_content
@@ -152,7 +215,6 @@ def pdf_to_images(pdf_file):
pdf_bytes = pdf_file.read()
pdf_document = fitz.open(stream=pdf_bytes, filetype="pdf")
images = []
-
for page_num in range(pdf_document.page_count):
page = pdf_document[page_num]
# Render page to an image (higher DPI for better quality)
@@ -161,7 +223,6 @@ def pdf_to_images(pdf_file):
img_data = pix.tobytes("png")
img = Image.open(io.BytesIO(img_data))
images.append(img)
-
pdf_document.close()
return images
except Exception as e:
@@ -194,7 +255,6 @@ def scale_image_to_fixed_size(image, max_width=900, max_height=1100):
width_ratio = max_width / image.width
height_ratio = max_height / image.height
ratio = min(width_ratio, height_ratio)
-
new_width = int(image.width * ratio)
new_height = int(image.height * ratio)
@@ -229,6 +289,47 @@ def swap_customer_supplier_details(index):
st.session_state.edited_data[index]['gt_parse'] = gt_parse
st.session_state.modified_indices.add(index)
+def get_display_name(record):
+ """Get display name from record, handling both file_name and file_names"""
+ if 'file_name' in record:
+ return record['file_name']
+ elif 'file_names' in record and record['file_names']:
+ if isinstance(record['file_names'], list):
+ return record['file_names'][0] if record['file_names'] else 'Unnamed'
+ return record['file_names']
+ return 'Unnamed'
+
+def find_actual_file(file_name):
+ """Find the actual uploaded file matching the given file_name"""
+ if not file_name:
+ return None
+
+ # Try exact match
+ if file_name in st.session_state.images:
+ return file_name
+
+ # Try with extensions
+ for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
+ if file_name + ext in st.session_state.images:
+ return file_name + ext
+
+ # Try matching without extension
+ for uploaded_name in st.session_state.images.keys():
+ uploaded_base = os.path.splitext(uploaded_name)[0]
+ if uploaded_base == file_name:
+ return uploaded_name
+
+ # Try matching any filename in file_names list
+ if file_name.endswith('.png'):
+ base_name = os.path.splitext(file_name)[0]
+ # Check if this is a page from a multi-page PDF
+ if '_page' in base_name:
+ pdf_base = base_name.split('_page')[0]
+ if pdf_base + '.pdf' in st.session_state.images:
+ return pdf_base + '.pdf'
+
+ return None
+
# Initialize session state
if 'data' not in st.session_state:
st.session_state.data = None
@@ -272,17 +373,50 @@ if 'navigating_page' not in st.session_state:
def auto_save(index):
"""Automatically save changes to session state and mark as modified"""
if st.session_state.edited_data:
- st.session_state.data = st.session_state.edited_data.copy()
+ # Get the current record
+ current_record = st.session_state.edited_data[index]
+
+ # Get original file name
+ original_file_name = current_record.get('file_name', '')
+ if not original_file_name and 'file_names' in current_record:
+ original_file_name = current_record.get('file_names', [])[0] if current_record.get('file_names', []) else ''
+
+ # Find actual file
+ actual_file_name = find_actual_file(original_file_name)
+
+ # Update file_name or file_names in the record
+ new_file_names = get_file_names_from_record(current_record, actual_file_name)
+
+ # Remove old keys
+ if 'file_name' in current_record:
+ del current_record['file_name']
+ if 'file_names' in current_record:
+ del current_record['file_names']
+
+ # Create new ordered record with file_name/file_names first
+ new_record = {}
+ if isinstance(new_file_names, list):
+ new_record['file_names'] = new_file_names
+ else:
+ new_record['file_name'] = new_file_names
+
+ # Add all other fields
+ for key, value in current_record.items():
+ new_record[key] = value
+
+ # Update the record
+ st.session_state.edited_data[index] = new_record
+ st.session_state.data[index] = new_record.copy()
st.session_state.modified_indices.add(index)
def activate_ocr_field(section, field, row_idx=None):
"""Activate OCR for a specific field.
- Toggle behavior: if the same field is already active, deactivate it to avoid repeated activations/looping.
- Also ensures the line-item expander stays expanded when OCR is requested.
+ Toggle behavior: if the same field is already active, deactivate it to avoid repeated activations/looping.
+ Also ensures the line-item expander stays expanded when OCR is requested.
"""
# If the requested field is already active, deactivate it (toggle off)
- if (st.session_state.ocr_active_section == section and
- st.session_state.ocr_active_field == field and
+ if (st.session_state.ocr_active_section == section and
+ st.session_state.ocr_active_field == field and
st.session_state.ocr_line_item_row == row_idx):
st.session_state.ocr_active_section = None
st.session_state.ocr_active_field = None
@@ -292,21 +426,21 @@ def activate_ocr_field(section, field, row_idx=None):
st.session_state.ocr_active_section = section
st.session_state.ocr_active_field = field
st.session_state.ocr_line_item_row = row_idx
-
+
# If it's a line-item, mark that expander as expanded so it remains open after rerun
if section == 'Line_items' and row_idx is not None:
current_idx = st.session_state.get('current_index', 0)
expander_key = f"line_item_expander_{current_idx}_{row_idx}"
st.session_state[expander_key] = True
-
+
# Bump canvas_key to ensure canvas is refreshed/cleared when toggling OCR
st.session_state.canvas_key += 1
st.rerun()
def is_ocr_active(section, field, row_idx=None):
"""Check if this OCR button is currently active"""
- return (st.session_state.ocr_active_section == section and
- st.session_state.ocr_active_field == field and
+ return (st.session_state.ocr_active_section == section and
+ st.session_state.ocr_active_field == field and
st.session_state.ocr_line_item_row == row_idx)
# PAGE 1: Upload Page
@@ -327,9 +461,8 @@ if st.session_state.page == 'upload':
st.error(f"Error loading file: {str(e)}")
st.markdown("**Step 2: Upload Images/PDFs Folder**")
-
uploaded_files = st.file_uploader(
- "Choose image or PDF files",
+ "Choose image or PDF files",
type=['png', 'jpg', 'jpeg', 'tiff', 'tif', 'bmp', 'pdf'],
accept_multiple_files=True,
help="Select all images and PDFs from your folder at once"
@@ -338,11 +471,9 @@ if st.session_state.page == 'upload':
if uploaded_files:
images_dict = {}
pdf_metadata = {}
-
for file in uploaded_files:
try:
file_ext = file.name.lower().split('.')[-1]
-
if file_ext == 'pdf':
# Convert PDF to images
pdf_images = pdf_to_images(file)
@@ -355,12 +486,10 @@ if st.session_state.page == 'upload':
'total_pages': len(pdf_images),
'current_page': 0
}
- #st.info(f"📄 Converted PDF '{file.name}' ({len(pdf_images)} pages)")
else:
# Handle regular images
image = Image.open(file)
images_dict[file.name] = image
-
except Exception as e:
st.warning(f"Could not load file {file.name}: {str(e)}")
@@ -373,7 +502,17 @@ if st.session_state.page == 'upload':
st.session_state.current_page_num[filename] = 0
if st.session_state.data is not None:
- gt_file_names = [rec.get('file_name', '') for rec in st.session_state.data]
+ # Get all file names from records (handle both file_name and file_names)
+ gt_file_names = []
+ for rec in st.session_state.data:
+ if 'file_name' in rec and rec['file_name']:
+ gt_file_names.append(rec['file_name'])
+ elif 'file_names' in rec and rec['file_names']:
+ if isinstance(rec['file_names'], list):
+ gt_file_names.extend(rec['file_names'])
+ else:
+ gt_file_names.append(rec['file_names'])
+
matched_images = set()
unmatched_gt_files = []
@@ -381,7 +520,6 @@ if st.session_state.page == 'upload':
for fname in gt_file_names:
if not fname:
continue
-
# Try exact match first
if fname in images_dict:
matched_images.add(fname)
@@ -393,12 +531,11 @@ if st.session_state.page == 'upload':
matched_images.add(fname)
found = True
break
-
# Try matching filename without extension from uploaded files
if not found:
for uploaded_name in images_dict.keys():
uploaded_base = uploaded_name.rsplit('.', 1)[0]
- if uploaded_base == fname:
+ if uploaded_base == fname or uploaded_base == os.path.splitext(fname)[0]:
matched_images.add(fname)
found = True
break
@@ -454,7 +591,7 @@ elif st.session_state.page == 'viewer':
with col2:
if st.session_state.modified_indices:
modified_data = [st.session_state.edited_data[i] for i in sorted(st.session_state.modified_indices)]
- jsonl_modified = save_to_jsonl(modified_data)
+ jsonl_modified = save_to_jsonl(modified_data, transform_filenames=True)
st.download_button(
label=f"⬇️ Download Modified ({len(modified_data)})",
data=jsonl_modified,
@@ -468,9 +605,9 @@ elif st.session_state.page == 'viewer':
with col3:
if st.session_state.modified_indices:
- unmodified_data = [st.session_state.data[i] for i in range(len(st.session_state.data))
- if i not in st.session_state.modified_indices]
- jsonl_unmodified = save_to_jsonl(unmodified_data)
+ unmodified_data = [st.session_state.data[i] for i in range(len(st.session_state.data))
+ if i not in st.session_state.modified_indices]
+ jsonl_unmodified = save_to_jsonl(unmodified_data, transform_filenames=True)
st.download_button(
label=f"⬇️ Download Unmodified ({len(unmodified_data)})",
data=jsonl_unmodified,
@@ -482,7 +619,7 @@ elif st.session_state.page == 'viewer':
st.button("⬇️ No Unmodified Records", disabled=True, use_container_width=True)
with col4:
- jsonl_all = save_to_jsonl(st.session_state.edited_data)
+ jsonl_all = save_to_jsonl(st.session_state.edited_data, transform_filenames=True)
st.download_button(
label=f"⬇️ Download All ({len(st.session_state.edited_data)})",
data=jsonl_all,
@@ -491,8 +628,9 @@ elif st.session_state.page == 'viewer':
use_container_width=True
)
- file_names = [record.get('file_name', f'Record {i}') for i, record in enumerate(st.session_state.data or [])]
-
+ # Get display names for all records
+ file_names = [get_display_name(record) for record in st.session_state.data or []]
+
# Guard: no records at all
if not file_names:
st.error("No records loaded. Please upload a JSONL file on the Upload page.")
@@ -502,12 +640,12 @@ elif st.session_state.page == 'viewer':
else:
# Build options (list is safer than range for length checks)
options = list(range(len(file_names)))
-
+
# Ensure edited_data exists and has consistent length
if not st.session_state.edited_data or len(st.session_state.edited_data) != len(file_names):
# try to sync edited_data to data
st.session_state.edited_data = (st.session_state.data or []).copy()
-
+
# Clamp current_index into valid range
cur_idx = st.session_state.get('current_index', 0)
try:
@@ -518,7 +656,7 @@ elif st.session_state.page == 'viewer':
cur_idx = 0
if cur_idx >= len(options):
cur_idx = len(options) - 1
-
+
# Show selectbox with a safe index
selected_file = st.selectbox(
"Select a file to view:",
@@ -526,698 +664,661 @@ elif st.session_state.page == 'viewer':
format_func=lambda x: f"{'✏️ ' if x in st.session_state.modified_indices else ''}{file_names[x]}",
index=cur_idx
)
-
+
# Persist chosen index
st.session_state.current_index = selected_file
-
+
# Safe access to the current record
current_record = st.session_state.edited_data[selected_file]
-
- left_col, right_col = st.columns([1.6, 1.0], gap="small")
-
- # LEFT SIDE: Image Display with OCR Canvas
- with left_col:
- with st.container(height=700, border=False):
- file_name = current_record.get('file_name', '')
-
- if file_name:
- # Find the actual file name (handle cases where extension is missing)
- actual_file_name = None
- if file_name in st.session_state.images:
- actual_file_name = file_name
- else:
- # Try adding common extensions
- for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
- if file_name + ext in st.session_state.images:
- actual_file_name = file_name + ext
- break
-
- # Try matching without extension
- if not actual_file_name:
- for uploaded_name in st.session_state.images.keys():
- uploaded_base = uploaded_name.rsplit('.', 1)[0]
- if uploaded_base == file_name:
- actual_file_name = uploaded_name
- break
+ left_col, right_col = st.columns([1.6, 1.0], gap="small")
+
+ # LEFT SIDE: Image Display with OCR Canvas
+ with left_col:
+ with st.container(height=700, border=False):
+ # Get file name from record
+ original_file_name = get_display_name(current_record)
- if actual_file_name:
- # Check if this is a PDF with multiple pages
- is_pdf = actual_file_name in st.session_state.pdf_metadata
+ if original_file_name:
+ # Find the actual file name
+ actual_file_name = find_actual_file(original_file_name)
- if is_pdf:
- pdf_meta = st.session_state.pdf_metadata[actual_file_name]
- total_pages = pdf_meta['total_pages']
- current_page = st.session_state.current_page_num.get(actual_file_name, 0)
-
- # PDF Navigation Header
- col_prev, col_info, col_next = st.columns([1, 2, 1])
-
- with col_prev:
- prev_clicked = st.button("⬅️ Previous", key=f"prev_page_{selected_file}_{actual_file_name}",
- disabled=(current_page == 0), use_container_width=True)
-
- with col_info:
- st.markdown(f"
📄 Page {current_page + 1} of {total_pages}
", unsafe_allow_html=True)
+ if actual_file_name:
+ # Check if this is a PDF with multiple pages
+ is_pdf = actual_file_name in st.session_state.pdf_metadata
+ if is_pdf:
+ pdf_meta = st.session_state.pdf_metadata[actual_file_name]
+ total_pages = pdf_meta['total_pages']
+ current_page = st.session_state.current_page_num.get(actual_file_name, 0)
+
+ # PDF Navigation Header
+ col_prev, col_info, col_next = st.columns([1, 2, 1])
+ with col_prev:
+ prev_clicked = st.button("⬅️ Previous", key=f"prev_page_{selected_file}_{actual_file_name}",
+ disabled=(current_page == 0), use_container_width=True)
+ with col_info:
+ st.markdown(f"📄 Page {current_page + 1} of {total_pages}
", unsafe_allow_html=True)
+ with col_next:
+ next_clicked = st.button("Next ➡️", key=f"next_page_{selected_file}_{actual_file_name}",
+ disabled=(current_page >= total_pages - 1), use_container_width=True)
+
+ # Handle navigation only if not already navigating
+ if not st.session_state.navigating_page:
+ if prev_clicked:
+ st.session_state.navigating_page = True
+ st.session_state.current_page_num[actual_file_name] = max(0, current_page - 1)
+ st.session_state.canvas_key += 1
+ st.session_state.ocr_active_section = None
+ st.session_state.ocr_active_field = None
+ st.rerun()
+ elif next_clicked:
+ st.session_state.navigating_page = True
+ st.session_state.current_page_num[actual_file_name] = min(total_pages - 1, current_page + 1)
+ st.session_state.canvas_key += 1
+ st.session_state.ocr_active_section = None
+ st.session_state.ocr_active_field = None
+ st.rerun()
+ else:
+ # Reset the flag after rerun
+ st.session_state.navigating_page = False
- with col_next:
- next_clicked = st.button("Next ➡️", key=f"next_page_{selected_file}_{actual_file_name}",
- disabled=(current_page >= total_pages - 1), use_container_width=True)
+ # Determine if PDF and get the appropriate image
+ is_pdf = actual_file_name in st.session_state.pdf_metadata
+ if is_pdf:
+ # Get the current page image
+ current_page = st.session_state.current_page_num.get(actual_file_name, 0)
+ pdf_meta = st.session_state.pdf_metadata[actual_file_name]
+ current_image = pdf_meta['pages'][current_page]
+ else:
+ current_image = st.session_state.images[actual_file_name]
- # Handle navigation only if not already navigating
- if not st.session_state.navigating_page:
- if prev_clicked:
- st.session_state.navigating_page = True
- st.session_state.current_page_num[actual_file_name] = max(0, current_page - 1)
- st.session_state.canvas_key += 1
- st.session_state.ocr_active_section = None
- st.session_state.ocr_active_field = None
- st.rerun()
- elif next_clicked:
- st.session_state.navigating_page = True
- st.session_state.current_page_num[actual_file_name] = min(total_pages - 1, current_page + 1)
- st.session_state.canvas_key += 1
- st.session_state.ocr_active_section = None
- st.session_state.ocr_active_field = None
+ if current_image:
+ # Scale to a reasonable size so canvas doesn't become excessively large
+ scaled_image, scale_ratio, paste_x, paste_y = scale_image_to_fixed_size(current_image)
+
+ # Render the canvas. Its internal canvas will be constrained by the wrapper due to CSS above.
+ canvas_result = st_canvas(
+ fill_color="rgba(255, 165, 0, 0.3)",
+ stroke_width=2,
+ stroke_color="#FF0000",
+ background_image=scaled_image,
+ update_streamlit=True,
+ height=scaled_image.height,
+ width=scaled_image.width,
+ drawing_mode="rect",
+ key=f"canvas_{selected_file}_{st.session_state.canvas_key}",
+ )
+
+ # Only attempt OCR if there's an active OCR target AND the user has drawn something (objects exist)
+ if canvas_result.json_data is not None and st.session_state.ocr_active_field:
+ objects = canvas_result.json_data.get("objects", [])
+ if len(objects) > 0:
+ rect = objects[-1]
+ bbox = [
+ (rect["left"] - paste_x) / scale_ratio,
+ (rect["top"] - paste_y) / scale_ratio,
+ (rect["left"] + rect["width"] - paste_x) / scale_ratio,
+ (rect["top"] + rect["height"] - paste_y) / scale_ratio
+ ]
+
+ with st.spinner("Performing OCR..."):
+ ocr_text = perform_ocr(current_image, bbox)
+ if ocr_text and not ocr_text.startswith("OCR Error"):
+ st.success(f"✅ OCR Result: {ocr_text}")
+ gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
+ if st.session_state.ocr_active_section == 'Line_items':
+ line_items = gt_parse.get('Line_items', [])
+ row_idx = st.session_state.ocr_line_item_row
+ if row_idx is not None and row_idx < len(line_items):
+ line_items[row_idx][st.session_state.ocr_active_field] = ocr_text
+ gt_parse['Line_items'] = line_items
+ # ensure expander stays open for this row after OCR
+ expander_key = f"line_item_expander_{selected_file}_{row_idx}"
+ st.session_state[expander_key] = True
+ else:
+ section = st.session_state.ocr_active_section
+ field = st.session_state.ocr_active_field
+ if section not in gt_parse:
+ gt_parse[section] = {}
+ gt_parse[section][field] = ocr_text
+
+ st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
+ st.session_state.modified_indices.add(selected_file)
+
+ # Keep the OCR field active so user can draw multiple rectangles for the same field
+ # Field will only change when user clicks a different OCR button
+ # Clear canvas for next OCR by bumping canvas_key then rerun
+ st.session_state.canvas_key += 1
+ st.rerun()
+ else:
+ st.error(ocr_text)
+ else:
+ st.error(f"❌ File '{original_file_name}' not found in uploaded files")
+ st.info("💡 Available files:")
+ with st.expander("Show available files"):
+ for img_name in list(st.session_state.images.keys())[:20]:
+ st.text(f" • {img_name}")
+ if len(st.session_state.images) > 20:
+ st.text(f" ... and {len(st.session_state.images) - 20} more")
+ else:
+ st.warning("No file name specified in record")
+
+ # RIGHT SIDE: Editable Details
+ with right_col:
+ with st.container(height=700, border=False):
+ st.markdown("### 📝 Document Details")
+ gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
+
+ tab1, tab2, tab3, tab4 = st.tabs([
+ "📄 Remittance Details",
+ "👥 Party Details",
+ "🏦 Bank Details",
+ "📋 Line Items"
+ ])
+
+ # TAB 1: Remittance Details
+ with tab1:
+ remittance = gt_parse.get('Remittance_details', {})
+
+ # Each field with OCR button
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ remittance['Remittance_adv_no'] = st.text_input(
+ "Remittance Advice No",
+ value=remittance.get('Remittance_adv_no', ''),
+ key=f"rem_adv_no_{selected_file}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_rem_adv_no_{selected_file}",
+ type="primary" if is_ocr_active('Remittance_details', 'Remittance_adv_no') else "secondary"):
+ activate_ocr_field('Remittance_details', 'Remittance_adv_no')
+
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ remittance['Remittance_adv_date'] = st.text_input(
+ "Remittance Advice Date",
+ value=remittance.get('Remittance_adv_date', ''),
+ key=f"rem_adv_date_{selected_file}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_rem_adv_date_{selected_file}",
+ type="primary" if is_ocr_active('Remittance_details', 'Remittance_adv_date') else "secondary"):
+ activate_ocr_field('Remittance_details', 'Remittance_adv_date')
+
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ remittance['Payment_method'] = st.text_input(
+ "Payment Method",
+ value=remittance.get('Payment_method', ''),
+ key=f"payment_method_{selected_file}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_payment_method_{selected_file}",
+ type="primary" if is_ocr_active('Remittance_details', 'Payment_method') else "secondary"):
+ activate_ocr_field('Remittance_details', 'Payment_method')
+
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ remittance['FCY'] = st.text_input(
+ "FCY (Foreign Currency)",
+ value=remittance.get('FCY', ''),
+ key=f"fcy_{selected_file}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_fcy_{selected_file}",
+ type="primary" if is_ocr_active('Remittance_details', 'FCY') else "secondary"):
+ activate_ocr_field('Remittance_details', 'FCY')
+
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ remittance['Total_payment_amt_FCY'] = st.text_input(
+ "Total Payment Amount (FCY)",
+ value=remittance.get('Total_payment_amt_FCY', ''),
+ key=f"total_payment_{selected_file}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_total_payment_{selected_file}",
+ type="primary" if is_ocr_active('Remittance_details', 'Total_payment_amt_FCY') else "secondary"):
+ activate_ocr_field('Remittance_details', 'Total_payment_amt_FCY')
+
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ remittance['Payment_date'] = st.text_input(
+ "Payment Date",
+ value=remittance.get('Payment_date', ''),
+ key=f"payment_date_{selected_file}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_payment_date_{selected_file}",
+ type="primary" if is_ocr_active('Remittance_details', 'Payment_date') else "secondary"):
+ activate_ocr_field('Remittance_details', 'Payment_date')
+
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ remittance['Payment_ref_no'] = st.text_input(
+ "Payment Reference No",
+ value=remittance.get('Payment_ref_no', ''),
+ key=f"payment_ref_{selected_file}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_payment_ref_{selected_file}",
+ type="primary" if is_ocr_active('Remittance_details', 'Payment_ref_no') else "secondary"):
+ activate_ocr_field('Remittance_details', 'Payment_ref_no')
+
+ gt_parse['Remittance_details'] = remittance
+
+ # TAB 2: Customer/Supplier Details with SWAP button
+ with tab2:
+ # SWAP BUTTON - Centered and prominent
+ col1, col2, col3 = st.columns([1, 2, 1])
+ with col2:
+ if st.button("🔄 Swap Customer ↔ Supplier", key=f"swap_btn_{selected_file}",
+ type="primary", use_container_width=True):
+ if not st.session_state.just_swapped:
+ st.session_state.just_swapped = True
+ swap_customer_supplier_details(selected_file)
st.rerun()
- else:
- # Reset the flag after rerun
- st.session_state.navigating_page = False
+
+ # Reset the flag after rerun
+ if st.session_state.just_swapped:
+ st.session_state.just_swapped = False
+
+ st.markdown("**Customer Details**")
+ customer_supplier = gt_parse.get('Customer_supplier_details', {})
+
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ customer_supplier['Customer_name'] = st.text_input(
+ "Customer Name",
+ value=customer_supplier.get('Customer_name', ''),
+ key=f"cust_name_{selected_file}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_cust_name_{selected_file}",
+ type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_name') else "secondary"):
+ activate_ocr_field('Customer_supplier_details', 'Customer_name')
+
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ customer_supplier['Customer_address'] = st.text_area(
+ "Customer Address",
+ value=customer_supplier.get('Customer_address', ''),
+ key=f"cust_addr_{selected_file}",
+ height=60
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_cust_addr_{selected_file}",
+ type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_address') else "secondary"):
+ activate_ocr_field('Customer_supplier_details', 'Customer_address')
+
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ customer_supplier['Customer_contact_info'] = st.text_input(
+ "Customer Contact Info",
+ value=customer_supplier.get('Customer_contact_info', ''),
+ key=f"cust_contact_{selected_file}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_cust_contact_{selected_file}",
+ type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_contact_info') else "secondary"):
+ activate_ocr_field('Customer_supplier_details', 'Customer_contact_info')
+
+ st.markdown("**Supplier Details**")
+
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ customer_supplier['Supplier_name'] = st.text_input(
+ "Supplier Name",
+ value=customer_supplier.get('Supplier_name', ''),
+ key=f"supp_name_{selected_file}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_supp_name_{selected_file}",
+ type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_name') else "secondary"):
+ activate_ocr_field('Customer_supplier_details', 'Supplier_name')
+
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ customer_supplier['Supplier_address'] = st.text_area(
+ "Supplier Address",
+ value=customer_supplier.get('Supplier_address', ''),
+ key=f"supp_addr_{selected_file}",
+ height=60
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_supp_addr_{selected_file}",
+ type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_address') else "secondary"):
+ activate_ocr_field('Customer_supplier_details', 'Supplier_address')
+
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ customer_supplier['Supplier_contact_info'] = st.text_input(
+ "Supplier Contact Info",
+ value=customer_supplier.get('Supplier_contact_info', ''),
+ key=f"supp_contact_{selected_file}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_supp_contact_{selected_file}",
+ type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_contact_info') else "secondary"):
+ activate_ocr_field('Customer_supplier_details', 'Supplier_contact_info')
+
+ gt_parse['Customer_supplier_details'] = customer_supplier
- if actual_file_name:
- # Determine if PDF and get the appropriate image
- is_pdf = actual_file_name in st.session_state.pdf_metadata
+ # TAB 3: Bank Details
+ with tab3:
+ bank = gt_parse.get('Bank_details', {})
- if is_pdf:
- # Get the current page image
- current_page = st.session_state.current_page_num.get(actual_file_name, 0)
- pdf_meta = st.session_state.pdf_metadata[actual_file_name]
- current_image = pdf_meta['pages'][current_page]
- else:
- current_image = st.session_state.images[actual_file_name]
- else:
- st.error(f"❌ File '{file_name}' not found in uploaded files")
- st.info("💡 Available files:")
- with st.expander("Show available files"):
- for img_name in list(st.session_state.images.keys())[:20]:
- st.text(f" • {img_name}")
- if len(st.session_state.images) > 20:
- st.text(f" ... and {len(st.session_state.images) - 20} more")
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ bank['Bank_name'] = st.text_input(
+ "Bank Name",
+ value=bank.get('Bank_name', ''),
+ key=f"bank_name_{selected_file}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_bank_name_{selected_file}",
+ type="primary" if is_ocr_active('Bank_details', 'Bank_name') else "secondary"):
+ activate_ocr_field('Bank_details', 'Bank_name')
+
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ bank['Bank_acc_no'] = st.text_input(
+ "Bank Account No",
+ value=bank.get('Bank_acc_no', ''),
+ key=f"bank_acc_{selected_file}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_bank_acc_{selected_file}",
+ type="primary" if is_ocr_active('Bank_details', 'Bank_acc_no') else "secondary"):
+ activate_ocr_field('Bank_details', 'Bank_acc_no')
+
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ bank['Bank_routing_no'] = st.text_input(
+ "Bank Routing No",
+ value=bank.get('Bank_routing_no', ''),
+ key=f"bank_routing_{selected_file}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_bank_routing_{selected_file}",
+ type="primary" if is_ocr_active('Bank_details', 'Bank_routing_no') else "secondary"):
+ activate_ocr_field('Bank_details', 'Bank_routing_no')
+
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ bank['Swift_code'] = st.text_input(
+ "SWIFT Code",
+ value=bank.get('Swift_code', ''),
+ key=f"swift_{selected_file}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_swift_{selected_file}",
+ type="primary" if is_ocr_active('Bank_details', 'Swift_code') else "secondary"):
+ activate_ocr_field('Bank_details', 'Swift_code')
+
+ gt_parse['Bank_details'] = bank
- if current_image:
- # Scale to a reasonable size so canvas doesn't become excessively large
- scaled_image, scale_ratio, paste_x, paste_y = scale_image_to_fixed_size(current_image)
+ # TAB 4: Line Items
+ with tab4:
+ current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
+ line_items = current_gt_parse.get('Line_items', [])
- # Render the canvas. Its internal canvas will be constrained by the wrapper due to CSS above.
- canvas_result = st_canvas(
- fill_color="rgba(255, 165, 0, 0.3)",
- stroke_width=2,
- stroke_color="#FF0000",
- background_image=scaled_image,
- update_streamlit=True,
- height=scaled_image.height,
- width=scaled_image.width,
- drawing_mode="rect",
- key=f"canvas_{selected_file}_{st.session_state.canvas_key}",
- )
+ # Add/Remove row buttons
+ col_add, col_remove = st.columns([1, 1])
+ with col_add:
+ if st.button("➕ Add New Row", key=f"add_row_{selected_file}", use_container_width=True):
+ if not st.session_state.button_clicked:
+ st.session_state.button_clicked = True
+ new_row = {
+ "Po_number": "", "Invoice_no": "", "Other_doc_ref_no": "",
+ "Invoice_date": "", "Invoice_amount_FCY": "",
+ "Amount_paid_for_each_invoice": "", "Outstanding_balance_FCY": "",
+ "Discounts_taken_FCY": "", "Adjustments(without_holding_tax)_FCY": "",
+ "Descriptions": ""
+ }
+ current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
+ current_line_items = current_gt_parse.get('Line_items', [])
+ current_line_items.append(new_row)
+ current_gt_parse['Line_items'] = current_line_items
+ st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse
+ st.session_state.modified_indices.add(selected_file)
+ # Ensure the newly added row's expander is open
+ new_idx = len(current_line_items) - 1
+ expander_key_new = f"line_item_expander_{selected_file}_{new_idx}"
+ st.session_state[expander_key_new] = True
+ st.rerun()
+ with col_remove:
+ if st.button("➖ Remove Last Row", key=f"remove_row_{selected_file}",
+ disabled=(len(line_items) == 0), use_container_width=True):
+ if not st.session_state.button_clicked and len(line_items) > 0:
+ st.session_state.button_clicked = True
+ current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
+ current_line_items = current_gt_parse.get('Line_items', [])
+ N = len(current_line_items)
+ current_line_items.pop()
+ current_gt_parse['Line_items'] = current_line_items
+ st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse
+ st.session_state.modified_indices.add(selected_file)
+ # Remove the expander flag for the popped row (if present)
+ popped_idx = N - 1
+ expander_key_popped = f"line_item_expander_{selected_file}_{popped_idx}"
+ if expander_key_popped in st.session_state:
+ del st.session_state[expander_key_popped]
+ st.rerun()
- # Only attempt OCR if there's an active OCR target AND the user has drawn something (objects exist)
- if canvas_result.json_data is not None and st.session_state.ocr_active_field:
- objects = canvas_result.json_data.get("objects", [])
- if len(objects) > 0:
- rect = objects[-1]
-
- bbox = [
- (rect["left"] - paste_x) / scale_ratio,
- (rect["top"] - paste_y) / scale_ratio,
- (rect["left"] + rect["width"] - paste_x) / scale_ratio,
- (rect["top"] + rect["height"] - paste_y) / scale_ratio
- ]
-
- with st.spinner("Performing OCR..."):
- ocr_text = perform_ocr(current_image, bbox)
+ if st.session_state.button_clicked:
+ st.session_state.button_clicked = False
+
+ # Display each row as an expander with OCR buttons
+ current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
+ line_items = current_gt_parse.get('Line_items', [])
+ if line_items:
+ for idx, item in enumerate(line_items):
+ # Use a persistent session_state flag so expansion state is preserved across reruns.
+ expander_key = f"line_item_expander_{selected_file}_{idx}"
+ expanded_default = st.session_state.get(expander_key, False)
- if ocr_text and not ocr_text.startswith("OCR Error"):
- st.success(f"✅ OCR Result: {ocr_text}")
+ # Note: do NOT pass a 'key' arg to st.expander to maintain compatibility; control expanded via session_state flag.
+ with st.expander(f"**Row {idx + 1}** - Invoice: {item.get('Invoice_no', 'N/A')}", expanded=expanded_default):
+ # PO Number
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ item['Po_number'] = st.text_input(
+ "PO Number",
+ value=item.get('Po_number', ''),
+ key=f"po_num_{selected_file}_{idx}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_po_{selected_file}_{idx}",
+ type="primary" if is_ocr_active('Line_items', 'Po_number', idx) else "secondary"):
+ # ensure expander stays open when user explicitly requests OCR
+ st.session_state[expander_key] = True
+ activate_ocr_field('Line_items', 'Po_number', idx)
- gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
+ # Invoice No
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ item['Invoice_no'] = st.text_input(
+ "Invoice No",
+ value=item.get('Invoice_no', ''),
+ key=f"inv_no_{selected_file}_{idx}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_inv_{selected_file}_{idx}",
+ type="primary" if is_ocr_active('Line_items', 'Invoice_no', idx) else "secondary"):
+ st.session_state[expander_key] = True
+ activate_ocr_field('Line_items', 'Invoice_no', idx)
- if st.session_state.ocr_active_section == 'Line_items':
- line_items = gt_parse.get('Line_items', [])
- row_idx = st.session_state.ocr_line_item_row
- if row_idx is not None and row_idx < len(line_items):
- line_items[row_idx][st.session_state.ocr_active_field] = ocr_text
- gt_parse['Line_items'] = line_items
-
- # ensure expander stays open for this row after OCR
- expander_key = f"line_item_expander_{selected_file}_{row_idx}"
+ # Other Doc Ref No
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ item['Other_doc_ref_no'] = st.text_input(
+ "Other Doc Ref No",
+ value=item.get('Other_doc_ref_no', ''),
+ key=f"other_doc_{selected_file}_{idx}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_other_{selected_file}_{idx}",
+ type="primary" if is_ocr_active('Line_items', 'Other_doc_ref_no', idx) else "secondary"):
st.session_state[expander_key] = True
- else:
- section = st.session_state.ocr_active_section
- field = st.session_state.ocr_active_field
- if section not in gt_parse:
- gt_parse[section] = {}
- gt_parse[section][field] = ocr_text
+ activate_ocr_field('Line_items', 'Other_doc_ref_no', idx)
- st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
- st.session_state.modified_indices.add(selected_file)
+ # Invoice Date
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ item['Invoice_date'] = st.text_input(
+ "Invoice Date",
+ value=item.get('Invoice_date', ''),
+ key=f"inv_date_{selected_file}_{idx}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_inv_date_{selected_file}_{idx}",
+ type="primary" if is_ocr_active('Line_items', 'Invoice_date', idx) else "secondary"):
+ st.session_state[expander_key] = True
+ activate_ocr_field('Line_items', 'Invoice_date', idx)
- # Keep the OCR field active so user can draw multiple rectangles for the same field
- # Field will only change when user clicks a different OCR button
+ # Invoice Amount FCY
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ item['Invoice_amount_FCY'] = st.text_input(
+ "Invoice Amount FCY",
+ value=item.get('Invoice_amount_FCY', ''),
+ key=f"inv_amt_{selected_file}_{idx}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_inv_amt_{selected_file}_{idx}",
+ type="primary" if is_ocr_active('Line_items', 'Invoice_amount_FCY', idx) else "secondary"):
+ st.session_state[expander_key] = True
+ activate_ocr_field('Line_items', 'Invoice_amount_FCY', idx)
- # Clear canvas for next OCR by bumping canvas_key then rerun
- st.session_state.canvas_key += 1
- st.rerun()
- else:
- st.error(ocr_text)
- else:
- st.warning("No file name specified in record")
-
- # RIGHT SIDE: Editable Details
- with right_col:
- with st.container(height=700, border=False):
- st.markdown("### 📝 Document Details")
-
- gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
-
- tab1, tab2, tab3, tab4 = st.tabs([
- "📄 Remittance Details",
- "👥 Party Details",
- "🏦 Bank Details",
- "📋 Line Items"
- ])
-
- # TAB 1: Remittance Details
- with tab1:
- remittance = gt_parse.get('Remittance_details', {})
-
- # Each field with OCR button
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- remittance['Remittance_adv_no'] = st.text_input(
- "Remittance Advice No",
- value=remittance.get('Remittance_adv_no', ''),
- key=f"rem_adv_no_{selected_file}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_rem_adv_no_{selected_file}",
- type="primary" if is_ocr_active('Remittance_details', 'Remittance_adv_no') else "secondary"):
- activate_ocr_field('Remittance_details', 'Remittance_adv_no')
-
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- remittance['Remittance_adv_date'] = st.text_input(
- "Remittance Advice Date",
- value=remittance.get('Remittance_adv_date', ''),
- key=f"rem_adv_date_{selected_file}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_rem_adv_date_{selected_file}",
- type="primary" if is_ocr_active('Remittance_details', 'Remittance_adv_date') else "secondary"):
- activate_ocr_field('Remittance_details', 'Remittance_adv_date')
-
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- remittance['Payment_method'] = st.text_input(
- "Payment Method",
- value=remittance.get('Payment_method', ''),
- key=f"payment_method_{selected_file}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_payment_method_{selected_file}",
- type="primary" if is_ocr_active('Remittance_details', 'Payment_method') else "secondary"):
- activate_ocr_field('Remittance_details', 'Payment_method')
-
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- remittance['FCY'] = st.text_input(
- "FCY (Foreign Currency)",
- value=remittance.get('FCY', ''),
- key=f"fcy_{selected_file}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_fcy_{selected_file}",
- type="primary" if is_ocr_active('Remittance_details', 'FCY') else "secondary"):
- activate_ocr_field('Remittance_details', 'FCY')
-
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- remittance['Total_payment_amt_FCY'] = st.text_input(
- "Total Payment Amount (FCY)",
- value=remittance.get('Total_payment_amt_FCY', ''),
- key=f"total_payment_{selected_file}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_total_payment_{selected_file}",
- type="primary" if is_ocr_active('Remittance_details', 'Total_payment_amt_FCY') else "secondary"):
- activate_ocr_field('Remittance_details', 'Total_payment_amt_FCY')
-
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- remittance['Payment_date'] = st.text_input(
- "Payment Date",
- value=remittance.get('Payment_date', ''),
- key=f"payment_date_{selected_file}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_payment_date_{selected_file}",
- type="primary" if is_ocr_active('Remittance_details', 'Payment_date') else "secondary"):
- activate_ocr_field('Remittance_details', 'Payment_date')
-
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- remittance['Payment_ref_no'] = st.text_input(
- "Payment Reference No",
- value=remittance.get('Payment_ref_no', ''),
- key=f"payment_ref_{selected_file}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_payment_ref_{selected_file}",
- type="primary" if is_ocr_active('Remittance_details', 'Payment_ref_no') else "secondary"):
- activate_ocr_field('Remittance_details', 'Payment_ref_no')
-
- gt_parse['Remittance_details'] = remittance
-
- # TAB 2: Customer/Supplier Details with SWAP button
- with tab2:
- # SWAP BUTTON - Centered and prominent
- col1, col2, col3 = st.columns([1, 2, 1])
- with col2:
- if st.button("🔄 Swap Customer ↔ Supplier", key=f"swap_btn_{selected_file}",
- type="primary", use_container_width=True):
- if not st.session_state.just_swapped:
- st.session_state.just_swapped = True
- swap_customer_supplier_details(selected_file)
- st.rerun()
-
- # Reset the flag after rerun
- if st.session_state.just_swapped:
- st.session_state.just_swapped = False
-
- st.markdown("**Customer Details**")
- customer_supplier = gt_parse.get('Customer_supplier_details', {})
-
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- customer_supplier['Customer_name'] = st.text_input(
- "Customer Name",
- value=customer_supplier.get('Customer_name', ''),
- key=f"cust_name_{selected_file}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_cust_name_{selected_file}",
- type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_name') else "secondary"):
- activate_ocr_field('Customer_supplier_details', 'Customer_name')
-
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- customer_supplier['Customer_address'] = st.text_area(
- "Customer Address",
- value=customer_supplier.get('Customer_address', ''),
- key=f"cust_addr_{selected_file}",
- height=60
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_cust_addr_{selected_file}",
- type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_address') else "secondary"):
- activate_ocr_field('Customer_supplier_details', 'Customer_address')
-
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- customer_supplier['Customer_contact_info'] = st.text_input(
- "Customer Contact Info",
- value=customer_supplier.get('Customer_contact_info', ''),
- key=f"cust_contact_{selected_file}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_cust_contact_{selected_file}",
- type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_contact_info') else "secondary"):
- activate_ocr_field('Customer_supplier_details', 'Customer_contact_info')
-
- st.markdown("**Supplier Details**")
-
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- customer_supplier['Supplier_name'] = st.text_input(
- "Supplier Name",
- value=customer_supplier.get('Supplier_name', ''),
- key=f"supp_name_{selected_file}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_supp_name_{selected_file}",
- type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_name') else "secondary"):
- activate_ocr_field('Customer_supplier_details', 'Supplier_name')
-
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- customer_supplier['Supplier_address'] = st.text_area(
- "Supplier Address",
- value=customer_supplier.get('Supplier_address', ''),
- key=f"supp_addr_{selected_file}",
- height=60
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_supp_addr_{selected_file}",
- type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_address') else "secondary"):
- activate_ocr_field('Customer_supplier_details', 'Supplier_address')
-
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- customer_supplier['Supplier_contact_info'] = st.text_input(
- "Supplier Contact Info",
- value=customer_supplier.get('Supplier_contact_info', ''),
- key=f"supp_contact_{selected_file}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_supp_contact_{selected_file}",
- type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_contact_info') else "secondary"):
- activate_ocr_field('Customer_supplier_details', 'Supplier_contact_info')
-
- gt_parse['Customer_supplier_details'] = customer_supplier
-
- # TAB 3: Bank Details
- with tab3:
- bank = gt_parse.get('Bank_details', {})
-
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- bank['Bank_name'] = st.text_input(
- "Bank Name",
- value=bank.get('Bank_name', ''),
- key=f"bank_name_{selected_file}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_bank_name_{selected_file}",
- type="primary" if is_ocr_active('Bank_details', 'Bank_name') else "secondary"):
- activate_ocr_field('Bank_details', 'Bank_name')
-
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- bank['Bank_acc_no'] = st.text_input(
- "Bank Account No",
- value=bank.get('Bank_acc_no', ''),
- key=f"bank_acc_{selected_file}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_bank_acc_{selected_file}",
- type="primary" if is_ocr_active('Bank_details', 'Bank_acc_no') else "secondary"):
- activate_ocr_field('Bank_details', 'Bank_acc_no')
-
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- bank['Bank_routing_no'] = st.text_input(
- "Bank Routing No",
- value=bank.get('Bank_routing_no', ''),
- key=f"bank_routing_{selected_file}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_bank_routing_{selected_file}",
- type="primary" if is_ocr_active('Bank_details', 'Bank_routing_no') else "secondary"):
- activate_ocr_field('Bank_details', 'Bank_routing_no')
-
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- bank['Swift_code'] = st.text_input(
- "SWIFT Code",
- value=bank.get('Swift_code', ''),
- key=f"swift_{selected_file}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_swift_{selected_file}",
- type="primary" if is_ocr_active('Bank_details', 'Swift_code') else "secondary"):
- activate_ocr_field('Bank_details', 'Swift_code')
-
- gt_parse['Bank_details'] = bank
-
- # TAB 4: Line Items
- with tab4:
- current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
- line_items = current_gt_parse.get('Line_items', [])
+ # Amount Paid
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ item['Amount_paid_for_each_invoice'] = st.text_input(
+ "Amount Paid",
+ value=item.get('Amount_paid_for_each_invoice', ''),
+ key=f"amt_paid_{selected_file}_{idx}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_amt_paid_{selected_file}_{idx}",
+ type="primary" if is_ocr_active('Line_items', 'Amount_paid_for_each_invoice', idx) else "secondary"):
+ st.session_state[expander_key] = True
+ activate_ocr_field('Line_items', 'Amount_paid_for_each_invoice', idx)
+
+ # Outstanding Balance
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ item['Outstanding_balance_FCY'] = st.text_input(
+ "Outstanding Balance FCY",
+ value=item.get('Outstanding_balance_FCY', ''),
+ key=f"out_bal_{selected_file}_{idx}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_out_bal_{selected_file}_{idx}",
+ type="primary" if is_ocr_active('Line_items', 'Outstanding_balance_FCY', idx) else "secondary"):
+ st.session_state[expander_key] = True
+ activate_ocr_field('Line_items', 'Outstanding_balance_FCY', idx)
+
+ # Discounts
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ item['Discounts_taken_FCY'] = st.text_input(
+ "Discounts Taken FCY",
+ value=item.get('Discounts_taken_FCY', ''),
+ key=f"disc_{selected_file}_{idx}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_disc_{selected_file}_{idx}",
+ type="primary" if is_ocr_active('Line_items', 'Discounts_taken_FCY', idx) else "secondary"):
+ st.session_state[expander_key] = True
+ activate_ocr_field('Line_items', 'Discounts_taken_FCY', idx)
+
+ # Adjustments
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ item['Adjustments(without_holding_tax)_FCY'] = st.text_input(
+ "Adjustments FCY",
+ value=item.get('Adjustments(without_holding_tax)_FCY', ''),
+ key=f"adj_{selected_file}_{idx}"
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_adj_{selected_file}_{idx}",
+ type="primary" if is_ocr_active('Line_items', 'Adjustments(without_holding_tax)_FCY', idx) else "secondary"):
+ st.session_state[expander_key] = True
+ activate_ocr_field('Line_items', 'Adjustments(without_holding_tax)_FCY', idx)
+
+ # Descriptions
+ col_input, col_btn = st.columns([5, 1])
+ with col_input:
+ item['Descriptions'] = st.text_area(
+ "Descriptions",
+ value=item.get('Descriptions', ''),
+ key=f"desc_{selected_file}_{idx}",
+ height=60
+ )
+ with col_btn:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("🔍", key=f"ocr_desc_{selected_file}_{idx}",
+ type="primary" if is_ocr_active('Line_items', 'Descriptions', idx) else "secondary"):
+ st.session_state[expander_key] = True
+ activate_ocr_field('Line_items', 'Descriptions', idx)
+
+ # Update line items back to gt_parse
+ current_gt_parse['Line_items'] = line_items
+ st.markdown("**📊 Line Items Summary Table**")
+
+ # Display summary table with index starting from 1
+ df = pd.DataFrame(line_items)
+ df.index = df.index + 1 # Start index from 1
+ df.index.name = 'SL No'
+ st.dataframe(
+ df,
+ use_container_width=True,
+ height=300
+ )
+ else:
+ st.info("No line items. Click '➕ Add New Row' to add a new row.")
- # Add/Remove row buttons
- col_add, col_remove = st.columns([1, 1])
- with col_add:
- if st.button("➕ Add New Row", key=f"add_row_{selected_file}", use_container_width=True):
- if not st.session_state.button_clicked:
- st.session_state.button_clicked = True
- new_row = {
- "Po_number": "", "Invoice_no": "", "Other_doc_ref_no": "",
- "Invoice_date": "", "Invoice_amount_FCY": "",
- "Amount_paid_for_each_invoice": "", "Outstanding_balance_FCY": "",
- "Discounts_taken_FCY": "", "Adjustments(without_holding_tax)_FCY": "",
- "Descriptions": ""
- }
- current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
- current_line_items = current_gt_parse.get('Line_items', [])
- current_line_items.append(new_row)
- current_gt_parse['Line_items'] = current_line_items
- st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse
- st.session_state.modified_indices.add(selected_file)
-
- # Ensure the newly added row's expander is open
- new_idx = len(current_line_items) - 1
- expander_key_new = f"line_item_expander_{selected_file}_{new_idx}"
- st.session_state[expander_key_new] = True
-
- st.rerun()
+ st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
- with col_remove:
- if st.button("➖ Remove Last Row", key=f"remove_row_{selected_file}",
- disabled=(len(line_items) == 0), use_container_width=True):
- if not st.session_state.button_clicked and len(line_items) > 0:
- st.session_state.button_clicked = True
- current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
- current_line_items = current_gt_parse.get('Line_items', [])
- N = len(current_line_items)
- current_line_items.pop()
- current_gt_parse['Line_items'] = current_line_items
- st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse
- st.session_state.modified_indices.add(selected_file)
-
- # Remove the expander flag for the popped row (if present)
- popped_idx = N - 1
- expander_key_popped = f"line_item_expander_{selected_file}_{popped_idx}"
- if expander_key_popped in st.session_state:
- del st.session_state[expander_key_popped]
-
+ # Save button
+ col1, col2 = st.columns([1, 1])
+ with col1:
+ if st.button("💾 Save Changes", type="primary", use_container_width=True, key=f"save_btn_{selected_file}"):
+ if not st.session_state.just_saved:
+ st.session_state.just_saved = True
+ auto_save(selected_file)
+ st.session_state.save_message = "✅ Changes saved successfully!"
+ st.session_state.save_message_time = time.time()
st.rerun()
- if st.session_state.button_clicked:
- st.session_state.button_clicked = False
-
- # Display each row as an expander with OCR buttons
- current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
- line_items = current_gt_parse.get('Line_items', [])
+ if st.session_state.just_saved:
+ st.session_state.just_saved = False
- if line_items:
- for idx, item in enumerate(line_items):
- # Use a persistent session_state flag so expansion state is preserved across reruns.
- expander_key = f"line_item_expander_{selected_file}_{idx}"
- expanded_default = st.session_state.get(expander_key, False)
-
- # Note: do NOT pass a 'key' arg to st.expander to maintain compatibility; control expanded via session_state flag.
- with st.expander(f"**Row {idx + 1}** - Invoice: {item.get('Invoice_no', 'N/A')}", expanded=expanded_default):
- # PO Number
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- item['Po_number'] = st.text_input(
- "PO Number",
- value=item.get('Po_number', ''),
- key=f"po_num_{selected_file}_{idx}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_po_{selected_file}_{idx}",
- type="primary" if is_ocr_active('Line_items', 'Po_number', idx) else "secondary"):
- # ensure expander stays open when user explicitly requests OCR
- st.session_state[expander_key] = True
- activate_ocr_field('Line_items', 'Po_number', idx)
-
- # Invoice No
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- item['Invoice_no'] = st.text_input(
- "Invoice No",
- value=item.get('Invoice_no', ''),
- key=f"inv_no_{selected_file}_{idx}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_inv_{selected_file}_{idx}",
- type="primary" if is_ocr_active('Line_items', 'Invoice_no', idx) else "secondary"):
- st.session_state[expander_key] = True
- activate_ocr_field('Line_items', 'Invoice_no', idx)
-
- # Other Doc Ref No
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- item['Other_doc_ref_no'] = st.text_input(
- "Other Doc Ref No",
- value=item.get('Other_doc_ref_no', ''),
- key=f"other_doc_{selected_file}_{idx}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_other_{selected_file}_{idx}",
- type="primary" if is_ocr_active('Line_items', 'Other_doc_ref_no', idx) else "secondary"):
- st.session_state[expander_key] = True
- activate_ocr_field('Line_items', 'Other_doc_ref_no', idx)
-
- # Invoice Date
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- item['Invoice_date'] = st.text_input(
- "Invoice Date",
- value=item.get('Invoice_date', ''),
- key=f"inv_date_{selected_file}_{idx}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_inv_date_{selected_file}_{idx}",
- type="primary" if is_ocr_active('Line_items', 'Invoice_date', idx) else "secondary"):
- st.session_state[expander_key] = True
- activate_ocr_field('Line_items', 'Invoice_date', idx)
-
- # Invoice Amount FCY
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- item['Invoice_amount_FCY'] = st.text_input(
- "Invoice Amount FCY",
- value=item.get('Invoice_amount_FCY', ''),
- key=f"inv_amt_{selected_file}_{idx}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_inv_amt_{selected_file}_{idx}",
- type="primary" if is_ocr_active('Line_items', 'Invoice_amount_FCY', idx) else "secondary"):
- st.session_state[expander_key] = True
- activate_ocr_field('Line_items', 'Invoice_amount_FCY', idx)
-
- # Amount Paid
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- item['Amount_paid_for_each_invoice'] = st.text_input(
- "Amount Paid",
- value=item.get('Amount_paid_for_each_invoice', ''),
- key=f"amt_paid_{selected_file}_{idx}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_amt_paid_{selected_file}_{idx}",
- type="primary" if is_ocr_active('Line_items', 'Amount_paid_for_each_invoice', idx) else "secondary"):
- st.session_state[expander_key] = True
- activate_ocr_field('Line_items', 'Amount_paid_for_each_invoice', idx)
-
- # Outstanding Balance
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- item['Outstanding_balance_FCY'] = st.text_input(
- "Outstanding Balance FCY",
- value=item.get('Outstanding_balance_FCY', ''),
- key=f"out_bal_{selected_file}_{idx}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_out_bal_{selected_file}_{idx}",
- type="primary" if is_ocr_active('Line_items', 'Outstanding_balance_FCY', idx) else "secondary"):
- st.session_state[expander_key] = True
- activate_ocr_field('Line_items', 'Outstanding_balance_FCY', idx)
-
- # Discounts
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- item['Discounts_taken_FCY'] = st.text_input(
- "Discounts Taken FCY",
- value=item.get('Discounts_taken_FCY', ''),
- key=f"disc_{selected_file}_{idx}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_disc_{selected_file}_{idx}",
- type="primary" if is_ocr_active('Line_items', 'Discounts_taken_FCY', idx) else "secondary"):
- st.session_state[expander_key] = True
- activate_ocr_field('Line_items', 'Discounts_taken_FCY', idx)
-
- # Adjustments
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- item['Adjustments(without_holding_tax)_FCY'] = st.text_input(
- "Adjustments FCY",
- value=item.get('Adjustments(without_holding_tax)_FCY', ''),
- key=f"adj_{selected_file}_{idx}"
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_adj_{selected_file}_{idx}",
- type="primary" if is_ocr_active('Line_items', 'Adjustments(without_holding_tax)_FCY', idx) else "secondary"):
- st.session_state[expander_key] = True
- activate_ocr_field('Line_items', 'Adjustments(without_holding_tax)_FCY', idx)
-
- # Descriptions
- col_input, col_btn = st.columns([5, 1])
- with col_input:
- item['Descriptions'] = st.text_area(
- "Descriptions",
- value=item.get('Descriptions', ''),
- key=f"desc_{selected_file}_{idx}",
- height=60
- )
- with col_btn:
- st.markdown("
", unsafe_allow_html=True)
- if st.button("🔍", key=f"ocr_desc_{selected_file}_{idx}",
- type="primary" if is_ocr_active('Line_items', 'Descriptions', idx) else "secondary"):
- st.session_state[expander_key] = True
- activate_ocr_field('Line_items', 'Descriptions', idx)
-
- # Update line items back to gt_parse
- current_gt_parse['Line_items'] = line_items
-
- st.markdown("**📊 Line Items Summary Table**")
-
- # Display summary table with index starting from 1
- df = pd.DataFrame(line_items)
- df.index = df.index + 1 # Start index from 1
- df.index.name = 'SL No'
-
- st.dataframe(
- df,
- use_container_width=True,
- height=300
- )
- else:
- st.info("No line items. Click '➕ Add New Row' to add a new row.")
-
- st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
-
- # Save button
- col1, col2 = st.columns([1, 1])
- with col1:
- if st.button("💾 Save Changes", type="primary", use_container_width=True, key=f"save_btn_{selected_file}"):
- if not st.session_state.just_saved:
- st.session_state.just_saved = True
- auto_save(selected_file)
- st.session_state.save_message = "✅ Changes saved successfully!"
- st.session_state.save_message_time = time.time()
- st.rerun()
-
- if st.session_state.just_saved:
- st.session_state.just_saved = False
-
- if st.session_state.save_message:
- st.success(st.session_state.save_message)
+ if st.session_state.save_message:
+ st.success(st.session_state.save_message)