import os
from pathlib import Path
# -----------------------------
# Environment hardening (HF Spaces, /.cache issue)
# -----------------------------
_home = os.environ.get("HOME", "")
if _home in ("", "/", None):
repo_dir = os.getcwd()
safe_home = repo_dir if os.access(repo_dir, os.W_OK) else "/tmp"
os.environ["HOME"] = safe_home
print(f"[startup] HOME not set or unwritable â setting HOME={safe_home}")
streamlit_dir = Path(os.environ["HOME"]) / ".streamlit"
try:
streamlit_dir.mkdir(parents=True, exist_ok=True)
print(f"[startup] ensured {streamlit_dir}")
except Exception as e:
print(f"[startup] WARNING: could not create {streamlit_dir}: {e}")
import streamlit as st
import json
import io
from PIL import Image
import time
import pandas as pd
from streamlit_drawable_canvas import st_canvas
import pytesseract
import numpy as np
from datetime import datetime
import fitz # PyMuPDF for PDF handling
# Set Tesseract path - auto-detect based on OS
if os.name == 'nt': # Windows
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
else: # Linux/Mac (HF Spaces uses Linux)
pass
# Page configuration
st.set_page_config(page_title="Invoice Data Viewer", layout="wide")
# Custom CSS
st.markdown("""
""", unsafe_allow_html=True)
def load_jsonl(file):
"""Load JSONL file and return list of records"""
data = []
content = file.getvalue().decode('utf-8')
for line in content.strip().split('\n'):
if line.strip():
data.append(json.loads(line))
return data
def reorder_record_fields(record):
"""Reorder record fields to put file_name/file_names first, then gt_parse, then others"""
ordered_record = {}
# First: Add file_name or file_names
if 'file_name' in record:
ordered_record['file_name'] = record['file_name']
if 'file_names' in record:
ordered_record['file_names'] = record['file_names']
# Second: Add gt_parse
if 'gt_parse' in record:
ordered_record['gt_parse'] = record['gt_parse']
# Third: Add any remaining fields
for key, value in record.items():
if key not in ordered_record:
ordered_record[key] = value
return ordered_record
def save_to_jsonl(data):
"""Convert data list to JSONL format with proper field ordering"""
jsonl_lines = []
for record in data:
ordered_record = reorder_record_fields(record)
jsonl_lines.append(json.dumps(ordered_record))
return '\n'.join(jsonl_lines)
def pdf_to_images(pdf_file):
"""Convert PDF to list of PIL Images (one per page)"""
try:
pdf_bytes = pdf_file.read()
pdf_document = fitz.open(stream=pdf_bytes, filetype="pdf")
images = []
for page_num in range(pdf_document.page_count):
page = pdf_document[page_num]
pix = page.get_pixmap(matrix=fitz.Matrix(3, 3), alpha=False)
img_data = pix.tobytes("png")
img = Image.open(io.BytesIO(img_data))
images.append(img)
pdf_document.close()
return images
except Exception as e:
st.error(f"Error converting PDF: {str(e)}")
return []
def perform_ocr(image, bbox):
"""Perform OCR on the selected region of the image"""
try:
x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
x1, y1 = max(0, x1), max(0, y1)
x2, y2 = min(image.width, x2), min(image.height, y2)
cropped = image.crop((x1, y1, x2, y2))
text = pytesseract.image_to_string(cropped, config='--psm 6').strip()
return text
except Exception as e:
return f"OCR Error: {str(e)}"
def scale_image_to_fixed_size(image, max_width=1100, max_height=1100):
"""Scale image to fit within max dimensions while maintaining aspect ratio - NO PADDING"""
if image.mode not in ('RGB', 'RGBA'):
image = image.convert('RGB')
elif image.mode == 'RGBA':
background = Image.new('RGB', image.size, (255, 255, 255))
background.paste(image, mask=image.split()[3])
image = background
width_ratio = max_width / image.width
height_ratio = max_height / image.height
ratio = min(width_ratio, height_ratio)
new_width = int(image.width * ratio)
new_height = int(image.height * ratio)
resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
return resized_image, ratio, 0, 0
def get_base_filename(record):
"""Get base filename from record, handling both file_name and file_names"""
# Check for file_names (plural) first
if 'file_names' in record and record['file_names']:
if isinstance(record['file_names'], list) and len(record['file_names']) > 0:
# Extract base name from first file (remove _pageN.png suffix)
first_file = record['file_names'][0]
# Remove .png extension
base = first_file.rsplit('.png', 1)[0]
# Remove _pageN suffix if exists
if '_page' in base:
base = base.rsplit('_page', 1)[0]
return base
return record['file_names']
# Fall back to file_name (singular)
file_name = record.get('file_name', '')
# Strip PDF extension if present (for cases where PDF was converted to images)
if file_name.lower().endswith('.pdf'):
file_name = file_name[:-4] # Remove .pdf
# Also strip other image extensions if present
for ext in ['.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
if file_name.lower().endswith(ext):
file_name = file_name[:-(len(ext))]
break
return file_name
def detect_image_groups(images_dict):
"""Detect multi-page image groups from uploaded files (e.g., invoice01_page1.png, invoice01_page2.png)"""
import re
image_groups = {}
grouped_files = set()
# Pattern to match: basename_pageN.extension
pattern = r'^(.+)_page(\d+)\.(png|jpg|jpeg|tiff|tif|bmp)$'
for filename in images_dict.keys():
match = re.match(pattern, filename, re.IGNORECASE)
if match:
base_name = match.group(1)
page_num = int(match.group(2))
ext = match.group(3)
if base_name not in image_groups:
image_groups[base_name] = []
image_groups[base_name].append({
'filename': filename,
'page_num': page_num,
'ext': ext
})
grouped_files.add(filename)
# Sort pages for each group and create metadata
image_groups_metadata = {}
for base_name, pages in image_groups.items():
# Sort by page number
pages.sort(key=lambda x: x['page_num'])
# Only consider it a group if there are multiple pages
if len(pages) > 1:
image_list = [images_dict[p['filename']] for p in pages]
image_groups_metadata[base_name] = {
'pages': image_list,
'filenames': [p['filename'] for p in pages],
'total_pages': len(pages),
'current_page': 0
}
return image_groups_metadata, grouped_files
def swap_sender_recipient_details(index):
"""Swap sender and recipient details"""
gt_parse = st.session_state.edited_data[index].get('gt_parse', {})
header = gt_parse.get('header', {})
# Store sender values
temp_sender_name = header.get('sender_name', '')
temp_sender_addr = header.get('sender_addr', '')
# Swap: Sender â Recipient
header['sender_name'] = header.get('rcpt_name', '')
header['sender_addr'] = header.get('rcpt_addr', '')
# Swap: Recipient â Sender (from temp)
header['rcpt_name'] = temp_sender_name
header['rcpt_addr'] = temp_sender_addr
# Update session state
gt_parse['header'] = header
st.session_state.edited_data[index]['gt_parse'] = gt_parse
st.session_state.modified_indices.add(index)
# Initialize session state
if 'data' not in st.session_state:
st.session_state.data = None
if 'current_index' not in st.session_state:
st.session_state.current_index = 0
if 'edited_data' not in st.session_state:
st.session_state.edited_data = None
if 'page' not in st.session_state:
st.session_state.page = 'upload'
if 'images' not in st.session_state:
st.session_state.images = {}
if 'pdf_metadata' not in st.session_state:
st.session_state.pdf_metadata = {}
if 'image_groups_metadata' not in st.session_state:
st.session_state.image_groups_metadata = {}
if 'current_page_num' not in st.session_state:
st.session_state.current_page_num = {}
if 'modified_indices' not in st.session_state:
st.session_state.modified_indices = set()
if 'ocr_active_section' not in st.session_state:
st.session_state.ocr_active_section = None
if 'ocr_active_field' not in st.session_state:
st.session_state.ocr_active_field = None
if 'ocr_line_item_row' not in st.session_state:
st.session_state.ocr_line_item_row = None
if 'canvas_key' not in st.session_state:
st.session_state.canvas_key = 0
if 'button_clicked' not in st.session_state:
st.session_state.button_clicked = False
if 'save_message' not in st.session_state:
st.session_state.save_message = None
if 'save_message_time' not in st.session_state:
st.session_state.save_message_time = None
if 'just_saved' not in st.session_state:
st.session_state.just_saved = False
if 'just_swapped' not in st.session_state:
st.session_state.just_swapped = False
if 'navigating_page' not in st.session_state:
st.session_state.navigating_page = False
def auto_save(index):
"""Automatically save changes to session state and mark as modified"""
if st.session_state.edited_data:
# Get current record
current_record = st.session_state.edited_data[index]
# Get base filename using the helper function
base_file_name = get_base_filename(current_record)
if not base_file_name:
st.warning("Cannot save: No file name found in record")
return
# Find the actual file name in uploaded files
actual_file_name = None
if base_file_name in st.session_state.images:
actual_file_name = base_file_name
else:
# Try with extensions
for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
if base_file_name + ext in st.session_state.images:
actual_file_name = base_file_name + ext
break
# Try matching base name
if not actual_file_name:
for uploaded_name in st.session_state.images.keys():
uploaded_base = uploaded_name.rsplit('.', 1)[0]
if uploaded_base == base_file_name:
actual_file_name = uploaded_name
break
# Check if it's a PDF and update file_name accordingly
if actual_file_name and actual_file_name in st.session_state.pdf_metadata:
# It's a PDF - get page count
pdf_meta = st.session_state.pdf_metadata[actual_file_name]
total_pages = pdf_meta['total_pages']
# Get base name without extension
base_name = actual_file_name.rsplit('.', 1)[0]
if total_pages > 1:
# Multi-page PDF: use file_names array
file_names_array = [f"{base_name}_page{i+1}.png" for i in range(total_pages)]
st.session_state.edited_data[index]['file_names'] = file_names_array
# Remove old file_name field if it exists
if 'file_name' in st.session_state.edited_data[index]:
del st.session_state.edited_data[index]['file_name']
else:
# Single-page PDF: use file_name string
st.session_state.edited_data[index]['file_name'] = f"{base_name}.png"
# Remove old file_names field if it exists
if 'file_names' in st.session_state.edited_data[index]:
del st.session_state.edited_data[index]['file_names']
# Check if it's an image group and update file_name accordingly
elif base_file_name in st.session_state.image_groups_metadata:
# It's a multi-page image group - use file_names array
img_group_meta = st.session_state.image_groups_metadata[base_file_name]
st.session_state.edited_data[index]['file_names'] = img_group_meta['filenames']
# Remove old file_name field if it exists (was likely a .pdf in original JSONL)
if 'file_name' in st.session_state.edited_data[index]:
del st.session_state.edited_data[index]['file_name']
st.session_state.data = st.session_state.edited_data.copy()
st.session_state.modified_indices.add(index)
def sync_field_to_data(index, section, field, value, row_idx=None):
"""Sync a field value from widget to data structure immediately"""
gt_parse = st.session_state.edited_data[index].get('gt_parse', {})
if section == 'items':
items = gt_parse.get('items', [])
if row_idx is not None and row_idx < len(items):
items[row_idx][field] = value
gt_parse['items'] = items
else:
if section not in gt_parse:
gt_parse[section] = {}
gt_parse[section][field] = value
st.session_state.edited_data[index]['gt_parse'] = gt_parse
st.session_state.modified_indices.add(index)
def activate_ocr_field(section, field, row_idx=None):
"""Activate OCR for a specific field"""
if (st.session_state.ocr_active_section == section and
st.session_state.ocr_active_field == field and
st.session_state.ocr_line_item_row == row_idx):
st.session_state.ocr_active_section = None
st.session_state.ocr_active_field = None
st.session_state.ocr_line_item_row = None
else:
st.session_state.ocr_active_section = section
st.session_state.ocr_active_field = field
st.session_state.ocr_line_item_row = row_idx
if section == 'items' and row_idx is not None:
current_idx = st.session_state.get('current_index', 0)
expander_key = f"line_item_expander_{current_idx}_{row_idx}"
st.session_state[expander_key] = True
def is_ocr_active(section, field, row_idx=None):
"""Check if this OCR button is currently active"""
return (st.session_state.ocr_active_section == section and
st.session_state.ocr_active_field == field and
st.session_state.ocr_line_item_row == row_idx)
# PAGE 1: Upload Page
if st.session_state.page == 'upload':
st.title("đ¤ Invoice Data Viewer with OCR")
st.markdown("### Upload your files to begin")
st.markdown("**Step 1: Upload JSONL File**")
uploaded_file = st.file_uploader("Choose a JSONL file", type=['jsonl', 'json'])
if uploaded_file is not None:
try:
data = load_jsonl(uploaded_file)
st.session_state.data = data
st.session_state.edited_data = data.copy()
st.success(f"â
Successfully loaded {len(data)} records!")
except Exception as e:
st.error(f"Error loading file: {str(e)}")
st.markdown("**Step 2: Upload Images/PDFs Folder**")
uploaded_files = st.file_uploader(
"Choose image or PDF files",
type=['png', 'jpg', 'jpeg', 'tiff', 'tif', 'bmp', 'pdf'],
accept_multiple_files=True,
help="Select all images and PDFs from your folder at once"
)
if uploaded_files:
images_dict = {}
pdf_metadata = {}
for file in uploaded_files:
try:
file_ext = file.name.lower().split('.')[-1]
if file_ext == 'pdf':
pdf_images = pdf_to_images(file)
if pdf_images:
images_dict[file.name] = pdf_images[0]
pdf_metadata[file.name] = {
'pages': pdf_images,
'total_pages': len(pdf_images),
'current_page': 0
}
else:
image = Image.open(file)
images_dict[file.name] = image
except Exception as e:
st.warning(f"Could not load file {file.name}: {str(e)}")
st.session_state.images = images_dict
st.session_state.pdf_metadata = pdf_metadata
# Detect multi-page image groups (e.g., invoice01_page1.png, invoice01_page2.png)
image_groups_metadata, grouped_files = detect_image_groups(images_dict)
st.session_state.image_groups_metadata = image_groups_metadata
# Initialize current page for PDFs and image groups
for filename in pdf_metadata.keys():
if filename not in st.session_state.current_page_num:
st.session_state.current_page_num[filename] = 0
for base_name in image_groups_metadata.keys():
if base_name not in st.session_state.current_page_num:
st.session_state.current_page_num[base_name] = 0
if st.session_state.data is not None:
gt_file_names = []
for rec in st.session_state.data:
base_fname = get_base_filename(rec)
if base_fname:
gt_file_names.append(base_fname)
matched_images = set()
unmatched_gt_files = []
for fname in gt_file_names:
if not fname:
continue
# Create a base name by stripping common extensions
fname_base = fname
for ext in ['.pdf', '.PDF', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
if fname.lower().endswith(ext.lower()):
fname_base = fname[:-len(ext)]
break
# Check direct match
if fname in images_dict:
matched_images.add(fname)
# Check base name in image groups (handles PDF converted to multi-page PNGs)
elif fname_base in image_groups_metadata:
matched_images.add(fname)
# Check full name in image groups
elif fname in image_groups_metadata:
matched_images.add(fname)
else:
found = False
# Try with extensions
for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
if fname + ext in images_dict:
matched_images.add(fname)
found = True
break
if not found:
# Try matching base name in uploaded files
for uploaded_name in images_dict.keys():
uploaded_base = uploaded_name.rsplit('.', 1)[0]
if uploaded_base == fname or uploaded_base == fname_base:
matched_images.add(fname)
found = True
break
for fname in gt_file_names:
if fname and fname not in matched_images:
unmatched_gt_files.append(fname)
st.success(f"â
Successfully loaded {len(images_dict)} files ({len(pdf_metadata)} PDFs, {len(image_groups_metadata)} multi-page image groups)!")
st.info(f"đ Exact matches: {len(matched_images)}/{len([f for f in gt_file_names if f])}")
if unmatched_gt_files:
st.warning(f"â ī¸ {len(unmatched_gt_files)} file(s) from JSONL not matched:")
with st.expander(f"Show {len(unmatched_gt_files)} unmatched file names"):
for fname in unmatched_gt_files:
st.text(f" âĸ {fname}")
else:
st.success("â
All JSONL file names matched to files!")
else:
st.success(f"â
Successfully loaded {len(images_dict)} files ({len(pdf_metadata)} PDFs, {len(image_groups_metadata)} multi-page image groups)!")
st.info("âšī¸ Upload a JSONL file to see how many files match the ground truth 'file_name' field.")
if st.session_state.data is not None:
col1, col2, col3 = st.columns([1, 1, 1])
with col2:
if st.button("Continue to Viewer â", type="primary", use_container_width=True):
st.session_state.page = 'viewer'
st.session_state.modified_indices = set()
st.session_state.navigating_page = False
st.rerun()
# PAGE 2: Viewer Page
elif st.session_state.page == 'viewer':
if st.session_state.save_message_time is not None:
if time.time() - st.session_state.save_message_time > 3:
st.session_state.save_message = None
st.session_state.save_message_time = None
today_date = datetime.now().strftime("%Y-%m-%d")
col1, col2, col3, col4 = st.columns([1, 2, 2, 2])
with col1:
if st.button("â Back to Upload"):
st.session_state.page = 'upload'
st.session_state.ocr_active_section = None
st.session_state.ocr_active_field = None
st.session_state.save_message = None
st.session_state.save_message_time = None
st.session_state.navigating_page = False
st.rerun()
with col2:
if st.session_state.modified_indices:
modified_data = [st.session_state.edited_data[i] for i in sorted(st.session_state.modified_indices)]
jsonl_modified = save_to_jsonl(modified_data)
st.download_button(
label=f"âŦī¸ Download Modified ({len(modified_data)})",
data=jsonl_modified,
file_name=f"modified_invoice_data_{today_date}.jsonl",
mime="application/jsonl",
type="primary",
use_container_width=True
)
else:
st.button("âŦī¸ No Modified Records", disabled=True, use_container_width=True)
with col3:
if st.session_state.modified_indices:
unmodified_data = [st.session_state.data[i] for i in range(len(st.session_state.data))
if i not in st.session_state.modified_indices]
jsonl_unmodified = save_to_jsonl(unmodified_data)
st.download_button(
label=f"âŦī¸ Download Unmodified ({len(unmodified_data)})",
data=jsonl_unmodified,
file_name=f"unmodified_invoice_data_{today_date}.jsonl",
mime="application/jsonl",
use_container_width=True
)
else:
st.button("âŦī¸ No Unmodified Records", disabled=True, use_container_width=True)
with col4:
jsonl_all = save_to_jsonl(st.session_state.edited_data)
st.download_button(
label=f"âŦī¸ Download All ({len(st.session_state.edited_data)})",
data=jsonl_all,
file_name=f"all_invoice_data_{today_date}.jsonl",
mime="application/jsonl",
use_container_width=True
)
# Build file names list for dropdown using helper function
file_names = []
for i, record in enumerate(st.session_state.data or []):
base_name = get_base_filename(record)
file_names.append(base_name if base_name else f'Record {i}')
if not file_names:
st.error("No records loaded. Please upload a JSONL file on the Upload page.")
if st.button("â Back to Upload"):
st.session_state.page = 'upload'
st.rerun()
else:
options = list(range(len(file_names)))
if not st.session_state.edited_data or len(st.session_state.edited_data) != len(file_names):
st.session_state.edited_data = (st.session_state.data or []).copy()
cur_idx = st.session_state.get('current_index', 0)
try:
cur_idx = int(cur_idx)
except Exception:
cur_idx = 0
if cur_idx < 0:
cur_idx = 0
if cur_idx >= len(options):
cur_idx = len(options) - 1
selected_file = st.selectbox(
"Select a file to view:",
options=options,
format_func=lambda x: f"{'âī¸ ' if x in st.session_state.modified_indices else ''}{file_names[x]}",
index=cur_idx
)
st.session_state.current_index = selected_file
current_record = st.session_state.edited_data[selected_file]
left_col, right_col = st.columns([1.6, 1.0], gap="small")
# LEFT SIDE: Image Display with OCR Canvas
with left_col:
# Use helper function to get base file name
file_name = get_base_filename(current_record)
if file_name:
# Create base name by stripping extensions
file_name_base = file_name
for ext in ['.pdf', '.PDF', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
if file_name.lower().endswith(ext.lower()):
file_name_base = file_name[:-len(ext)]
break
actual_file_name = None
# First check for direct match
if file_name in st.session_state.images:
actual_file_name = file_name
# Check if base name matches an image group (handles PDF converted to images)
elif file_name_base in st.session_state.image_groups_metadata:
actual_file_name = file_name_base # Use base name for image groups
# Check if full name is an image group
elif file_name in st.session_state.image_groups_metadata:
actual_file_name = file_name # Use as-is for image groups
else:
# Try with extensions
for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
if file_name + ext in st.session_state.images:
actual_file_name = file_name + ext
break
if not actual_file_name:
# Try matching base name
for uploaded_name in st.session_state.images.keys():
uploaded_base = uploaded_name.rsplit('.', 1)[0]
if uploaded_base == file_name or uploaded_base == file_name_base:
actual_file_name = uploaded_name
break
if actual_file_name:
is_pdf = actual_file_name in st.session_state.pdf_metadata
is_image_group = actual_file_name in st.session_state.image_groups_metadata or file_name_base in st.session_state.image_groups_metadata
# Determine which key to use for image group
image_group_key = None
if is_image_group:
if actual_file_name in st.session_state.image_groups_metadata:
image_group_key = actual_file_name
else:
image_group_key = file_name_base
if is_pdf:
pdf_meta = st.session_state.pdf_metadata[actual_file_name]
total_pages = pdf_meta['total_pages']
current_page = st.session_state.current_page_num.get(actual_file_name, 0)
col_prev, col_info, col_next = st.columns([1, 2, 1])
with col_prev:
prev_clicked = st.button("âŦ
ī¸ Previous", key=f"prev_page_{selected_file}_{actual_file_name}",
disabled=(current_page == 0), use_container_width=True)
with col_info:
st.markdown(f"
đ Page {current_page + 1} of {total_pages}
", unsafe_allow_html=True)
with col_next:
next_clicked = st.button("Next âĄī¸", key=f"next_page_{selected_file}_{actual_file_name}",
disabled=(current_page >= total_pages - 1), use_container_width=True)
if not st.session_state.navigating_page:
if prev_clicked:
st.session_state.navigating_page = True
st.session_state.current_page_num[actual_file_name] = max(0, current_page - 1)
st.session_state.canvas_key += 1
st.session_state.ocr_active_section = None
st.session_state.ocr_active_field = None
st.rerun()
elif next_clicked:
st.session_state.navigating_page = True
st.session_state.current_page_num[actual_file_name] = min(total_pages - 1, current_page + 1)
st.session_state.canvas_key += 1
st.session_state.ocr_active_section = None
st.session_state.ocr_active_field = None
st.rerun()
else:
st.session_state.navigating_page = False
elif is_image_group and image_group_key:
img_group_meta = st.session_state.image_groups_metadata[image_group_key]
total_pages = img_group_meta['total_pages']
current_page = st.session_state.current_page_num.get(image_group_key, 0)
col_prev, col_info, col_next = st.columns([1, 2, 1])
with col_prev:
prev_clicked = st.button("âŦ
ī¸ Previous", key=f"prev_page_{selected_file}_{image_group_key}",
disabled=(current_page == 0), use_container_width=True)
with col_info:
st.markdown(f"đŧī¸ Page {current_page + 1} of {total_pages}
", unsafe_allow_html=True)
with col_next:
next_clicked = st.button("Next âĄī¸", key=f"next_page_{selected_file}_{image_group_key}",
disabled=(current_page >= total_pages - 1), use_container_width=True)
if not st.session_state.navigating_page:
if prev_clicked:
st.session_state.navigating_page = True
st.session_state.current_page_num[image_group_key] = max(0, current_page - 1)
st.session_state.canvas_key += 1
st.session_state.ocr_active_section = None
st.session_state.ocr_active_field = None
st.rerun()
elif next_clicked:
st.session_state.navigating_page = True
st.session_state.current_page_num[image_group_key] = min(total_pages - 1, current_page + 1)
st.session_state.canvas_key += 1
st.session_state.ocr_active_section = None
st.session_state.ocr_active_field = None
st.rerun()
else:
st.session_state.navigating_page = False
if actual_file_name:
is_pdf = actual_file_name in st.session_state.pdf_metadata
is_image_group = actual_file_name in st.session_state.image_groups_metadata or file_name_base in st.session_state.image_groups_metadata
# Determine which key to use for image group
image_group_key = None
if is_image_group:
if actual_file_name in st.session_state.image_groups_metadata:
image_group_key = actual_file_name
else:
image_group_key = file_name_base
if is_pdf:
current_page = st.session_state.current_page_num.get(actual_file_name, 0)
pdf_meta = st.session_state.pdf_metadata[actual_file_name]
current_image = pdf_meta['pages'][current_page]
elif is_image_group and image_group_key:
current_page = st.session_state.current_page_num.get(image_group_key, 0)
img_group_meta = st.session_state.image_groups_metadata[image_group_key]
current_image = img_group_meta['pages'][current_page]
else:
current_image = st.session_state.images[actual_file_name]
else:
st.error(f"â File '{file_name}' not found in uploaded files")
st.info("đĄ Available files:")
with st.expander("Show available files"):
for img_name in list(st.session_state.images.keys())[:20]:
st.text(f" âĸ {img_name}")
if len(st.session_state.images) > 20:
st.text(f" ... and {len(st.session_state.images) - 20} more")
current_image = None
if current_image:
scaled_image, scale_ratio, paste_x, paste_y = scale_image_to_fixed_size(current_image, max_width=700, max_height=1000)
# Wrap canvas in scrollable container
st.markdown(f'', unsafe_allow_html=True)
canvas_result = st_canvas(
fill_color="rgba(255, 165, 0, 0.3)",
stroke_width=2,
stroke_color="#FF0000",
background_image=scaled_image,
update_streamlit=True,
height=scaled_image.height,
width=scaled_image.width,
drawing_mode="rect",
key=f"canvas_{selected_file}_{st.session_state.canvas_key}",
)
st.markdown('
', unsafe_allow_html=True)
if canvas_result.json_data is not None and st.session_state.ocr_active_field:
objects = canvas_result.json_data.get("objects", [])
if len(objects) > 0:
rect = objects[-1]
bbox = [
(rect["left"] - paste_x) / scale_ratio,
(rect["top"] - paste_y) / scale_ratio,
(rect["left"] + rect["width"] - paste_x) / scale_ratio,
(rect["top"] + rect["height"] - paste_y) / scale_ratio
]
with st.spinner("Performing OCR..."):
ocr_text = perform_ocr(current_image, bbox)
if ocr_text and not ocr_text.startswith("OCR Error"):
st.success(f"â
OCR Result: {ocr_text}")
gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
if st.session_state.ocr_active_section == 'items':
items = gt_parse.get('items', [])
row_idx = st.session_state.ocr_line_item_row
if row_idx is not None and row_idx < len(items):
items[row_idx][st.session_state.ocr_active_field] = ocr_text
gt_parse['items'] = items
expander_key = f"line_item_expander_{selected_file}_{row_idx}"
st.session_state[expander_key] = True
else:
section = st.session_state.ocr_active_section
field = st.session_state.ocr_active_field
if section not in gt_parse:
gt_parse[section] = {}
gt_parse[section][field] = ocr_text
st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
st.session_state.modified_indices.add(selected_file)
st.session_state.canvas_key += 1
st.rerun()
else:
st.error(ocr_text)
else:
st.warning("No file name specified in record")
# RIGHT SIDE: Editable Details
with right_col:
# Create scrollable container for form fields
st.markdown('', unsafe_allow_html=True)
st.markdown("### đ Invoice Details")
gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
tab1, tab2, tab3, tab4 = st.tabs([
"đ Invoice Details",
"đĨ Party Details",
"đĻ Bank Details",
"đ Line Items"
])
# TAB 1: Header (includes invoice details + summary fields)
with tab1:
header = gt_parse.get('header', {})
summary = gt_parse.get('summary', {})
# Invoice No
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Invoice No",
value=header.get('invoice_no', ''),
key=f"invoice_no_{selected_file}",
on_change=lambda: sync_field_to_data(selected_file, 'header', 'invoice_no',
st.session_state[f"invoice_no_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_invoice_no_{selected_file}",
type="primary" if is_ocr_active('header', 'invoice_no') else "secondary"):
activate_ocr_field('header', 'invoice_no')
# Invoice Date
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Invoice Date",
value=header.get('invoice_date', ''),
key=f"invoice_date_{selected_file}",
on_change=lambda: sync_field_to_data(selected_file, 'header', 'invoice_date',
st.session_state[f"invoice_date_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_invoice_date_{selected_file}",
type="primary" if is_ocr_active('header', 'invoice_date') else "secondary"):
activate_ocr_field('header', 'invoice_date')
# Payment Terms
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Payment Terms",
value=header.get('payment_terms', ''),
key=f"payment_terms_{selected_file}",
on_change=lambda: sync_field_to_data(selected_file, 'header', 'payment_terms',
st.session_state[f"payment_terms_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_payment_terms_{selected_file}",
type="primary" if is_ocr_active('header', 'payment_terms') else "secondary"):
activate_ocr_field('header', 'payment_terms')
# Due Date
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Due Date",
value=header.get('due_date', ''),
key=f"due_date_{selected_file}",
on_change=lambda: sync_field_to_data(selected_file, 'header', 'due_date',
st.session_state[f"due_date_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_due_date_{selected_file}",
type="primary" if is_ocr_active('header', 'due_date') else "secondary"):
activate_ocr_field('header', 'due_date')
# Subtotal
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Subtotal",
value=summary.get('subtotal', ''),
key=f"subtotal_{selected_file}",
on_change=lambda: sync_field_to_data(selected_file, 'summary', 'subtotal',
st.session_state[f"subtotal_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_subtotal_{selected_file}",
type="primary" if is_ocr_active('summary', 'subtotal') else "secondary"):
activate_ocr_field('summary', 'subtotal')
# Tax Rate
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Tax Rate",
value=summary.get('tax_rate', ''),
key=f"tax_rate_{selected_file}",
on_change=lambda: sync_field_to_data(selected_file, 'summary', 'tax_rate',
st.session_state[f"tax_rate_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_tax_rate_{selected_file}",
type="primary" if is_ocr_active('summary', 'tax_rate') else "secondary"):
activate_ocr_field('summary', 'tax_rate')
# Tax Amount
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Tax Amount",
value=summary.get('tax_amount', ''),
key=f"tax_amount_{selected_file}",
on_change=lambda: sync_field_to_data(selected_file, 'summary', 'tax_amount',
st.session_state[f"tax_amount_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_tax_amount_{selected_file}",
type="primary" if is_ocr_active('summary', 'tax_amount') else "secondary"):
activate_ocr_field('summary', 'tax_amount')
# Discount Rate
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Discount Rate",
value=summary.get('discount_rate', ''),
key=f"discount_rate_{selected_file}",
on_change=lambda: sync_field_to_data(selected_file, 'summary', 'discount_rate',
st.session_state[f"discount_rate_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_discount_rate_{selected_file}",
type="primary" if is_ocr_active('summary', 'discount_rate') else "secondary"):
activate_ocr_field('summary', 'discount_rate')
# Total Discount Amount
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Total Discount Amount",
value=summary.get('total_discount_amount', ''),
key=f"total_discount_amount_{selected_file}",
on_change=lambda: sync_field_to_data(selected_file, 'summary', 'total_discount_amount',
st.session_state[f"total_discount_amount_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_total_discount_amount_{selected_file}",
type="primary" if is_ocr_active('summary', 'total_discount_amount') else "secondary"):
activate_ocr_field('summary', 'total_discount_amount')
# Total Amount
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Total Amount",
value=summary.get('total_amount', ''),
key=f"total_amount_{selected_file}",
on_change=lambda: sync_field_to_data(selected_file, 'summary', 'total_amount',
st.session_state[f"total_amount_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_total_amount_{selected_file}",
type="primary" if is_ocr_active('summary', 'total_amount') else "secondary"):
activate_ocr_field('summary', 'total_amount')
# Currency
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Currency",
value=summary.get('currency', ''),
key=f"currency_{selected_file}",
on_change=lambda: sync_field_to_data(selected_file, 'summary', 'currency',
st.session_state[f"currency_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_currency_{selected_file}",
type="primary" if is_ocr_active('summary', 'currency') else "secondary"):
activate_ocr_field('summary', 'currency')
# TAB 2: Party Details (without bank details)
with tab2:
# SWAP BUTTON
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
if st.button("đ Swap Sender â Recipient", key=f"swap_btn_{selected_file}",
type="primary", use_container_width=True):
if not st.session_state.just_swapped:
st.session_state.just_swapped = True
swap_sender_recipient_details(selected_file)
st.rerun()
if st.session_state.just_swapped:
st.session_state.just_swapped = False
st.markdown("**Sender Details**")
header = gt_parse.get('header', {})
# Sender Name
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Sender Name",
value=header.get('sender_name', ''),
key=f"sender_name_{selected_file}",
on_change=lambda: sync_field_to_data(selected_file, 'header', 'sender_name',
st.session_state[f"sender_name_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_sender_name_{selected_file}",
type="primary" if is_ocr_active('header', 'sender_name') else "secondary"):
activate_ocr_field('header', 'sender_name')
# Sender Address
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_area(
"Sender Address",
value=header.get('sender_addr', ''),
key=f"sender_addr_{selected_file}",
height=60,
on_change=lambda: sync_field_to_data(selected_file, 'header', 'sender_addr',
st.session_state[f"sender_addr_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_sender_addr_{selected_file}",
type="primary" if is_ocr_active('header', 'sender_addr') else "secondary"):
activate_ocr_field('header', 'sender_addr')
st.markdown("**Recipient Details**")
# Recipient Name
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Recipient Name",
value=header.get('rcpt_name', ''),
key=f"rcpt_name_{selected_file}",
on_change=lambda: sync_field_to_data(selected_file, 'header', 'rcpt_name',
st.session_state[f"rcpt_name_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_rcpt_name_{selected_file}",
type="primary" if is_ocr_active('header', 'rcpt_name') else "secondary"):
activate_ocr_field('header', 'rcpt_name')
# Recipient Address
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_area(
"Recipient Address",
value=header.get('rcpt_addr', ''),
key=f"rcpt_addr_{selected_file}",
height=60,
on_change=lambda: sync_field_to_data(selected_file, 'header', 'rcpt_addr',
st.session_state[f"rcpt_addr_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_rcpt_addr_{selected_file}",
type="primary" if is_ocr_active('header', 'rcpt_addr') else "secondary"):
activate_ocr_field('header', 'rcpt_addr')
# TAB 3: Bank Details
with tab3:
header = gt_parse.get('header', {})
# Bank IBAN
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Bank IBAN",
value=header.get('bank_iban', ''),
key=f"bank_iban_{selected_file}",
on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_iban',
st.session_state[f"bank_iban_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_bank_iban_{selected_file}",
type="primary" if is_ocr_active('header', 'bank_iban') else "secondary"):
activate_ocr_field('header', 'bank_iban')
# Bank Name
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Bank Name",
value=header.get('bank_name', ''),
key=f"bank_name_{selected_file}",
on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_name',
st.session_state[f"bank_name_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_bank_name_{selected_file}",
type="primary" if is_ocr_active('header', 'bank_name') else "secondary"):
activate_ocr_field('header', 'bank_name')
# Bank Account No
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Bank Account No",
value=header.get('bank_acc_no', ''),
key=f"bank_acc_no_{selected_file}",
on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_acc_no',
st.session_state[f"bank_acc_no_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_bank_acc_no_{selected_file}",
type="primary" if is_ocr_active('header', 'bank_acc_no') else "secondary"):
activate_ocr_field('header', 'bank_acc_no')
# Bank Routing
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Bank Routing",
value=header.get('bank_routing', ''),
key=f"bank_routing_{selected_file}",
on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_routing',
st.session_state[f"bank_routing_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_bank_routing_{selected_file}",
type="primary" if is_ocr_active('header', 'bank_routing') else "secondary"):
activate_ocr_field('header', 'bank_routing')
# Bank SWIFT
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Bank SWIFT",
value=header.get('bank_swift', ''),
key=f"bank_swift_{selected_file}",
on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_swift',
st.session_state[f"bank_swift_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_bank_swift_{selected_file}",
type="primary" if is_ocr_active('header', 'bank_swift') else "secondary"):
activate_ocr_field('header', 'bank_swift')
# Bank Account Name
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Bank Account Name",
value=header.get('bank_acc_name', ''),
key=f"bank_acc_name_{selected_file}",
on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_acc_name',
st.session_state[f"bank_acc_name_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_bank_acc_name_{selected_file}",
type="primary" if is_ocr_active('header', 'bank_acc_name') else "secondary"):
activate_ocr_field('header', 'bank_acc_name')
# Bank Branch
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Bank Branch",
value=header.get('bank_branch', ''),
key=f"bank_branch_{selected_file}",
on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_branch',
st.session_state[f"bank_branch_{selected_file}"])
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_bank_branch_{selected_file}",
type="primary" if is_ocr_active('header', 'bank_branch') else "secondary"):
activate_ocr_field('header', 'bank_branch')
# TAB 4: Items
with tab4:
current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
items = current_gt_parse.get('items', [])
# Add/Remove row buttons
col_add, col_remove = st.columns([1, 1])
with col_add:
if st.button("â Add New Item", key=f"add_item_{selected_file}", use_container_width=True):
if not st.session_state.button_clicked:
st.session_state.button_clicked = True
new_item = {
"descriptions": "", "SKU": "", "quantity": "",
"unit_price": "", "amount": "", "discount_rate_per_item": "",
"discount_amount_per_item": "", "tax_rate_per_item": "",
"tax_amount_per_item": "", "Line_total": ""
}
current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
current_items = current_gt_parse.get('items', [])
current_items.append(new_item)
current_gt_parse['items'] = current_items
st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse
st.session_state.modified_indices.add(selected_file)
new_idx = len(current_items) - 1
expander_key_new = f"line_item_expander_{selected_file}_{new_idx}"
st.session_state[expander_key_new] = True
st.rerun()
with col_remove:
if st.button("â Remove Last Item", key=f"remove_item_{selected_file}",
disabled=(len(items) == 0), use_container_width=True):
if not st.session_state.button_clicked and len(items) > 0:
st.session_state.button_clicked = True
current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
current_items = current_gt_parse.get('items', [])
N = len(current_items)
current_items.pop()
current_gt_parse['items'] = current_items
st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse
st.session_state.modified_indices.add(selected_file)
popped_idx = N - 1
expander_key_popped = f"line_item_expander_{selected_file}_{popped_idx}"
if expander_key_popped in st.session_state:
del st.session_state[expander_key_popped]
st.rerun()
if st.session_state.button_clicked:
st.session_state.button_clicked = False
current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
items = current_gt_parse.get('items', [])
if items:
for idx, item in enumerate(items):
expander_key = f"line_item_expander_{selected_file}_{idx}"
expanded_default = st.session_state.get(expander_key, False)
with st.expander(f"**Item {idx + 1}** - {item.get('descriptions', 'N/A')[:30]}", expanded=expanded_default):
# Descriptions
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_area(
"Descriptions",
value=item.get('descriptions', ''),
key=f"desc_{selected_file}_{idx}",
height=60,
on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'descriptions',
st.session_state[f"desc_{selected_file}_{i}"], i)
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_desc_{selected_file}_{idx}",
type="primary" if is_ocr_active('items', 'descriptions', idx) else "secondary"):
st.session_state[expander_key] = True
activate_ocr_field('items', 'descriptions', idx)
# SKU
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"SKU",
value=item.get('SKU', ''),
key=f"sku_{selected_file}_{idx}",
on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'SKU',
st.session_state[f"sku_{selected_file}_{i}"], i)
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_sku_{selected_file}_{idx}",
type="primary" if is_ocr_active('items', 'SKU', idx) else "secondary"):
st.session_state[expander_key] = True
activate_ocr_field('items', 'SKU', idx)
# Quantity
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Quantity",
value=item.get('quantity', ''),
key=f"qty_{selected_file}_{idx}",
on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'quantity',
st.session_state[f"qty_{selected_file}_{i}"], i)
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_qty_{selected_file}_{idx}",
type="primary" if is_ocr_active('items', 'quantity', idx) else "secondary"):
st.session_state[expander_key] = True
activate_ocr_field('items', 'quantity', idx)
# Unit Price
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Unit Price",
value=item.get('unit_price', ''),
key=f"unit_price_{selected_file}_{idx}",
on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'unit_price',
st.session_state[f"unit_price_{selected_file}_{i}"], i)
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_unit_price_{selected_file}_{idx}",
type="primary" if is_ocr_active('items', 'unit_price', idx) else "secondary"):
st.session_state[expander_key] = True
activate_ocr_field('items', 'unit_price', idx)
# Amount
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Amount",
value=item.get('amount', ''),
key=f"amount_{selected_file}_{idx}",
on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'amount',
st.session_state[f"amount_{selected_file}_{i}"], i)
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_amount_{selected_file}_{idx}",
type="primary" if is_ocr_active('items', 'amount', idx) else "secondary"):
st.session_state[expander_key] = True
activate_ocr_field('items', 'amount', idx)
# Discount Rate Per Item
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Discount Rate Per Item",
value=item.get('discount_rate_per_item', ''),
key=f"discount_rate_per_item_{selected_file}_{idx}",
on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'discount_rate_per_item',
st.session_state[f"discount_rate_per_item_{selected_file}_{i}"], i)
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_discount_rate_per_item_{selected_file}_{idx}",
type="primary" if is_ocr_active('items', 'discount_rate_per_item', idx) else "secondary"):
st.session_state[expander_key] = True
activate_ocr_field('items', 'discount_rate_per_item', idx)
# Discount Amount Per Item
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Discount Amount Per Item",
value=item.get('discount_amount_per_item', ''),
key=f"discount_amount_per_item_{selected_file}_{idx}",
on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'discount_amount_per_item',
st.session_state[f"discount_amount_per_item_{selected_file}_{i}"], i)
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_discount_amount_per_item_{selected_file}_{idx}",
type="primary" if is_ocr_active('items', 'discount_amount_per_item', idx) else "secondary"):
st.session_state[expander_key] = True
activate_ocr_field('items', 'discount_amount_per_item', idx)
# Tax Rate Per Item (NEW FIELD)
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Tax Rate Per Item",
value=item.get('tax_rate_per_item', ''),
key=f"tax_rate_per_item_{selected_file}_{idx}",
on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'tax_rate_per_item',
st.session_state[f"tax_rate_per_item_{selected_file}_{i}"], i)
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_tax_rate_per_item_{selected_file}_{idx}",
type="primary" if is_ocr_active('items', 'tax_rate_per_item', idx) else "secondary"):
st.session_state[expander_key] = True
activate_ocr_field('items', 'tax_rate_per_item', idx)
# Tax Amount Per Item (RENAMED from "Tax")
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Tax Amount Per Item",
value=item.get('tax_amount_per_item', ''),
key=f"tax_amount_per_item_{selected_file}_{idx}",
on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'tax_amount_per_item',
st.session_state[f"tax_amount_per_item_{selected_file}_{i}"], i)
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_tax_amount_per_item_{selected_file}_{idx}",
type="primary" if is_ocr_active('items', 'tax_amount_per_item', idx) else "secondary"):
st.session_state[expander_key] = True
activate_ocr_field('items', 'tax_amount_per_item', idx)
# Line Total
col_input, col_btn = st.columns([5, 1])
with col_input:
new_value = st.text_input(
"Line Total",
value=item.get('Line_total', ''),
key=f"line_total_{selected_file}_{idx}",
on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'Line_total',
st.session_state[f"line_total_{selected_file}_{i}"], i)
)
with col_btn:
st.markdown("
", unsafe_allow_html=True)
if st.button("đ", key=f"ocr_line_total_{selected_file}_{idx}",
type="primary" if is_ocr_active('items', 'Line_total', idx) else "secondary"):
st.session_state[expander_key] = True
activate_ocr_field('items', 'Line_total', idx)
st.markdown("**đ Items Summary Table**")
df = pd.DataFrame(items)
df.index = df.index + 1
df.index.name = 'SL No'
st.dataframe(
df,
use_container_width=True,
height=300
)
else:
st.info("No items. Click 'â Add New Item' to add a new item.")
# Save button
col1, col2 = st.columns([1, 1])
with col1:
if st.button("đž Save Changes", type="primary", use_container_width=True, key=f"save_btn_{selected_file}"):
if not st.session_state.just_saved:
st.session_state.just_saved = True
auto_save(selected_file)
st.session_state.save_message = "â
Changes saved successfully!"
st.session_state.save_message_time = time.time()
st.rerun()
if st.session_state.just_saved:
st.session_state.just_saved = False
if st.session_state.save_message:
st.success(st.session_state.save_message)
st.markdown('
', unsafe_allow_html=True) # Close scrollable container