invoice-annotator

Sleeping

App Files Files Community

invoice-annotator / src /streamlit_app.py

Bhuvi13

Update src/streamlit_app.py

78e5961 verified 9 days ago

raw

history blame contribute delete

77.8 kB

	import os
	from pathlib import Path

	# -----------------------------
	# Environment hardening (HF Spaces, /.cache issue)
	# -----------------------------
	_home = os.environ.get("HOME", "")
	if _home in ("", "/", None):
	repo_dir = os.getcwd()
	safe_home = repo_dir if os.access(repo_dir, os.W_OK) else "/tmp"
	os.environ["HOME"] = safe_home
	print(f"[startup] HOME not set or unwritable — setting HOME={safe_home}")

	streamlit_dir = Path(os.environ["HOME"]) / ".streamlit"
	try:
	streamlit_dir.mkdir(parents=True, exist_ok=True)
	print(f"[startup] ensured {streamlit_dir}")
	except Exception as e:
	print(f"[startup] WARNING: could not create {streamlit_dir}: {e}")

	import streamlit as st
	import json
	import io
	from PIL import Image
	import time
	import pandas as pd
	from streamlit_drawable_canvas import st_canvas
	import pytesseract
	import numpy as np
	from datetime import datetime
	import fitz # PyMuPDF for PDF handling

	# Set Tesseract path - auto-detect based on OS
	if os.name == 'nt': # Windows
	pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
	else: # Linux/Mac (HF Spaces uses Linux)
	pass

	# Page configuration
	st.set_page_config(page_title="Invoice Data Viewer", layout="wide")

	# Custom CSS
	st.markdown("""
	<style>
	/* Reduce spacing between form fields */
	.stTextInput > div > div > input,
	.stTextArea > div > div > textarea,
	.stSelectbox > div > div > div {
	margin-bottom: 0px !important;
	}
	div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stTextInput"]),
	div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stTextArea"]),
	div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stSelectbox"]) {
	margin-bottom: 4px !important;
	}
	.stSelectbox { margin-bottom: 4px !important; }

	/* Button styling */
	.stButton > button {
	padding: 0.25rem 0.5rem !important;
	font-size: 0.85rem !important;
	line-height: 1 !important;
	min-height: 1.8rem !important;
	height: 1.8rem !important;
	}
	.stButton > button[kind="primary"] {
	background-color: #FF0000 !important;
	border-color: #FF0000 !important;
	color: white !important;
	}
	.stButton > button[kind="primary"]:hover {
	background-color: #CC0000 !important;
	border-color: #CC0000 !important;
	}

	/* Small vertical gaps */
	[data-testid="stVerticalBlock"] > [data-testid="stVerticalBlock"] { gap: 0.25rem !important; }
	[data-testid="column"] { padding-left: 0.5rem !important; padding-right: 0.5rem !important; }
	[data-testid="stHorizontalBlock"] { gap: 0.5rem !important; }

	/* FIXED: Remove problematic viewport-based heights */
	section[data-testid="stAppViewContainer"] {
	overflow: visible !important;
	}

	.main .block-container {
	overflow: visible !important;
	padding-bottom: 1rem !important;
	}

	/* Force the column containing the canvas to allow horizontal scroll */
	[data-testid="column"]:has(.stCanvas) {
	overflow-x: auto !important;
	overflow-y: hidden !important;
	}

	/* Ensure canvas doesn't shrink */
	.stCanvas {
	min-width: max-content !important;
	}

	/* Style the scrollbar */
	[data-testid="column"]:has(.stCanvas)::-webkit-scrollbar {
	height: 12px;
	}
	[data-testid="column"]:has(.stCanvas)::-webkit-scrollbar-track {
	background: #e0e0e0;
	border-radius: 6px;
	}
	[data-testid="column"]:has(.stCanvas)::-webkit-scrollbar-thumb {
	background: rgba(0,0,0,0.4);
	border-radius: 6px;
	}
	[data-testid="column"]:has(.stCanvas)::-webkit-scrollbar-thumb:hover {
	background: rgba(0,0,0,0.6);
	}
	</style>
	""", unsafe_allow_html=True)

	def load_jsonl(file):
	"""Load JSONL file and return list of records"""
	data = []
	content = file.getvalue().decode('utf-8')
	for line in content.strip().split('\n'):
	if line.strip():
	data.append(json.loads(line))
	return data

	def reorder_record_fields(record):
	"""Reorder record fields to put file_name/file_names first, then gt_parse, then others"""
	ordered_record = {}

	# First: Add file_name or file_names
	if 'file_name' in record:
	ordered_record['file_name'] = record['file_name']
	if 'file_names' in record:
	ordered_record['file_names'] = record['file_names']

	# Second: Add gt_parse
	if 'gt_parse' in record:
	ordered_record['gt_parse'] = record['gt_parse']

	# Third: Add any remaining fields
	for key, value in record.items():
	if key not in ordered_record:
	ordered_record[key] = value

	return ordered_record

	def save_to_jsonl(data):
	"""Convert data list to JSONL format with proper field ordering"""
	jsonl_lines = []
	for record in data:
	ordered_record = reorder_record_fields(record)
	jsonl_lines.append(json.dumps(ordered_record))
	return '\n'.join(jsonl_lines)

	def pdf_to_images(pdf_file):
	"""Convert PDF to list of PIL Images (one per page)"""
	try:
	pdf_bytes = pdf_file.read()
	pdf_document = fitz.open(stream=pdf_bytes, filetype="pdf")
	images = []

	for page_num in range(pdf_document.page_count):
	page = pdf_document[page_num]
	pix = page.get_pixmap(matrix=fitz.Matrix(3, 3), alpha=False)
	img_data = pix.tobytes("png")
	img = Image.open(io.BytesIO(img_data))
	images.append(img)

	pdf_document.close()
	return images
	except Exception as e:
	st.error(f"Error converting PDF: {str(e)}")
	return []

	def perform_ocr(image, bbox):
	"""Perform OCR on the selected region of the image"""
	try:
	x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
	x1, y1 = max(0, x1), max(0, y1)
	x2, y2 = min(image.width, x2), min(image.height, y2)
	cropped = image.crop((x1, y1, x2, y2))
	text = pytesseract.image_to_string(cropped, config='--psm 6').strip()
	return text
	except Exception as e:
	return f"OCR Error: {str(e)}"

	def scale_image_to_fixed_size(image, max_width=1100, max_height=1100):
	"""Scale image to fit within max dimensions while maintaining aspect ratio - NO PADDING"""
	if image.mode not in ('RGB', 'RGBA'):
	image = image.convert('RGB')
	elif image.mode == 'RGBA':
	background = Image.new('RGB', image.size, (255, 255, 255))
	background.paste(image, mask=image.split()[3])
	image = background

	width_ratio = max_width / image.width
	height_ratio = max_height / image.height
	ratio = min(width_ratio, height_ratio)

	new_width = int(image.width * ratio)
	new_height = int(image.height * ratio)

	resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)

	return resized_image, ratio, 0, 0

	def get_base_filename(record):
	"""Get base filename from record, handling both file_name and file_names"""
	# Check for file_names (plural) first
	if 'file_names' in record and record['file_names']:
	if isinstance(record['file_names'], list) and len(record['file_names']) > 0:
	# Extract base name from first file (remove _pageN.png suffix)
	first_file = record['file_names'][0]
	# Remove .png extension
	base = first_file.rsplit('.png', 1)[0]
	# Remove _pageN suffix if exists
	if '_page' in base:
	base = base.rsplit('_page', 1)[0]
	return base
	return record['file_names']

	# Fall back to file_name (singular)
	file_name = record.get('file_name', '')

	# Strip PDF extension if present (for cases where PDF was converted to images)
	if file_name.lower().endswith('.pdf'):
	file_name = file_name[:-4] # Remove .pdf

	# Also strip other image extensions if present
	for ext in ['.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
	if file_name.lower().endswith(ext):
	file_name = file_name[:-(len(ext))]
	break

	return file_name

	def detect_image_groups(images_dict):
	"""Detect multi-page image groups from uploaded files (e.g., invoice01_page1.png, invoice01_page2.png)"""
	import re

	image_groups = {}
	grouped_files = set()

	# Pattern to match: basename_pageN.extension
	pattern = r'^(.+)_page(\d+)\.(png\|jpg\|jpeg\|tiff\|tif\|bmp)$'

	for filename in images_dict.keys():
	match = re.match(pattern, filename, re.IGNORECASE)
	if match:
	base_name = match.group(1)
	page_num = int(match.group(2))
	ext = match.group(3)

	if base_name not in image_groups:
	image_groups[base_name] = []

	image_groups[base_name].append({
	'filename': filename,
	'page_num': page_num,
	'ext': ext
	})
	grouped_files.add(filename)

	# Sort pages for each group and create metadata
	image_groups_metadata = {}
	for base_name, pages in image_groups.items():
	# Sort by page number
	pages.sort(key=lambda x: x['page_num'])

	# Only consider it a group if there are multiple pages
	if len(pages) > 1:
	image_list = [images_dict[p['filename']] for p in pages]

	image_groups_metadata[base_name] = {
	'pages': image_list,
	'filenames': [p['filename'] for p in pages],
	'total_pages': len(pages),
	'current_page': 0
	}

	return image_groups_metadata, grouped_files

	def swap_sender_recipient_details(index):
	"""Swap sender and recipient details"""
	gt_parse = st.session_state.edited_data[index].get('gt_parse', {})
	header = gt_parse.get('header', {})

	# Store sender values
	temp_sender_name = header.get('sender_name', '')
	temp_sender_addr = header.get('sender_addr', '')

	# Swap: Sender ← Recipient
	header['sender_name'] = header.get('rcpt_name', '')
	header['sender_addr'] = header.get('rcpt_addr', '')

	# Swap: Recipient ← Sender (from temp)
	header['rcpt_name'] = temp_sender_name
	header['rcpt_addr'] = temp_sender_addr

	# Update session state
	gt_parse['header'] = header
	st.session_state.edited_data[index]['gt_parse'] = gt_parse
	st.session_state.modified_indices.add(index)

	# Initialize session state
	if 'data' not in st.session_state:
	st.session_state.data = None
	if 'current_index' not in st.session_state:
	st.session_state.current_index = 0
	if 'edited_data' not in st.session_state:
	st.session_state.edited_data = None
	if 'page' not in st.session_state:
	st.session_state.page = 'upload'
	if 'images' not in st.session_state:
	st.session_state.images = {}
	if 'pdf_metadata' not in st.session_state:
	st.session_state.pdf_metadata = {}
	if 'image_groups_metadata' not in st.session_state:
	st.session_state.image_groups_metadata = {}
	if 'current_page_num' not in st.session_state:
	st.session_state.current_page_num = {}
	if 'modified_indices' not in st.session_state:
	st.session_state.modified_indices = set()
	if 'ocr_active_section' not in st.session_state:
	st.session_state.ocr_active_section = None
	if 'ocr_active_field' not in st.session_state:
	st.session_state.ocr_active_field = None
	if 'ocr_line_item_row' not in st.session_state:
	st.session_state.ocr_line_item_row = None
	if 'canvas_key' not in st.session_state:
	st.session_state.canvas_key = 0
	if 'button_clicked' not in st.session_state:
	st.session_state.button_clicked = False
	if 'save_message' not in st.session_state:
	st.session_state.save_message = None
	if 'save_message_time' not in st.session_state:
	st.session_state.save_message_time = None
	if 'just_saved' not in st.session_state:
	st.session_state.just_saved = False
	if 'just_swapped' not in st.session_state:
	st.session_state.just_swapped = False
	if 'navigating_page' not in st.session_state:
	st.session_state.navigating_page = False

	def auto_save(index):
	"""Automatically save changes to session state and mark as modified"""
	if st.session_state.edited_data:
	# Get current record
	current_record = st.session_state.edited_data[index]

	# Get base filename using the helper function
	base_file_name = get_base_filename(current_record)

	if not base_file_name:
	st.warning("Cannot save: No file name found in record")
	return

	# Find the actual file name in uploaded files
	actual_file_name = None
	if base_file_name in st.session_state.images:
	actual_file_name = base_file_name
	else:
	# Try with extensions
	for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
	if base_file_name + ext in st.session_state.images:
	actual_file_name = base_file_name + ext
	break

	# Try matching base name
	if not actual_file_name:
	for uploaded_name in st.session_state.images.keys():
	uploaded_base = uploaded_name.rsplit('.', 1)[0]
	if uploaded_base == base_file_name:
	actual_file_name = uploaded_name
	break

	# Check if it's a PDF and update file_name accordingly
	if actual_file_name and actual_file_name in st.session_state.pdf_metadata:
	# It's a PDF - get page count
	pdf_meta = st.session_state.pdf_metadata[actual_file_name]
	total_pages = pdf_meta['total_pages']

	# Get base name without extension
	base_name = actual_file_name.rsplit('.', 1)[0]

	if total_pages > 1:
	# Multi-page PDF: use file_names array
	file_names_array = [f"{base_name}_page{i+1}.png" for i in range(total_pages)]
	st.session_state.edited_data[index]['file_names'] = file_names_array
	# Remove old file_name field if it exists
	if 'file_name' in st.session_state.edited_data[index]:
	del st.session_state.edited_data[index]['file_name']
	else:
	# Single-page PDF: use file_name string
	st.session_state.edited_data[index]['file_name'] = f"{base_name}.png"
	# Remove old file_names field if it exists
	if 'file_names' in st.session_state.edited_data[index]:
	del st.session_state.edited_data[index]['file_names']

	# Check if it's an image group and update file_name accordingly
	elif base_file_name in st.session_state.image_groups_metadata:
	# It's a multi-page image group - use file_names array
	img_group_meta = st.session_state.image_groups_metadata[base_file_name]
	st.session_state.edited_data[index]['file_names'] = img_group_meta['filenames']
	# Remove old file_name field if it exists (was likely a .pdf in original JSONL)
	if 'file_name' in st.session_state.edited_data[index]:
	del st.session_state.edited_data[index]['file_name']

	st.session_state.data = st.session_state.edited_data.copy()
	st.session_state.modified_indices.add(index)

	def sync_field_to_data(index, section, field, value, row_idx=None):
	"""Sync a field value from widget to data structure immediately"""
	gt_parse = st.session_state.edited_data[index].get('gt_parse', {})

	if section == 'items':
	items = gt_parse.get('items', [])
	if row_idx is not None and row_idx < len(items):
	items[row_idx][field] = value
	gt_parse['items'] = items
	else:
	if section not in gt_parse:
	gt_parse[section] = {}
	gt_parse[section][field] = value

	st.session_state.edited_data[index]['gt_parse'] = gt_parse
	st.session_state.modified_indices.add(index)

	def activate_ocr_field(section, field, row_idx=None):
	"""Activate OCR for a specific field"""
	if (st.session_state.ocr_active_section == section and
	st.session_state.ocr_active_field == field and
	st.session_state.ocr_line_item_row == row_idx):
	st.session_state.ocr_active_section = None
	st.session_state.ocr_active_field = None
	st.session_state.ocr_line_item_row = None
	else:
	st.session_state.ocr_active_section = section
	st.session_state.ocr_active_field = field
	st.session_state.ocr_line_item_row = row_idx

	if section == 'items' and row_idx is not None:
	current_idx = st.session_state.get('current_index', 0)
	expander_key = f"line_item_expander_{current_idx}_{row_idx}"
	st.session_state[expander_key] = True

	def is_ocr_active(section, field, row_idx=None):
	"""Check if this OCR button is currently active"""
	return (st.session_state.ocr_active_section == section and
	st.session_state.ocr_active_field == field and
	st.session_state.ocr_line_item_row == row_idx)

	# PAGE 1: Upload Page
	if st.session_state.page == 'upload':
	st.title("📤 Invoice Data Viewer with OCR")
	st.markdown("### Upload your files to begin")

	st.markdown("Step 1: Upload JSONL File")
	uploaded_file = st.file_uploader("Choose a JSONL file", type=['jsonl', 'json'])

	if uploaded_file is not None:
	try:
	data = load_jsonl(uploaded_file)
	st.session_state.data = data
	st.session_state.edited_data = data.copy()
	st.success(f"✅ Successfully loaded {len(data)} records!")
	except Exception as e:
	st.error(f"Error loading file: {str(e)}")

	st.markdown("Step 2: Upload Images/PDFs Folder")

	uploaded_files = st.file_uploader(
	"Choose image or PDF files",
	type=['png', 'jpg', 'jpeg', 'tiff', 'tif', 'bmp', 'pdf'],
	accept_multiple_files=True,
	help="Select all images and PDFs from your folder at once"
	)

	if uploaded_files:
	images_dict = {}
	pdf_metadata = {}

	for file in uploaded_files:
	try:
	file_ext = file.name.lower().split('.')[-1]

	if file_ext == 'pdf':
	pdf_images = pdf_to_images(file)
	if pdf_images:
	images_dict[file.name] = pdf_images[0]
	pdf_metadata[file.name] = {
	'pages': pdf_images,
	'total_pages': len(pdf_images),
	'current_page': 0
	}
	else:
	image = Image.open(file)
	images_dict[file.name] = image

	except Exception as e:
	st.warning(f"Could not load file {file.name}: {str(e)}")

	st.session_state.images = images_dict
	st.session_state.pdf_metadata = pdf_metadata

	# Detect multi-page image groups (e.g., invoice01_page1.png, invoice01_page2.png)
	image_groups_metadata, grouped_files = detect_image_groups(images_dict)
	st.session_state.image_groups_metadata = image_groups_metadata

	# Initialize current page for PDFs and image groups
	for filename in pdf_metadata.keys():
	if filename not in st.session_state.current_page_num:
	st.session_state.current_page_num[filename] = 0

	for base_name in image_groups_metadata.keys():
	if base_name not in st.session_state.current_page_num:
	st.session_state.current_page_num[base_name] = 0

	if st.session_state.data is not None:
	gt_file_names = []
	for rec in st.session_state.data:
	base_fname = get_base_filename(rec)
	if base_fname:
	gt_file_names.append(base_fname)

	matched_images = set()
	unmatched_gt_files = []

	for fname in gt_file_names:
	if not fname:
	continue

	# Create a base name by stripping common extensions
	fname_base = fname
	for ext in ['.pdf', '.PDF', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
	if fname.lower().endswith(ext.lower()):
	fname_base = fname[:-len(ext)]
	break

	# Check direct match
	if fname in images_dict:
	matched_images.add(fname)
	# Check base name in image groups (handles PDF converted to multi-page PNGs)
	elif fname_base in image_groups_metadata:
	matched_images.add(fname)
	# Check full name in image groups
	elif fname in image_groups_metadata:
	matched_images.add(fname)
	else:
	found = False
	# Try with extensions
	for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
	if fname + ext in images_dict:
	matched_images.add(fname)
	found = True
	break

	if not found:
	# Try matching base name in uploaded files
	for uploaded_name in images_dict.keys():
	uploaded_base = uploaded_name.rsplit('.', 1)[0]
	if uploaded_base == fname or uploaded_base == fname_base:
	matched_images.add(fname)
	found = True
	break

	for fname in gt_file_names:
	if fname and fname not in matched_images:
	unmatched_gt_files.append(fname)

	st.success(f"✅ Successfully loaded {len(images_dict)} files ({len(pdf_metadata)} PDFs, {len(image_groups_metadata)} multi-page image groups)!")
	st.info(f"🔎 Exact matches: {len(matched_images)}/{len([f for f in gt_file_names if f])}")

	if unmatched_gt_files:
	st.warning(f"⚠️ {len(unmatched_gt_files)} file(s) from JSONL not matched:")
	with st.expander(f"Show {len(unmatched_gt_files)} unmatched file names"):
	for fname in unmatched_gt_files:
	st.text(f" • {fname}")
	else:
	st.success("✅ All JSONL file names matched to files!")
	else:
	st.success(f"✅ Successfully loaded {len(images_dict)} files ({len(pdf_metadata)} PDFs, {len(image_groups_metadata)} multi-page image groups)!")
	st.info("ℹ️ Upload a JSONL file to see how many files match the ground truth 'file_name' field.")

	if st.session_state.data is not None:
	col1, col2, col3 = st.columns([1, 1, 1])
	with col2:
	if st.button("Continue to Viewer →", type="primary", use_container_width=True):
	st.session_state.page = 'viewer'
	st.session_state.modified_indices = set()
	st.session_state.navigating_page = False
	st.rerun()

	# PAGE 2: Viewer Page
	elif st.session_state.page == 'viewer':
	if st.session_state.save_message_time is not None:
	if time.time() - st.session_state.save_message_time > 3:
	st.session_state.save_message = None
	st.session_state.save_message_time = None

	today_date = datetime.now().strftime("%Y-%m-%d")

	col1, col2, col3, col4 = st.columns([1, 2, 2, 2])

	with col1:
	if st.button("← Back to Upload"):
	st.session_state.page = 'upload'
	st.session_state.ocr_active_section = None
	st.session_state.ocr_active_field = None
	st.session_state.save_message = None
	st.session_state.save_message_time = None
	st.session_state.navigating_page = False
	st.rerun()

	with col2:
	if st.session_state.modified_indices:
	modified_data = [st.session_state.edited_data[i] for i in sorted(st.session_state.modified_indices)]
	jsonl_modified = save_to_jsonl(modified_data)
	st.download_button(
	label=f"⬇️ Download Modified ({len(modified_data)})",
	data=jsonl_modified,
	file_name=f"modified_invoice_data_{today_date}.jsonl",
	mime="application/jsonl",
	type="primary",
	use_container_width=True
	)
	else:
	st.button("⬇️ No Modified Records", disabled=True, use_container_width=True)

	with col3:
	if st.session_state.modified_indices:
	unmodified_data = [st.session_state.data[i] for i in range(len(st.session_state.data))
	if i not in st.session_state.modified_indices]
	jsonl_unmodified = save_to_jsonl(unmodified_data)
	st.download_button(
	label=f"⬇️ Download Unmodified ({len(unmodified_data)})",
	data=jsonl_unmodified,
	file_name=f"unmodified_invoice_data_{today_date}.jsonl",
	mime="application/jsonl",
	use_container_width=True
	)
	else:
	st.button("⬇️ No Unmodified Records", disabled=True, use_container_width=True)

	with col4:
	jsonl_all = save_to_jsonl(st.session_state.edited_data)
	st.download_button(
	label=f"⬇️ Download All ({len(st.session_state.edited_data)})",
	data=jsonl_all,
	file_name=f"all_invoice_data_{today_date}.jsonl",
	mime="application/jsonl",
	use_container_width=True
	)

	# Build file names list for dropdown using helper function
	file_names = []
	for i, record in enumerate(st.session_state.data or []):
	base_name = get_base_filename(record)
	file_names.append(base_name if base_name else f'Record {i}')

	if not file_names:
	st.error("No records loaded. Please upload a JSONL file on the Upload page.")
	if st.button("← Back to Upload"):
	st.session_state.page = 'upload'
	st.rerun()
	else:
	options = list(range(len(file_names)))

	if not st.session_state.edited_data or len(st.session_state.edited_data) != len(file_names):
	st.session_state.edited_data = (st.session_state.data or []).copy()

	cur_idx = st.session_state.get('current_index', 0)
	try:
	cur_idx = int(cur_idx)
	except Exception:
	cur_idx = 0
	if cur_idx < 0:
	cur_idx = 0
	if cur_idx >= len(options):
	cur_idx = len(options) - 1

	selected_file = st.selectbox(
	"Select a file to view:",
	options=options,
	format_func=lambda x: f"{'✏️ ' if x in st.session_state.modified_indices else ''}{file_names[x]}",
	index=cur_idx
	)

	st.session_state.current_index = selected_file
	current_record = st.session_state.edited_data[selected_file]

	left_col, right_col = st.columns([1.6, 1.0], gap="small")

	# LEFT SIDE: Image Display with OCR Canvas
	with left_col:
	# Use helper function to get base file name
	file_name = get_base_filename(current_record)

	if file_name:
	# Create base name by stripping extensions
	file_name_base = file_name
	for ext in ['.pdf', '.PDF', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
	if file_name.lower().endswith(ext.lower()):
	file_name_base = file_name[:-len(ext)]
	break

	actual_file_name = None
	# First check for direct match
	if file_name in st.session_state.images:
	actual_file_name = file_name
	# Check if base name matches an image group (handles PDF converted to images)
	elif file_name_base in st.session_state.image_groups_metadata:
	actual_file_name = file_name_base # Use base name for image groups
	# Check if full name is an image group
	elif file_name in st.session_state.image_groups_metadata:
	actual_file_name = file_name # Use as-is for image groups
	else:
	# Try with extensions
	for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']:
	if file_name + ext in st.session_state.images:
	actual_file_name = file_name + ext
	break

	if not actual_file_name:
	# Try matching base name
	for uploaded_name in st.session_state.images.keys():
	uploaded_base = uploaded_name.rsplit('.', 1)[0]
	if uploaded_base == file_name or uploaded_base == file_name_base:
	actual_file_name = uploaded_name
	break

	if actual_file_name:
	is_pdf = actual_file_name in st.session_state.pdf_metadata
	is_image_group = actual_file_name in st.session_state.image_groups_metadata or file_name_base in st.session_state.image_groups_metadata

	# Determine which key to use for image group
	image_group_key = None
	if is_image_group:
	if actual_file_name in st.session_state.image_groups_metadata:
	image_group_key = actual_file_name
	else:
	image_group_key = file_name_base

	if is_pdf:
	pdf_meta = st.session_state.pdf_metadata[actual_file_name]
	total_pages = pdf_meta['total_pages']
	current_page = st.session_state.current_page_num.get(actual_file_name, 0)

	col_prev, col_info, col_next = st.columns([1, 2, 1])

	with col_prev:
	prev_clicked = st.button("⬅️ Previous", key=f"prev_page_{selected_file}_{actual_file_name}",
	disabled=(current_page == 0), use_container_width=True)

	with col_info:
	st.markdown(f"<div style='text-align: center; padding: 5px;'><b>📄 Page {current_page + 1} of {total_pages}</b></div>", unsafe_allow_html=True)

	with col_next:
	next_clicked = st.button("Next ➡️", key=f"next_page_{selected_file}_{actual_file_name}",
	disabled=(current_page >= total_pages - 1), use_container_width=True)

	if not st.session_state.navigating_page:
	if prev_clicked:
	st.session_state.navigating_page = True
	st.session_state.current_page_num[actual_file_name] = max(0, current_page - 1)
	st.session_state.canvas_key += 1
	st.session_state.ocr_active_section = None
	st.session_state.ocr_active_field = None
	st.rerun()
	elif next_clicked:
	st.session_state.navigating_page = True
	st.session_state.current_page_num[actual_file_name] = min(total_pages - 1, current_page + 1)
	st.session_state.canvas_key += 1
	st.session_state.ocr_active_section = None
	st.session_state.ocr_active_field = None
	st.rerun()
	else:
	st.session_state.navigating_page = False

	elif is_image_group and image_group_key:
	img_group_meta = st.session_state.image_groups_metadata[image_group_key]
	total_pages = img_group_meta['total_pages']
	current_page = st.session_state.current_page_num.get(image_group_key, 0)

	col_prev, col_info, col_next = st.columns([1, 2, 1])

	with col_prev:
	prev_clicked = st.button("⬅️ Previous", key=f"prev_page_{selected_file}_{image_group_key}",
	disabled=(current_page == 0), use_container_width=True)

	with col_info:
	st.markdown(f"<div style='text-align: center; padding: 5px;'><b>🖼️ Page {current_page + 1} of {total_pages}</b></div>", unsafe_allow_html=True)

	with col_next:
	next_clicked = st.button("Next ➡️", key=f"next_page_{selected_file}_{image_group_key}",
	disabled=(current_page >= total_pages - 1), use_container_width=True)

	if not st.session_state.navigating_page:
	if prev_clicked:
	st.session_state.navigating_page = True
	st.session_state.current_page_num[image_group_key] = max(0, current_page - 1)
	st.session_state.canvas_key += 1
	st.session_state.ocr_active_section = None
	st.session_state.ocr_active_field = None
	st.rerun()
	elif next_clicked:
	st.session_state.navigating_page = True
	st.session_state.current_page_num[image_group_key] = min(total_pages - 1, current_page + 1)
	st.session_state.canvas_key += 1
	st.session_state.ocr_active_section = None
	st.session_state.ocr_active_field = None
	st.rerun()
	else:
	st.session_state.navigating_page = False

	if actual_file_name:
	is_pdf = actual_file_name in st.session_state.pdf_metadata
	is_image_group = actual_file_name in st.session_state.image_groups_metadata or file_name_base in st.session_state.image_groups_metadata

	# Determine which key to use for image group
	image_group_key = None
	if is_image_group:
	if actual_file_name in st.session_state.image_groups_metadata:
	image_group_key = actual_file_name
	else:
	image_group_key = file_name_base

	if is_pdf:
	current_page = st.session_state.current_page_num.get(actual_file_name, 0)
	pdf_meta = st.session_state.pdf_metadata[actual_file_name]
	current_image = pdf_meta['pages'][current_page]
	elif is_image_group and image_group_key:
	current_page = st.session_state.current_page_num.get(image_group_key, 0)
	img_group_meta = st.session_state.image_groups_metadata[image_group_key]
	current_image = img_group_meta['pages'][current_page]
	else:
	current_image = st.session_state.images[actual_file_name]
	else:
	st.error(f"❌ File '{file_name}' not found in uploaded files")
	st.info("💡 Available files:")
	with st.expander("Show available files"):
	for img_name in list(st.session_state.images.keys())[:20]:
	st.text(f" • {img_name}")
	if len(st.session_state.images) > 20:
	st.text(f" ... and {len(st.session_state.images) - 20} more")
	current_image = None

	if current_image:
	scaled_image, scale_ratio, paste_x, paste_y = scale_image_to_fixed_size(current_image, max_width=700, max_height=1000)

	# Wrap canvas in scrollable container
	st.markdown(f'<div class="image-scroll-container" style="max-height: {scaled_image.height + 40}px;">', unsafe_allow_html=True)

	canvas_result = st_canvas(
	fill_color="rgba(255, 165, 0, 0.3)",
	stroke_width=2,
	stroke_color="#FF0000",
	background_image=scaled_image,
	update_streamlit=True,
	height=scaled_image.height,
	width=scaled_image.width,
	drawing_mode="rect",
	key=f"canvas_{selected_file}_{st.session_state.canvas_key}",
	)

	st.markdown('</div>', unsafe_allow_html=True)

	if canvas_result.json_data is not None and st.session_state.ocr_active_field:
	objects = canvas_result.json_data.get("objects", [])
	if len(objects) > 0:
	rect = objects[-1]

	bbox = [
	(rect["left"] - paste_x) / scale_ratio,
	(rect["top"] - paste_y) / scale_ratio,
	(rect["left"] + rect["width"] - paste_x) / scale_ratio,
	(rect["top"] + rect["height"] - paste_y) / scale_ratio
	]

	with st.spinner("Performing OCR..."):
	ocr_text = perform_ocr(current_image, bbox)

	if ocr_text and not ocr_text.startswith("OCR Error"):
	st.success(f"✅ OCR Result: {ocr_text}")

	gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})

	if st.session_state.ocr_active_section == 'items':
	items = gt_parse.get('items', [])
	row_idx = st.session_state.ocr_line_item_row
	if row_idx is not None and row_idx < len(items):
	items[row_idx][st.session_state.ocr_active_field] = ocr_text
	gt_parse['items'] = items

	expander_key = f"line_item_expander_{selected_file}_{row_idx}"
	st.session_state[expander_key] = True
	else:
	section = st.session_state.ocr_active_section
	field = st.session_state.ocr_active_field
	if section not in gt_parse:
	gt_parse[section] = {}
	gt_parse[section][field] = ocr_text

	st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
	st.session_state.modified_indices.add(selected_file)

	st.session_state.canvas_key += 1
	st.rerun()
	else:
	st.error(ocr_text)
	else:
	st.warning("No file name specified in record")

	# RIGHT SIDE: Editable Details
	with right_col:
	# Create scrollable container for form fields
	st.markdown('<div style="max-height: 85vh; overflow-y: auto; overflow-x: hidden; padding-right: 10px;">', unsafe_allow_html=True)

	st.markdown("### 📝 Invoice Details")

	gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})

	tab1, tab2, tab3, tab4 = st.tabs([
	"📄 Invoice Details",
	"👥 Party Details",
	"🏦 Bank Details",
	"📋 Line Items"
	])

	# TAB 1: Header (includes invoice details + summary fields)
	with tab1:
	header = gt_parse.get('header', {})
	summary = gt_parse.get('summary', {})

	# Invoice No
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Invoice No",
	value=header.get('invoice_no', ''),
	key=f"invoice_no_{selected_file}",
	on_change=lambda: sync_field_to_data(selected_file, 'header', 'invoice_no',
	st.session_state[f"invoice_no_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_invoice_no_{selected_file}",
	type="primary" if is_ocr_active('header', 'invoice_no') else "secondary"):
	activate_ocr_field('header', 'invoice_no')

	# Invoice Date
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Invoice Date",
	value=header.get('invoice_date', ''),
	key=f"invoice_date_{selected_file}",
	on_change=lambda: sync_field_to_data(selected_file, 'header', 'invoice_date',
	st.session_state[f"invoice_date_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_invoice_date_{selected_file}",
	type="primary" if is_ocr_active('header', 'invoice_date') else "secondary"):
	activate_ocr_field('header', 'invoice_date')

	# Payment Terms
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Payment Terms",
	value=header.get('payment_terms', ''),
	key=f"payment_terms_{selected_file}",
	on_change=lambda: sync_field_to_data(selected_file, 'header', 'payment_terms',
	st.session_state[f"payment_terms_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_payment_terms_{selected_file}",
	type="primary" if is_ocr_active('header', 'payment_terms') else "secondary"):
	activate_ocr_field('header', 'payment_terms')

	# Due Date
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Due Date",
	value=header.get('due_date', ''),
	key=f"due_date_{selected_file}",
	on_change=lambda: sync_field_to_data(selected_file, 'header', 'due_date',
	st.session_state[f"due_date_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_due_date_{selected_file}",
	type="primary" if is_ocr_active('header', 'due_date') else "secondary"):
	activate_ocr_field('header', 'due_date')

	# Subtotal
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Subtotal",
	value=summary.get('subtotal', ''),
	key=f"subtotal_{selected_file}",
	on_change=lambda: sync_field_to_data(selected_file, 'summary', 'subtotal',
	st.session_state[f"subtotal_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_subtotal_{selected_file}",
	type="primary" if is_ocr_active('summary', 'subtotal') else "secondary"):
	activate_ocr_field('summary', 'subtotal')

	# Tax Rate
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Tax Rate",
	value=summary.get('tax_rate', ''),
	key=f"tax_rate_{selected_file}",
	on_change=lambda: sync_field_to_data(selected_file, 'summary', 'tax_rate',
	st.session_state[f"tax_rate_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_tax_rate_{selected_file}",
	type="primary" if is_ocr_active('summary', 'tax_rate') else "secondary"):
	activate_ocr_field('summary', 'tax_rate')

	# Tax Amount
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Tax Amount",
	value=summary.get('tax_amount', ''),
	key=f"tax_amount_{selected_file}",
	on_change=lambda: sync_field_to_data(selected_file, 'summary', 'tax_amount',
	st.session_state[f"tax_amount_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_tax_amount_{selected_file}",
	type="primary" if is_ocr_active('summary', 'tax_amount') else "secondary"):
	activate_ocr_field('summary', 'tax_amount')

	# Discount Rate
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Discount Rate",
	value=summary.get('discount_rate', ''),
	key=f"discount_rate_{selected_file}",
	on_change=lambda: sync_field_to_data(selected_file, 'summary', 'discount_rate',
	st.session_state[f"discount_rate_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_discount_rate_{selected_file}",
	type="primary" if is_ocr_active('summary', 'discount_rate') else "secondary"):
	activate_ocr_field('summary', 'discount_rate')

	# Total Discount Amount
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Total Discount Amount",
	value=summary.get('total_discount_amount', ''),
	key=f"total_discount_amount_{selected_file}",
	on_change=lambda: sync_field_to_data(selected_file, 'summary', 'total_discount_amount',
	st.session_state[f"total_discount_amount_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_total_discount_amount_{selected_file}",
	type="primary" if is_ocr_active('summary', 'total_discount_amount') else "secondary"):
	activate_ocr_field('summary', 'total_discount_amount')

	# Total Amount
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Total Amount",
	value=summary.get('total_amount', ''),
	key=f"total_amount_{selected_file}",
	on_change=lambda: sync_field_to_data(selected_file, 'summary', 'total_amount',
	st.session_state[f"total_amount_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_total_amount_{selected_file}",
	type="primary" if is_ocr_active('summary', 'total_amount') else "secondary"):
	activate_ocr_field('summary', 'total_amount')

	# Currency
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Currency",
	value=summary.get('currency', ''),
	key=f"currency_{selected_file}",
	on_change=lambda: sync_field_to_data(selected_file, 'summary', 'currency',
	st.session_state[f"currency_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_currency_{selected_file}",
	type="primary" if is_ocr_active('summary', 'currency') else "secondary"):
	activate_ocr_field('summary', 'currency')

	# TAB 2: Party Details (without bank details)
	with tab2:
	# SWAP BUTTON
	col1, col2, col3 = st.columns([1, 2, 1])
	with col2:
	if st.button("🔄 Swap Sender ↔ Recipient", key=f"swap_btn_{selected_file}",
	type="primary", use_container_width=True):
	if not st.session_state.just_swapped:
	st.session_state.just_swapped = True
	swap_sender_recipient_details(selected_file)
	st.rerun()

	if st.session_state.just_swapped:
	st.session_state.just_swapped = False

	st.markdown("Sender Details")
	header = gt_parse.get('header', {})

	# Sender Name
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Sender Name",
	value=header.get('sender_name', ''),
	key=f"sender_name_{selected_file}",
	on_change=lambda: sync_field_to_data(selected_file, 'header', 'sender_name',
	st.session_state[f"sender_name_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_sender_name_{selected_file}",
	type="primary" if is_ocr_active('header', 'sender_name') else "secondary"):
	activate_ocr_field('header', 'sender_name')

	# Sender Address
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_area(
	"Sender Address",
	value=header.get('sender_addr', ''),
	key=f"sender_addr_{selected_file}",
	height=60,
	on_change=lambda: sync_field_to_data(selected_file, 'header', 'sender_addr',
	st.session_state[f"sender_addr_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_sender_addr_{selected_file}",
	type="primary" if is_ocr_active('header', 'sender_addr') else "secondary"):
	activate_ocr_field('header', 'sender_addr')

	st.markdown("Recipient Details")

	# Recipient Name
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Recipient Name",
	value=header.get('rcpt_name', ''),
	key=f"rcpt_name_{selected_file}",
	on_change=lambda: sync_field_to_data(selected_file, 'header', 'rcpt_name',
	st.session_state[f"rcpt_name_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_rcpt_name_{selected_file}",
	type="primary" if is_ocr_active('header', 'rcpt_name') else "secondary"):
	activate_ocr_field('header', 'rcpt_name')

	# Recipient Address
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_area(
	"Recipient Address",
	value=header.get('rcpt_addr', ''),
	key=f"rcpt_addr_{selected_file}",
	height=60,
	on_change=lambda: sync_field_to_data(selected_file, 'header', 'rcpt_addr',
	st.session_state[f"rcpt_addr_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_rcpt_addr_{selected_file}",
	type="primary" if is_ocr_active('header', 'rcpt_addr') else "secondary"):
	activate_ocr_field('header', 'rcpt_addr')

	# TAB 3: Bank Details
	with tab3:
	header = gt_parse.get('header', {})

	# Bank IBAN
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Bank IBAN",
	value=header.get('bank_iban', ''),
	key=f"bank_iban_{selected_file}",
	on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_iban',
	st.session_state[f"bank_iban_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_bank_iban_{selected_file}",
	type="primary" if is_ocr_active('header', 'bank_iban') else "secondary"):
	activate_ocr_field('header', 'bank_iban')

	# Bank Name
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Bank Name",
	value=header.get('bank_name', ''),
	key=f"bank_name_{selected_file}",
	on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_name',
	st.session_state[f"bank_name_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_bank_name_{selected_file}",
	type="primary" if is_ocr_active('header', 'bank_name') else "secondary"):
	activate_ocr_field('header', 'bank_name')

	# Bank Account No
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Bank Account No",
	value=header.get('bank_acc_no', ''),
	key=f"bank_acc_no_{selected_file}",
	on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_acc_no',
	st.session_state[f"bank_acc_no_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_bank_acc_no_{selected_file}",
	type="primary" if is_ocr_active('header', 'bank_acc_no') else "secondary"):
	activate_ocr_field('header', 'bank_acc_no')

	# Bank Routing
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Bank Routing",
	value=header.get('bank_routing', ''),
	key=f"bank_routing_{selected_file}",
	on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_routing',
	st.session_state[f"bank_routing_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_bank_routing_{selected_file}",
	type="primary" if is_ocr_active('header', 'bank_routing') else "secondary"):
	activate_ocr_field('header', 'bank_routing')

	# Bank SWIFT
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Bank SWIFT",
	value=header.get('bank_swift', ''),
	key=f"bank_swift_{selected_file}",
	on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_swift',
	st.session_state[f"bank_swift_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_bank_swift_{selected_file}",
	type="primary" if is_ocr_active('header', 'bank_swift') else "secondary"):
	activate_ocr_field('header', 'bank_swift')

	# Bank Account Name
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Bank Account Name",
	value=header.get('bank_acc_name', ''),
	key=f"bank_acc_name_{selected_file}",
	on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_acc_name',
	st.session_state[f"bank_acc_name_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_bank_acc_name_{selected_file}",
	type="primary" if is_ocr_active('header', 'bank_acc_name') else "secondary"):
	activate_ocr_field('header', 'bank_acc_name')

	# Bank Branch
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Bank Branch",
	value=header.get('bank_branch', ''),
	key=f"bank_branch_{selected_file}",
	on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_branch',
	st.session_state[f"bank_branch_{selected_file}"])
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_bank_branch_{selected_file}",
	type="primary" if is_ocr_active('header', 'bank_branch') else "secondary"):
	activate_ocr_field('header', 'bank_branch')

	# TAB 4: Items
	with tab4:
	current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
	items = current_gt_parse.get('items', [])

	# Add/Remove row buttons
	col_add, col_remove = st.columns([1, 1])
	with col_add:
	if st.button("➕ Add New Item", key=f"add_item_{selected_file}", use_container_width=True):
	if not st.session_state.button_clicked:
	st.session_state.button_clicked = True
	new_item = {
	"descriptions": "", "SKU": "", "quantity": "",
	"unit_price": "", "amount": "", "discount_rate_per_item": "",
	"discount_amount_per_item": "", "tax_rate_per_item": "",
	"tax_amount_per_item": "", "Line_total": ""
	}
	current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
	current_items = current_gt_parse.get('items', [])
	current_items.append(new_item)
	current_gt_parse['items'] = current_items
	st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse
	st.session_state.modified_indices.add(selected_file)

	new_idx = len(current_items) - 1
	expander_key_new = f"line_item_expander_{selected_file}_{new_idx}"
	st.session_state[expander_key_new] = True

	st.rerun()

	with col_remove:
	if st.button("➖ Remove Last Item", key=f"remove_item_{selected_file}",
	disabled=(len(items) == 0), use_container_width=True):
	if not st.session_state.button_clicked and len(items) > 0:
	st.session_state.button_clicked = True
	current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
	current_items = current_gt_parse.get('items', [])
	N = len(current_items)
	current_items.pop()
	current_gt_parse['items'] = current_items
	st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse
	st.session_state.modified_indices.add(selected_file)

	popped_idx = N - 1
	expander_key_popped = f"line_item_expander_{selected_file}_{popped_idx}"
	if expander_key_popped in st.session_state:
	del st.session_state[expander_key_popped]

	st.rerun()

	if st.session_state.button_clicked:
	st.session_state.button_clicked = False

	current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
	items = current_gt_parse.get('items', [])

	if items:
	for idx, item in enumerate(items):
	expander_key = f"line_item_expander_{selected_file}_{idx}"
	expanded_default = st.session_state.get(expander_key, False)

	with st.expander(f"Item {idx + 1} - {item.get('descriptions', 'N/A')[:30]}", expanded=expanded_default):
	# Descriptions
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_area(
	"Descriptions",
	value=item.get('descriptions', ''),
	key=f"desc_{selected_file}_{idx}",
	height=60,
	on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'descriptions',
	st.session_state[f"desc_{selected_file}_{i}"], i)
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_desc_{selected_file}_{idx}",
	type="primary" if is_ocr_active('items', 'descriptions', idx) else "secondary"):
	st.session_state[expander_key] = True
	activate_ocr_field('items', 'descriptions', idx)

	# SKU
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"SKU",
	value=item.get('SKU', ''),
	key=f"sku_{selected_file}_{idx}",
	on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'SKU',
	st.session_state[f"sku_{selected_file}_{i}"], i)
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_sku_{selected_file}_{idx}",
	type="primary" if is_ocr_active('items', 'SKU', idx) else "secondary"):
	st.session_state[expander_key] = True
	activate_ocr_field('items', 'SKU', idx)

	# Quantity
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Quantity",
	value=item.get('quantity', ''),
	key=f"qty_{selected_file}_{idx}",
	on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'quantity',
	st.session_state[f"qty_{selected_file}_{i}"], i)
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_qty_{selected_file}_{idx}",
	type="primary" if is_ocr_active('items', 'quantity', idx) else "secondary"):
	st.session_state[expander_key] = True
	activate_ocr_field('items', 'quantity', idx)

	# Unit Price
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Unit Price",
	value=item.get('unit_price', ''),
	key=f"unit_price_{selected_file}_{idx}",
	on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'unit_price',
	st.session_state[f"unit_price_{selected_file}_{i}"], i)
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_unit_price_{selected_file}_{idx}",
	type="primary" if is_ocr_active('items', 'unit_price', idx) else "secondary"):
	st.session_state[expander_key] = True
	activate_ocr_field('items', 'unit_price', idx)

	# Amount
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Amount",
	value=item.get('amount', ''),
	key=f"amount_{selected_file}_{idx}",
	on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'amount',
	st.session_state[f"amount_{selected_file}_{i}"], i)
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_amount_{selected_file}_{idx}",
	type="primary" if is_ocr_active('items', 'amount', idx) else "secondary"):
	st.session_state[expander_key] = True
	activate_ocr_field('items', 'amount', idx)

	# Discount Rate Per Item
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Discount Rate Per Item",
	value=item.get('discount_rate_per_item', ''),
	key=f"discount_rate_per_item_{selected_file}_{idx}",
	on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'discount_rate_per_item',
	st.session_state[f"discount_rate_per_item_{selected_file}_{i}"], i)
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_discount_rate_per_item_{selected_file}_{idx}",
	type="primary" if is_ocr_active('items', 'discount_rate_per_item', idx) else "secondary"):
	st.session_state[expander_key] = True
	activate_ocr_field('items', 'discount_rate_per_item', idx)

	# Discount Amount Per Item
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Discount Amount Per Item",
	value=item.get('discount_amount_per_item', ''),
	key=f"discount_amount_per_item_{selected_file}_{idx}",
	on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'discount_amount_per_item',
	st.session_state[f"discount_amount_per_item_{selected_file}_{i}"], i)
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_discount_amount_per_item_{selected_file}_{idx}",
	type="primary" if is_ocr_active('items', 'discount_amount_per_item', idx) else "secondary"):
	st.session_state[expander_key] = True
	activate_ocr_field('items', 'discount_amount_per_item', idx)

	# Tax Rate Per Item (NEW FIELD)
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Tax Rate Per Item",
	value=item.get('tax_rate_per_item', ''),
	key=f"tax_rate_per_item_{selected_file}_{idx}",
	on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'tax_rate_per_item',
	st.session_state[f"tax_rate_per_item_{selected_file}_{i}"], i)
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_tax_rate_per_item_{selected_file}_{idx}",
	type="primary" if is_ocr_active('items', 'tax_rate_per_item', idx) else "secondary"):
	st.session_state[expander_key] = True
	activate_ocr_field('items', 'tax_rate_per_item', idx)

	# Tax Amount Per Item (RENAMED from "Tax")
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Tax Amount Per Item",
	value=item.get('tax_amount_per_item', ''),
	key=f"tax_amount_per_item_{selected_file}_{idx}",
	on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'tax_amount_per_item',
	st.session_state[f"tax_amount_per_item_{selected_file}_{i}"], i)
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_tax_amount_per_item_{selected_file}_{idx}",
	type="primary" if is_ocr_active('items', 'tax_amount_per_item', idx) else "secondary"):
	st.session_state[expander_key] = True
	activate_ocr_field('items', 'tax_amount_per_item', idx)

	# Line Total
	col_input, col_btn = st.columns([5, 1])
	with col_input:
	new_value = st.text_input(
	"Line Total",
	value=item.get('Line_total', ''),
	key=f"line_total_{selected_file}_{idx}",
	on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'Line_total',
	st.session_state[f"line_total_{selected_file}_{i}"], i)
	)
	with col_btn:
	st.markdown("<br>", unsafe_allow_html=True)
	if st.button("🔍", key=f"ocr_line_total_{selected_file}_{idx}",
	type="primary" if is_ocr_active('items', 'Line_total', idx) else "secondary"):
	st.session_state[expander_key] = True
	activate_ocr_field('items', 'Line_total', idx)

	st.markdown("📊 Items Summary Table")

	df = pd.DataFrame(items)
	df.index = df.index + 1
	df.index.name = 'SL No'

	st.dataframe(
	df,
	use_container_width=True,
	height=300
	)
	else:
	st.info("No items. Click '➕ Add New Item' to add a new item.")

	# Save button
	col1, col2 = st.columns([1, 1])
	with col1:
	if st.button("💾 Save Changes", type="primary", use_container_width=True, key=f"save_btn_{selected_file}"):
	if not st.session_state.just_saved:
	st.session_state.just_saved = True
	auto_save(selected_file)
	st.session_state.save_message = "✅ Changes saved successfully!"
	st.session_state.save_message_time = time.time()
	st.rerun()

	if st.session_state.just_saved:
	st.session_state.just_saved = False

	if st.session_state.save_message:
	st.success(st.session_state.save_message)

	st.markdown('</div>', unsafe_allow_html=True) # Close scrollable container