import os from pathlib import Path # ----------------------------- # Environment hardening (HF Spaces, /.cache issue) # ----------------------------- _home = os.environ.get("HOME", "") if _home in ("", "/", None): repo_dir = os.getcwd() safe_home = repo_dir if os.access(repo_dir, os.W_OK) else "/tmp" os.environ["HOME"] = safe_home print(f"[startup] HOME not set or unwritable — setting HOME={safe_home}") streamlit_dir = Path(os.environ["HOME"]) / ".streamlit" try: streamlit_dir.mkdir(parents=True, exist_ok=True) print(f"[startup] ensured {streamlit_dir}") except Exception as e: print(f"[startup] WARNING: could not create {streamlit_dir}: {e}") import streamlit as st import json import io from PIL import Image import time import pandas as pd from streamlit_drawable_canvas import st_canvas import pytesseract import numpy as np from datetime import datetime import fitz # PyMuPDF for PDF handling # Set Tesseract path - auto-detect based on OS if os.name == 'nt': # Windows pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" else: # Linux/Mac (HF Spaces uses Linux) pass # Page configuration st.set_page_config(page_title="Invoice Data Viewer", layout="wide") # Custom CSS st.markdown(""" """, unsafe_allow_html=True) def load_jsonl(file): """Load JSONL file and return list of records""" data = [] content = file.getvalue().decode('utf-8') for line in content.strip().split('\n'): if line.strip(): data.append(json.loads(line)) return data def reorder_record_fields(record): """Reorder record fields to put file_name/file_names first, then gt_parse, then others""" ordered_record = {} # First: Add file_name or file_names if 'file_name' in record: ordered_record['file_name'] = record['file_name'] if 'file_names' in record: ordered_record['file_names'] = record['file_names'] # Second: Add gt_parse if 'gt_parse' in record: ordered_record['gt_parse'] = record['gt_parse'] # Third: Add any remaining fields for key, value in record.items(): if key not in ordered_record: ordered_record[key] = value return ordered_record def save_to_jsonl(data): """Convert data list to JSONL format with proper field ordering""" jsonl_lines = [] for record in data: ordered_record = reorder_record_fields(record) jsonl_lines.append(json.dumps(ordered_record)) return '\n'.join(jsonl_lines) def pdf_to_images(pdf_file): """Convert PDF to list of PIL Images (one per page)""" try: pdf_bytes = pdf_file.read() pdf_document = fitz.open(stream=pdf_bytes, filetype="pdf") images = [] for page_num in range(pdf_document.page_count): page = pdf_document[page_num] pix = page.get_pixmap(matrix=fitz.Matrix(3, 3), alpha=False) img_data = pix.tobytes("png") img = Image.open(io.BytesIO(img_data)) images.append(img) pdf_document.close() return images except Exception as e: st.error(f"Error converting PDF: {str(e)}") return [] def perform_ocr(image, bbox): """Perform OCR on the selected region of the image""" try: x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]) x1, y1 = max(0, x1), max(0, y1) x2, y2 = min(image.width, x2), min(image.height, y2) cropped = image.crop((x1, y1, x2, y2)) text = pytesseract.image_to_string(cropped, config='--psm 6').strip() return text except Exception as e: return f"OCR Error: {str(e)}" def scale_image_to_fixed_size(image, max_width=1100, max_height=1100): """Scale image to fit within max dimensions while maintaining aspect ratio - NO PADDING""" if image.mode not in ('RGB', 'RGBA'): image = image.convert('RGB') elif image.mode == 'RGBA': background = Image.new('RGB', image.size, (255, 255, 255)) background.paste(image, mask=image.split()[3]) image = background width_ratio = max_width / image.width height_ratio = max_height / image.height ratio = min(width_ratio, height_ratio) new_width = int(image.width * ratio) new_height = int(image.height * ratio) resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) return resized_image, ratio, 0, 0 def get_base_filename(record): """Get base filename from record, handling both file_name and file_names""" # Check for file_names (plural) first if 'file_names' in record and record['file_names']: if isinstance(record['file_names'], list) and len(record['file_names']) > 0: # Extract base name from first file (remove _pageN.png suffix) first_file = record['file_names'][0] # Remove .png extension base = first_file.rsplit('.png', 1)[0] # Remove _pageN suffix if exists if '_page' in base: base = base.rsplit('_page', 1)[0] return base return record['file_names'] # Fall back to file_name (singular) file_name = record.get('file_name', '') # Strip PDF extension if present (for cases where PDF was converted to images) if file_name.lower().endswith('.pdf'): file_name = file_name[:-4] # Remove .pdf # Also strip other image extensions if present for ext in ['.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: if file_name.lower().endswith(ext): file_name = file_name[:-(len(ext))] break return file_name def detect_image_groups(images_dict): """Detect multi-page image groups from uploaded files (e.g., invoice01_page1.png, invoice01_page2.png)""" import re image_groups = {} grouped_files = set() # Pattern to match: basename_pageN.extension pattern = r'^(.+)_page(\d+)\.(png|jpg|jpeg|tiff|tif|bmp)$' for filename in images_dict.keys(): match = re.match(pattern, filename, re.IGNORECASE) if match: base_name = match.group(1) page_num = int(match.group(2)) ext = match.group(3) if base_name not in image_groups: image_groups[base_name] = [] image_groups[base_name].append({ 'filename': filename, 'page_num': page_num, 'ext': ext }) grouped_files.add(filename) # Sort pages for each group and create metadata image_groups_metadata = {} for base_name, pages in image_groups.items(): # Sort by page number pages.sort(key=lambda x: x['page_num']) # Only consider it a group if there are multiple pages if len(pages) > 1: image_list = [images_dict[p['filename']] for p in pages] image_groups_metadata[base_name] = { 'pages': image_list, 'filenames': [p['filename'] for p in pages], 'total_pages': len(pages), 'current_page': 0 } return image_groups_metadata, grouped_files def swap_sender_recipient_details(index): """Swap sender and recipient details""" gt_parse = st.session_state.edited_data[index].get('gt_parse', {}) header = gt_parse.get('header', {}) # Store sender values temp_sender_name = header.get('sender_name', '') temp_sender_addr = header.get('sender_addr', '') # Swap: Sender ← Recipient header['sender_name'] = header.get('rcpt_name', '') header['sender_addr'] = header.get('rcpt_addr', '') # Swap: Recipient ← Sender (from temp) header['rcpt_name'] = temp_sender_name header['rcpt_addr'] = temp_sender_addr # Update session state gt_parse['header'] = header st.session_state.edited_data[index]['gt_parse'] = gt_parse st.session_state.modified_indices.add(index) # Initialize session state if 'data' not in st.session_state: st.session_state.data = None if 'current_index' not in st.session_state: st.session_state.current_index = 0 if 'edited_data' not in st.session_state: st.session_state.edited_data = None if 'page' not in st.session_state: st.session_state.page = 'upload' if 'images' not in st.session_state: st.session_state.images = {} if 'pdf_metadata' not in st.session_state: st.session_state.pdf_metadata = {} if 'image_groups_metadata' not in st.session_state: st.session_state.image_groups_metadata = {} if 'current_page_num' not in st.session_state: st.session_state.current_page_num = {} if 'modified_indices' not in st.session_state: st.session_state.modified_indices = set() if 'ocr_active_section' not in st.session_state: st.session_state.ocr_active_section = None if 'ocr_active_field' not in st.session_state: st.session_state.ocr_active_field = None if 'ocr_line_item_row' not in st.session_state: st.session_state.ocr_line_item_row = None if 'canvas_key' not in st.session_state: st.session_state.canvas_key = 0 if 'button_clicked' not in st.session_state: st.session_state.button_clicked = False if 'save_message' not in st.session_state: st.session_state.save_message = None if 'save_message_time' not in st.session_state: st.session_state.save_message_time = None if 'just_saved' not in st.session_state: st.session_state.just_saved = False if 'just_swapped' not in st.session_state: st.session_state.just_swapped = False if 'navigating_page' not in st.session_state: st.session_state.navigating_page = False def auto_save(index): """Automatically save changes to session state and mark as modified""" if st.session_state.edited_data: # Get current record current_record = st.session_state.edited_data[index] # Get base filename using the helper function base_file_name = get_base_filename(current_record) if not base_file_name: st.warning("Cannot save: No file name found in record") return # Find the actual file name in uploaded files actual_file_name = None if base_file_name in st.session_state.images: actual_file_name = base_file_name else: # Try with extensions for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: if base_file_name + ext in st.session_state.images: actual_file_name = base_file_name + ext break # Try matching base name if not actual_file_name: for uploaded_name in st.session_state.images.keys(): uploaded_base = uploaded_name.rsplit('.', 1)[0] if uploaded_base == base_file_name: actual_file_name = uploaded_name break # Check if it's a PDF and update file_name accordingly if actual_file_name and actual_file_name in st.session_state.pdf_metadata: # It's a PDF - get page count pdf_meta = st.session_state.pdf_metadata[actual_file_name] total_pages = pdf_meta['total_pages'] # Get base name without extension base_name = actual_file_name.rsplit('.', 1)[0] if total_pages > 1: # Multi-page PDF: use file_names array file_names_array = [f"{base_name}_page{i+1}.png" for i in range(total_pages)] st.session_state.edited_data[index]['file_names'] = file_names_array # Remove old file_name field if it exists if 'file_name' in st.session_state.edited_data[index]: del st.session_state.edited_data[index]['file_name'] else: # Single-page PDF: use file_name string st.session_state.edited_data[index]['file_name'] = f"{base_name}.png" # Remove old file_names field if it exists if 'file_names' in st.session_state.edited_data[index]: del st.session_state.edited_data[index]['file_names'] # Check if it's an image group and update file_name accordingly elif base_file_name in st.session_state.image_groups_metadata: # It's a multi-page image group - use file_names array img_group_meta = st.session_state.image_groups_metadata[base_file_name] st.session_state.edited_data[index]['file_names'] = img_group_meta['filenames'] # Remove old file_name field if it exists (was likely a .pdf in original JSONL) if 'file_name' in st.session_state.edited_data[index]: del st.session_state.edited_data[index]['file_name'] st.session_state.data = st.session_state.edited_data.copy() st.session_state.modified_indices.add(index) def sync_field_to_data(index, section, field, value, row_idx=None): """Sync a field value from widget to data structure immediately""" gt_parse = st.session_state.edited_data[index].get('gt_parse', {}) if section == 'items': items = gt_parse.get('items', []) if row_idx is not None and row_idx < len(items): items[row_idx][field] = value gt_parse['items'] = items else: if section not in gt_parse: gt_parse[section] = {} gt_parse[section][field] = value st.session_state.edited_data[index]['gt_parse'] = gt_parse st.session_state.modified_indices.add(index) def activate_ocr_field(section, field, row_idx=None): """Activate OCR for a specific field""" if (st.session_state.ocr_active_section == section and st.session_state.ocr_active_field == field and st.session_state.ocr_line_item_row == row_idx): st.session_state.ocr_active_section = None st.session_state.ocr_active_field = None st.session_state.ocr_line_item_row = None else: st.session_state.ocr_active_section = section st.session_state.ocr_active_field = field st.session_state.ocr_line_item_row = row_idx if section == 'items' and row_idx is not None: current_idx = st.session_state.get('current_index', 0) expander_key = f"line_item_expander_{current_idx}_{row_idx}" st.session_state[expander_key] = True def is_ocr_active(section, field, row_idx=None): """Check if this OCR button is currently active""" return (st.session_state.ocr_active_section == section and st.session_state.ocr_active_field == field and st.session_state.ocr_line_item_row == row_idx) # PAGE 1: Upload Page if st.session_state.page == 'upload': st.title("📤 Invoice Data Viewer with OCR") st.markdown("### Upload your files to begin") st.markdown("**Step 1: Upload JSONL File**") uploaded_file = st.file_uploader("Choose a JSONL file", type=['jsonl', 'json']) if uploaded_file is not None: try: data = load_jsonl(uploaded_file) st.session_state.data = data st.session_state.edited_data = data.copy() st.success(f"✅ Successfully loaded {len(data)} records!") except Exception as e: st.error(f"Error loading file: {str(e)}") st.markdown("**Step 2: Upload Images/PDFs Folder**") uploaded_files = st.file_uploader( "Choose image or PDF files", type=['png', 'jpg', 'jpeg', 'tiff', 'tif', 'bmp', 'pdf'], accept_multiple_files=True, help="Select all images and PDFs from your folder at once" ) if uploaded_files: images_dict = {} pdf_metadata = {} for file in uploaded_files: try: file_ext = file.name.lower().split('.')[-1] if file_ext == 'pdf': pdf_images = pdf_to_images(file) if pdf_images: images_dict[file.name] = pdf_images[0] pdf_metadata[file.name] = { 'pages': pdf_images, 'total_pages': len(pdf_images), 'current_page': 0 } else: image = Image.open(file) images_dict[file.name] = image except Exception as e: st.warning(f"Could not load file {file.name}: {str(e)}") st.session_state.images = images_dict st.session_state.pdf_metadata = pdf_metadata # Detect multi-page image groups (e.g., invoice01_page1.png, invoice01_page2.png) image_groups_metadata, grouped_files = detect_image_groups(images_dict) st.session_state.image_groups_metadata = image_groups_metadata # Initialize current page for PDFs and image groups for filename in pdf_metadata.keys(): if filename not in st.session_state.current_page_num: st.session_state.current_page_num[filename] = 0 for base_name in image_groups_metadata.keys(): if base_name not in st.session_state.current_page_num: st.session_state.current_page_num[base_name] = 0 if st.session_state.data is not None: gt_file_names = [] for rec in st.session_state.data: base_fname = get_base_filename(rec) if base_fname: gt_file_names.append(base_fname) matched_images = set() unmatched_gt_files = [] for fname in gt_file_names: if not fname: continue # Create a base name by stripping common extensions fname_base = fname for ext in ['.pdf', '.PDF', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: if fname.lower().endswith(ext.lower()): fname_base = fname[:-len(ext)] break # Check direct match if fname in images_dict: matched_images.add(fname) # Check base name in image groups (handles PDF converted to multi-page PNGs) elif fname_base in image_groups_metadata: matched_images.add(fname) # Check full name in image groups elif fname in image_groups_metadata: matched_images.add(fname) else: found = False # Try with extensions for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: if fname + ext in images_dict: matched_images.add(fname) found = True break if not found: # Try matching base name in uploaded files for uploaded_name in images_dict.keys(): uploaded_base = uploaded_name.rsplit('.', 1)[0] if uploaded_base == fname or uploaded_base == fname_base: matched_images.add(fname) found = True break for fname in gt_file_names: if fname and fname not in matched_images: unmatched_gt_files.append(fname) st.success(f"✅ Successfully loaded {len(images_dict)} files ({len(pdf_metadata)} PDFs, {len(image_groups_metadata)} multi-page image groups)!") st.info(f"🔎 Exact matches: {len(matched_images)}/{len([f for f in gt_file_names if f])}") if unmatched_gt_files: st.warning(f"âš ī¸ {len(unmatched_gt_files)} file(s) from JSONL not matched:") with st.expander(f"Show {len(unmatched_gt_files)} unmatched file names"): for fname in unmatched_gt_files: st.text(f" â€ĸ {fname}") else: st.success("✅ All JSONL file names matched to files!") else: st.success(f"✅ Successfully loaded {len(images_dict)} files ({len(pdf_metadata)} PDFs, {len(image_groups_metadata)} multi-page image groups)!") st.info("â„šī¸ Upload a JSONL file to see how many files match the ground truth 'file_name' field.") if st.session_state.data is not None: col1, col2, col3 = st.columns([1, 1, 1]) with col2: if st.button("Continue to Viewer →", type="primary", use_container_width=True): st.session_state.page = 'viewer' st.session_state.modified_indices = set() st.session_state.navigating_page = False st.rerun() # PAGE 2: Viewer Page elif st.session_state.page == 'viewer': if st.session_state.save_message_time is not None: if time.time() - st.session_state.save_message_time > 3: st.session_state.save_message = None st.session_state.save_message_time = None today_date = datetime.now().strftime("%Y-%m-%d") col1, col2, col3, col4 = st.columns([1, 2, 2, 2]) with col1: if st.button("← Back to Upload"): st.session_state.page = 'upload' st.session_state.ocr_active_section = None st.session_state.ocr_active_field = None st.session_state.save_message = None st.session_state.save_message_time = None st.session_state.navigating_page = False st.rerun() with col2: if st.session_state.modified_indices: modified_data = [st.session_state.edited_data[i] for i in sorted(st.session_state.modified_indices)] jsonl_modified = save_to_jsonl(modified_data) st.download_button( label=f"âŦ‡ī¸ Download Modified ({len(modified_data)})", data=jsonl_modified, file_name=f"modified_invoice_data_{today_date}.jsonl", mime="application/jsonl", type="primary", use_container_width=True ) else: st.button("âŦ‡ī¸ No Modified Records", disabled=True, use_container_width=True) with col3: if st.session_state.modified_indices: unmodified_data = [st.session_state.data[i] for i in range(len(st.session_state.data)) if i not in st.session_state.modified_indices] jsonl_unmodified = save_to_jsonl(unmodified_data) st.download_button( label=f"âŦ‡ī¸ Download Unmodified ({len(unmodified_data)})", data=jsonl_unmodified, file_name=f"unmodified_invoice_data_{today_date}.jsonl", mime="application/jsonl", use_container_width=True ) else: st.button("âŦ‡ī¸ No Unmodified Records", disabled=True, use_container_width=True) with col4: jsonl_all = save_to_jsonl(st.session_state.edited_data) st.download_button( label=f"âŦ‡ī¸ Download All ({len(st.session_state.edited_data)})", data=jsonl_all, file_name=f"all_invoice_data_{today_date}.jsonl", mime="application/jsonl", use_container_width=True ) # Build file names list for dropdown using helper function file_names = [] for i, record in enumerate(st.session_state.data or []): base_name = get_base_filename(record) file_names.append(base_name if base_name else f'Record {i}') if not file_names: st.error("No records loaded. Please upload a JSONL file on the Upload page.") if st.button("← Back to Upload"): st.session_state.page = 'upload' st.rerun() else: options = list(range(len(file_names))) if not st.session_state.edited_data or len(st.session_state.edited_data) != len(file_names): st.session_state.edited_data = (st.session_state.data or []).copy() cur_idx = st.session_state.get('current_index', 0) try: cur_idx = int(cur_idx) except Exception: cur_idx = 0 if cur_idx < 0: cur_idx = 0 if cur_idx >= len(options): cur_idx = len(options) - 1 selected_file = st.selectbox( "Select a file to view:", options=options, format_func=lambda x: f"{'âœī¸ ' if x in st.session_state.modified_indices else ''}{file_names[x]}", index=cur_idx ) st.session_state.current_index = selected_file current_record = st.session_state.edited_data[selected_file] left_col, right_col = st.columns([1.6, 1.0], gap="small") # LEFT SIDE: Image Display with OCR Canvas with left_col: # Use helper function to get base file name file_name = get_base_filename(current_record) if file_name: # Create base name by stripping extensions file_name_base = file_name for ext in ['.pdf', '.PDF', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: if file_name.lower().endswith(ext.lower()): file_name_base = file_name[:-len(ext)] break actual_file_name = None # First check for direct match if file_name in st.session_state.images: actual_file_name = file_name # Check if base name matches an image group (handles PDF converted to images) elif file_name_base in st.session_state.image_groups_metadata: actual_file_name = file_name_base # Use base name for image groups # Check if full name is an image group elif file_name in st.session_state.image_groups_metadata: actual_file_name = file_name # Use as-is for image groups else: # Try with extensions for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: if file_name + ext in st.session_state.images: actual_file_name = file_name + ext break if not actual_file_name: # Try matching base name for uploaded_name in st.session_state.images.keys(): uploaded_base = uploaded_name.rsplit('.', 1)[0] if uploaded_base == file_name or uploaded_base == file_name_base: actual_file_name = uploaded_name break if actual_file_name: is_pdf = actual_file_name in st.session_state.pdf_metadata is_image_group = actual_file_name in st.session_state.image_groups_metadata or file_name_base in st.session_state.image_groups_metadata # Determine which key to use for image group image_group_key = None if is_image_group: if actual_file_name in st.session_state.image_groups_metadata: image_group_key = actual_file_name else: image_group_key = file_name_base if is_pdf: pdf_meta = st.session_state.pdf_metadata[actual_file_name] total_pages = pdf_meta['total_pages'] current_page = st.session_state.current_page_num.get(actual_file_name, 0) col_prev, col_info, col_next = st.columns([1, 2, 1]) with col_prev: prev_clicked = st.button("âŦ…ī¸ Previous", key=f"prev_page_{selected_file}_{actual_file_name}", disabled=(current_page == 0), use_container_width=True) with col_info: st.markdown(f"
📄 Page {current_page + 1} of {total_pages}
", unsafe_allow_html=True) with col_next: next_clicked = st.button("Next âžĄī¸", key=f"next_page_{selected_file}_{actual_file_name}", disabled=(current_page >= total_pages - 1), use_container_width=True) if not st.session_state.navigating_page: if prev_clicked: st.session_state.navigating_page = True st.session_state.current_page_num[actual_file_name] = max(0, current_page - 1) st.session_state.canvas_key += 1 st.session_state.ocr_active_section = None st.session_state.ocr_active_field = None st.rerun() elif next_clicked: st.session_state.navigating_page = True st.session_state.current_page_num[actual_file_name] = min(total_pages - 1, current_page + 1) st.session_state.canvas_key += 1 st.session_state.ocr_active_section = None st.session_state.ocr_active_field = None st.rerun() else: st.session_state.navigating_page = False elif is_image_group and image_group_key: img_group_meta = st.session_state.image_groups_metadata[image_group_key] total_pages = img_group_meta['total_pages'] current_page = st.session_state.current_page_num.get(image_group_key, 0) col_prev, col_info, col_next = st.columns([1, 2, 1]) with col_prev: prev_clicked = st.button("âŦ…ī¸ Previous", key=f"prev_page_{selected_file}_{image_group_key}", disabled=(current_page == 0), use_container_width=True) with col_info: st.markdown(f"
đŸ–ŧī¸ Page {current_page + 1} of {total_pages}
", unsafe_allow_html=True) with col_next: next_clicked = st.button("Next âžĄī¸", key=f"next_page_{selected_file}_{image_group_key}", disabled=(current_page >= total_pages - 1), use_container_width=True) if not st.session_state.navigating_page: if prev_clicked: st.session_state.navigating_page = True st.session_state.current_page_num[image_group_key] = max(0, current_page - 1) st.session_state.canvas_key += 1 st.session_state.ocr_active_section = None st.session_state.ocr_active_field = None st.rerun() elif next_clicked: st.session_state.navigating_page = True st.session_state.current_page_num[image_group_key] = min(total_pages - 1, current_page + 1) st.session_state.canvas_key += 1 st.session_state.ocr_active_section = None st.session_state.ocr_active_field = None st.rerun() else: st.session_state.navigating_page = False if actual_file_name: is_pdf = actual_file_name in st.session_state.pdf_metadata is_image_group = actual_file_name in st.session_state.image_groups_metadata or file_name_base in st.session_state.image_groups_metadata # Determine which key to use for image group image_group_key = None if is_image_group: if actual_file_name in st.session_state.image_groups_metadata: image_group_key = actual_file_name else: image_group_key = file_name_base if is_pdf: current_page = st.session_state.current_page_num.get(actual_file_name, 0) pdf_meta = st.session_state.pdf_metadata[actual_file_name] current_image = pdf_meta['pages'][current_page] elif is_image_group and image_group_key: current_page = st.session_state.current_page_num.get(image_group_key, 0) img_group_meta = st.session_state.image_groups_metadata[image_group_key] current_image = img_group_meta['pages'][current_page] else: current_image = st.session_state.images[actual_file_name] else: st.error(f"❌ File '{file_name}' not found in uploaded files") st.info("💡 Available files:") with st.expander("Show available files"): for img_name in list(st.session_state.images.keys())[:20]: st.text(f" â€ĸ {img_name}") if len(st.session_state.images) > 20: st.text(f" ... and {len(st.session_state.images) - 20} more") current_image = None if current_image: scaled_image, scale_ratio, paste_x, paste_y = scale_image_to_fixed_size(current_image, max_width=700, max_height=1000) # Wrap canvas in scrollable container st.markdown(f'
', unsafe_allow_html=True) canvas_result = st_canvas( fill_color="rgba(255, 165, 0, 0.3)", stroke_width=2, stroke_color="#FF0000", background_image=scaled_image, update_streamlit=True, height=scaled_image.height, width=scaled_image.width, drawing_mode="rect", key=f"canvas_{selected_file}_{st.session_state.canvas_key}", ) st.markdown('
', unsafe_allow_html=True) if canvas_result.json_data is not None and st.session_state.ocr_active_field: objects = canvas_result.json_data.get("objects", []) if len(objects) > 0: rect = objects[-1] bbox = [ (rect["left"] - paste_x) / scale_ratio, (rect["top"] - paste_y) / scale_ratio, (rect["left"] + rect["width"] - paste_x) / scale_ratio, (rect["top"] + rect["height"] - paste_y) / scale_ratio ] with st.spinner("Performing OCR..."): ocr_text = perform_ocr(current_image, bbox) if ocr_text and not ocr_text.startswith("OCR Error"): st.success(f"✅ OCR Result: {ocr_text}") gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) if st.session_state.ocr_active_section == 'items': items = gt_parse.get('items', []) row_idx = st.session_state.ocr_line_item_row if row_idx is not None and row_idx < len(items): items[row_idx][st.session_state.ocr_active_field] = ocr_text gt_parse['items'] = items expander_key = f"line_item_expander_{selected_file}_{row_idx}" st.session_state[expander_key] = True else: section = st.session_state.ocr_active_section field = st.session_state.ocr_active_field if section not in gt_parse: gt_parse[section] = {} gt_parse[section][field] = ocr_text st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse st.session_state.modified_indices.add(selected_file) st.session_state.canvas_key += 1 st.rerun() else: st.error(ocr_text) else: st.warning("No file name specified in record") # RIGHT SIDE: Editable Details with right_col: # Create scrollable container for form fields st.markdown('
', unsafe_allow_html=True) st.markdown("### 📝 Invoice Details") gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) tab1, tab2, tab3, tab4 = st.tabs([ "📄 Invoice Details", "đŸ‘Ĩ Party Details", "đŸĻ Bank Details", "📋 Line Items" ]) # TAB 1: Header (includes invoice details + summary fields) with tab1: header = gt_parse.get('header', {}) summary = gt_parse.get('summary', {}) # Invoice No col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Invoice No", value=header.get('invoice_no', ''), key=f"invoice_no_{selected_file}", on_change=lambda: sync_field_to_data(selected_file, 'header', 'invoice_no', st.session_state[f"invoice_no_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_invoice_no_{selected_file}", type="primary" if is_ocr_active('header', 'invoice_no') else "secondary"): activate_ocr_field('header', 'invoice_no') # Invoice Date col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Invoice Date", value=header.get('invoice_date', ''), key=f"invoice_date_{selected_file}", on_change=lambda: sync_field_to_data(selected_file, 'header', 'invoice_date', st.session_state[f"invoice_date_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_invoice_date_{selected_file}", type="primary" if is_ocr_active('header', 'invoice_date') else "secondary"): activate_ocr_field('header', 'invoice_date') # Payment Terms col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Payment Terms", value=header.get('payment_terms', ''), key=f"payment_terms_{selected_file}", on_change=lambda: sync_field_to_data(selected_file, 'header', 'payment_terms', st.session_state[f"payment_terms_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_payment_terms_{selected_file}", type="primary" if is_ocr_active('header', 'payment_terms') else "secondary"): activate_ocr_field('header', 'payment_terms') # Due Date col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Due Date", value=header.get('due_date', ''), key=f"due_date_{selected_file}", on_change=lambda: sync_field_to_data(selected_file, 'header', 'due_date', st.session_state[f"due_date_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_due_date_{selected_file}", type="primary" if is_ocr_active('header', 'due_date') else "secondary"): activate_ocr_field('header', 'due_date') # Subtotal col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Subtotal", value=summary.get('subtotal', ''), key=f"subtotal_{selected_file}", on_change=lambda: sync_field_to_data(selected_file, 'summary', 'subtotal', st.session_state[f"subtotal_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_subtotal_{selected_file}", type="primary" if is_ocr_active('summary', 'subtotal') else "secondary"): activate_ocr_field('summary', 'subtotal') # Tax Rate col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Tax Rate", value=summary.get('tax_rate', ''), key=f"tax_rate_{selected_file}", on_change=lambda: sync_field_to_data(selected_file, 'summary', 'tax_rate', st.session_state[f"tax_rate_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_tax_rate_{selected_file}", type="primary" if is_ocr_active('summary', 'tax_rate') else "secondary"): activate_ocr_field('summary', 'tax_rate') # Tax Amount col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Tax Amount", value=summary.get('tax_amount', ''), key=f"tax_amount_{selected_file}", on_change=lambda: sync_field_to_data(selected_file, 'summary', 'tax_amount', st.session_state[f"tax_amount_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_tax_amount_{selected_file}", type="primary" if is_ocr_active('summary', 'tax_amount') else "secondary"): activate_ocr_field('summary', 'tax_amount') # Discount Rate col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Discount Rate", value=summary.get('discount_rate', ''), key=f"discount_rate_{selected_file}", on_change=lambda: sync_field_to_data(selected_file, 'summary', 'discount_rate', st.session_state[f"discount_rate_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_discount_rate_{selected_file}", type="primary" if is_ocr_active('summary', 'discount_rate') else "secondary"): activate_ocr_field('summary', 'discount_rate') # Total Discount Amount col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Total Discount Amount", value=summary.get('total_discount_amount', ''), key=f"total_discount_amount_{selected_file}", on_change=lambda: sync_field_to_data(selected_file, 'summary', 'total_discount_amount', st.session_state[f"total_discount_amount_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_total_discount_amount_{selected_file}", type="primary" if is_ocr_active('summary', 'total_discount_amount') else "secondary"): activate_ocr_field('summary', 'total_discount_amount') # Total Amount col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Total Amount", value=summary.get('total_amount', ''), key=f"total_amount_{selected_file}", on_change=lambda: sync_field_to_data(selected_file, 'summary', 'total_amount', st.session_state[f"total_amount_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_total_amount_{selected_file}", type="primary" if is_ocr_active('summary', 'total_amount') else "secondary"): activate_ocr_field('summary', 'total_amount') # Currency col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Currency", value=summary.get('currency', ''), key=f"currency_{selected_file}", on_change=lambda: sync_field_to_data(selected_file, 'summary', 'currency', st.session_state[f"currency_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_currency_{selected_file}", type="primary" if is_ocr_active('summary', 'currency') else "secondary"): activate_ocr_field('summary', 'currency') # TAB 2: Party Details (without bank details) with tab2: # SWAP BUTTON col1, col2, col3 = st.columns([1, 2, 1]) with col2: if st.button("🔄 Swap Sender ↔ Recipient", key=f"swap_btn_{selected_file}", type="primary", use_container_width=True): if not st.session_state.just_swapped: st.session_state.just_swapped = True swap_sender_recipient_details(selected_file) st.rerun() if st.session_state.just_swapped: st.session_state.just_swapped = False st.markdown("**Sender Details**") header = gt_parse.get('header', {}) # Sender Name col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Sender Name", value=header.get('sender_name', ''), key=f"sender_name_{selected_file}", on_change=lambda: sync_field_to_data(selected_file, 'header', 'sender_name', st.session_state[f"sender_name_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_sender_name_{selected_file}", type="primary" if is_ocr_active('header', 'sender_name') else "secondary"): activate_ocr_field('header', 'sender_name') # Sender Address col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_area( "Sender Address", value=header.get('sender_addr', ''), key=f"sender_addr_{selected_file}", height=60, on_change=lambda: sync_field_to_data(selected_file, 'header', 'sender_addr', st.session_state[f"sender_addr_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_sender_addr_{selected_file}", type="primary" if is_ocr_active('header', 'sender_addr') else "secondary"): activate_ocr_field('header', 'sender_addr') st.markdown("**Recipient Details**") # Recipient Name col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Recipient Name", value=header.get('rcpt_name', ''), key=f"rcpt_name_{selected_file}", on_change=lambda: sync_field_to_data(selected_file, 'header', 'rcpt_name', st.session_state[f"rcpt_name_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_rcpt_name_{selected_file}", type="primary" if is_ocr_active('header', 'rcpt_name') else "secondary"): activate_ocr_field('header', 'rcpt_name') # Recipient Address col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_area( "Recipient Address", value=header.get('rcpt_addr', ''), key=f"rcpt_addr_{selected_file}", height=60, on_change=lambda: sync_field_to_data(selected_file, 'header', 'rcpt_addr', st.session_state[f"rcpt_addr_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_rcpt_addr_{selected_file}", type="primary" if is_ocr_active('header', 'rcpt_addr') else "secondary"): activate_ocr_field('header', 'rcpt_addr') # TAB 3: Bank Details with tab3: header = gt_parse.get('header', {}) # Bank IBAN col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Bank IBAN", value=header.get('bank_iban', ''), key=f"bank_iban_{selected_file}", on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_iban', st.session_state[f"bank_iban_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_bank_iban_{selected_file}", type="primary" if is_ocr_active('header', 'bank_iban') else "secondary"): activate_ocr_field('header', 'bank_iban') # Bank Name col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Bank Name", value=header.get('bank_name', ''), key=f"bank_name_{selected_file}", on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_name', st.session_state[f"bank_name_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_bank_name_{selected_file}", type="primary" if is_ocr_active('header', 'bank_name') else "secondary"): activate_ocr_field('header', 'bank_name') # Bank Account No col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Bank Account No", value=header.get('bank_acc_no', ''), key=f"bank_acc_no_{selected_file}", on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_acc_no', st.session_state[f"bank_acc_no_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_bank_acc_no_{selected_file}", type="primary" if is_ocr_active('header', 'bank_acc_no') else "secondary"): activate_ocr_field('header', 'bank_acc_no') # Bank Routing col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Bank Routing", value=header.get('bank_routing', ''), key=f"bank_routing_{selected_file}", on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_routing', st.session_state[f"bank_routing_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_bank_routing_{selected_file}", type="primary" if is_ocr_active('header', 'bank_routing') else "secondary"): activate_ocr_field('header', 'bank_routing') # Bank SWIFT col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Bank SWIFT", value=header.get('bank_swift', ''), key=f"bank_swift_{selected_file}", on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_swift', st.session_state[f"bank_swift_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_bank_swift_{selected_file}", type="primary" if is_ocr_active('header', 'bank_swift') else "secondary"): activate_ocr_field('header', 'bank_swift') # Bank Account Name col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Bank Account Name", value=header.get('bank_acc_name', ''), key=f"bank_acc_name_{selected_file}", on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_acc_name', st.session_state[f"bank_acc_name_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_bank_acc_name_{selected_file}", type="primary" if is_ocr_active('header', 'bank_acc_name') else "secondary"): activate_ocr_field('header', 'bank_acc_name') # Bank Branch col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Bank Branch", value=header.get('bank_branch', ''), key=f"bank_branch_{selected_file}", on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_branch', st.session_state[f"bank_branch_{selected_file}"]) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_bank_branch_{selected_file}", type="primary" if is_ocr_active('header', 'bank_branch') else "secondary"): activate_ocr_field('header', 'bank_branch') # TAB 4: Items with tab4: current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) items = current_gt_parse.get('items', []) # Add/Remove row buttons col_add, col_remove = st.columns([1, 1]) with col_add: if st.button("➕ Add New Item", key=f"add_item_{selected_file}", use_container_width=True): if not st.session_state.button_clicked: st.session_state.button_clicked = True new_item = { "descriptions": "", "SKU": "", "quantity": "", "unit_price": "", "amount": "", "discount_rate_per_item": "", "discount_amount_per_item": "", "tax_rate_per_item": "", "tax_amount_per_item": "", "Line_total": "" } current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) current_items = current_gt_parse.get('items', []) current_items.append(new_item) current_gt_parse['items'] = current_items st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse st.session_state.modified_indices.add(selected_file) new_idx = len(current_items) - 1 expander_key_new = f"line_item_expander_{selected_file}_{new_idx}" st.session_state[expander_key_new] = True st.rerun() with col_remove: if st.button("➖ Remove Last Item", key=f"remove_item_{selected_file}", disabled=(len(items) == 0), use_container_width=True): if not st.session_state.button_clicked and len(items) > 0: st.session_state.button_clicked = True current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) current_items = current_gt_parse.get('items', []) N = len(current_items) current_items.pop() current_gt_parse['items'] = current_items st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse st.session_state.modified_indices.add(selected_file) popped_idx = N - 1 expander_key_popped = f"line_item_expander_{selected_file}_{popped_idx}" if expander_key_popped in st.session_state: del st.session_state[expander_key_popped] st.rerun() if st.session_state.button_clicked: st.session_state.button_clicked = False current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) items = current_gt_parse.get('items', []) if items: for idx, item in enumerate(items): expander_key = f"line_item_expander_{selected_file}_{idx}" expanded_default = st.session_state.get(expander_key, False) with st.expander(f"**Item {idx + 1}** - {item.get('descriptions', 'N/A')[:30]}", expanded=expanded_default): # Descriptions col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_area( "Descriptions", value=item.get('descriptions', ''), key=f"desc_{selected_file}_{idx}", height=60, on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'descriptions', st.session_state[f"desc_{selected_file}_{i}"], i) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_desc_{selected_file}_{idx}", type="primary" if is_ocr_active('items', 'descriptions', idx) else "secondary"): st.session_state[expander_key] = True activate_ocr_field('items', 'descriptions', idx) # SKU col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "SKU", value=item.get('SKU', ''), key=f"sku_{selected_file}_{idx}", on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'SKU', st.session_state[f"sku_{selected_file}_{i}"], i) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_sku_{selected_file}_{idx}", type="primary" if is_ocr_active('items', 'SKU', idx) else "secondary"): st.session_state[expander_key] = True activate_ocr_field('items', 'SKU', idx) # Quantity col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Quantity", value=item.get('quantity', ''), key=f"qty_{selected_file}_{idx}", on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'quantity', st.session_state[f"qty_{selected_file}_{i}"], i) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_qty_{selected_file}_{idx}", type="primary" if is_ocr_active('items', 'quantity', idx) else "secondary"): st.session_state[expander_key] = True activate_ocr_field('items', 'quantity', idx) # Unit Price col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Unit Price", value=item.get('unit_price', ''), key=f"unit_price_{selected_file}_{idx}", on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'unit_price', st.session_state[f"unit_price_{selected_file}_{i}"], i) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_unit_price_{selected_file}_{idx}", type="primary" if is_ocr_active('items', 'unit_price', idx) else "secondary"): st.session_state[expander_key] = True activate_ocr_field('items', 'unit_price', idx) # Amount col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Amount", value=item.get('amount', ''), key=f"amount_{selected_file}_{idx}", on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'amount', st.session_state[f"amount_{selected_file}_{i}"], i) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_amount_{selected_file}_{idx}", type="primary" if is_ocr_active('items', 'amount', idx) else "secondary"): st.session_state[expander_key] = True activate_ocr_field('items', 'amount', idx) # Discount Rate Per Item col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Discount Rate Per Item", value=item.get('discount_rate_per_item', ''), key=f"discount_rate_per_item_{selected_file}_{idx}", on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'discount_rate_per_item', st.session_state[f"discount_rate_per_item_{selected_file}_{i}"], i) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_discount_rate_per_item_{selected_file}_{idx}", type="primary" if is_ocr_active('items', 'discount_rate_per_item', idx) else "secondary"): st.session_state[expander_key] = True activate_ocr_field('items', 'discount_rate_per_item', idx) # Discount Amount Per Item col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Discount Amount Per Item", value=item.get('discount_amount_per_item', ''), key=f"discount_amount_per_item_{selected_file}_{idx}", on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'discount_amount_per_item', st.session_state[f"discount_amount_per_item_{selected_file}_{i}"], i) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_discount_amount_per_item_{selected_file}_{idx}", type="primary" if is_ocr_active('items', 'discount_amount_per_item', idx) else "secondary"): st.session_state[expander_key] = True activate_ocr_field('items', 'discount_amount_per_item', idx) # Tax Rate Per Item (NEW FIELD) col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Tax Rate Per Item", value=item.get('tax_rate_per_item', ''), key=f"tax_rate_per_item_{selected_file}_{idx}", on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'tax_rate_per_item', st.session_state[f"tax_rate_per_item_{selected_file}_{i}"], i) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_tax_rate_per_item_{selected_file}_{idx}", type="primary" if is_ocr_active('items', 'tax_rate_per_item', idx) else "secondary"): st.session_state[expander_key] = True activate_ocr_field('items', 'tax_rate_per_item', idx) # Tax Amount Per Item (RENAMED from "Tax") col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Tax Amount Per Item", value=item.get('tax_amount_per_item', ''), key=f"tax_amount_per_item_{selected_file}_{idx}", on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'tax_amount_per_item', st.session_state[f"tax_amount_per_item_{selected_file}_{i}"], i) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_tax_amount_per_item_{selected_file}_{idx}", type="primary" if is_ocr_active('items', 'tax_amount_per_item', idx) else "secondary"): st.session_state[expander_key] = True activate_ocr_field('items', 'tax_amount_per_item', idx) # Line Total col_input, col_btn = st.columns([5, 1]) with col_input: new_value = st.text_input( "Line Total", value=item.get('Line_total', ''), key=f"line_total_{selected_file}_{idx}", on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'Line_total', st.session_state[f"line_total_{selected_file}_{i}"], i) ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_line_total_{selected_file}_{idx}", type="primary" if is_ocr_active('items', 'Line_total', idx) else "secondary"): st.session_state[expander_key] = True activate_ocr_field('items', 'Line_total', idx) st.markdown("**📊 Items Summary Table**") df = pd.DataFrame(items) df.index = df.index + 1 df.index.name = 'SL No' st.dataframe( df, use_container_width=True, height=300 ) else: st.info("No items. Click '➕ Add New Item' to add a new item.") # Save button col1, col2 = st.columns([1, 1]) with col1: if st.button("💾 Save Changes", type="primary", use_container_width=True, key=f"save_btn_{selected_file}"): if not st.session_state.just_saved: st.session_state.just_saved = True auto_save(selected_file) st.session_state.save_message = "✅ Changes saved successfully!" st.session_state.save_message_time = time.time() st.rerun() if st.session_state.just_saved: st.session_state.just_saved = False if st.session_state.save_message: st.success(st.session_state.save_message) st.markdown('
', unsafe_allow_html=True) # Close scrollable container