Spaces:
Sleeping
Sleeping
| import os | |
| from pathlib import Path | |
| # ----------------------------- | |
| # Environment hardening (HF Spaces, /.cache issue) | |
| # ----------------------------- | |
| _home = os.environ.get("HOME", "") | |
| if _home in ("", "/", None): | |
| repo_dir = os.getcwd() | |
| safe_home = repo_dir if os.access(repo_dir, os.W_OK) else "/tmp" | |
| os.environ["HOME"] = safe_home | |
| print(f"[startup] HOME not set or unwritable β setting HOME={safe_home}") | |
| streamlit_dir = Path(os.environ["HOME"]) / ".streamlit" | |
| try: | |
| streamlit_dir.mkdir(parents=True, exist_ok=True) | |
| print(f"[startup] ensured {streamlit_dir}") | |
| except Exception as e: | |
| print(f"[startup] WARNING: could not create {streamlit_dir}: {e}") | |
| import streamlit as st | |
| import json | |
| import io | |
| from PIL import Image | |
| import time | |
| import pandas as pd | |
| from streamlit_drawable_canvas import st_canvas | |
| import pytesseract | |
| import numpy as np | |
| from datetime import datetime | |
| import fitz # PyMuPDF for PDF handling | |
| # Set Tesseract path - auto-detect based on OS | |
| if os.name == 'nt': # Windows | |
| pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" | |
| else: # Linux/Mac (HF Spaces uses Linux) | |
| pass | |
| # Page configuration | |
| st.set_page_config(page_title="Invoice Data Viewer", layout="wide") | |
| # Custom CSS | |
| st.markdown(""" | |
| <style> | |
| /* Reduce spacing between form fields */ | |
| .stTextInput > div > div > input, | |
| .stTextArea > div > div > textarea, | |
| .stSelectbox > div > div > div { | |
| margin-bottom: 0px !important; | |
| } | |
| div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stTextInput"]), | |
| div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stTextArea"]), | |
| div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stSelectbox"]) { | |
| margin-bottom: 4px !important; | |
| } | |
| .stSelectbox { margin-bottom: 4px !important; } | |
| /* Button styling */ | |
| .stButton > button { | |
| padding: 0.25rem 0.5rem !important; | |
| font-size: 0.85rem !important; | |
| line-height: 1 !important; | |
| min-height: 1.8rem !important; | |
| height: 1.8rem !important; | |
| } | |
| .stButton > button[kind="primary"] { | |
| background-color: #FF0000 !important; | |
| border-color: #FF0000 !important; | |
| color: white !important; | |
| } | |
| .stButton > button[kind="primary"]:hover { | |
| background-color: #CC0000 !important; | |
| border-color: #CC0000 !important; | |
| } | |
| /* Small vertical gaps */ | |
| [data-testid="stVerticalBlock"] > [data-testid="stVerticalBlock"] { gap: 0.25rem !important; } | |
| [data-testid="column"] { padding-left: 0.5rem !important; padding-right: 0.5rem !important; } | |
| [data-testid="stHorizontalBlock"] { gap: 0.5rem !important; } | |
| /* FIXED: Remove problematic viewport-based heights */ | |
| section[data-testid="stAppViewContainer"] { | |
| overflow: visible !important; | |
| } | |
| .main .block-container { | |
| overflow: visible !important; | |
| padding-bottom: 1rem !important; | |
| } | |
| /* Force the column containing the canvas to allow horizontal scroll */ | |
| [data-testid="column"]:has(.stCanvas) { | |
| overflow-x: auto !important; | |
| overflow-y: hidden !important; | |
| } | |
| /* Ensure canvas doesn't shrink */ | |
| .stCanvas { | |
| min-width: max-content !important; | |
| } | |
| /* Style the scrollbar */ | |
| [data-testid="column"]:has(.stCanvas)::-webkit-scrollbar { | |
| height: 12px; | |
| } | |
| [data-testid="column"]:has(.stCanvas)::-webkit-scrollbar-track { | |
| background: #e0e0e0; | |
| border-radius: 6px; | |
| } | |
| [data-testid="column"]:has(.stCanvas)::-webkit-scrollbar-thumb { | |
| background: rgba(0,0,0,0.4); | |
| border-radius: 6px; | |
| } | |
| [data-testid="column"]:has(.stCanvas)::-webkit-scrollbar-thumb:hover { | |
| background: rgba(0,0,0,0.6); | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| def load_jsonl(file): | |
| """Load JSONL file and return list of records""" | |
| data = [] | |
| content = file.getvalue().decode('utf-8') | |
| for line in content.strip().split('\n'): | |
| if line.strip(): | |
| data.append(json.loads(line)) | |
| return data | |
| def reorder_record_fields(record): | |
| """Reorder record fields to put file_name/file_names first, then gt_parse, then others""" | |
| ordered_record = {} | |
| # First: Add file_name or file_names | |
| if 'file_name' in record: | |
| ordered_record['file_name'] = record['file_name'] | |
| if 'file_names' in record: | |
| ordered_record['file_names'] = record['file_names'] | |
| # Second: Add gt_parse | |
| if 'gt_parse' in record: | |
| ordered_record['gt_parse'] = record['gt_parse'] | |
| # Third: Add any remaining fields | |
| for key, value in record.items(): | |
| if key not in ordered_record: | |
| ordered_record[key] = value | |
| return ordered_record | |
| def save_to_jsonl(data): | |
| """Convert data list to JSONL format with proper field ordering""" | |
| jsonl_lines = [] | |
| for record in data: | |
| ordered_record = reorder_record_fields(record) | |
| jsonl_lines.append(json.dumps(ordered_record)) | |
| return '\n'.join(jsonl_lines) | |
| def pdf_to_images(pdf_file): | |
| """Convert PDF to list of PIL Images (one per page)""" | |
| try: | |
| pdf_bytes = pdf_file.read() | |
| pdf_document = fitz.open(stream=pdf_bytes, filetype="pdf") | |
| images = [] | |
| for page_num in range(pdf_document.page_count): | |
| page = pdf_document[page_num] | |
| pix = page.get_pixmap(matrix=fitz.Matrix(3, 3), alpha=False) | |
| img_data = pix.tobytes("png") | |
| img = Image.open(io.BytesIO(img_data)) | |
| images.append(img) | |
| pdf_document.close() | |
| return images | |
| except Exception as e: | |
| st.error(f"Error converting PDF: {str(e)}") | |
| return [] | |
| def perform_ocr(image, bbox): | |
| """Perform OCR on the selected region of the image""" | |
| try: | |
| x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]) | |
| x1, y1 = max(0, x1), max(0, y1) | |
| x2, y2 = min(image.width, x2), min(image.height, y2) | |
| cropped = image.crop((x1, y1, x2, y2)) | |
| text = pytesseract.image_to_string(cropped, config='--psm 6').strip() | |
| return text | |
| except Exception as e: | |
| return f"OCR Error: {str(e)}" | |
| def scale_image_to_fixed_size(image, max_width=1100, max_height=1100): | |
| """Scale image to fit within max dimensions while maintaining aspect ratio - NO PADDING""" | |
| if image.mode not in ('RGB', 'RGBA'): | |
| image = image.convert('RGB') | |
| elif image.mode == 'RGBA': | |
| background = Image.new('RGB', image.size, (255, 255, 255)) | |
| background.paste(image, mask=image.split()[3]) | |
| image = background | |
| width_ratio = max_width / image.width | |
| height_ratio = max_height / image.height | |
| ratio = min(width_ratio, height_ratio) | |
| new_width = int(image.width * ratio) | |
| new_height = int(image.height * ratio) | |
| resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) | |
| return resized_image, ratio, 0, 0 | |
| def get_base_filename(record): | |
| """Get base filename from record, handling both file_name and file_names""" | |
| # Check for file_names (plural) first | |
| if 'file_names' in record and record['file_names']: | |
| if isinstance(record['file_names'], list) and len(record['file_names']) > 0: | |
| # Extract base name from first file (remove _pageN.png suffix) | |
| first_file = record['file_names'][0] | |
| # Remove .png extension | |
| base = first_file.rsplit('.png', 1)[0] | |
| # Remove _pageN suffix if exists | |
| if '_page' in base: | |
| base = base.rsplit('_page', 1)[0] | |
| return base | |
| return record['file_names'] | |
| # Fall back to file_name (singular) | |
| file_name = record.get('file_name', '') | |
| # Strip PDF extension if present (for cases where PDF was converted to images) | |
| if file_name.lower().endswith('.pdf'): | |
| file_name = file_name[:-4] # Remove .pdf | |
| # Also strip other image extensions if present | |
| for ext in ['.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: | |
| if file_name.lower().endswith(ext): | |
| file_name = file_name[:-(len(ext))] | |
| break | |
| return file_name | |
| def detect_image_groups(images_dict): | |
| """Detect multi-page image groups from uploaded files (e.g., invoice01_page1.png, invoice01_page2.png)""" | |
| import re | |
| image_groups = {} | |
| grouped_files = set() | |
| # Pattern to match: basename_pageN.extension | |
| pattern = r'^(.+)_page(\d+)\.(png|jpg|jpeg|tiff|tif|bmp)$' | |
| for filename in images_dict.keys(): | |
| match = re.match(pattern, filename, re.IGNORECASE) | |
| if match: | |
| base_name = match.group(1) | |
| page_num = int(match.group(2)) | |
| ext = match.group(3) | |
| if base_name not in image_groups: | |
| image_groups[base_name] = [] | |
| image_groups[base_name].append({ | |
| 'filename': filename, | |
| 'page_num': page_num, | |
| 'ext': ext | |
| }) | |
| grouped_files.add(filename) | |
| # Sort pages for each group and create metadata | |
| image_groups_metadata = {} | |
| for base_name, pages in image_groups.items(): | |
| # Sort by page number | |
| pages.sort(key=lambda x: x['page_num']) | |
| # Only consider it a group if there are multiple pages | |
| if len(pages) > 1: | |
| image_list = [images_dict[p['filename']] for p in pages] | |
| image_groups_metadata[base_name] = { | |
| 'pages': image_list, | |
| 'filenames': [p['filename'] for p in pages], | |
| 'total_pages': len(pages), | |
| 'current_page': 0 | |
| } | |
| return image_groups_metadata, grouped_files | |
| def swap_sender_recipient_details(index): | |
| """Swap sender and recipient details""" | |
| gt_parse = st.session_state.edited_data[index].get('gt_parse', {}) | |
| header = gt_parse.get('header', {}) | |
| # Store sender values | |
| temp_sender_name = header.get('sender_name', '') | |
| temp_sender_addr = header.get('sender_addr', '') | |
| # Swap: Sender β Recipient | |
| header['sender_name'] = header.get('rcpt_name', '') | |
| header['sender_addr'] = header.get('rcpt_addr', '') | |
| # Swap: Recipient β Sender (from temp) | |
| header['rcpt_name'] = temp_sender_name | |
| header['rcpt_addr'] = temp_sender_addr | |
| # Update session state | |
| gt_parse['header'] = header | |
| st.session_state.edited_data[index]['gt_parse'] = gt_parse | |
| st.session_state.modified_indices.add(index) | |
| # Initialize session state | |
| if 'data' not in st.session_state: | |
| st.session_state.data = None | |
| if 'current_index' not in st.session_state: | |
| st.session_state.current_index = 0 | |
| if 'edited_data' not in st.session_state: | |
| st.session_state.edited_data = None | |
| if 'page' not in st.session_state: | |
| st.session_state.page = 'upload' | |
| if 'images' not in st.session_state: | |
| st.session_state.images = {} | |
| if 'pdf_metadata' not in st.session_state: | |
| st.session_state.pdf_metadata = {} | |
| if 'image_groups_metadata' not in st.session_state: | |
| st.session_state.image_groups_metadata = {} | |
| if 'current_page_num' not in st.session_state: | |
| st.session_state.current_page_num = {} | |
| if 'modified_indices' not in st.session_state: | |
| st.session_state.modified_indices = set() | |
| if 'ocr_active_section' not in st.session_state: | |
| st.session_state.ocr_active_section = None | |
| if 'ocr_active_field' not in st.session_state: | |
| st.session_state.ocr_active_field = None | |
| if 'ocr_line_item_row' not in st.session_state: | |
| st.session_state.ocr_line_item_row = None | |
| if 'canvas_key' not in st.session_state: | |
| st.session_state.canvas_key = 0 | |
| if 'button_clicked' not in st.session_state: | |
| st.session_state.button_clicked = False | |
| if 'save_message' not in st.session_state: | |
| st.session_state.save_message = None | |
| if 'save_message_time' not in st.session_state: | |
| st.session_state.save_message_time = None | |
| if 'just_saved' not in st.session_state: | |
| st.session_state.just_saved = False | |
| if 'just_swapped' not in st.session_state: | |
| st.session_state.just_swapped = False | |
| if 'navigating_page' not in st.session_state: | |
| st.session_state.navigating_page = False | |
| def auto_save(index): | |
| """Automatically save changes to session state and mark as modified""" | |
| if st.session_state.edited_data: | |
| # Get current record | |
| current_record = st.session_state.edited_data[index] | |
| # Get base filename using the helper function | |
| base_file_name = get_base_filename(current_record) | |
| if not base_file_name: | |
| st.warning("Cannot save: No file name found in record") | |
| return | |
| # Find the actual file name in uploaded files | |
| actual_file_name = None | |
| if base_file_name in st.session_state.images: | |
| actual_file_name = base_file_name | |
| else: | |
| # Try with extensions | |
| for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: | |
| if base_file_name + ext in st.session_state.images: | |
| actual_file_name = base_file_name + ext | |
| break | |
| # Try matching base name | |
| if not actual_file_name: | |
| for uploaded_name in st.session_state.images.keys(): | |
| uploaded_base = uploaded_name.rsplit('.', 1)[0] | |
| if uploaded_base == base_file_name: | |
| actual_file_name = uploaded_name | |
| break | |
| # Check if it's a PDF and update file_name accordingly | |
| if actual_file_name and actual_file_name in st.session_state.pdf_metadata: | |
| # It's a PDF - get page count | |
| pdf_meta = st.session_state.pdf_metadata[actual_file_name] | |
| total_pages = pdf_meta['total_pages'] | |
| # Get base name without extension | |
| base_name = actual_file_name.rsplit('.', 1)[0] | |
| if total_pages > 1: | |
| # Multi-page PDF: use file_names array | |
| file_names_array = [f"{base_name}_page{i+1}.png" for i in range(total_pages)] | |
| st.session_state.edited_data[index]['file_names'] = file_names_array | |
| # Remove old file_name field if it exists | |
| if 'file_name' in st.session_state.edited_data[index]: | |
| del st.session_state.edited_data[index]['file_name'] | |
| else: | |
| # Single-page PDF: use file_name string | |
| st.session_state.edited_data[index]['file_name'] = f"{base_name}.png" | |
| # Remove old file_names field if it exists | |
| if 'file_names' in st.session_state.edited_data[index]: | |
| del st.session_state.edited_data[index]['file_names'] | |
| # Check if it's an image group and update file_name accordingly | |
| elif base_file_name in st.session_state.image_groups_metadata: | |
| # It's a multi-page image group - use file_names array | |
| img_group_meta = st.session_state.image_groups_metadata[base_file_name] | |
| st.session_state.edited_data[index]['file_names'] = img_group_meta['filenames'] | |
| # Remove old file_name field if it exists (was likely a .pdf in original JSONL) | |
| if 'file_name' in st.session_state.edited_data[index]: | |
| del st.session_state.edited_data[index]['file_name'] | |
| st.session_state.data = st.session_state.edited_data.copy() | |
| st.session_state.modified_indices.add(index) | |
| def sync_field_to_data(index, section, field, value, row_idx=None): | |
| """Sync a field value from widget to data structure immediately""" | |
| gt_parse = st.session_state.edited_data[index].get('gt_parse', {}) | |
| if section == 'items': | |
| items = gt_parse.get('items', []) | |
| if row_idx is not None and row_idx < len(items): | |
| items[row_idx][field] = value | |
| gt_parse['items'] = items | |
| else: | |
| if section not in gt_parse: | |
| gt_parse[section] = {} | |
| gt_parse[section][field] = value | |
| st.session_state.edited_data[index]['gt_parse'] = gt_parse | |
| st.session_state.modified_indices.add(index) | |
| def activate_ocr_field(section, field, row_idx=None): | |
| """Activate OCR for a specific field""" | |
| if (st.session_state.ocr_active_section == section and | |
| st.session_state.ocr_active_field == field and | |
| st.session_state.ocr_line_item_row == row_idx): | |
| st.session_state.ocr_active_section = None | |
| st.session_state.ocr_active_field = None | |
| st.session_state.ocr_line_item_row = None | |
| else: | |
| st.session_state.ocr_active_section = section | |
| st.session_state.ocr_active_field = field | |
| st.session_state.ocr_line_item_row = row_idx | |
| if section == 'items' and row_idx is not None: | |
| current_idx = st.session_state.get('current_index', 0) | |
| expander_key = f"line_item_expander_{current_idx}_{row_idx}" | |
| st.session_state[expander_key] = True | |
| def is_ocr_active(section, field, row_idx=None): | |
| """Check if this OCR button is currently active""" | |
| return (st.session_state.ocr_active_section == section and | |
| st.session_state.ocr_active_field == field and | |
| st.session_state.ocr_line_item_row == row_idx) | |
| # PAGE 1: Upload Page | |
| if st.session_state.page == 'upload': | |
| st.title("π€ Invoice Data Viewer with OCR") | |
| st.markdown("### Upload your files to begin") | |
| st.markdown("**Step 1: Upload JSONL File**") | |
| uploaded_file = st.file_uploader("Choose a JSONL file", type=['jsonl', 'json']) | |
| if uploaded_file is not None: | |
| try: | |
| data = load_jsonl(uploaded_file) | |
| st.session_state.data = data | |
| st.session_state.edited_data = data.copy() | |
| st.success(f"β Successfully loaded {len(data)} records!") | |
| except Exception as e: | |
| st.error(f"Error loading file: {str(e)}") | |
| st.markdown("**Step 2: Upload Images/PDFs Folder**") | |
| uploaded_files = st.file_uploader( | |
| "Choose image or PDF files", | |
| type=['png', 'jpg', 'jpeg', 'tiff', 'tif', 'bmp', 'pdf'], | |
| accept_multiple_files=True, | |
| help="Select all images and PDFs from your folder at once" | |
| ) | |
| if uploaded_files: | |
| images_dict = {} | |
| pdf_metadata = {} | |
| for file in uploaded_files: | |
| try: | |
| file_ext = file.name.lower().split('.')[-1] | |
| if file_ext == 'pdf': | |
| pdf_images = pdf_to_images(file) | |
| if pdf_images: | |
| images_dict[file.name] = pdf_images[0] | |
| pdf_metadata[file.name] = { | |
| 'pages': pdf_images, | |
| 'total_pages': len(pdf_images), | |
| 'current_page': 0 | |
| } | |
| else: | |
| image = Image.open(file) | |
| images_dict[file.name] = image | |
| except Exception as e: | |
| st.warning(f"Could not load file {file.name}: {str(e)}") | |
| st.session_state.images = images_dict | |
| st.session_state.pdf_metadata = pdf_metadata | |
| # Detect multi-page image groups (e.g., invoice01_page1.png, invoice01_page2.png) | |
| image_groups_metadata, grouped_files = detect_image_groups(images_dict) | |
| st.session_state.image_groups_metadata = image_groups_metadata | |
| # Initialize current page for PDFs and image groups | |
| for filename in pdf_metadata.keys(): | |
| if filename not in st.session_state.current_page_num: | |
| st.session_state.current_page_num[filename] = 0 | |
| for base_name in image_groups_metadata.keys(): | |
| if base_name not in st.session_state.current_page_num: | |
| st.session_state.current_page_num[base_name] = 0 | |
| if st.session_state.data is not None: | |
| gt_file_names = [] | |
| for rec in st.session_state.data: | |
| base_fname = get_base_filename(rec) | |
| if base_fname: | |
| gt_file_names.append(base_fname) | |
| matched_images = set() | |
| unmatched_gt_files = [] | |
| for fname in gt_file_names: | |
| if not fname: | |
| continue | |
| # Create a base name by stripping common extensions | |
| fname_base = fname | |
| for ext in ['.pdf', '.PDF', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: | |
| if fname.lower().endswith(ext.lower()): | |
| fname_base = fname[:-len(ext)] | |
| break | |
| # Check direct match | |
| if fname in images_dict: | |
| matched_images.add(fname) | |
| # Check base name in image groups (handles PDF converted to multi-page PNGs) | |
| elif fname_base in image_groups_metadata: | |
| matched_images.add(fname) | |
| # Check full name in image groups | |
| elif fname in image_groups_metadata: | |
| matched_images.add(fname) | |
| else: | |
| found = False | |
| # Try with extensions | |
| for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: | |
| if fname + ext in images_dict: | |
| matched_images.add(fname) | |
| found = True | |
| break | |
| if not found: | |
| # Try matching base name in uploaded files | |
| for uploaded_name in images_dict.keys(): | |
| uploaded_base = uploaded_name.rsplit('.', 1)[0] | |
| if uploaded_base == fname or uploaded_base == fname_base: | |
| matched_images.add(fname) | |
| found = True | |
| break | |
| for fname in gt_file_names: | |
| if fname and fname not in matched_images: | |
| unmatched_gt_files.append(fname) | |
| st.success(f"β Successfully loaded {len(images_dict)} files ({len(pdf_metadata)} PDFs, {len(image_groups_metadata)} multi-page image groups)!") | |
| st.info(f"π Exact matches: {len(matched_images)}/{len([f for f in gt_file_names if f])}") | |
| if unmatched_gt_files: | |
| st.warning(f"β οΈ {len(unmatched_gt_files)} file(s) from JSONL not matched:") | |
| with st.expander(f"Show {len(unmatched_gt_files)} unmatched file names"): | |
| for fname in unmatched_gt_files: | |
| st.text(f" β’ {fname}") | |
| else: | |
| st.success("β All JSONL file names matched to files!") | |
| else: | |
| st.success(f"β Successfully loaded {len(images_dict)} files ({len(pdf_metadata)} PDFs, {len(image_groups_metadata)} multi-page image groups)!") | |
| st.info("βΉοΈ Upload a JSONL file to see how many files match the ground truth 'file_name' field.") | |
| if st.session_state.data is not None: | |
| col1, col2, col3 = st.columns([1, 1, 1]) | |
| with col2: | |
| if st.button("Continue to Viewer β", type="primary", use_container_width=True): | |
| st.session_state.page = 'viewer' | |
| st.session_state.modified_indices = set() | |
| st.session_state.navigating_page = False | |
| st.rerun() | |
| # PAGE 2: Viewer Page | |
| elif st.session_state.page == 'viewer': | |
| if st.session_state.save_message_time is not None: | |
| if time.time() - st.session_state.save_message_time > 3: | |
| st.session_state.save_message = None | |
| st.session_state.save_message_time = None | |
| today_date = datetime.now().strftime("%Y-%m-%d") | |
| col1, col2, col3, col4 = st.columns([1, 2, 2, 2]) | |
| with col1: | |
| if st.button("β Back to Upload"): | |
| st.session_state.page = 'upload' | |
| st.session_state.ocr_active_section = None | |
| st.session_state.ocr_active_field = None | |
| st.session_state.save_message = None | |
| st.session_state.save_message_time = None | |
| st.session_state.navigating_page = False | |
| st.rerun() | |
| with col2: | |
| if st.session_state.modified_indices: | |
| modified_data = [st.session_state.edited_data[i] for i in sorted(st.session_state.modified_indices)] | |
| jsonl_modified = save_to_jsonl(modified_data) | |
| st.download_button( | |
| label=f"β¬οΈ Download Modified ({len(modified_data)})", | |
| data=jsonl_modified, | |
| file_name=f"modified_invoice_data_{today_date}.jsonl", | |
| mime="application/jsonl", | |
| type="primary", | |
| use_container_width=True | |
| ) | |
| else: | |
| st.button("β¬οΈ No Modified Records", disabled=True, use_container_width=True) | |
| with col3: | |
| if st.session_state.modified_indices: | |
| unmodified_data = [st.session_state.data[i] for i in range(len(st.session_state.data)) | |
| if i not in st.session_state.modified_indices] | |
| jsonl_unmodified = save_to_jsonl(unmodified_data) | |
| st.download_button( | |
| label=f"β¬οΈ Download Unmodified ({len(unmodified_data)})", | |
| data=jsonl_unmodified, | |
| file_name=f"unmodified_invoice_data_{today_date}.jsonl", | |
| mime="application/jsonl", | |
| use_container_width=True | |
| ) | |
| else: | |
| st.button("β¬οΈ No Unmodified Records", disabled=True, use_container_width=True) | |
| with col4: | |
| jsonl_all = save_to_jsonl(st.session_state.edited_data) | |
| st.download_button( | |
| label=f"β¬οΈ Download All ({len(st.session_state.edited_data)})", | |
| data=jsonl_all, | |
| file_name=f"all_invoice_data_{today_date}.jsonl", | |
| mime="application/jsonl", | |
| use_container_width=True | |
| ) | |
| # Build file names list for dropdown using helper function | |
| file_names = [] | |
| for i, record in enumerate(st.session_state.data or []): | |
| base_name = get_base_filename(record) | |
| file_names.append(base_name if base_name else f'Record {i}') | |
| if not file_names: | |
| st.error("No records loaded. Please upload a JSONL file on the Upload page.") | |
| if st.button("β Back to Upload"): | |
| st.session_state.page = 'upload' | |
| st.rerun() | |
| else: | |
| options = list(range(len(file_names))) | |
| if not st.session_state.edited_data or len(st.session_state.edited_data) != len(file_names): | |
| st.session_state.edited_data = (st.session_state.data or []).copy() | |
| cur_idx = st.session_state.get('current_index', 0) | |
| try: | |
| cur_idx = int(cur_idx) | |
| except Exception: | |
| cur_idx = 0 | |
| if cur_idx < 0: | |
| cur_idx = 0 | |
| if cur_idx >= len(options): | |
| cur_idx = len(options) - 1 | |
| selected_file = st.selectbox( | |
| "Select a file to view:", | |
| options=options, | |
| format_func=lambda x: f"{'βοΈ ' if x in st.session_state.modified_indices else ''}{file_names[x]}", | |
| index=cur_idx | |
| ) | |
| st.session_state.current_index = selected_file | |
| current_record = st.session_state.edited_data[selected_file] | |
| left_col, right_col = st.columns([1.6, 1.0], gap="small") | |
| # LEFT SIDE: Image Display with OCR Canvas | |
| with left_col: | |
| # Use helper function to get base file name | |
| file_name = get_base_filename(current_record) | |
| if file_name: | |
| # Create base name by stripping extensions | |
| file_name_base = file_name | |
| for ext in ['.pdf', '.PDF', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: | |
| if file_name.lower().endswith(ext.lower()): | |
| file_name_base = file_name[:-len(ext)] | |
| break | |
| actual_file_name = None | |
| # First check for direct match | |
| if file_name in st.session_state.images: | |
| actual_file_name = file_name | |
| # Check if base name matches an image group (handles PDF converted to images) | |
| elif file_name_base in st.session_state.image_groups_metadata: | |
| actual_file_name = file_name_base # Use base name for image groups | |
| # Check if full name is an image group | |
| elif file_name in st.session_state.image_groups_metadata: | |
| actual_file_name = file_name # Use as-is for image groups | |
| else: | |
| # Try with extensions | |
| for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: | |
| if file_name + ext in st.session_state.images: | |
| actual_file_name = file_name + ext | |
| break | |
| if not actual_file_name: | |
| # Try matching base name | |
| for uploaded_name in st.session_state.images.keys(): | |
| uploaded_base = uploaded_name.rsplit('.', 1)[0] | |
| if uploaded_base == file_name or uploaded_base == file_name_base: | |
| actual_file_name = uploaded_name | |
| break | |
| if actual_file_name: | |
| is_pdf = actual_file_name in st.session_state.pdf_metadata | |
| is_image_group = actual_file_name in st.session_state.image_groups_metadata or file_name_base in st.session_state.image_groups_metadata | |
| # Determine which key to use for image group | |
| image_group_key = None | |
| if is_image_group: | |
| if actual_file_name in st.session_state.image_groups_metadata: | |
| image_group_key = actual_file_name | |
| else: | |
| image_group_key = file_name_base | |
| if is_pdf: | |
| pdf_meta = st.session_state.pdf_metadata[actual_file_name] | |
| total_pages = pdf_meta['total_pages'] | |
| current_page = st.session_state.current_page_num.get(actual_file_name, 0) | |
| col_prev, col_info, col_next = st.columns([1, 2, 1]) | |
| with col_prev: | |
| prev_clicked = st.button("β¬ οΈ Previous", key=f"prev_page_{selected_file}_{actual_file_name}", | |
| disabled=(current_page == 0), use_container_width=True) | |
| with col_info: | |
| st.markdown(f"<div style='text-align: center; padding: 5px;'><b>π Page {current_page + 1} of {total_pages}</b></div>", unsafe_allow_html=True) | |
| with col_next: | |
| next_clicked = st.button("Next β‘οΈ", key=f"next_page_{selected_file}_{actual_file_name}", | |
| disabled=(current_page >= total_pages - 1), use_container_width=True) | |
| if not st.session_state.navigating_page: | |
| if prev_clicked: | |
| st.session_state.navigating_page = True | |
| st.session_state.current_page_num[actual_file_name] = max(0, current_page - 1) | |
| st.session_state.canvas_key += 1 | |
| st.session_state.ocr_active_section = None | |
| st.session_state.ocr_active_field = None | |
| st.rerun() | |
| elif next_clicked: | |
| st.session_state.navigating_page = True | |
| st.session_state.current_page_num[actual_file_name] = min(total_pages - 1, current_page + 1) | |
| st.session_state.canvas_key += 1 | |
| st.session_state.ocr_active_section = None | |
| st.session_state.ocr_active_field = None | |
| st.rerun() | |
| else: | |
| st.session_state.navigating_page = False | |
| elif is_image_group and image_group_key: | |
| img_group_meta = st.session_state.image_groups_metadata[image_group_key] | |
| total_pages = img_group_meta['total_pages'] | |
| current_page = st.session_state.current_page_num.get(image_group_key, 0) | |
| col_prev, col_info, col_next = st.columns([1, 2, 1]) | |
| with col_prev: | |
| prev_clicked = st.button("β¬ οΈ Previous", key=f"prev_page_{selected_file}_{image_group_key}", | |
| disabled=(current_page == 0), use_container_width=True) | |
| with col_info: | |
| st.markdown(f"<div style='text-align: center; padding: 5px;'><b>πΌοΈ Page {current_page + 1} of {total_pages}</b></div>", unsafe_allow_html=True) | |
| with col_next: | |
| next_clicked = st.button("Next β‘οΈ", key=f"next_page_{selected_file}_{image_group_key}", | |
| disabled=(current_page >= total_pages - 1), use_container_width=True) | |
| if not st.session_state.navigating_page: | |
| if prev_clicked: | |
| st.session_state.navigating_page = True | |
| st.session_state.current_page_num[image_group_key] = max(0, current_page - 1) | |
| st.session_state.canvas_key += 1 | |
| st.session_state.ocr_active_section = None | |
| st.session_state.ocr_active_field = None | |
| st.rerun() | |
| elif next_clicked: | |
| st.session_state.navigating_page = True | |
| st.session_state.current_page_num[image_group_key] = min(total_pages - 1, current_page + 1) | |
| st.session_state.canvas_key += 1 | |
| st.session_state.ocr_active_section = None | |
| st.session_state.ocr_active_field = None | |
| st.rerun() | |
| else: | |
| st.session_state.navigating_page = False | |
| if actual_file_name: | |
| is_pdf = actual_file_name in st.session_state.pdf_metadata | |
| is_image_group = actual_file_name in st.session_state.image_groups_metadata or file_name_base in st.session_state.image_groups_metadata | |
| # Determine which key to use for image group | |
| image_group_key = None | |
| if is_image_group: | |
| if actual_file_name in st.session_state.image_groups_metadata: | |
| image_group_key = actual_file_name | |
| else: | |
| image_group_key = file_name_base | |
| if is_pdf: | |
| current_page = st.session_state.current_page_num.get(actual_file_name, 0) | |
| pdf_meta = st.session_state.pdf_metadata[actual_file_name] | |
| current_image = pdf_meta['pages'][current_page] | |
| elif is_image_group and image_group_key: | |
| current_page = st.session_state.current_page_num.get(image_group_key, 0) | |
| img_group_meta = st.session_state.image_groups_metadata[image_group_key] | |
| current_image = img_group_meta['pages'][current_page] | |
| else: | |
| current_image = st.session_state.images[actual_file_name] | |
| else: | |
| st.error(f"β File '{file_name}' not found in uploaded files") | |
| st.info("π‘ Available files:") | |
| with st.expander("Show available files"): | |
| for img_name in list(st.session_state.images.keys())[:20]: | |
| st.text(f" β’ {img_name}") | |
| if len(st.session_state.images) > 20: | |
| st.text(f" ... and {len(st.session_state.images) - 20} more") | |
| current_image = None | |
| if current_image: | |
| scaled_image, scale_ratio, paste_x, paste_y = scale_image_to_fixed_size(current_image, max_width=700, max_height=1000) | |
| # Wrap canvas in scrollable container | |
| st.markdown(f'<div class="image-scroll-container" style="max-height: {scaled_image.height + 40}px;">', unsafe_allow_html=True) | |
| canvas_result = st_canvas( | |
| fill_color="rgba(255, 165, 0, 0.3)", | |
| stroke_width=2, | |
| stroke_color="#FF0000", | |
| background_image=scaled_image, | |
| update_streamlit=True, | |
| height=scaled_image.height, | |
| width=scaled_image.width, | |
| drawing_mode="rect", | |
| key=f"canvas_{selected_file}_{st.session_state.canvas_key}", | |
| ) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| if canvas_result.json_data is not None and st.session_state.ocr_active_field: | |
| objects = canvas_result.json_data.get("objects", []) | |
| if len(objects) > 0: | |
| rect = objects[-1] | |
| bbox = [ | |
| (rect["left"] - paste_x) / scale_ratio, | |
| (rect["top"] - paste_y) / scale_ratio, | |
| (rect["left"] + rect["width"] - paste_x) / scale_ratio, | |
| (rect["top"] + rect["height"] - paste_y) / scale_ratio | |
| ] | |
| with st.spinner("Performing OCR..."): | |
| ocr_text = perform_ocr(current_image, bbox) | |
| if ocr_text and not ocr_text.startswith("OCR Error"): | |
| st.success(f"β OCR Result: {ocr_text}") | |
| gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) | |
| if st.session_state.ocr_active_section == 'items': | |
| items = gt_parse.get('items', []) | |
| row_idx = st.session_state.ocr_line_item_row | |
| if row_idx is not None and row_idx < len(items): | |
| items[row_idx][st.session_state.ocr_active_field] = ocr_text | |
| gt_parse['items'] = items | |
| expander_key = f"line_item_expander_{selected_file}_{row_idx}" | |
| st.session_state[expander_key] = True | |
| else: | |
| section = st.session_state.ocr_active_section | |
| field = st.session_state.ocr_active_field | |
| if section not in gt_parse: | |
| gt_parse[section] = {} | |
| gt_parse[section][field] = ocr_text | |
| st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse | |
| st.session_state.modified_indices.add(selected_file) | |
| st.session_state.canvas_key += 1 | |
| st.rerun() | |
| else: | |
| st.error(ocr_text) | |
| else: | |
| st.warning("No file name specified in record") | |
| # RIGHT SIDE: Editable Details | |
| with right_col: | |
| # Create scrollable container for form fields | |
| st.markdown('<div style="max-height: 85vh; overflow-y: auto; overflow-x: hidden; padding-right: 10px;">', unsafe_allow_html=True) | |
| st.markdown("### π Invoice Details") | |
| gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) | |
| tab1, tab2, tab3, tab4 = st.tabs([ | |
| "π Invoice Details", | |
| "π₯ Party Details", | |
| "π¦ Bank Details", | |
| "π Line Items" | |
| ]) | |
| # TAB 1: Header (includes invoice details + summary fields) | |
| with tab1: | |
| header = gt_parse.get('header', {}) | |
| summary = gt_parse.get('summary', {}) | |
| # Invoice No | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Invoice No", | |
| value=header.get('invoice_no', ''), | |
| key=f"invoice_no_{selected_file}", | |
| on_change=lambda: sync_field_to_data(selected_file, 'header', 'invoice_no', | |
| st.session_state[f"invoice_no_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_invoice_no_{selected_file}", | |
| type="primary" if is_ocr_active('header', 'invoice_no') else "secondary"): | |
| activate_ocr_field('header', 'invoice_no') | |
| # Invoice Date | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Invoice Date", | |
| value=header.get('invoice_date', ''), | |
| key=f"invoice_date_{selected_file}", | |
| on_change=lambda: sync_field_to_data(selected_file, 'header', 'invoice_date', | |
| st.session_state[f"invoice_date_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_invoice_date_{selected_file}", | |
| type="primary" if is_ocr_active('header', 'invoice_date') else "secondary"): | |
| activate_ocr_field('header', 'invoice_date') | |
| # Payment Terms | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Payment Terms", | |
| value=header.get('payment_terms', ''), | |
| key=f"payment_terms_{selected_file}", | |
| on_change=lambda: sync_field_to_data(selected_file, 'header', 'payment_terms', | |
| st.session_state[f"payment_terms_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_payment_terms_{selected_file}", | |
| type="primary" if is_ocr_active('header', 'payment_terms') else "secondary"): | |
| activate_ocr_field('header', 'payment_terms') | |
| # Due Date | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Due Date", | |
| value=header.get('due_date', ''), | |
| key=f"due_date_{selected_file}", | |
| on_change=lambda: sync_field_to_data(selected_file, 'header', 'due_date', | |
| st.session_state[f"due_date_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_due_date_{selected_file}", | |
| type="primary" if is_ocr_active('header', 'due_date') else "secondary"): | |
| activate_ocr_field('header', 'due_date') | |
| # Subtotal | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Subtotal", | |
| value=summary.get('subtotal', ''), | |
| key=f"subtotal_{selected_file}", | |
| on_change=lambda: sync_field_to_data(selected_file, 'summary', 'subtotal', | |
| st.session_state[f"subtotal_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_subtotal_{selected_file}", | |
| type="primary" if is_ocr_active('summary', 'subtotal') else "secondary"): | |
| activate_ocr_field('summary', 'subtotal') | |
| # Tax Rate | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Tax Rate", | |
| value=summary.get('tax_rate', ''), | |
| key=f"tax_rate_{selected_file}", | |
| on_change=lambda: sync_field_to_data(selected_file, 'summary', 'tax_rate', | |
| st.session_state[f"tax_rate_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_tax_rate_{selected_file}", | |
| type="primary" if is_ocr_active('summary', 'tax_rate') else "secondary"): | |
| activate_ocr_field('summary', 'tax_rate') | |
| # Tax Amount | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Tax Amount", | |
| value=summary.get('tax_amount', ''), | |
| key=f"tax_amount_{selected_file}", | |
| on_change=lambda: sync_field_to_data(selected_file, 'summary', 'tax_amount', | |
| st.session_state[f"tax_amount_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_tax_amount_{selected_file}", | |
| type="primary" if is_ocr_active('summary', 'tax_amount') else "secondary"): | |
| activate_ocr_field('summary', 'tax_amount') | |
| # Discount Rate | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Discount Rate", | |
| value=summary.get('discount_rate', ''), | |
| key=f"discount_rate_{selected_file}", | |
| on_change=lambda: sync_field_to_data(selected_file, 'summary', 'discount_rate', | |
| st.session_state[f"discount_rate_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_discount_rate_{selected_file}", | |
| type="primary" if is_ocr_active('summary', 'discount_rate') else "secondary"): | |
| activate_ocr_field('summary', 'discount_rate') | |
| # Total Discount Amount | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Total Discount Amount", | |
| value=summary.get('total_discount_amount', ''), | |
| key=f"total_discount_amount_{selected_file}", | |
| on_change=lambda: sync_field_to_data(selected_file, 'summary', 'total_discount_amount', | |
| st.session_state[f"total_discount_amount_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_total_discount_amount_{selected_file}", | |
| type="primary" if is_ocr_active('summary', 'total_discount_amount') else "secondary"): | |
| activate_ocr_field('summary', 'total_discount_amount') | |
| # Total Amount | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Total Amount", | |
| value=summary.get('total_amount', ''), | |
| key=f"total_amount_{selected_file}", | |
| on_change=lambda: sync_field_to_data(selected_file, 'summary', 'total_amount', | |
| st.session_state[f"total_amount_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_total_amount_{selected_file}", | |
| type="primary" if is_ocr_active('summary', 'total_amount') else "secondary"): | |
| activate_ocr_field('summary', 'total_amount') | |
| # Currency | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Currency", | |
| value=summary.get('currency', ''), | |
| key=f"currency_{selected_file}", | |
| on_change=lambda: sync_field_to_data(selected_file, 'summary', 'currency', | |
| st.session_state[f"currency_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_currency_{selected_file}", | |
| type="primary" if is_ocr_active('summary', 'currency') else "secondary"): | |
| activate_ocr_field('summary', 'currency') | |
| # TAB 2: Party Details (without bank details) | |
| with tab2: | |
| # SWAP BUTTON | |
| col1, col2, col3 = st.columns([1, 2, 1]) | |
| with col2: | |
| if st.button("π Swap Sender β Recipient", key=f"swap_btn_{selected_file}", | |
| type="primary", use_container_width=True): | |
| if not st.session_state.just_swapped: | |
| st.session_state.just_swapped = True | |
| swap_sender_recipient_details(selected_file) | |
| st.rerun() | |
| if st.session_state.just_swapped: | |
| st.session_state.just_swapped = False | |
| st.markdown("**Sender Details**") | |
| header = gt_parse.get('header', {}) | |
| # Sender Name | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Sender Name", | |
| value=header.get('sender_name', ''), | |
| key=f"sender_name_{selected_file}", | |
| on_change=lambda: sync_field_to_data(selected_file, 'header', 'sender_name', | |
| st.session_state[f"sender_name_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_sender_name_{selected_file}", | |
| type="primary" if is_ocr_active('header', 'sender_name') else "secondary"): | |
| activate_ocr_field('header', 'sender_name') | |
| # Sender Address | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_area( | |
| "Sender Address", | |
| value=header.get('sender_addr', ''), | |
| key=f"sender_addr_{selected_file}", | |
| height=60, | |
| on_change=lambda: sync_field_to_data(selected_file, 'header', 'sender_addr', | |
| st.session_state[f"sender_addr_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_sender_addr_{selected_file}", | |
| type="primary" if is_ocr_active('header', 'sender_addr') else "secondary"): | |
| activate_ocr_field('header', 'sender_addr') | |
| st.markdown("**Recipient Details**") | |
| # Recipient Name | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Recipient Name", | |
| value=header.get('rcpt_name', ''), | |
| key=f"rcpt_name_{selected_file}", | |
| on_change=lambda: sync_field_to_data(selected_file, 'header', 'rcpt_name', | |
| st.session_state[f"rcpt_name_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_rcpt_name_{selected_file}", | |
| type="primary" if is_ocr_active('header', 'rcpt_name') else "secondary"): | |
| activate_ocr_field('header', 'rcpt_name') | |
| # Recipient Address | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_area( | |
| "Recipient Address", | |
| value=header.get('rcpt_addr', ''), | |
| key=f"rcpt_addr_{selected_file}", | |
| height=60, | |
| on_change=lambda: sync_field_to_data(selected_file, 'header', 'rcpt_addr', | |
| st.session_state[f"rcpt_addr_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_rcpt_addr_{selected_file}", | |
| type="primary" if is_ocr_active('header', 'rcpt_addr') else "secondary"): | |
| activate_ocr_field('header', 'rcpt_addr') | |
| # TAB 3: Bank Details | |
| with tab3: | |
| header = gt_parse.get('header', {}) | |
| # Bank IBAN | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Bank IBAN", | |
| value=header.get('bank_iban', ''), | |
| key=f"bank_iban_{selected_file}", | |
| on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_iban', | |
| st.session_state[f"bank_iban_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_bank_iban_{selected_file}", | |
| type="primary" if is_ocr_active('header', 'bank_iban') else "secondary"): | |
| activate_ocr_field('header', 'bank_iban') | |
| # Bank Name | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Bank Name", | |
| value=header.get('bank_name', ''), | |
| key=f"bank_name_{selected_file}", | |
| on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_name', | |
| st.session_state[f"bank_name_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_bank_name_{selected_file}", | |
| type="primary" if is_ocr_active('header', 'bank_name') else "secondary"): | |
| activate_ocr_field('header', 'bank_name') | |
| # Bank Account No | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Bank Account No", | |
| value=header.get('bank_acc_no', ''), | |
| key=f"bank_acc_no_{selected_file}", | |
| on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_acc_no', | |
| st.session_state[f"bank_acc_no_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_bank_acc_no_{selected_file}", | |
| type="primary" if is_ocr_active('header', 'bank_acc_no') else "secondary"): | |
| activate_ocr_field('header', 'bank_acc_no') | |
| # Bank Routing | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Bank Routing", | |
| value=header.get('bank_routing', ''), | |
| key=f"bank_routing_{selected_file}", | |
| on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_routing', | |
| st.session_state[f"bank_routing_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_bank_routing_{selected_file}", | |
| type="primary" if is_ocr_active('header', 'bank_routing') else "secondary"): | |
| activate_ocr_field('header', 'bank_routing') | |
| # Bank SWIFT | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Bank SWIFT", | |
| value=header.get('bank_swift', ''), | |
| key=f"bank_swift_{selected_file}", | |
| on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_swift', | |
| st.session_state[f"bank_swift_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_bank_swift_{selected_file}", | |
| type="primary" if is_ocr_active('header', 'bank_swift') else "secondary"): | |
| activate_ocr_field('header', 'bank_swift') | |
| # Bank Account Name | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Bank Account Name", | |
| value=header.get('bank_acc_name', ''), | |
| key=f"bank_acc_name_{selected_file}", | |
| on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_acc_name', | |
| st.session_state[f"bank_acc_name_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_bank_acc_name_{selected_file}", | |
| type="primary" if is_ocr_active('header', 'bank_acc_name') else "secondary"): | |
| activate_ocr_field('header', 'bank_acc_name') | |
| # Bank Branch | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Bank Branch", | |
| value=header.get('bank_branch', ''), | |
| key=f"bank_branch_{selected_file}", | |
| on_change=lambda: sync_field_to_data(selected_file, 'header', 'bank_branch', | |
| st.session_state[f"bank_branch_{selected_file}"]) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_bank_branch_{selected_file}", | |
| type="primary" if is_ocr_active('header', 'bank_branch') else "secondary"): | |
| activate_ocr_field('header', 'bank_branch') | |
| # TAB 4: Items | |
| with tab4: | |
| current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) | |
| items = current_gt_parse.get('items', []) | |
| # Add/Remove row buttons | |
| col_add, col_remove = st.columns([1, 1]) | |
| with col_add: | |
| if st.button("β Add New Item", key=f"add_item_{selected_file}", use_container_width=True): | |
| if not st.session_state.button_clicked: | |
| st.session_state.button_clicked = True | |
| new_item = { | |
| "descriptions": "", "SKU": "", "quantity": "", | |
| "unit_price": "", "amount": "", "discount_rate_per_item": "", | |
| "discount_amount_per_item": "", "tax_rate_per_item": "", | |
| "tax_amount_per_item": "", "Line_total": "" | |
| } | |
| current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) | |
| current_items = current_gt_parse.get('items', []) | |
| current_items.append(new_item) | |
| current_gt_parse['items'] = current_items | |
| st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse | |
| st.session_state.modified_indices.add(selected_file) | |
| new_idx = len(current_items) - 1 | |
| expander_key_new = f"line_item_expander_{selected_file}_{new_idx}" | |
| st.session_state[expander_key_new] = True | |
| st.rerun() | |
| with col_remove: | |
| if st.button("β Remove Last Item", key=f"remove_item_{selected_file}", | |
| disabled=(len(items) == 0), use_container_width=True): | |
| if not st.session_state.button_clicked and len(items) > 0: | |
| st.session_state.button_clicked = True | |
| current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) | |
| current_items = current_gt_parse.get('items', []) | |
| N = len(current_items) | |
| current_items.pop() | |
| current_gt_parse['items'] = current_items | |
| st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse | |
| st.session_state.modified_indices.add(selected_file) | |
| popped_idx = N - 1 | |
| expander_key_popped = f"line_item_expander_{selected_file}_{popped_idx}" | |
| if expander_key_popped in st.session_state: | |
| del st.session_state[expander_key_popped] | |
| st.rerun() | |
| if st.session_state.button_clicked: | |
| st.session_state.button_clicked = False | |
| current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) | |
| items = current_gt_parse.get('items', []) | |
| if items: | |
| for idx, item in enumerate(items): | |
| expander_key = f"line_item_expander_{selected_file}_{idx}" | |
| expanded_default = st.session_state.get(expander_key, False) | |
| with st.expander(f"**Item {idx + 1}** - {item.get('descriptions', 'N/A')[:30]}", expanded=expanded_default): | |
| # Descriptions | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_area( | |
| "Descriptions", | |
| value=item.get('descriptions', ''), | |
| key=f"desc_{selected_file}_{idx}", | |
| height=60, | |
| on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'descriptions', | |
| st.session_state[f"desc_{selected_file}_{i}"], i) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_desc_{selected_file}_{idx}", | |
| type="primary" if is_ocr_active('items', 'descriptions', idx) else "secondary"): | |
| st.session_state[expander_key] = True | |
| activate_ocr_field('items', 'descriptions', idx) | |
| # SKU | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "SKU", | |
| value=item.get('SKU', ''), | |
| key=f"sku_{selected_file}_{idx}", | |
| on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'SKU', | |
| st.session_state[f"sku_{selected_file}_{i}"], i) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_sku_{selected_file}_{idx}", | |
| type="primary" if is_ocr_active('items', 'SKU', idx) else "secondary"): | |
| st.session_state[expander_key] = True | |
| activate_ocr_field('items', 'SKU', idx) | |
| # Quantity | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Quantity", | |
| value=item.get('quantity', ''), | |
| key=f"qty_{selected_file}_{idx}", | |
| on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'quantity', | |
| st.session_state[f"qty_{selected_file}_{i}"], i) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_qty_{selected_file}_{idx}", | |
| type="primary" if is_ocr_active('items', 'quantity', idx) else "secondary"): | |
| st.session_state[expander_key] = True | |
| activate_ocr_field('items', 'quantity', idx) | |
| # Unit Price | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Unit Price", | |
| value=item.get('unit_price', ''), | |
| key=f"unit_price_{selected_file}_{idx}", | |
| on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'unit_price', | |
| st.session_state[f"unit_price_{selected_file}_{i}"], i) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_unit_price_{selected_file}_{idx}", | |
| type="primary" if is_ocr_active('items', 'unit_price', idx) else "secondary"): | |
| st.session_state[expander_key] = True | |
| activate_ocr_field('items', 'unit_price', idx) | |
| # Amount | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Amount", | |
| value=item.get('amount', ''), | |
| key=f"amount_{selected_file}_{idx}", | |
| on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'amount', | |
| st.session_state[f"amount_{selected_file}_{i}"], i) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_amount_{selected_file}_{idx}", | |
| type="primary" if is_ocr_active('items', 'amount', idx) else "secondary"): | |
| st.session_state[expander_key] = True | |
| activate_ocr_field('items', 'amount', idx) | |
| # Discount Rate Per Item | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Discount Rate Per Item", | |
| value=item.get('discount_rate_per_item', ''), | |
| key=f"discount_rate_per_item_{selected_file}_{idx}", | |
| on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'discount_rate_per_item', | |
| st.session_state[f"discount_rate_per_item_{selected_file}_{i}"], i) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_discount_rate_per_item_{selected_file}_{idx}", | |
| type="primary" if is_ocr_active('items', 'discount_rate_per_item', idx) else "secondary"): | |
| st.session_state[expander_key] = True | |
| activate_ocr_field('items', 'discount_rate_per_item', idx) | |
| # Discount Amount Per Item | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Discount Amount Per Item", | |
| value=item.get('discount_amount_per_item', ''), | |
| key=f"discount_amount_per_item_{selected_file}_{idx}", | |
| on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'discount_amount_per_item', | |
| st.session_state[f"discount_amount_per_item_{selected_file}_{i}"], i) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_discount_amount_per_item_{selected_file}_{idx}", | |
| type="primary" if is_ocr_active('items', 'discount_amount_per_item', idx) else "secondary"): | |
| st.session_state[expander_key] = True | |
| activate_ocr_field('items', 'discount_amount_per_item', idx) | |
| # Tax Rate Per Item (NEW FIELD) | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Tax Rate Per Item", | |
| value=item.get('tax_rate_per_item', ''), | |
| key=f"tax_rate_per_item_{selected_file}_{idx}", | |
| on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'tax_rate_per_item', | |
| st.session_state[f"tax_rate_per_item_{selected_file}_{i}"], i) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_tax_rate_per_item_{selected_file}_{idx}", | |
| type="primary" if is_ocr_active('items', 'tax_rate_per_item', idx) else "secondary"): | |
| st.session_state[expander_key] = True | |
| activate_ocr_field('items', 'tax_rate_per_item', idx) | |
| # Tax Amount Per Item (RENAMED from "Tax") | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Tax Amount Per Item", | |
| value=item.get('tax_amount_per_item', ''), | |
| key=f"tax_amount_per_item_{selected_file}_{idx}", | |
| on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'tax_amount_per_item', | |
| st.session_state[f"tax_amount_per_item_{selected_file}_{i}"], i) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_tax_amount_per_item_{selected_file}_{idx}", | |
| type="primary" if is_ocr_active('items', 'tax_amount_per_item', idx) else "secondary"): | |
| st.session_state[expander_key] = True | |
| activate_ocr_field('items', 'tax_amount_per_item', idx) | |
| # Line Total | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| new_value = st.text_input( | |
| "Line Total", | |
| value=item.get('Line_total', ''), | |
| key=f"line_total_{selected_file}_{idx}", | |
| on_change=lambda i=idx: sync_field_to_data(selected_file, 'items', 'Line_total', | |
| st.session_state[f"line_total_{selected_file}_{i}"], i) | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_line_total_{selected_file}_{idx}", | |
| type="primary" if is_ocr_active('items', 'Line_total', idx) else "secondary"): | |
| st.session_state[expander_key] = True | |
| activate_ocr_field('items', 'Line_total', idx) | |
| st.markdown("**π Items Summary Table**") | |
| df = pd.DataFrame(items) | |
| df.index = df.index + 1 | |
| df.index.name = 'SL No' | |
| st.dataframe( | |
| df, | |
| use_container_width=True, | |
| height=300 | |
| ) | |
| else: | |
| st.info("No items. Click 'β Add New Item' to add a new item.") | |
| # Save button | |
| col1, col2 = st.columns([1, 1]) | |
| with col1: | |
| if st.button("πΎ Save Changes", type="primary", use_container_width=True, key=f"save_btn_{selected_file}"): | |
| if not st.session_state.just_saved: | |
| st.session_state.just_saved = True | |
| auto_save(selected_file) | |
| st.session_state.save_message = "β Changes saved successfully!" | |
| st.session_state.save_message_time = time.time() | |
| st.rerun() | |
| if st.session_state.just_saved: | |
| st.session_state.just_saved = False | |
| if st.session_state.save_message: | |
| st.success(st.session_state.save_message) | |
| st.markdown('</div>', unsafe_allow_html=True) # Close scrollable container |