import os from pathlib import Path import streamlit as st import json import io from PIL import Image import time import pandas as pd from streamlit_drawable_canvas import st_canvas import pytesseract import numpy as np from datetime import datetime import fitz # PyMuPDF for PDF handling # ----------------------------- # Environment hardening (HF Spaces, /.cache issue) # ----------------------------- _home = os.environ.get("HOME", "") if _home in ("", "/", None): repo_dir = os.getcwd() safe_home = repo_dir if os.access(repo_dir, os.W_OK) else "/tmp" os.environ["HOME"] = safe_home print(f"[startup] HOME not set or unwritable — setting HOME={safe_home}") streamlit_dir = Path(os.environ["HOME"]) / ".streamlit" try: streamlit_dir.mkdir(parents=True, exist_ok=True) print(f"[startup] ensured {streamlit_dir}") except Exception as e: print(f"[startup] WARNING: could not create {streamlit_dir}: {e}") # Set Tesseract path - auto-detect based on OS if os.name == 'nt': # Windows pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" else: # Linux/Mac (HF Spaces uses Linux) pass # Page configuration st.set_page_config(page_title="Remittance Data Viewer", layout="wide") # Custom CSS st.markdown(""" """, unsafe_allow_html=True) def load_jsonl(file): """Load JSONL file and return list of records""" data = [] content = file.getvalue().decode('utf-8') for line in content.strip().split('\n'): if line.strip(): data.append(json.loads(line)) return data def get_file_names_from_record(record, actual_file_name): """Generate file_name or file_names based on the PDF metadata""" if not actual_file_name: return record.get('file_name', record.get('file_names', [])) # Check if it's a PDF with multiple pages is_pdf = actual_file_name in st.session_state.pdf_metadata if is_pdf: pdf_meta = st.session_state.pdf_metadata[actual_file_name] total_pages = pdf_meta['total_pages'] base_name = os.path.splitext(actual_file_name)[0] if total_pages > 1: # Return list of file names for multi-page PDF return [f"{base_name}_page{i+1}.png" for i in range(total_pages)] else: # Single page PDF return f"{base_name}.png" else: # Regular image file if not actual_file_name.lower().endswith('.png'): base_name = os.path.splitext(actual_file_name)[0] return f"{base_name}.png" return actual_file_name def save_to_jsonl(data, transform_filenames=False): """Convert data list to JSONL format""" if transform_filenames: transformed_data = [] for record in data: new_record = {} # First, determine file_name(s) and add to new_record original_file_name = record.get('file_name', '') if not original_file_name and 'file_names' in record: # Handle file_names field if it exists original_file_name = record.get('file_names', [])[0] if record.get('file_names', []) else '' # Find the actual uploaded file actual_file_name = None if original_file_name: if original_file_name in st.session_state.images: actual_file_name = original_file_name else: # Try with extensions for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: if original_file_name + ext in st.session_state.images: actual_file_name = original_file_name + ext break # Try without extension if not actual_file_name: for uploaded_name in st.session_state.images.keys(): uploaded_base = os.path.splitext(uploaded_name)[0] if uploaded_base == original_file_name: actual_file_name = uploaded_name break # Get the transformed file name(s) new_file_names = get_file_names_from_record(record, actual_file_name) # Add file_name or file_names as the FIRST field if isinstance(new_file_names, list): new_record['file_names'] = new_file_names else: new_record['file_name'] = new_file_names # Now add all other fields (excluding the original file_name/file_names) for key, value in record.items(): if key not in ['file_name', 'file_names']: new_record[key] = value transformed_data.append(new_record) data = transformed_data jsonl_content = '\n'.join([json.dumps(record) for record in data]) return jsonl_content def pdf_to_images(pdf_file): """Convert PDF to list of PIL Images (one per page)""" try: pdf_bytes = pdf_file.read() pdf_document = fitz.open(stream=pdf_bytes, filetype="pdf") images = [] for page_num in range(pdf_document.page_count): page = pdf_document[page_num] # Render page to an image (higher DPI for better quality) # Using 3x zoom (300 DPI equivalent) for much better clarity pix = page.get_pixmap(matrix=fitz.Matrix(3, 3), alpha=False) img_data = pix.tobytes("png") img = Image.open(io.BytesIO(img_data)) images.append(img) pdf_document.close() return images except Exception as e: st.error(f"Error converting PDF: {str(e)}") return [] def perform_ocr(image, bbox): """Perform OCR on the selected region of the image""" try: x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]) x1, y1 = max(0, x1), max(0, y1) x2, y2 = min(image.width, x2), min(image.height, y2) cropped = image.crop((x1, y1, x2, y2)) text = pytesseract.image_to_string(cropped, config='--psm 6').strip() return text except Exception as e: return f"OCR Error: {str(e)}" def scale_image_to_fixed_size(image, max_width=900, max_height=1100): """Scale image to fit within max dimensions while maintaining aspect ratio - NO PADDING""" # Convert to RGB with proper handling if image.mode not in ('RGB', 'RGBA'): image = image.convert('RGB') elif image.mode == 'RGBA': background = Image.new('RGB', image.size, (255, 255, 255)) background.paste(image, mask=image.split()[3]) image = background # Calculate scaling ratio width_ratio = max_width / image.width height_ratio = max_height / image.height ratio = min(width_ratio, height_ratio) new_width = int(image.width * ratio) new_height = int(image.height * ratio) # Always use LANCZOS for highest quality resampling resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) # Return without padding - image takes only the space it needs return resized_image, ratio, 0, 0 def swap_customer_supplier_details(index): """Swap customer and supplier details""" gt_parse = st.session_state.edited_data[index].get('gt_parse', {}) customer_supplier = gt_parse.get('Customer_supplier_details', {}) # Store customer values temp_customer_name = customer_supplier.get('Customer_name', '') temp_customer_address = customer_supplier.get('Customer_address', '') temp_customer_contact = customer_supplier.get('Customer_contact_info', '') # Swap: Customer ← Supplier customer_supplier['Customer_name'] = customer_supplier.get('Supplier_name', '') customer_supplier['Customer_address'] = customer_supplier.get('Supplier_address', '') customer_supplier['Customer_contact_info'] = customer_supplier.get('Supplier_contact_info', '') # Swap: Supplier ← Customer (from temp) customer_supplier['Supplier_name'] = temp_customer_name customer_supplier['Supplier_address'] = temp_customer_address customer_supplier['Supplier_contact_info'] = temp_customer_contact # Update session state gt_parse['Customer_supplier_details'] = customer_supplier st.session_state.edited_data[index]['gt_parse'] = gt_parse st.session_state.modified_indices.add(index) def get_display_name(record): """Get display name from record, handling both file_name and file_names""" if 'file_name' in record: return record['file_name'] elif 'file_names' in record and record['file_names']: if isinstance(record['file_names'], list): return record['file_names'][0] if record['file_names'] else 'Unnamed' return record['file_names'] return 'Unnamed' def find_actual_file(file_name): """Find the actual uploaded file matching the given file_name""" if not file_name: return None # FIRST: Check if this is a generated filename from a multi-page PDF (e.g., "12_page1.png") if file_name.endswith('.png') and '_page' in file_name: base_name = os.path.splitext(file_name)[0] # Remove .png # Extract the base name before _pageN if '_page' in base_name: pdf_base = base_name.split('_page')[0] # Try to find the PDF file if pdf_base + '.pdf' in st.session_state.images: return pdf_base + '.pdf' # Also try without any extension in case original didn't have it for uploaded_name in st.session_state.images.keys(): uploaded_base = os.path.splitext(uploaded_name)[0] if uploaded_base == pdf_base and uploaded_name.lower().endswith('.pdf'): return uploaded_name # Try exact match if file_name in st.session_state.images: return file_name # Try with extensions for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: if file_name + ext in st.session_state.images: return file_name + ext # Try matching without extension for uploaded_name in st.session_state.images.keys(): uploaded_base = os.path.splitext(uploaded_name)[0] if uploaded_base == file_name: return uploaded_name return None # Initialize session state if 'data' not in st.session_state: st.session_state.data = None if 'current_index' not in st.session_state: st.session_state.current_index = 0 if 'edited_data' not in st.session_state: st.session_state.edited_data = None if 'page' not in st.session_state: st.session_state.page = 'upload' if 'images' not in st.session_state: st.session_state.images = {} if 'pdf_metadata' not in st.session_state: st.session_state.pdf_metadata = {} # Store {filename: {'pages': [images], 'current_page': 0}} if 'current_page_num' not in st.session_state: st.session_state.current_page_num = {} # Track current page for each file if 'modified_indices' not in st.session_state: st.session_state.modified_indices = set() if 'ocr_active_section' not in st.session_state: st.session_state.ocr_active_section = None if 'ocr_active_field' not in st.session_state: st.session_state.ocr_active_field = None if 'ocr_line_item_row' not in st.session_state: st.session_state.ocr_line_item_row = None if 'canvas_key' not in st.session_state: st.session_state.canvas_key = 0 if 'line_items_temp' not in st.session_state: st.session_state.line_items_temp = [] if 'button_clicked' not in st.session_state: st.session_state.button_clicked = False if 'save_message' not in st.session_state: st.session_state.save_message = None if 'save_message_time' not in st.session_state: st.session_state.save_message_time = None if 'just_saved' not in st.session_state: st.session_state.just_saved = False if 'just_swapped' not in st.session_state: st.session_state.just_swapped = False if 'navigating_page' not in st.session_state: st.session_state.navigating_page = False def auto_save(index): """Automatically save changes to session state and mark as modified""" if st.session_state.edited_data: # Get the current record current_record = st.session_state.edited_data[index] # Get original file name original_file_name = current_record.get('file_name', '') if not original_file_name and 'file_names' in current_record: original_file_name = current_record.get('file_names', [])[0] if current_record.get('file_names', []) else '' # Find actual file actual_file_name = find_actual_file(original_file_name) # Update file_name or file_names in the record new_file_names = get_file_names_from_record(current_record, actual_file_name) # Remove old keys if 'file_name' in current_record: del current_record['file_name'] if 'file_names' in current_record: del current_record['file_names'] # Create new ordered record with file_name/file_names first new_record = {} if isinstance(new_file_names, list): new_record['file_names'] = new_file_names else: new_record['file_name'] = new_file_names # Add all other fields for key, value in current_record.items(): new_record[key] = value # Update the record st.session_state.edited_data[index] = new_record st.session_state.data[index] = new_record.copy() st.session_state.modified_indices.add(index) def activate_ocr_field(section, field, row_idx=None): """Activate OCR for a specific field. Toggle behavior: if the same field is already active, deactivate it to avoid repeated activations/looping. Also ensures the line-item expander stays expanded when OCR is requested. """ # If the requested field is already active, deactivate it (toggle off) if (st.session_state.ocr_active_section == section and st.session_state.ocr_active_field == field and st.session_state.ocr_line_item_row == row_idx): st.session_state.ocr_active_section = None st.session_state.ocr_active_field = None st.session_state.ocr_line_item_row = None else: # Activate new OCR target st.session_state.ocr_active_section = section st.session_state.ocr_active_field = field st.session_state.ocr_line_item_row = row_idx # If it's a line-item, mark that expander as expanded so it remains open after rerun if section == 'Line_items' and row_idx is not None: current_idx = st.session_state.get('current_index', 0) expander_key = f"line_item_expander_{current_idx}_{row_idx}" st.session_state[expander_key] = True # Bump canvas_key to ensure canvas is refreshed/cleared when toggling OCR st.session_state.canvas_key += 1 st.rerun() def is_ocr_active(section, field, row_idx=None): """Check if this OCR button is currently active""" return (st.session_state.ocr_active_section == section and st.session_state.ocr_active_field == field and st.session_state.ocr_line_item_row == row_idx) # PAGE 1: Upload Page if st.session_state.page == 'upload': st.title("📤 Remittance Data Viewer with OCR") st.markdown("### Upload your files to begin") st.markdown("**Step 1: Upload JSONL File**") uploaded_file = st.file_uploader("Choose a JSONL file", type=['jsonl', 'json']) if uploaded_file is not None: try: data = load_jsonl(uploaded_file) st.session_state.data = data st.session_state.edited_data = data.copy() st.success(f"✅ Successfully loaded {len(data)} records!") except Exception as e: st.error(f"Error loading file: {str(e)}") st.markdown("**Step 2: Upload Images/PDFs Folder**") uploaded_files = st.file_uploader( "Choose image or PDF files", type=['png', 'jpg', 'jpeg', 'tiff', 'tif', 'bmp', 'pdf'], accept_multiple_files=True, help="Select all images and PDFs from your folder at once" ) if uploaded_files: images_dict = {} pdf_metadata = {} for file in uploaded_files: try: file_ext = file.name.lower().split('.')[-1] if file_ext == 'pdf': # Convert PDF to images pdf_images = pdf_to_images(file) if pdf_images: # Store first page as the main image images_dict[file.name] = pdf_images[0] # Store all pages in metadata pdf_metadata[file.name] = { 'pages': pdf_images, 'total_pages': len(pdf_images), 'current_page': 0 } else: # Handle regular images image = Image.open(file) images_dict[file.name] = image except Exception as e: st.warning(f"Could not load file {file.name}: {str(e)}") st.session_state.images = images_dict st.session_state.pdf_metadata = pdf_metadata # Initialize current page tracking for filename in pdf_metadata.keys(): if filename not in st.session_state.current_page_num: st.session_state.current_page_num[filename] = 0 if st.session_state.data is not None: # Get all file names from records (handle both file_name and file_names) gt_file_names = [] for rec in st.session_state.data: if 'file_name' in rec and rec['file_name']: gt_file_names.append(rec['file_name']) elif 'file_names' in rec and rec['file_names']: if isinstance(rec['file_names'], list): gt_file_names.extend(rec['file_names']) else: gt_file_names.append(rec['file_names']) matched_images = set() unmatched_gt_files = [] # Try to match with and without extensions for fname in gt_file_names: if not fname: continue # Try exact match first if fname in images_dict: matched_images.add(fname) else: # Try adding common extensions found = False for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: if fname + ext in images_dict: matched_images.add(fname) found = True break # Try matching filename without extension from uploaded files if not found: for uploaded_name in images_dict.keys(): uploaded_base = uploaded_name.rsplit('.', 1)[0] if uploaded_base == fname or uploaded_base == os.path.splitext(fname)[0]: matched_images.add(fname) found = True break for fname in gt_file_names: if fname and fname not in matched_images: unmatched_gt_files.append(fname) st.success(f"✅ Successfully loaded {len(images_dict)} files ({len(pdf_metadata)} PDFs)!") st.info(f"🔎 Exact matches: {len(matched_images)}/{len([f for f in gt_file_names if f])}") if unmatched_gt_files: st.warning(f"âš ī¸ {len(unmatched_gt_files)} file(s) from JSONL not matched:") with st.expander(f"Show {len(unmatched_gt_files)} unmatched file names"): for fname in unmatched_gt_files: st.text(f" â€ĸ {fname}") else: st.success("✅ All JSONL file names matched to files!") else: st.success(f"✅ Successfully loaded {len(images_dict)} files ({len(pdf_metadata)} PDFs)!") st.info("â„šī¸ Upload a JSONL file to see how many files match the ground truth 'file_name' field.") if st.session_state.data is not None: col1, col2, col3 = st.columns([1, 1, 1]) with col2: if st.button("Continue to Viewer →", type="primary", use_container_width=True): st.session_state.page = 'viewer' st.session_state.modified_indices = set() st.session_state.navigating_page = False st.rerun() # PAGE 2: Viewer Page elif st.session_state.page == 'viewer': if st.session_state.save_message_time is not None: if time.time() - st.session_state.save_message_time > 3: st.session_state.save_message = None st.session_state.save_message_time = None today_date = datetime.now().strftime("%Y-%m-%d") col1, col2, col3, col4 = st.columns([1, 2, 2, 2]) with col1: if st.button("← Back to Upload"): st.session_state.page = 'upload' st.session_state.ocr_active_section = None st.session_state.ocr_active_field = None st.session_state.save_message = None st.session_state.save_message_time = None st.session_state.navigating_page = False st.rerun() with col2: if st.session_state.modified_indices: modified_data = [st.session_state.edited_data[i] for i in sorted(st.session_state.modified_indices)] jsonl_modified = save_to_jsonl(modified_data, transform_filenames=True) st.download_button( label=f"âŦ‡ī¸ Download Modified ({len(modified_data)})", data=jsonl_modified, file_name=f"modified_remittance_data_{today_date}.jsonl", mime="application/jsonl", type="primary", use_container_width=True ) else: st.button("âŦ‡ī¸ No Modified Records", disabled=True, use_container_width=True) with col3: if st.session_state.modified_indices: unmodified_data = [st.session_state.data[i] for i in range(len(st.session_state.data)) if i not in st.session_state.modified_indices] jsonl_unmodified = save_to_jsonl(unmodified_data, transform_filenames=True) st.download_button( label=f"âŦ‡ī¸ Download Unmodified ({len(unmodified_data)})", data=jsonl_unmodified, file_name=f"unmodified_remittance_data_{today_date}.jsonl", mime="application/jsonl", use_container_width=True ) else: st.button("âŦ‡ī¸ No Unmodified Records", disabled=True, use_container_width=True) with col4: jsonl_all = save_to_jsonl(st.session_state.edited_data, transform_filenames=True) st.download_button( label=f"âŦ‡ī¸ Download All ({len(st.session_state.edited_data)})", data=jsonl_all, file_name=f"all_remittance_data_{today_date}.jsonl", mime="application/jsonl", use_container_width=True ) # Get display names for all records file_names = [get_display_name(record) for record in st.session_state.data or []] # Guard: no records at all if not file_names: st.error("No records loaded. Please upload a JSONL file on the Upload page.") if st.button("← Back to Upload"): st.session_state.page = 'upload' st.rerun() else: # Build options (list is safer than range for length checks) options = list(range(len(file_names))) # Ensure edited_data exists and has consistent length if not st.session_state.edited_data or len(st.session_state.edited_data) != len(file_names): # try to sync edited_data to data st.session_state.edited_data = (st.session_state.data or []).copy() # Clamp current_index into valid range cur_idx = st.session_state.get('current_index', 0) try: cur_idx = int(cur_idx) except Exception: cur_idx = 0 if cur_idx < 0: cur_idx = 0 if cur_idx >= len(options): cur_idx = len(options) - 1 # Show selectbox with a safe index selected_file = st.selectbox( "Select a file to view:", options=options, format_func=lambda x: f"{'âœī¸ ' if x in st.session_state.modified_indices else ''}{file_names[x]}", index=cur_idx ) # Persist chosen index st.session_state.current_index = selected_file # Safe access to the current record current_record = st.session_state.edited_data[selected_file] left_col, right_col = st.columns([1.6, 1.0], gap="small") # LEFT SIDE: Image Display with OCR Canvas with left_col: with st.container(height=700, border=False): # Get file name from record original_file_name = get_display_name(current_record) if original_file_name: # Find the actual file name actual_file_name = find_actual_file(original_file_name) if actual_file_name: # Check if this is a PDF with multiple pages is_pdf = actual_file_name in st.session_state.pdf_metadata if is_pdf: pdf_meta = st.session_state.pdf_metadata[actual_file_name] total_pages = pdf_meta['total_pages'] current_page = st.session_state.current_page_num.get(actual_file_name, 0) # PDF Navigation Header col_prev, col_info, col_next = st.columns([1, 2, 1]) with col_prev: prev_clicked = st.button("âŦ…ī¸ Previous", key=f"prev_page_{selected_file}_{actual_file_name}", disabled=(current_page == 0), use_container_width=True) with col_info: st.markdown(f"
📄 Page {current_page + 1} of {total_pages}
", unsafe_allow_html=True) with col_next: next_clicked = st.button("Next âžĄī¸", key=f"next_page_{selected_file}_{actual_file_name}", disabled=(current_page >= total_pages - 1), use_container_width=True) # Handle navigation only if not already navigating if not st.session_state.navigating_page: if prev_clicked: st.session_state.navigating_page = True st.session_state.current_page_num[actual_file_name] = max(0, current_page - 1) st.session_state.canvas_key += 1 st.session_state.ocr_active_section = None st.session_state.ocr_active_field = None st.rerun() elif next_clicked: st.session_state.navigating_page = True st.session_state.current_page_num[actual_file_name] = min(total_pages - 1, current_page + 1) st.session_state.canvas_key += 1 st.session_state.ocr_active_section = None st.session_state.ocr_active_field = None st.rerun() else: # Reset the flag after rerun st.session_state.navigating_page = False # Determine if PDF and get the appropriate image is_pdf = actual_file_name in st.session_state.pdf_metadata if is_pdf: # Get the current page image current_page = st.session_state.current_page_num.get(actual_file_name, 0) pdf_meta = st.session_state.pdf_metadata[actual_file_name] current_image = pdf_meta['pages'][current_page] else: current_image = st.session_state.images[actual_file_name] if current_image: # Scale to a reasonable size so canvas doesn't become excessively large scaled_image, scale_ratio, paste_x, paste_y = scale_image_to_fixed_size(current_image) # Render the canvas. Its internal canvas will be constrained by the wrapper due to CSS above. canvas_result = st_canvas( fill_color="rgba(255, 165, 0, 0.3)", stroke_width=2, stroke_color="#FF0000", background_image=scaled_image, update_streamlit=True, height=scaled_image.height, width=scaled_image.width, drawing_mode="rect", key=f"canvas_{selected_file}_{st.session_state.canvas_key}", ) # Only attempt OCR if there's an active OCR target AND the user has drawn something (objects exist) if canvas_result.json_data is not None and st.session_state.ocr_active_field: objects = canvas_result.json_data.get("objects", []) if len(objects) > 0: rect = objects[-1] bbox = [ (rect["left"] - paste_x) / scale_ratio, (rect["top"] - paste_y) / scale_ratio, (rect["left"] + rect["width"] - paste_x) / scale_ratio, (rect["top"] + rect["height"] - paste_y) / scale_ratio ] with st.spinner("Performing OCR..."): ocr_text = perform_ocr(current_image, bbox) if ocr_text and not ocr_text.startswith("OCR Error"): st.success(f"✅ OCR Result: {ocr_text}") gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) if st.session_state.ocr_active_section == 'Line_items': line_items = gt_parse.get('Line_items', []) row_idx = st.session_state.ocr_line_item_row if row_idx is not None and row_idx < len(line_items): line_items[row_idx][st.session_state.ocr_active_field] = ocr_text gt_parse['Line_items'] = line_items # ensure expander stays open for this row after OCR expander_key = f"line_item_expander_{selected_file}_{row_idx}" st.session_state[expander_key] = True else: section = st.session_state.ocr_active_section field = st.session_state.ocr_active_field if section not in gt_parse: gt_parse[section] = {} gt_parse[section][field] = ocr_text st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse st.session_state.modified_indices.add(selected_file) # Keep the OCR field active so user can draw multiple rectangles for the same field # Field will only change when user clicks a different OCR button # Clear canvas for next OCR by bumping canvas_key then rerun st.session_state.canvas_key += 1 st.rerun() else: st.error(ocr_text) else: st.error(f"❌ File '{original_file_name}' not found in uploaded files") st.info("💡 Available files:") with st.expander("Show available files"): for img_name in list(st.session_state.images.keys())[:20]: st.text(f" â€ĸ {img_name}") if len(st.session_state.images) > 20: st.text(f" ... and {len(st.session_state.images) - 20} more") else: st.warning("No file name specified in record") # RIGHT SIDE: Editable Details with right_col: with st.container(height=700, border=False): st.markdown("### 📝 Document Details") gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) tab1, tab2, tab3, tab4 = st.tabs([ "📄 Remittance Details", "đŸ‘Ĩ Party Details", "đŸĻ Bank Details", "📋 Line Items" ]) # TAB 1: Remittance Details with tab1: remittance = gt_parse.get('Remittance_details', {}) # Each field with OCR button col_input, col_btn = st.columns([5, 1]) with col_input: remittance['Remittance_adv_no'] = st.text_input( "Remittance Advice No", value=remittance.get('Remittance_adv_no', ''), key=f"rem_adv_no_{selected_file}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_rem_adv_no_{selected_file}", type="primary" if is_ocr_active('Remittance_details', 'Remittance_adv_no') else "secondary"): activate_ocr_field('Remittance_details', 'Remittance_adv_no') col_input, col_btn = st.columns([5, 1]) with col_input: remittance['Remittance_adv_date'] = st.text_input( "Remittance Advice Date", value=remittance.get('Remittance_adv_date', ''), key=f"rem_adv_date_{selected_file}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_rem_adv_date_{selected_file}", type="primary" if is_ocr_active('Remittance_details', 'Remittance_adv_date') else "secondary"): activate_ocr_field('Remittance_details', 'Remittance_adv_date') col_input, col_btn = st.columns([5, 1]) with col_input: remittance['Payment_method'] = st.text_input( "Payment Method", value=remittance.get('Payment_method', ''), key=f"payment_method_{selected_file}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_payment_method_{selected_file}", type="primary" if is_ocr_active('Remittance_details', 'Payment_method') else "secondary"): activate_ocr_field('Remittance_details', 'Payment_method') col_input, col_btn = st.columns([5, 1]) with col_input: remittance['FCY'] = st.text_input( "FCY (Foreign Currency)", value=remittance.get('FCY', ''), key=f"fcy_{selected_file}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_fcy_{selected_file}", type="primary" if is_ocr_active('Remittance_details', 'FCY') else "secondary"): activate_ocr_field('Remittance_details', 'FCY') col_input, col_btn = st.columns([5, 1]) with col_input: remittance['Total_payment_amt_FCY'] = st.text_input( "Total Payment Amount (FCY)", value=remittance.get('Total_payment_amt_FCY', ''), key=f"total_payment_{selected_file}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_total_payment_{selected_file}", type="primary" if is_ocr_active('Remittance_details', 'Total_payment_amt_FCY') else "secondary"): activate_ocr_field('Remittance_details', 'Total_payment_amt_FCY') col_input, col_btn = st.columns([5, 1]) with col_input: remittance['Payment_date'] = st.text_input( "Payment Date", value=remittance.get('Payment_date', ''), key=f"payment_date_{selected_file}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_payment_date_{selected_file}", type="primary" if is_ocr_active('Remittance_details', 'Payment_date') else "secondary"): activate_ocr_field('Remittance_details', 'Payment_date') col_input, col_btn = st.columns([5, 1]) with col_input: remittance['Payment_ref_no'] = st.text_input( "Payment Reference No", value=remittance.get('Payment_ref_no', ''), key=f"payment_ref_{selected_file}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_payment_ref_{selected_file}", type="primary" if is_ocr_active('Remittance_details', 'Payment_ref_no') else "secondary"): activate_ocr_field('Remittance_details', 'Payment_ref_no') gt_parse['Remittance_details'] = remittance # TAB 2: Customer/Supplier Details with SWAP button with tab2: # SWAP BUTTON - Centered and prominent col1, col2, col3 = st.columns([1, 2, 1]) with col2: if st.button("🔄 Swap Customer ↔ Supplier", key=f"swap_btn_{selected_file}", type="primary", use_container_width=True): if not st.session_state.just_swapped: st.session_state.just_swapped = True swap_customer_supplier_details(selected_file) st.rerun() # Reset the flag after rerun if st.session_state.just_swapped: st.session_state.just_swapped = False st.markdown("**Customer Details**") customer_supplier = gt_parse.get('Customer_supplier_details', {}) col_input, col_btn = st.columns([5, 1]) with col_input: customer_supplier['Customer_name'] = st.text_input( "Customer Name", value=customer_supplier.get('Customer_name', ''), key=f"cust_name_{selected_file}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_cust_name_{selected_file}", type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_name') else "secondary"): activate_ocr_field('Customer_supplier_details', 'Customer_name') col_input, col_btn = st.columns([5, 1]) with col_input: customer_supplier['Customer_address'] = st.text_area( "Customer Address", value=customer_supplier.get('Customer_address', ''), key=f"cust_addr_{selected_file}", height=60 ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_cust_addr_{selected_file}", type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_address') else "secondary"): activate_ocr_field('Customer_supplier_details', 'Customer_address') col_input, col_btn = st.columns([5, 1]) with col_input: customer_supplier['Customer_contact_info'] = st.text_input( "Customer Contact Info", value=customer_supplier.get('Customer_contact_info', ''), key=f"cust_contact_{selected_file}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_cust_contact_{selected_file}", type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_contact_info') else "secondary"): activate_ocr_field('Customer_supplier_details', 'Customer_contact_info') st.markdown("**Supplier Details**") col_input, col_btn = st.columns([5, 1]) with col_input: customer_supplier['Supplier_name'] = st.text_input( "Supplier Name", value=customer_supplier.get('Supplier_name', ''), key=f"supp_name_{selected_file}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_supp_name_{selected_file}", type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_name') else "secondary"): activate_ocr_field('Customer_supplier_details', 'Supplier_name') col_input, col_btn = st.columns([5, 1]) with col_input: customer_supplier['Supplier_address'] = st.text_area( "Supplier Address", value=customer_supplier.get('Supplier_address', ''), key=f"supp_addr_{selected_file}", height=60 ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_supp_addr_{selected_file}", type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_address') else "secondary"): activate_ocr_field('Customer_supplier_details', 'Supplier_address') col_input, col_btn = st.columns([5, 1]) with col_input: customer_supplier['Supplier_contact_info'] = st.text_input( "Supplier Contact Info", value=customer_supplier.get('Supplier_contact_info', ''), key=f"supp_contact_{selected_file}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_supp_contact_{selected_file}", type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_contact_info') else "secondary"): activate_ocr_field('Customer_supplier_details', 'Supplier_contact_info') gt_parse['Customer_supplier_details'] = customer_supplier # TAB 3: Bank Details with tab3: bank = gt_parse.get('Bank_details', {}) col_input, col_btn = st.columns([5, 1]) with col_input: bank['Bank_name'] = st.text_input( "Bank Name", value=bank.get('Bank_name', ''), key=f"bank_name_{selected_file}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_bank_name_{selected_file}", type="primary" if is_ocr_active('Bank_details', 'Bank_name') else "secondary"): activate_ocr_field('Bank_details', 'Bank_name') col_input, col_btn = st.columns([5, 1]) with col_input: bank['Bank_acc_no'] = st.text_input( "Bank Account No", value=bank.get('Bank_acc_no', ''), key=f"bank_acc_{selected_file}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_bank_acc_{selected_file}", type="primary" if is_ocr_active('Bank_details', 'Bank_acc_no') else "secondary"): activate_ocr_field('Bank_details', 'Bank_acc_no') col_input, col_btn = st.columns([5, 1]) with col_input: bank['Bank_routing_no'] = st.text_input( "Bank Routing No", value=bank.get('Bank_routing_no', ''), key=f"bank_routing_{selected_file}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_bank_routing_{selected_file}", type="primary" if is_ocr_active('Bank_details', 'Bank_routing_no') else "secondary"): activate_ocr_field('Bank_details', 'Bank_routing_no') col_input, col_btn = st.columns([5, 1]) with col_input: bank['Swift_code'] = st.text_input( "SWIFT Code", value=bank.get('Swift_code', ''), key=f"swift_{selected_file}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_swift_{selected_file}", type="primary" if is_ocr_active('Bank_details', 'Swift_code') else "secondary"): activate_ocr_field('Bank_details', 'Swift_code') gt_parse['Bank_details'] = bank # TAB 4: Line Items with tab4: current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) line_items = current_gt_parse.get('Line_items', []) # Add/Remove row buttons col_add, col_remove = st.columns([1, 1]) with col_add: if st.button("➕ Add New Row", key=f"add_row_{selected_file}", use_container_width=True): if not st.session_state.button_clicked: st.session_state.button_clicked = True new_row = { "Po_number": "", "Invoice_no": "", "Other_doc_ref_no": "", "Invoice_date": "", "Invoice_amount_FCY": "", "Amount_paid_for_each_invoice": "", "Outstanding_balance_FCY": "", "Discounts_taken_FCY": "", "Adjustments(without_holding_tax)_FCY": "", "Descriptions": "" } current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) current_line_items = current_gt_parse.get('Line_items', []) current_line_items.append(new_row) current_gt_parse['Line_items'] = current_line_items st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse st.session_state.modified_indices.add(selected_file) # Ensure the newly added row's expander is open new_idx = len(current_line_items) - 1 expander_key_new = f"line_item_expander_{selected_file}_{new_idx}" st.session_state[expander_key_new] = True st.rerun() with col_remove: if st.button("➖ Remove Last Row", key=f"remove_row_{selected_file}", disabled=(len(line_items) == 0), use_container_width=True): if not st.session_state.button_clicked and len(line_items) > 0: st.session_state.button_clicked = True current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) current_line_items = current_gt_parse.get('Line_items', []) N = len(current_line_items) current_line_items.pop() current_gt_parse['Line_items'] = current_line_items st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse st.session_state.modified_indices.add(selected_file) # Remove the expander flag for the popped row (if present) popped_idx = N - 1 expander_key_popped = f"line_item_expander_{selected_file}_{popped_idx}" if expander_key_popped in st.session_state: del st.session_state[expander_key_popped] st.rerun() if st.session_state.button_clicked: st.session_state.button_clicked = False # Display each row as an expander with OCR buttons current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) line_items = current_gt_parse.get('Line_items', []) if line_items: for idx, item in enumerate(line_items): # Use a persistent session_state flag so expansion state is preserved across reruns. expander_key = f"line_item_expander_{selected_file}_{idx}" expanded_default = st.session_state.get(expander_key, False) # Note: do NOT pass a 'key' arg to st.expander to maintain compatibility; control expanded via session_state flag. with st.expander(f"**Row {idx + 1}** - Invoice: {item.get('Invoice_no', 'N/A')}", expanded=expanded_default): # PO Number col_input, col_btn = st.columns([5, 1]) with col_input: item['Po_number'] = st.text_input( "PO Number", value=item.get('Po_number', ''), key=f"po_num_{selected_file}_{idx}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_po_{selected_file}_{idx}", type="primary" if is_ocr_active('Line_items', 'Po_number', idx) else "secondary"): # ensure expander stays open when user explicitly requests OCR st.session_state[expander_key] = True activate_ocr_field('Line_items', 'Po_number', idx) # Invoice No col_input, col_btn = st.columns([5, 1]) with col_input: item['Invoice_no'] = st.text_input( "Invoice No", value=item.get('Invoice_no', ''), key=f"inv_no_{selected_file}_{idx}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_inv_{selected_file}_{idx}", type="primary" if is_ocr_active('Line_items', 'Invoice_no', idx) else "secondary"): st.session_state[expander_key] = True activate_ocr_field('Line_items', 'Invoice_no', idx) # Other Doc Ref No col_input, col_btn = st.columns([5, 1]) with col_input: item['Other_doc_ref_no'] = st.text_input( "Other Doc Ref No", value=item.get('Other_doc_ref_no', ''), key=f"other_doc_{selected_file}_{idx}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_other_{selected_file}_{idx}", type="primary" if is_ocr_active('Line_items', 'Other_doc_ref_no', idx) else "secondary"): st.session_state[expander_key] = True activate_ocr_field('Line_items', 'Other_doc_ref_no', idx) # Invoice Date col_input, col_btn = st.columns([5, 1]) with col_input: item['Invoice_date'] = st.text_input( "Invoice Date", value=item.get('Invoice_date', ''), key=f"inv_date_{selected_file}_{idx}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_inv_date_{selected_file}_{idx}", type="primary" if is_ocr_active('Line_items', 'Invoice_date', idx) else "secondary"): st.session_state[expander_key] = True activate_ocr_field('Line_items', 'Invoice_date', idx) # Invoice Amount FCY col_input, col_btn = st.columns([5, 1]) with col_input: item['Invoice_amount_FCY'] = st.text_input( "Invoice Amount FCY", value=item.get('Invoice_amount_FCY', ''), key=f"inv_amt_{selected_file}_{idx}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_inv_amt_{selected_file}_{idx}", type="primary" if is_ocr_active('Line_items', 'Invoice_amount_FCY', idx) else "secondary"): st.session_state[expander_key] = True activate_ocr_field('Line_items', 'Invoice_amount_FCY', idx) # Amount Paid col_input, col_btn = st.columns([5, 1]) with col_input: item['Amount_paid_for_each_invoice'] = st.text_input( "Amount Paid", value=item.get('Amount_paid_for_each_invoice', ''), key=f"amt_paid_{selected_file}_{idx}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_amt_paid_{selected_file}_{idx}", type="primary" if is_ocr_active('Line_items', 'Amount_paid_for_each_invoice', idx) else "secondary"): st.session_state[expander_key] = True activate_ocr_field('Line_items', 'Amount_paid_for_each_invoice', idx) # Outstanding Balance col_input, col_btn = st.columns([5, 1]) with col_input: item['Outstanding_balance_FCY'] = st.text_input( "Outstanding Balance FCY", value=item.get('Outstanding_balance_FCY', ''), key=f"out_bal_{selected_file}_{idx}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_out_bal_{selected_file}_{idx}", type="primary" if is_ocr_active('Line_items', 'Outstanding_balance_FCY', idx) else "secondary"): st.session_state[expander_key] = True activate_ocr_field('Line_items', 'Outstanding_balance_FCY', idx) # Discounts col_input, col_btn = st.columns([5, 1]) with col_input: item['Discounts_taken_FCY'] = st.text_input( "Discounts Taken FCY", value=item.get('Discounts_taken_FCY', ''), key=f"disc_{selected_file}_{idx}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_disc_{selected_file}_{idx}", type="primary" if is_ocr_active('Line_items', 'Discounts_taken_FCY', idx) else "secondary"): st.session_state[expander_key] = True activate_ocr_field('Line_items', 'Discounts_taken_FCY', idx) # Adjustments col_input, col_btn = st.columns([5, 1]) with col_input: item['Adjustments(without_holding_tax)_FCY'] = st.text_input( "Adjustments FCY", value=item.get('Adjustments(without_holding_tax)_FCY', ''), key=f"adj_{selected_file}_{idx}" ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_adj_{selected_file}_{idx}", type="primary" if is_ocr_active('Line_items', 'Adjustments(without_holding_tax)_FCY', idx) else "secondary"): st.session_state[expander_key] = True activate_ocr_field('Line_items', 'Adjustments(without_holding_tax)_FCY', idx) # Descriptions col_input, col_btn = st.columns([5, 1]) with col_input: item['Descriptions'] = st.text_area( "Descriptions", value=item.get('Descriptions', ''), key=f"desc_{selected_file}_{idx}", height=60 ) with col_btn: st.markdown("
", unsafe_allow_html=True) if st.button("🔍", key=f"ocr_desc_{selected_file}_{idx}", type="primary" if is_ocr_active('Line_items', 'Descriptions', idx) else "secondary"): st.session_state[expander_key] = True activate_ocr_field('Line_items', 'Descriptions', idx) # Update line items back to gt_parse current_gt_parse['Line_items'] = line_items st.markdown("**📊 Line Items Summary Table**") # Display summary table with index starting from 1 df = pd.DataFrame(line_items) df.index = df.index + 1 # Start index from 1 df.index.name = 'SL No' st.dataframe( df, use_container_width=True, height=300 ) else: st.info("No line items. Click '➕ Add New Row' to add a new row.") st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse # Save button col1, col2 = st.columns([1, 1]) with col1: if st.button("💾 Save Changes", type="primary", use_container_width=True, key=f"save_btn_{selected_file}"): if not st.session_state.just_saved: st.session_state.just_saved = True auto_save(selected_file) st.session_state.save_message = "✅ Changes saved successfully!" st.session_state.save_message_time = time.time() st.rerun() if st.session_state.just_saved: st.session_state.just_saved = False if st.session_state.save_message: st.success(st.session_state.save_message)