Spaces:
Sleeping
Sleeping
| import os | |
| from pathlib import Path | |
| import streamlit as st | |
| import json | |
| import io | |
| from PIL import Image | |
| import time | |
| import pandas as pd | |
| from streamlit_drawable_canvas import st_canvas | |
| import pytesseract | |
| import numpy as np | |
| from datetime import datetime | |
| import fitz # PyMuPDF for PDF handling | |
| # ----------------------------- | |
| # Environment hardening (HF Spaces, /.cache issue) | |
| # ----------------------------- | |
| _home = os.environ.get("HOME", "") | |
| if _home in ("", "/", None): | |
| repo_dir = os.getcwd() | |
| safe_home = repo_dir if os.access(repo_dir, os.W_OK) else "/tmp" | |
| os.environ["HOME"] = safe_home | |
| print(f"[startup] HOME not set or unwritable β setting HOME={safe_home}") | |
| streamlit_dir = Path(os.environ["HOME"]) / ".streamlit" | |
| try: | |
| streamlit_dir.mkdir(parents=True, exist_ok=True) | |
| print(f"[startup] ensured {streamlit_dir}") | |
| except Exception as e: | |
| print(f"[startup] WARNING: could not create {streamlit_dir}: {e}") | |
| # Set Tesseract path - auto-detect based on OS | |
| if os.name == 'nt': # Windows | |
| pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" | |
| else: # Linux/Mac (HF Spaces uses Linux) | |
| pass | |
| # Page configuration | |
| st.set_page_config(page_title="Remittance Data Viewer", layout="wide") | |
| # Custom CSS | |
| st.markdown(""" | |
| <style> | |
| /* Reduce spacing between form fields */ | |
| .stTextInput > div > div > input, | |
| .stTextArea > div > div > textarea, | |
| .stSelectbox > div > div > div { | |
| margin-bottom: 0px !important; | |
| } | |
| div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stTextInput"]), | |
| div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stTextArea"]), | |
| div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stSelectbox"]) { | |
| margin-bottom: 4px !important; | |
| } | |
| .stSelectbox { margin-bottom: 4px !important; } | |
| /* Button styling */ | |
| .stButton > button { | |
| padding: 0.25rem 0.5rem !important; | |
| font-size: 0.85rem !important; | |
| line-height: 1 !important; | |
| min-height: 1.8rem !important; | |
| height: 1.8rem !important; | |
| } | |
| .stButton > button[kind="primary"] { | |
| background-color: #FF0000 !important; | |
| border-color: #FF0000 !important; | |
| color: white !important; | |
| } | |
| .stButton > button[kind="primary"]:hover { | |
| background-color: #CC0000 !important; | |
| border-color: #CC0000 !important; | |
| } | |
| /* Small vertical gaps */ | |
| [data-testid="stVerticalBlock"] > [data-testid="stVerticalBlock"] { gap: 0.25rem !important; } | |
| [data-testid="column"] { padding-left: 0.5rem !important; padding-right: 0.5rem !important; } | |
| [data-testid="stHorizontalBlock"] { gap: 0.5rem !important; } | |
| /* FIXED: Remove problematic viewport-based heights */ | |
| /* Instead use fixed pixel heights that work in iframes */ | |
| section[data-testid="stAppViewContainer"] { | |
| overflow: visible !important; | |
| } | |
| .main .block-container { | |
| overflow: visible !important; | |
| padding-bottom: 1rem !important; | |
| } | |
| /* Make columns scrollable with fixed height */ | |
| div[data-testid="column"] { | |
| max-height: 85vh !important; | |
| overflow-y: auto !important; | |
| overflow-x: hidden !important; | |
| position: relative !important; | |
| } | |
| /* Ensure images don't expand the column */ | |
| div[data-testid="column"] img, | |
| div[data-testid="column"] canvas { | |
| max-width: 100% !important; | |
| height: auto !important; | |
| display: block !important; | |
| } | |
| /* Custom scrollbar styling */ | |
| div[data-testid="column"]::-webkit-scrollbar { | |
| width: 10px; | |
| height: 10px; | |
| } | |
| div[data-testid="column"]::-webkit-scrollbar-thumb { | |
| border-radius: 8px; | |
| background-color: rgba(0,0,0,0.3); | |
| border: 2px solid transparent; | |
| background-clip: padding-box; | |
| } | |
| div[data-testid="column"]::-webkit-scrollbar-thumb:hover { | |
| background-color: rgba(0,0,0,0.5); | |
| } | |
| div[data-testid="column"]::-webkit-scrollbar-track { | |
| background: rgba(0,0,0,0.05); | |
| border-radius: 8px; | |
| } | |
| /* Firefox scrollbar */ | |
| div[data-testid="column"] { | |
| scrollbar-width: thin; | |
| scrollbar-color: rgba(0,0,0,0.3) rgba(0,0,0,0.05); | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| def load_jsonl(file): | |
| """Load JSONL file and return list of records""" | |
| data = [] | |
| content = file.getvalue().decode('utf-8') | |
| for line in content.strip().split('\n'): | |
| if line.strip(): | |
| data.append(json.loads(line)) | |
| return data | |
| def get_file_names_from_record(record, actual_file_name): | |
| """Generate file_name or file_names based on the PDF metadata""" | |
| if not actual_file_name: | |
| return record.get('file_name', record.get('file_names', [])) | |
| # Check if it's a PDF with multiple pages | |
| is_pdf = actual_file_name in st.session_state.pdf_metadata | |
| if is_pdf: | |
| pdf_meta = st.session_state.pdf_metadata[actual_file_name] | |
| total_pages = pdf_meta['total_pages'] | |
| base_name = os.path.splitext(actual_file_name)[0] | |
| if total_pages > 1: | |
| # Return list of file names for multi-page PDF | |
| return [f"{base_name}_page{i+1}.png" for i in range(total_pages)] | |
| else: | |
| # Single page PDF | |
| return f"{base_name}.png" | |
| else: | |
| # Regular image file | |
| if not actual_file_name.lower().endswith('.png'): | |
| base_name = os.path.splitext(actual_file_name)[0] | |
| return f"{base_name}.png" | |
| return actual_file_name | |
| def save_to_jsonl(data, transform_filenames=False): | |
| """Convert data list to JSONL format""" | |
| if transform_filenames: | |
| transformed_data = [] | |
| for record in data: | |
| new_record = {} | |
| # First, determine file_name(s) and add to new_record | |
| original_file_name = record.get('file_name', '') | |
| if not original_file_name and 'file_names' in record: | |
| # Handle file_names field if it exists | |
| original_file_name = record.get('file_names', [])[0] if record.get('file_names', []) else '' | |
| # Find the actual uploaded file | |
| actual_file_name = None | |
| if original_file_name: | |
| if original_file_name in st.session_state.images: | |
| actual_file_name = original_file_name | |
| else: | |
| # Try with extensions | |
| for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: | |
| if original_file_name + ext in st.session_state.images: | |
| actual_file_name = original_file_name + ext | |
| break | |
| # Try without extension | |
| if not actual_file_name: | |
| for uploaded_name in st.session_state.images.keys(): | |
| uploaded_base = os.path.splitext(uploaded_name)[0] | |
| if uploaded_base == original_file_name: | |
| actual_file_name = uploaded_name | |
| break | |
| # Get the transformed file name(s) | |
| new_file_names = get_file_names_from_record(record, actual_file_name) | |
| # Add file_name or file_names as the FIRST field | |
| if isinstance(new_file_names, list): | |
| new_record['file_names'] = new_file_names | |
| else: | |
| new_record['file_name'] = new_file_names | |
| # Now add all other fields (excluding the original file_name/file_names) | |
| for key, value in record.items(): | |
| if key not in ['file_name', 'file_names']: | |
| new_record[key] = value | |
| transformed_data.append(new_record) | |
| data = transformed_data | |
| jsonl_content = '\n'.join([json.dumps(record) for record in data]) | |
| return jsonl_content | |
| def pdf_to_images(pdf_file): | |
| """Convert PDF to list of PIL Images (one per page)""" | |
| try: | |
| pdf_bytes = pdf_file.read() | |
| pdf_document = fitz.open(stream=pdf_bytes, filetype="pdf") | |
| images = [] | |
| for page_num in range(pdf_document.page_count): | |
| page = pdf_document[page_num] | |
| # Render page to an image (higher DPI for better quality) | |
| # Using 3x zoom (300 DPI equivalent) for much better clarity | |
| pix = page.get_pixmap(matrix=fitz.Matrix(3, 3), alpha=False) | |
| img_data = pix.tobytes("png") | |
| img = Image.open(io.BytesIO(img_data)) | |
| images.append(img) | |
| pdf_document.close() | |
| return images | |
| except Exception as e: | |
| st.error(f"Error converting PDF: {str(e)}") | |
| return [] | |
| def perform_ocr(image, bbox): | |
| """Perform OCR on the selected region of the image""" | |
| try: | |
| x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]) | |
| x1, y1 = max(0, x1), max(0, y1) | |
| x2, y2 = min(image.width, x2), min(image.height, y2) | |
| cropped = image.crop((x1, y1, x2, y2)) | |
| text = pytesseract.image_to_string(cropped, config='--psm 6').strip() | |
| return text | |
| except Exception as e: | |
| return f"OCR Error: {str(e)}" | |
| def scale_image_to_fixed_size(image, max_width=900, max_height=1100): | |
| """Scale image to fit within max dimensions while maintaining aspect ratio - NO PADDING""" | |
| # Convert to RGB with proper handling | |
| if image.mode not in ('RGB', 'RGBA'): | |
| image = image.convert('RGB') | |
| elif image.mode == 'RGBA': | |
| background = Image.new('RGB', image.size, (255, 255, 255)) | |
| background.paste(image, mask=image.split()[3]) | |
| image = background | |
| # Calculate scaling ratio | |
| width_ratio = max_width / image.width | |
| height_ratio = max_height / image.height | |
| ratio = min(width_ratio, height_ratio) | |
| new_width = int(image.width * ratio) | |
| new_height = int(image.height * ratio) | |
| # Always use LANCZOS for highest quality resampling | |
| resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) | |
| # Return without padding - image takes only the space it needs | |
| return resized_image, ratio, 0, 0 | |
| def swap_customer_supplier_details(index): | |
| """Swap customer and supplier details""" | |
| gt_parse = st.session_state.edited_data[index].get('gt_parse', {}) | |
| customer_supplier = gt_parse.get('Customer_supplier_details', {}) | |
| # Store customer values | |
| temp_customer_name = customer_supplier.get('Customer_name', '') | |
| temp_customer_address = customer_supplier.get('Customer_address', '') | |
| temp_customer_contact = customer_supplier.get('Customer_contact_info', '') | |
| # Swap: Customer β Supplier | |
| customer_supplier['Customer_name'] = customer_supplier.get('Supplier_name', '') | |
| customer_supplier['Customer_address'] = customer_supplier.get('Supplier_address', '') | |
| customer_supplier['Customer_contact_info'] = customer_supplier.get('Supplier_contact_info', '') | |
| # Swap: Supplier β Customer (from temp) | |
| customer_supplier['Supplier_name'] = temp_customer_name | |
| customer_supplier['Supplier_address'] = temp_customer_address | |
| customer_supplier['Supplier_contact_info'] = temp_customer_contact | |
| # Update session state | |
| gt_parse['Customer_supplier_details'] = customer_supplier | |
| st.session_state.edited_data[index]['gt_parse'] = gt_parse | |
| st.session_state.modified_indices.add(index) | |
| def get_display_name(record): | |
| """Get display name from record, handling both file_name and file_names""" | |
| if 'file_name' in record: | |
| return record['file_name'] | |
| elif 'file_names' in record and record['file_names']: | |
| if isinstance(record['file_names'], list): | |
| return record['file_names'][0] if record['file_names'] else 'Unnamed' | |
| return record['file_names'] | |
| return 'Unnamed' | |
| def find_actual_file(file_name): | |
| """Find the actual uploaded file matching the given file_name""" | |
| if not file_name: | |
| return None | |
| # FIRST: Check if this is a generated filename from a multi-page PDF (e.g., "12_page1.png") | |
| if file_name.endswith('.png') and '_page' in file_name: | |
| base_name = os.path.splitext(file_name)[0] # Remove .png | |
| # Extract the base name before _pageN | |
| if '_page' in base_name: | |
| pdf_base = base_name.split('_page')[0] | |
| # Try to find the PDF file | |
| if pdf_base + '.pdf' in st.session_state.images: | |
| return pdf_base + '.pdf' | |
| # Also try without any extension in case original didn't have it | |
| for uploaded_name in st.session_state.images.keys(): | |
| uploaded_base = os.path.splitext(uploaded_name)[0] | |
| if uploaded_base == pdf_base and uploaded_name.lower().endswith('.pdf'): | |
| return uploaded_name | |
| # Try exact match | |
| if file_name in st.session_state.images: | |
| return file_name | |
| # Try with extensions | |
| for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: | |
| if file_name + ext in st.session_state.images: | |
| return file_name + ext | |
| # Try matching without extension | |
| for uploaded_name in st.session_state.images.keys(): | |
| uploaded_base = os.path.splitext(uploaded_name)[0] | |
| if uploaded_base == file_name: | |
| return uploaded_name | |
| return None | |
| # Initialize session state | |
| if 'data' not in st.session_state: | |
| st.session_state.data = None | |
| if 'current_index' not in st.session_state: | |
| st.session_state.current_index = 0 | |
| if 'edited_data' not in st.session_state: | |
| st.session_state.edited_data = None | |
| if 'page' not in st.session_state: | |
| st.session_state.page = 'upload' | |
| if 'images' not in st.session_state: | |
| st.session_state.images = {} | |
| if 'pdf_metadata' not in st.session_state: | |
| st.session_state.pdf_metadata = {} # Store {filename: {'pages': [images], 'current_page': 0}} | |
| if 'current_page_num' not in st.session_state: | |
| st.session_state.current_page_num = {} # Track current page for each file | |
| if 'modified_indices' not in st.session_state: | |
| st.session_state.modified_indices = set() | |
| if 'ocr_active_section' not in st.session_state: | |
| st.session_state.ocr_active_section = None | |
| if 'ocr_active_field' not in st.session_state: | |
| st.session_state.ocr_active_field = None | |
| if 'ocr_line_item_row' not in st.session_state: | |
| st.session_state.ocr_line_item_row = None | |
| if 'canvas_key' not in st.session_state: | |
| st.session_state.canvas_key = 0 | |
| if 'line_items_temp' not in st.session_state: | |
| st.session_state.line_items_temp = [] | |
| if 'button_clicked' not in st.session_state: | |
| st.session_state.button_clicked = False | |
| if 'save_message' not in st.session_state: | |
| st.session_state.save_message = None | |
| if 'save_message_time' not in st.session_state: | |
| st.session_state.save_message_time = None | |
| if 'just_saved' not in st.session_state: | |
| st.session_state.just_saved = False | |
| if 'just_swapped' not in st.session_state: | |
| st.session_state.just_swapped = False | |
| if 'navigating_page' not in st.session_state: | |
| st.session_state.navigating_page = False | |
| def auto_save(index): | |
| """Automatically save changes to session state and mark as modified""" | |
| if st.session_state.edited_data: | |
| # Get the current record | |
| current_record = st.session_state.edited_data[index] | |
| # Get original file name | |
| original_file_name = current_record.get('file_name', '') | |
| if not original_file_name and 'file_names' in current_record: | |
| original_file_name = current_record.get('file_names', [])[0] if current_record.get('file_names', []) else '' | |
| # Find actual file | |
| actual_file_name = find_actual_file(original_file_name) | |
| # Update file_name or file_names in the record | |
| new_file_names = get_file_names_from_record(current_record, actual_file_name) | |
| # Remove old keys | |
| if 'file_name' in current_record: | |
| del current_record['file_name'] | |
| if 'file_names' in current_record: | |
| del current_record['file_names'] | |
| # Create new ordered record with file_name/file_names first | |
| new_record = {} | |
| if isinstance(new_file_names, list): | |
| new_record['file_names'] = new_file_names | |
| else: | |
| new_record['file_name'] = new_file_names | |
| # Add all other fields | |
| for key, value in current_record.items(): | |
| new_record[key] = value | |
| # Update the record | |
| st.session_state.edited_data[index] = new_record | |
| st.session_state.data[index] = new_record.copy() | |
| st.session_state.modified_indices.add(index) | |
| def activate_ocr_field(section, field, row_idx=None): | |
| """Activate OCR for a specific field. | |
| Toggle behavior: if the same field is already active, deactivate it to avoid repeated activations/looping. | |
| Also ensures the line-item expander stays expanded when OCR is requested. | |
| """ | |
| # If the requested field is already active, deactivate it (toggle off) | |
| if (st.session_state.ocr_active_section == section and | |
| st.session_state.ocr_active_field == field and | |
| st.session_state.ocr_line_item_row == row_idx): | |
| st.session_state.ocr_active_section = None | |
| st.session_state.ocr_active_field = None | |
| st.session_state.ocr_line_item_row = None | |
| else: | |
| # Activate new OCR target | |
| st.session_state.ocr_active_section = section | |
| st.session_state.ocr_active_field = field | |
| st.session_state.ocr_line_item_row = row_idx | |
| # If it's a line-item, mark that expander as expanded so it remains open after rerun | |
| if section == 'Line_items' and row_idx is not None: | |
| current_idx = st.session_state.get('current_index', 0) | |
| expander_key = f"line_item_expander_{current_idx}_{row_idx}" | |
| st.session_state[expander_key] = True | |
| # Bump canvas_key to ensure canvas is refreshed/cleared when toggling OCR | |
| st.session_state.canvas_key += 1 | |
| st.rerun() | |
| def is_ocr_active(section, field, row_idx=None): | |
| """Check if this OCR button is currently active""" | |
| return (st.session_state.ocr_active_section == section and | |
| st.session_state.ocr_active_field == field and | |
| st.session_state.ocr_line_item_row == row_idx) | |
| # PAGE 1: Upload Page | |
| if st.session_state.page == 'upload': | |
| st.title("π€ Remittance Data Viewer with OCR") | |
| st.markdown("### Upload your files to begin") | |
| st.markdown("**Step 1: Upload JSONL File**") | |
| uploaded_file = st.file_uploader("Choose a JSONL file", type=['jsonl', 'json']) | |
| if uploaded_file is not None: | |
| try: | |
| data = load_jsonl(uploaded_file) | |
| st.session_state.data = data | |
| st.session_state.edited_data = data.copy() | |
| st.success(f"β Successfully loaded {len(data)} records!") | |
| except Exception as e: | |
| st.error(f"Error loading file: {str(e)}") | |
| st.markdown("**Step 2: Upload Images/PDFs Folder**") | |
| uploaded_files = st.file_uploader( | |
| "Choose image or PDF files", | |
| type=['png', 'jpg', 'jpeg', 'tiff', 'tif', 'bmp', 'pdf'], | |
| accept_multiple_files=True, | |
| help="Select all images and PDFs from your folder at once" | |
| ) | |
| if uploaded_files: | |
| images_dict = {} | |
| pdf_metadata = {} | |
| for file in uploaded_files: | |
| try: | |
| file_ext = file.name.lower().split('.')[-1] | |
| if file_ext == 'pdf': | |
| # Convert PDF to images | |
| pdf_images = pdf_to_images(file) | |
| if pdf_images: | |
| # Store first page as the main image | |
| images_dict[file.name] = pdf_images[0] | |
| # Store all pages in metadata | |
| pdf_metadata[file.name] = { | |
| 'pages': pdf_images, | |
| 'total_pages': len(pdf_images), | |
| 'current_page': 0 | |
| } | |
| else: | |
| # Handle regular images | |
| image = Image.open(file) | |
| images_dict[file.name] = image | |
| except Exception as e: | |
| st.warning(f"Could not load file {file.name}: {str(e)}") | |
| st.session_state.images = images_dict | |
| st.session_state.pdf_metadata = pdf_metadata | |
| # Initialize current page tracking | |
| for filename in pdf_metadata.keys(): | |
| if filename not in st.session_state.current_page_num: | |
| st.session_state.current_page_num[filename] = 0 | |
| if st.session_state.data is not None: | |
| # Get all file names from records (handle both file_name and file_names) | |
| gt_file_names = [] | |
| for rec in st.session_state.data: | |
| if 'file_name' in rec and rec['file_name']: | |
| gt_file_names.append(rec['file_name']) | |
| elif 'file_names' in rec and rec['file_names']: | |
| if isinstance(rec['file_names'], list): | |
| gt_file_names.extend(rec['file_names']) | |
| else: | |
| gt_file_names.append(rec['file_names']) | |
| matched_images = set() | |
| unmatched_gt_files = [] | |
| # Try to match with and without extensions | |
| for fname in gt_file_names: | |
| if not fname: | |
| continue | |
| # Try exact match first | |
| if fname in images_dict: | |
| matched_images.add(fname) | |
| else: | |
| # Try adding common extensions | |
| found = False | |
| for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: | |
| if fname + ext in images_dict: | |
| matched_images.add(fname) | |
| found = True | |
| break | |
| # Try matching filename without extension from uploaded files | |
| if not found: | |
| for uploaded_name in images_dict.keys(): | |
| uploaded_base = uploaded_name.rsplit('.', 1)[0] | |
| if uploaded_base == fname or uploaded_base == os.path.splitext(fname)[0]: | |
| matched_images.add(fname) | |
| found = True | |
| break | |
| for fname in gt_file_names: | |
| if fname and fname not in matched_images: | |
| unmatched_gt_files.append(fname) | |
| st.success(f"β Successfully loaded {len(images_dict)} files ({len(pdf_metadata)} PDFs)!") | |
| st.info(f"π Exact matches: {len(matched_images)}/{len([f for f in gt_file_names if f])}") | |
| if unmatched_gt_files: | |
| st.warning(f"β οΈ {len(unmatched_gt_files)} file(s) from JSONL not matched:") | |
| with st.expander(f"Show {len(unmatched_gt_files)} unmatched file names"): | |
| for fname in unmatched_gt_files: | |
| st.text(f" β’ {fname}") | |
| else: | |
| st.success("β All JSONL file names matched to files!") | |
| else: | |
| st.success(f"β Successfully loaded {len(images_dict)} files ({len(pdf_metadata)} PDFs)!") | |
| st.info("βΉοΈ Upload a JSONL file to see how many files match the ground truth 'file_name' field.") | |
| if st.session_state.data is not None: | |
| col1, col2, col3 = st.columns([1, 1, 1]) | |
| with col2: | |
| if st.button("Continue to Viewer β", type="primary", use_container_width=True): | |
| st.session_state.page = 'viewer' | |
| st.session_state.modified_indices = set() | |
| st.session_state.navigating_page = False | |
| st.rerun() | |
| # PAGE 2: Viewer Page | |
| elif st.session_state.page == 'viewer': | |
| if st.session_state.save_message_time is not None: | |
| if time.time() - st.session_state.save_message_time > 3: | |
| st.session_state.save_message = None | |
| st.session_state.save_message_time = None | |
| today_date = datetime.now().strftime("%Y-%m-%d") | |
| col1, col2, col3, col4 = st.columns([1, 2, 2, 2]) | |
| with col1: | |
| if st.button("β Back to Upload"): | |
| st.session_state.page = 'upload' | |
| st.session_state.ocr_active_section = None | |
| st.session_state.ocr_active_field = None | |
| st.session_state.save_message = None | |
| st.session_state.save_message_time = None | |
| st.session_state.navigating_page = False | |
| st.rerun() | |
| with col2: | |
| if st.session_state.modified_indices: | |
| modified_data = [st.session_state.edited_data[i] for i in sorted(st.session_state.modified_indices)] | |
| jsonl_modified = save_to_jsonl(modified_data, transform_filenames=True) | |
| st.download_button( | |
| label=f"β¬οΈ Download Modified ({len(modified_data)})", | |
| data=jsonl_modified, | |
| file_name=f"modified_remittance_data_{today_date}.jsonl", | |
| mime="application/jsonl", | |
| type="primary", | |
| use_container_width=True | |
| ) | |
| else: | |
| st.button("β¬οΈ No Modified Records", disabled=True, use_container_width=True) | |
| with col3: | |
| if st.session_state.modified_indices: | |
| unmodified_data = [st.session_state.data[i] for i in range(len(st.session_state.data)) | |
| if i not in st.session_state.modified_indices] | |
| jsonl_unmodified = save_to_jsonl(unmodified_data, transform_filenames=True) | |
| st.download_button( | |
| label=f"β¬οΈ Download Unmodified ({len(unmodified_data)})", | |
| data=jsonl_unmodified, | |
| file_name=f"unmodified_remittance_data_{today_date}.jsonl", | |
| mime="application/jsonl", | |
| use_container_width=True | |
| ) | |
| else: | |
| st.button("β¬οΈ No Unmodified Records", disabled=True, use_container_width=True) | |
| with col4: | |
| jsonl_all = save_to_jsonl(st.session_state.edited_data, transform_filenames=True) | |
| st.download_button( | |
| label=f"β¬οΈ Download All ({len(st.session_state.edited_data)})", | |
| data=jsonl_all, | |
| file_name=f"all_remittance_data_{today_date}.jsonl", | |
| mime="application/jsonl", | |
| use_container_width=True | |
| ) | |
| # Get display names for all records | |
| file_names = [get_display_name(record) for record in st.session_state.data or []] | |
| # Guard: no records at all | |
| if not file_names: | |
| st.error("No records loaded. Please upload a JSONL file on the Upload page.") | |
| if st.button("β Back to Upload"): | |
| st.session_state.page = 'upload' | |
| st.rerun() | |
| else: | |
| # Build options (list is safer than range for length checks) | |
| options = list(range(len(file_names))) | |
| # Ensure edited_data exists and has consistent length | |
| if not st.session_state.edited_data or len(st.session_state.edited_data) != len(file_names): | |
| # try to sync edited_data to data | |
| st.session_state.edited_data = (st.session_state.data or []).copy() | |
| # Clamp current_index into valid range | |
| cur_idx = st.session_state.get('current_index', 0) | |
| try: | |
| cur_idx = int(cur_idx) | |
| except Exception: | |
| cur_idx = 0 | |
| if cur_idx < 0: | |
| cur_idx = 0 | |
| if cur_idx >= len(options): | |
| cur_idx = len(options) - 1 | |
| # Show selectbox with a safe index | |
| selected_file = st.selectbox( | |
| "Select a file to view:", | |
| options=options, | |
| format_func=lambda x: f"{'βοΈ ' if x in st.session_state.modified_indices else ''}{file_names[x]}", | |
| index=cur_idx | |
| ) | |
| # Persist chosen index | |
| st.session_state.current_index = selected_file | |
| # Safe access to the current record | |
| current_record = st.session_state.edited_data[selected_file] | |
| left_col, right_col = st.columns([1.6, 1.0], gap="small") | |
| # LEFT SIDE: Image Display with OCR Canvas | |
| with left_col: | |
| with st.container(height=700, border=False): | |
| # Get file name from record | |
| original_file_name = get_display_name(current_record) | |
| if original_file_name: | |
| # Find the actual file name | |
| actual_file_name = find_actual_file(original_file_name) | |
| if actual_file_name: | |
| # Check if this is a PDF with multiple pages | |
| is_pdf = actual_file_name in st.session_state.pdf_metadata | |
| if is_pdf: | |
| pdf_meta = st.session_state.pdf_metadata[actual_file_name] | |
| total_pages = pdf_meta['total_pages'] | |
| current_page = st.session_state.current_page_num.get(actual_file_name, 0) | |
| # PDF Navigation Header | |
| col_prev, col_info, col_next = st.columns([1, 2, 1]) | |
| with col_prev: | |
| prev_clicked = st.button("β¬ οΈ Previous", key=f"prev_page_{selected_file}_{actual_file_name}", | |
| disabled=(current_page == 0), use_container_width=True) | |
| with col_info: | |
| st.markdown(f"<div style='text-align: center; padding: 5px;'><b>π Page {current_page + 1} of {total_pages}</b></div>", unsafe_allow_html=True) | |
| with col_next: | |
| next_clicked = st.button("Next β‘οΈ", key=f"next_page_{selected_file}_{actual_file_name}", | |
| disabled=(current_page >= total_pages - 1), use_container_width=True) | |
| # Handle navigation only if not already navigating | |
| if not st.session_state.navigating_page: | |
| if prev_clicked: | |
| st.session_state.navigating_page = True | |
| st.session_state.current_page_num[actual_file_name] = max(0, current_page - 1) | |
| st.session_state.canvas_key += 1 | |
| st.session_state.ocr_active_section = None | |
| st.session_state.ocr_active_field = None | |
| st.rerun() | |
| elif next_clicked: | |
| st.session_state.navigating_page = True | |
| st.session_state.current_page_num[actual_file_name] = min(total_pages - 1, current_page + 1) | |
| st.session_state.canvas_key += 1 | |
| st.session_state.ocr_active_section = None | |
| st.session_state.ocr_active_field = None | |
| st.rerun() | |
| else: | |
| # Reset the flag after rerun | |
| st.session_state.navigating_page = False | |
| # Determine if PDF and get the appropriate image | |
| is_pdf = actual_file_name in st.session_state.pdf_metadata | |
| if is_pdf: | |
| # Get the current page image | |
| current_page = st.session_state.current_page_num.get(actual_file_name, 0) | |
| pdf_meta = st.session_state.pdf_metadata[actual_file_name] | |
| current_image = pdf_meta['pages'][current_page] | |
| else: | |
| current_image = st.session_state.images[actual_file_name] | |
| if current_image: | |
| # Scale to a reasonable size so canvas doesn't become excessively large | |
| scaled_image, scale_ratio, paste_x, paste_y = scale_image_to_fixed_size(current_image) | |
| # Render the canvas. Its internal canvas will be constrained by the wrapper due to CSS above. | |
| canvas_result = st_canvas( | |
| fill_color="rgba(255, 165, 0, 0.3)", | |
| stroke_width=2, | |
| stroke_color="#FF0000", | |
| background_image=scaled_image, | |
| update_streamlit=True, | |
| height=scaled_image.height, | |
| width=scaled_image.width, | |
| drawing_mode="rect", | |
| key=f"canvas_{selected_file}_{st.session_state.canvas_key}", | |
| ) | |
| # Only attempt OCR if there's an active OCR target AND the user has drawn something (objects exist) | |
| if canvas_result.json_data is not None and st.session_state.ocr_active_field: | |
| objects = canvas_result.json_data.get("objects", []) | |
| if len(objects) > 0: | |
| rect = objects[-1] | |
| bbox = [ | |
| (rect["left"] - paste_x) / scale_ratio, | |
| (rect["top"] - paste_y) / scale_ratio, | |
| (rect["left"] + rect["width"] - paste_x) / scale_ratio, | |
| (rect["top"] + rect["height"] - paste_y) / scale_ratio | |
| ] | |
| with st.spinner("Performing OCR..."): | |
| ocr_text = perform_ocr(current_image, bbox) | |
| if ocr_text and not ocr_text.startswith("OCR Error"): | |
| st.success(f"β OCR Result: {ocr_text}") | |
| gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) | |
| if st.session_state.ocr_active_section == 'Line_items': | |
| line_items = gt_parse.get('Line_items', []) | |
| row_idx = st.session_state.ocr_line_item_row | |
| if row_idx is not None and row_idx < len(line_items): | |
| line_items[row_idx][st.session_state.ocr_active_field] = ocr_text | |
| gt_parse['Line_items'] = line_items | |
| # ensure expander stays open for this row after OCR | |
| expander_key = f"line_item_expander_{selected_file}_{row_idx}" | |
| st.session_state[expander_key] = True | |
| else: | |
| section = st.session_state.ocr_active_section | |
| field = st.session_state.ocr_active_field | |
| if section not in gt_parse: | |
| gt_parse[section] = {} | |
| gt_parse[section][field] = ocr_text | |
| st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse | |
| st.session_state.modified_indices.add(selected_file) | |
| # Keep the OCR field active so user can draw multiple rectangles for the same field | |
| # Field will only change when user clicks a different OCR button | |
| # Clear canvas for next OCR by bumping canvas_key then rerun | |
| st.session_state.canvas_key += 1 | |
| st.rerun() | |
| else: | |
| st.error(ocr_text) | |
| else: | |
| st.error(f"β File '{original_file_name}' not found in uploaded files") | |
| st.info("π‘ Available files:") | |
| with st.expander("Show available files"): | |
| for img_name in list(st.session_state.images.keys())[:20]: | |
| st.text(f" β’ {img_name}") | |
| if len(st.session_state.images) > 20: | |
| st.text(f" ... and {len(st.session_state.images) - 20} more") | |
| else: | |
| st.warning("No file name specified in record") | |
| # RIGHT SIDE: Editable Details | |
| with right_col: | |
| with st.container(height=700, border=False): | |
| st.markdown("### π Document Details") | |
| gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) | |
| tab1, tab2, tab3, tab4 = st.tabs([ | |
| "π Remittance Details", | |
| "π₯ Party Details", | |
| "π¦ Bank Details", | |
| "π Line Items" | |
| ]) | |
| # TAB 1: Remittance Details | |
| with tab1: | |
| remittance = gt_parse.get('Remittance_details', {}) | |
| # Each field with OCR button | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| remittance['Remittance_adv_no'] = st.text_input( | |
| "Remittance Advice No", | |
| value=remittance.get('Remittance_adv_no', ''), | |
| key=f"rem_adv_no_{selected_file}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_rem_adv_no_{selected_file}", | |
| type="primary" if is_ocr_active('Remittance_details', 'Remittance_adv_no') else "secondary"): | |
| activate_ocr_field('Remittance_details', 'Remittance_adv_no') | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| remittance['Remittance_adv_date'] = st.text_input( | |
| "Remittance Advice Date", | |
| value=remittance.get('Remittance_adv_date', ''), | |
| key=f"rem_adv_date_{selected_file}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_rem_adv_date_{selected_file}", | |
| type="primary" if is_ocr_active('Remittance_details', 'Remittance_adv_date') else "secondary"): | |
| activate_ocr_field('Remittance_details', 'Remittance_adv_date') | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| remittance['Payment_method'] = st.text_input( | |
| "Payment Method", | |
| value=remittance.get('Payment_method', ''), | |
| key=f"payment_method_{selected_file}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_payment_method_{selected_file}", | |
| type="primary" if is_ocr_active('Remittance_details', 'Payment_method') else "secondary"): | |
| activate_ocr_field('Remittance_details', 'Payment_method') | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| remittance['FCY'] = st.text_input( | |
| "FCY (Foreign Currency)", | |
| value=remittance.get('FCY', ''), | |
| key=f"fcy_{selected_file}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_fcy_{selected_file}", | |
| type="primary" if is_ocr_active('Remittance_details', 'FCY') else "secondary"): | |
| activate_ocr_field('Remittance_details', 'FCY') | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| remittance['Total_payment_amt_FCY'] = st.text_input( | |
| "Total Payment Amount (FCY)", | |
| value=remittance.get('Total_payment_amt_FCY', ''), | |
| key=f"total_payment_{selected_file}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_total_payment_{selected_file}", | |
| type="primary" if is_ocr_active('Remittance_details', 'Total_payment_amt_FCY') else "secondary"): | |
| activate_ocr_field('Remittance_details', 'Total_payment_amt_FCY') | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| remittance['Payment_date'] = st.text_input( | |
| "Payment Date", | |
| value=remittance.get('Payment_date', ''), | |
| key=f"payment_date_{selected_file}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_payment_date_{selected_file}", | |
| type="primary" if is_ocr_active('Remittance_details', 'Payment_date') else "secondary"): | |
| activate_ocr_field('Remittance_details', 'Payment_date') | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| remittance['Payment_ref_no'] = st.text_input( | |
| "Payment Reference No", | |
| value=remittance.get('Payment_ref_no', ''), | |
| key=f"payment_ref_{selected_file}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_payment_ref_{selected_file}", | |
| type="primary" if is_ocr_active('Remittance_details', 'Payment_ref_no') else "secondary"): | |
| activate_ocr_field('Remittance_details', 'Payment_ref_no') | |
| gt_parse['Remittance_details'] = remittance | |
| # TAB 2: Customer/Supplier Details with SWAP button | |
| with tab2: | |
| # SWAP BUTTON - Centered and prominent | |
| col1, col2, col3 = st.columns([1, 2, 1]) | |
| with col2: | |
| if st.button("π Swap Customer β Supplier", key=f"swap_btn_{selected_file}", | |
| type="primary", use_container_width=True): | |
| if not st.session_state.just_swapped: | |
| st.session_state.just_swapped = True | |
| swap_customer_supplier_details(selected_file) | |
| st.rerun() | |
| # Reset the flag after rerun | |
| if st.session_state.just_swapped: | |
| st.session_state.just_swapped = False | |
| st.markdown("**Customer Details**") | |
| customer_supplier = gt_parse.get('Customer_supplier_details', {}) | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| customer_supplier['Customer_name'] = st.text_input( | |
| "Customer Name", | |
| value=customer_supplier.get('Customer_name', ''), | |
| key=f"cust_name_{selected_file}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_cust_name_{selected_file}", | |
| type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_name') else "secondary"): | |
| activate_ocr_field('Customer_supplier_details', 'Customer_name') | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| customer_supplier['Customer_address'] = st.text_area( | |
| "Customer Address", | |
| value=customer_supplier.get('Customer_address', ''), | |
| key=f"cust_addr_{selected_file}", | |
| height=60 | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_cust_addr_{selected_file}", | |
| type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_address') else "secondary"): | |
| activate_ocr_field('Customer_supplier_details', 'Customer_address') | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| customer_supplier['Customer_contact_info'] = st.text_input( | |
| "Customer Contact Info", | |
| value=customer_supplier.get('Customer_contact_info', ''), | |
| key=f"cust_contact_{selected_file}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_cust_contact_{selected_file}", | |
| type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_contact_info') else "secondary"): | |
| activate_ocr_field('Customer_supplier_details', 'Customer_contact_info') | |
| st.markdown("**Supplier Details**") | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| customer_supplier['Supplier_name'] = st.text_input( | |
| "Supplier Name", | |
| value=customer_supplier.get('Supplier_name', ''), | |
| key=f"supp_name_{selected_file}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_supp_name_{selected_file}", | |
| type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_name') else "secondary"): | |
| activate_ocr_field('Customer_supplier_details', 'Supplier_name') | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| customer_supplier['Supplier_address'] = st.text_area( | |
| "Supplier Address", | |
| value=customer_supplier.get('Supplier_address', ''), | |
| key=f"supp_addr_{selected_file}", | |
| height=60 | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_supp_addr_{selected_file}", | |
| type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_address') else "secondary"): | |
| activate_ocr_field('Customer_supplier_details', 'Supplier_address') | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| customer_supplier['Supplier_contact_info'] = st.text_input( | |
| "Supplier Contact Info", | |
| value=customer_supplier.get('Supplier_contact_info', ''), | |
| key=f"supp_contact_{selected_file}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_supp_contact_{selected_file}", | |
| type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_contact_info') else "secondary"): | |
| activate_ocr_field('Customer_supplier_details', 'Supplier_contact_info') | |
| gt_parse['Customer_supplier_details'] = customer_supplier | |
| # TAB 3: Bank Details | |
| with tab3: | |
| bank = gt_parse.get('Bank_details', {}) | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| bank['Bank_name'] = st.text_input( | |
| "Bank Name", | |
| value=bank.get('Bank_name', ''), | |
| key=f"bank_name_{selected_file}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_bank_name_{selected_file}", | |
| type="primary" if is_ocr_active('Bank_details', 'Bank_name') else "secondary"): | |
| activate_ocr_field('Bank_details', 'Bank_name') | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| bank['Bank_acc_no'] = st.text_input( | |
| "Bank Account No", | |
| value=bank.get('Bank_acc_no', ''), | |
| key=f"bank_acc_{selected_file}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_bank_acc_{selected_file}", | |
| type="primary" if is_ocr_active('Bank_details', 'Bank_acc_no') else "secondary"): | |
| activate_ocr_field('Bank_details', 'Bank_acc_no') | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| bank['Bank_routing_no'] = st.text_input( | |
| "Bank Routing No", | |
| value=bank.get('Bank_routing_no', ''), | |
| key=f"bank_routing_{selected_file}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_bank_routing_{selected_file}", | |
| type="primary" if is_ocr_active('Bank_details', 'Bank_routing_no') else "secondary"): | |
| activate_ocr_field('Bank_details', 'Bank_routing_no') | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| bank['Swift_code'] = st.text_input( | |
| "SWIFT Code", | |
| value=bank.get('Swift_code', ''), | |
| key=f"swift_{selected_file}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_swift_{selected_file}", | |
| type="primary" if is_ocr_active('Bank_details', 'Swift_code') else "secondary"): | |
| activate_ocr_field('Bank_details', 'Swift_code') | |
| gt_parse['Bank_details'] = bank | |
| # TAB 4: Line Items | |
| with tab4: | |
| current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) | |
| line_items = current_gt_parse.get('Line_items', []) | |
| # Add/Remove row buttons | |
| col_add, col_remove = st.columns([1, 1]) | |
| with col_add: | |
| if st.button("β Add New Row", key=f"add_row_{selected_file}", use_container_width=True): | |
| if not st.session_state.button_clicked: | |
| st.session_state.button_clicked = True | |
| new_row = { | |
| "Po_number": "", "Invoice_no": "", "Other_doc_ref_no": "", | |
| "Invoice_date": "", "Invoice_amount_FCY": "", | |
| "Amount_paid_for_each_invoice": "", "Outstanding_balance_FCY": "", | |
| "Discounts_taken_FCY": "", "Adjustments(without_holding_tax)_FCY": "", | |
| "Descriptions": "" | |
| } | |
| current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) | |
| current_line_items = current_gt_parse.get('Line_items', []) | |
| current_line_items.append(new_row) | |
| current_gt_parse['Line_items'] = current_line_items | |
| st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse | |
| st.session_state.modified_indices.add(selected_file) | |
| # Ensure the newly added row's expander is open | |
| new_idx = len(current_line_items) - 1 | |
| expander_key_new = f"line_item_expander_{selected_file}_{new_idx}" | |
| st.session_state[expander_key_new] = True | |
| st.rerun() | |
| with col_remove: | |
| if st.button("β Remove Last Row", key=f"remove_row_{selected_file}", | |
| disabled=(len(line_items) == 0), use_container_width=True): | |
| if not st.session_state.button_clicked and len(line_items) > 0: | |
| st.session_state.button_clicked = True | |
| current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) | |
| current_line_items = current_gt_parse.get('Line_items', []) | |
| N = len(current_line_items) | |
| current_line_items.pop() | |
| current_gt_parse['Line_items'] = current_line_items | |
| st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse | |
| st.session_state.modified_indices.add(selected_file) | |
| # Remove the expander flag for the popped row (if present) | |
| popped_idx = N - 1 | |
| expander_key_popped = f"line_item_expander_{selected_file}_{popped_idx}" | |
| if expander_key_popped in st.session_state: | |
| del st.session_state[expander_key_popped] | |
| st.rerun() | |
| if st.session_state.button_clicked: | |
| st.session_state.button_clicked = False | |
| # Display each row as an expander with OCR buttons | |
| current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) | |
| line_items = current_gt_parse.get('Line_items', []) | |
| if line_items: | |
| for idx, item in enumerate(line_items): | |
| # Use a persistent session_state flag so expansion state is preserved across reruns. | |
| expander_key = f"line_item_expander_{selected_file}_{idx}" | |
| expanded_default = st.session_state.get(expander_key, False) | |
| # Note: do NOT pass a 'key' arg to st.expander to maintain compatibility; control expanded via session_state flag. | |
| with st.expander(f"**Row {idx + 1}** - Invoice: {item.get('Invoice_no', 'N/A')}", expanded=expanded_default): | |
| # PO Number | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| item['Po_number'] = st.text_input( | |
| "PO Number", | |
| value=item.get('Po_number', ''), | |
| key=f"po_num_{selected_file}_{idx}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_po_{selected_file}_{idx}", | |
| type="primary" if is_ocr_active('Line_items', 'Po_number', idx) else "secondary"): | |
| # ensure expander stays open when user explicitly requests OCR | |
| st.session_state[expander_key] = True | |
| activate_ocr_field('Line_items', 'Po_number', idx) | |
| # Invoice No | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| item['Invoice_no'] = st.text_input( | |
| "Invoice No", | |
| value=item.get('Invoice_no', ''), | |
| key=f"inv_no_{selected_file}_{idx}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_inv_{selected_file}_{idx}", | |
| type="primary" if is_ocr_active('Line_items', 'Invoice_no', idx) else "secondary"): | |
| st.session_state[expander_key] = True | |
| activate_ocr_field('Line_items', 'Invoice_no', idx) | |
| # Other Doc Ref No | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| item['Other_doc_ref_no'] = st.text_input( | |
| "Other Doc Ref No", | |
| value=item.get('Other_doc_ref_no', ''), | |
| key=f"other_doc_{selected_file}_{idx}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_other_{selected_file}_{idx}", | |
| type="primary" if is_ocr_active('Line_items', 'Other_doc_ref_no', idx) else "secondary"): | |
| st.session_state[expander_key] = True | |
| activate_ocr_field('Line_items', 'Other_doc_ref_no', idx) | |
| # Invoice Date | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| item['Invoice_date'] = st.text_input( | |
| "Invoice Date", | |
| value=item.get('Invoice_date', ''), | |
| key=f"inv_date_{selected_file}_{idx}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_inv_date_{selected_file}_{idx}", | |
| type="primary" if is_ocr_active('Line_items', 'Invoice_date', idx) else "secondary"): | |
| st.session_state[expander_key] = True | |
| activate_ocr_field('Line_items', 'Invoice_date', idx) | |
| # Invoice Amount FCY | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| item['Invoice_amount_FCY'] = st.text_input( | |
| "Invoice Amount FCY", | |
| value=item.get('Invoice_amount_FCY', ''), | |
| key=f"inv_amt_{selected_file}_{idx}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_inv_amt_{selected_file}_{idx}", | |
| type="primary" if is_ocr_active('Line_items', 'Invoice_amount_FCY', idx) else "secondary"): | |
| st.session_state[expander_key] = True | |
| activate_ocr_field('Line_items', 'Invoice_amount_FCY', idx) | |
| # Amount Paid | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| item['Amount_paid_for_each_invoice'] = st.text_input( | |
| "Amount Paid", | |
| value=item.get('Amount_paid_for_each_invoice', ''), | |
| key=f"amt_paid_{selected_file}_{idx}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_amt_paid_{selected_file}_{idx}", | |
| type="primary" if is_ocr_active('Line_items', 'Amount_paid_for_each_invoice', idx) else "secondary"): | |
| st.session_state[expander_key] = True | |
| activate_ocr_field('Line_items', 'Amount_paid_for_each_invoice', idx) | |
| # Outstanding Balance | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| item['Outstanding_balance_FCY'] = st.text_input( | |
| "Outstanding Balance FCY", | |
| value=item.get('Outstanding_balance_FCY', ''), | |
| key=f"out_bal_{selected_file}_{idx}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_out_bal_{selected_file}_{idx}", | |
| type="primary" if is_ocr_active('Line_items', 'Outstanding_balance_FCY', idx) else "secondary"): | |
| st.session_state[expander_key] = True | |
| activate_ocr_field('Line_items', 'Outstanding_balance_FCY', idx) | |
| # Discounts | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| item['Discounts_taken_FCY'] = st.text_input( | |
| "Discounts Taken FCY", | |
| value=item.get('Discounts_taken_FCY', ''), | |
| key=f"disc_{selected_file}_{idx}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_disc_{selected_file}_{idx}", | |
| type="primary" if is_ocr_active('Line_items', 'Discounts_taken_FCY', idx) else "secondary"): | |
| st.session_state[expander_key] = True | |
| activate_ocr_field('Line_items', 'Discounts_taken_FCY', idx) | |
| # Adjustments | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| item['Adjustments(without_holding_tax)_FCY'] = st.text_input( | |
| "Adjustments FCY", | |
| value=item.get('Adjustments(without_holding_tax)_FCY', ''), | |
| key=f"adj_{selected_file}_{idx}" | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_adj_{selected_file}_{idx}", | |
| type="primary" if is_ocr_active('Line_items', 'Adjustments(without_holding_tax)_FCY', idx) else "secondary"): | |
| st.session_state[expander_key] = True | |
| activate_ocr_field('Line_items', 'Adjustments(without_holding_tax)_FCY', idx) | |
| # Descriptions | |
| col_input, col_btn = st.columns([5, 1]) | |
| with col_input: | |
| item['Descriptions'] = st.text_area( | |
| "Descriptions", | |
| value=item.get('Descriptions', ''), | |
| key=f"desc_{selected_file}_{idx}", | |
| height=60 | |
| ) | |
| with col_btn: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("π", key=f"ocr_desc_{selected_file}_{idx}", | |
| type="primary" if is_ocr_active('Line_items', 'Descriptions', idx) else "secondary"): | |
| st.session_state[expander_key] = True | |
| activate_ocr_field('Line_items', 'Descriptions', idx) | |
| # Update line items back to gt_parse | |
| current_gt_parse['Line_items'] = line_items | |
| st.markdown("**π Line Items Summary Table**") | |
| # Display summary table with index starting from 1 | |
| df = pd.DataFrame(line_items) | |
| df.index = df.index + 1 # Start index from 1 | |
| df.index.name = 'SL No' | |
| st.dataframe( | |
| df, | |
| use_container_width=True, | |
| height=300 | |
| ) | |
| else: | |
| st.info("No line items. Click 'β Add New Row' to add a new row.") | |
| st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse | |
| # Save button | |
| col1, col2 = st.columns([1, 1]) | |
| with col1: | |
| if st.button("πΎ Save Changes", type="primary", use_container_width=True, key=f"save_btn_{selected_file}"): | |
| if not st.session_state.just_saved: | |
| st.session_state.just_saved = True | |
| auto_save(selected_file) | |
| st.session_state.save_message = "β Changes saved successfully!" | |
| st.session_state.save_message_time = time.time() | |
| st.rerun() | |
| if st.session_state.just_saved: | |
| st.session_state.just_saved = False | |
| if st.session_state.save_message: | |
| st.success(st.session_state.save_message) | |