diff --git "a/src/streamlit_app.py" "b/src/streamlit_app.py" --- "a/src/streamlit_app.py" +++ "b/src/streamlit_app.py" @@ -1,5 +1,16 @@ import os from pathlib import Path +import streamlit as st +import json +import io +from PIL import Image +import time +import pandas as pd +from streamlit_drawable_canvas import st_canvas +import pytesseract +import numpy as np +from datetime import datetime +import fitz # PyMuPDF for PDF handling # ----------------------------- # Environment hardening (HF Spaces, /.cache issue) @@ -18,18 +29,6 @@ try: except Exception as e: print(f"[startup] WARNING: could not create {streamlit_dir}: {e}") -import streamlit as st -import json -import io -from PIL import Image -import time -import pandas as pd -from streamlit_drawable_canvas import st_canvas -import pytesseract -import numpy as np -from datetime import datetime -import fitz # PyMuPDF for PDF handling - # Set Tesseract path - auto-detect based on OS if os.name == 'nt': # Windows pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" @@ -42,93 +41,85 @@ st.set_page_config(page_title="Remittance Data Viewer", layout="wide") # Custom CSS st.markdown(""" """, unsafe_allow_html=True) @@ -141,8 +132,80 @@ def load_jsonl(file): data.append(json.loads(line)) return data -def save_to_jsonl(data): +def get_file_names_from_record(record, actual_file_name): + """Generate file_name or file_names based on the PDF metadata""" + if not actual_file_name: + return record.get('file_name', record.get('file_names', [])) + + # Check if it's a PDF with multiple pages + is_pdf = actual_file_name in st.session_state.pdf_metadata + if is_pdf: + pdf_meta = st.session_state.pdf_metadata[actual_file_name] + total_pages = pdf_meta['total_pages'] + base_name = os.path.splitext(actual_file_name)[0] + + if total_pages > 1: + # Return list of file names for multi-page PDF + return [f"{base_name}_page{i+1}.png" for i in range(total_pages)] + else: + # Single page PDF + return f"{base_name}.png" + else: + # Regular image file + if not actual_file_name.lower().endswith('.png'): + base_name = os.path.splitext(actual_file_name)[0] + return f"{base_name}.png" + return actual_file_name + +def save_to_jsonl(data, transform_filenames=False): """Convert data list to JSONL format""" + if transform_filenames: + transformed_data = [] + for record in data: + new_record = {} + + # First, determine file_name(s) and add to new_record + original_file_name = record.get('file_name', '') + if not original_file_name and 'file_names' in record: + # Handle file_names field if it exists + original_file_name = record.get('file_names', [])[0] if record.get('file_names', []) else '' + + # Find the actual uploaded file + actual_file_name = None + if original_file_name: + if original_file_name in st.session_state.images: + actual_file_name = original_file_name + else: + # Try with extensions + for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: + if original_file_name + ext in st.session_state.images: + actual_file_name = original_file_name + ext + break + # Try without extension + if not actual_file_name: + for uploaded_name in st.session_state.images.keys(): + uploaded_base = os.path.splitext(uploaded_name)[0] + if uploaded_base == original_file_name: + actual_file_name = uploaded_name + break + + # Get the transformed file name(s) + new_file_names = get_file_names_from_record(record, actual_file_name) + + # Add file_name or file_names as the FIRST field + if isinstance(new_file_names, list): + new_record['file_names'] = new_file_names + else: + new_record['file_name'] = new_file_names + + # Now add all other fields (excluding the original file_name/file_names) + for key, value in record.items(): + if key not in ['file_name', 'file_names']: + new_record[key] = value + + transformed_data.append(new_record) + data = transformed_data + jsonl_content = '\n'.join([json.dumps(record) for record in data]) return jsonl_content @@ -152,7 +215,6 @@ def pdf_to_images(pdf_file): pdf_bytes = pdf_file.read() pdf_document = fitz.open(stream=pdf_bytes, filetype="pdf") images = [] - for page_num in range(pdf_document.page_count): page = pdf_document[page_num] # Render page to an image (higher DPI for better quality) @@ -161,7 +223,6 @@ def pdf_to_images(pdf_file): img_data = pix.tobytes("png") img = Image.open(io.BytesIO(img_data)) images.append(img) - pdf_document.close() return images except Exception as e: @@ -194,7 +255,6 @@ def scale_image_to_fixed_size(image, max_width=900, max_height=1100): width_ratio = max_width / image.width height_ratio = max_height / image.height ratio = min(width_ratio, height_ratio) - new_width = int(image.width * ratio) new_height = int(image.height * ratio) @@ -229,6 +289,47 @@ def swap_customer_supplier_details(index): st.session_state.edited_data[index]['gt_parse'] = gt_parse st.session_state.modified_indices.add(index) +def get_display_name(record): + """Get display name from record, handling both file_name and file_names""" + if 'file_name' in record: + return record['file_name'] + elif 'file_names' in record and record['file_names']: + if isinstance(record['file_names'], list): + return record['file_names'][0] if record['file_names'] else 'Unnamed' + return record['file_names'] + return 'Unnamed' + +def find_actual_file(file_name): + """Find the actual uploaded file matching the given file_name""" + if not file_name: + return None + + # Try exact match + if file_name in st.session_state.images: + return file_name + + # Try with extensions + for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: + if file_name + ext in st.session_state.images: + return file_name + ext + + # Try matching without extension + for uploaded_name in st.session_state.images.keys(): + uploaded_base = os.path.splitext(uploaded_name)[0] + if uploaded_base == file_name: + return uploaded_name + + # Try matching any filename in file_names list + if file_name.endswith('.png'): + base_name = os.path.splitext(file_name)[0] + # Check if this is a page from a multi-page PDF + if '_page' in base_name: + pdf_base = base_name.split('_page')[0] + if pdf_base + '.pdf' in st.session_state.images: + return pdf_base + '.pdf' + + return None + # Initialize session state if 'data' not in st.session_state: st.session_state.data = None @@ -272,17 +373,50 @@ if 'navigating_page' not in st.session_state: def auto_save(index): """Automatically save changes to session state and mark as modified""" if st.session_state.edited_data: - st.session_state.data = st.session_state.edited_data.copy() + # Get the current record + current_record = st.session_state.edited_data[index] + + # Get original file name + original_file_name = current_record.get('file_name', '') + if not original_file_name and 'file_names' in current_record: + original_file_name = current_record.get('file_names', [])[0] if current_record.get('file_names', []) else '' + + # Find actual file + actual_file_name = find_actual_file(original_file_name) + + # Update file_name or file_names in the record + new_file_names = get_file_names_from_record(current_record, actual_file_name) + + # Remove old keys + if 'file_name' in current_record: + del current_record['file_name'] + if 'file_names' in current_record: + del current_record['file_names'] + + # Create new ordered record with file_name/file_names first + new_record = {} + if isinstance(new_file_names, list): + new_record['file_names'] = new_file_names + else: + new_record['file_name'] = new_file_names + + # Add all other fields + for key, value in current_record.items(): + new_record[key] = value + + # Update the record + st.session_state.edited_data[index] = new_record + st.session_state.data[index] = new_record.copy() st.session_state.modified_indices.add(index) def activate_ocr_field(section, field, row_idx=None): """Activate OCR for a specific field. - Toggle behavior: if the same field is already active, deactivate it to avoid repeated activations/looping. - Also ensures the line-item expander stays expanded when OCR is requested. + Toggle behavior: if the same field is already active, deactivate it to avoid repeated activations/looping. + Also ensures the line-item expander stays expanded when OCR is requested. """ # If the requested field is already active, deactivate it (toggle off) - if (st.session_state.ocr_active_section == section and - st.session_state.ocr_active_field == field and + if (st.session_state.ocr_active_section == section and + st.session_state.ocr_active_field == field and st.session_state.ocr_line_item_row == row_idx): st.session_state.ocr_active_section = None st.session_state.ocr_active_field = None @@ -292,21 +426,21 @@ def activate_ocr_field(section, field, row_idx=None): st.session_state.ocr_active_section = section st.session_state.ocr_active_field = field st.session_state.ocr_line_item_row = row_idx - + # If it's a line-item, mark that expander as expanded so it remains open after rerun if section == 'Line_items' and row_idx is not None: current_idx = st.session_state.get('current_index', 0) expander_key = f"line_item_expander_{current_idx}_{row_idx}" st.session_state[expander_key] = True - + # Bump canvas_key to ensure canvas is refreshed/cleared when toggling OCR st.session_state.canvas_key += 1 st.rerun() def is_ocr_active(section, field, row_idx=None): """Check if this OCR button is currently active""" - return (st.session_state.ocr_active_section == section and - st.session_state.ocr_active_field == field and + return (st.session_state.ocr_active_section == section and + st.session_state.ocr_active_field == field and st.session_state.ocr_line_item_row == row_idx) # PAGE 1: Upload Page @@ -327,9 +461,8 @@ if st.session_state.page == 'upload': st.error(f"Error loading file: {str(e)}") st.markdown("**Step 2: Upload Images/PDFs Folder**") - uploaded_files = st.file_uploader( - "Choose image or PDF files", + "Choose image or PDF files", type=['png', 'jpg', 'jpeg', 'tiff', 'tif', 'bmp', 'pdf'], accept_multiple_files=True, help="Select all images and PDFs from your folder at once" @@ -338,11 +471,9 @@ if st.session_state.page == 'upload': if uploaded_files: images_dict = {} pdf_metadata = {} - for file in uploaded_files: try: file_ext = file.name.lower().split('.')[-1] - if file_ext == 'pdf': # Convert PDF to images pdf_images = pdf_to_images(file) @@ -355,12 +486,10 @@ if st.session_state.page == 'upload': 'total_pages': len(pdf_images), 'current_page': 0 } - #st.info(f"📄 Converted PDF '{file.name}' ({len(pdf_images)} pages)") else: # Handle regular images image = Image.open(file) images_dict[file.name] = image - except Exception as e: st.warning(f"Could not load file {file.name}: {str(e)}") @@ -373,7 +502,17 @@ if st.session_state.page == 'upload': st.session_state.current_page_num[filename] = 0 if st.session_state.data is not None: - gt_file_names = [rec.get('file_name', '') for rec in st.session_state.data] + # Get all file names from records (handle both file_name and file_names) + gt_file_names = [] + for rec in st.session_state.data: + if 'file_name' in rec and rec['file_name']: + gt_file_names.append(rec['file_name']) + elif 'file_names' in rec and rec['file_names']: + if isinstance(rec['file_names'], list): + gt_file_names.extend(rec['file_names']) + else: + gt_file_names.append(rec['file_names']) + matched_images = set() unmatched_gt_files = [] @@ -381,7 +520,6 @@ if st.session_state.page == 'upload': for fname in gt_file_names: if not fname: continue - # Try exact match first if fname in images_dict: matched_images.add(fname) @@ -393,12 +531,11 @@ if st.session_state.page == 'upload': matched_images.add(fname) found = True break - # Try matching filename without extension from uploaded files if not found: for uploaded_name in images_dict.keys(): uploaded_base = uploaded_name.rsplit('.', 1)[0] - if uploaded_base == fname: + if uploaded_base == fname or uploaded_base == os.path.splitext(fname)[0]: matched_images.add(fname) found = True break @@ -454,7 +591,7 @@ elif st.session_state.page == 'viewer': with col2: if st.session_state.modified_indices: modified_data = [st.session_state.edited_data[i] for i in sorted(st.session_state.modified_indices)] - jsonl_modified = save_to_jsonl(modified_data) + jsonl_modified = save_to_jsonl(modified_data, transform_filenames=True) st.download_button( label=f"⬇️ Download Modified ({len(modified_data)})", data=jsonl_modified, @@ -468,9 +605,9 @@ elif st.session_state.page == 'viewer': with col3: if st.session_state.modified_indices: - unmodified_data = [st.session_state.data[i] for i in range(len(st.session_state.data)) - if i not in st.session_state.modified_indices] - jsonl_unmodified = save_to_jsonl(unmodified_data) + unmodified_data = [st.session_state.data[i] for i in range(len(st.session_state.data)) + if i not in st.session_state.modified_indices] + jsonl_unmodified = save_to_jsonl(unmodified_data, transform_filenames=True) st.download_button( label=f"⬇️ Download Unmodified ({len(unmodified_data)})", data=jsonl_unmodified, @@ -482,7 +619,7 @@ elif st.session_state.page == 'viewer': st.button("⬇️ No Unmodified Records", disabled=True, use_container_width=True) with col4: - jsonl_all = save_to_jsonl(st.session_state.edited_data) + jsonl_all = save_to_jsonl(st.session_state.edited_data, transform_filenames=True) st.download_button( label=f"⬇️ Download All ({len(st.session_state.edited_data)})", data=jsonl_all, @@ -491,8 +628,9 @@ elif st.session_state.page == 'viewer': use_container_width=True ) - file_names = [record.get('file_name', f'Record {i}') for i, record in enumerate(st.session_state.data or [])] - + # Get display names for all records + file_names = [get_display_name(record) for record in st.session_state.data or []] + # Guard: no records at all if not file_names: st.error("No records loaded. Please upload a JSONL file on the Upload page.") @@ -502,12 +640,12 @@ elif st.session_state.page == 'viewer': else: # Build options (list is safer than range for length checks) options = list(range(len(file_names))) - + # Ensure edited_data exists and has consistent length if not st.session_state.edited_data or len(st.session_state.edited_data) != len(file_names): # try to sync edited_data to data st.session_state.edited_data = (st.session_state.data or []).copy() - + # Clamp current_index into valid range cur_idx = st.session_state.get('current_index', 0) try: @@ -518,7 +656,7 @@ elif st.session_state.page == 'viewer': cur_idx = 0 if cur_idx >= len(options): cur_idx = len(options) - 1 - + # Show selectbox with a safe index selected_file = st.selectbox( "Select a file to view:", @@ -526,698 +664,661 @@ elif st.session_state.page == 'viewer': format_func=lambda x: f"{'✏️ ' if x in st.session_state.modified_indices else ''}{file_names[x]}", index=cur_idx ) - + # Persist chosen index st.session_state.current_index = selected_file - + # Safe access to the current record current_record = st.session_state.edited_data[selected_file] - - left_col, right_col = st.columns([1.6, 1.0], gap="small") - - # LEFT SIDE: Image Display with OCR Canvas - with left_col: - with st.container(height=700, border=False): - file_name = current_record.get('file_name', '') - - if file_name: - # Find the actual file name (handle cases where extension is missing) - actual_file_name = None - if file_name in st.session_state.images: - actual_file_name = file_name - else: - # Try adding common extensions - for ext in ['.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.tif', '.bmp']: - if file_name + ext in st.session_state.images: - actual_file_name = file_name + ext - break - - # Try matching without extension - if not actual_file_name: - for uploaded_name in st.session_state.images.keys(): - uploaded_base = uploaded_name.rsplit('.', 1)[0] - if uploaded_base == file_name: - actual_file_name = uploaded_name - break + left_col, right_col = st.columns([1.6, 1.0], gap="small") + + # LEFT SIDE: Image Display with OCR Canvas + with left_col: + with st.container(height=700, border=False): + # Get file name from record + original_file_name = get_display_name(current_record) - if actual_file_name: - # Check if this is a PDF with multiple pages - is_pdf = actual_file_name in st.session_state.pdf_metadata + if original_file_name: + # Find the actual file name + actual_file_name = find_actual_file(original_file_name) - if is_pdf: - pdf_meta = st.session_state.pdf_metadata[actual_file_name] - total_pages = pdf_meta['total_pages'] - current_page = st.session_state.current_page_num.get(actual_file_name, 0) - - # PDF Navigation Header - col_prev, col_info, col_next = st.columns([1, 2, 1]) - - with col_prev: - prev_clicked = st.button("⬅️ Previous", key=f"prev_page_{selected_file}_{actual_file_name}", - disabled=(current_page == 0), use_container_width=True) - - with col_info: - st.markdown(f"
📄 Page {current_page + 1} of {total_pages}
", unsafe_allow_html=True) + if actual_file_name: + # Check if this is a PDF with multiple pages + is_pdf = actual_file_name in st.session_state.pdf_metadata + if is_pdf: + pdf_meta = st.session_state.pdf_metadata[actual_file_name] + total_pages = pdf_meta['total_pages'] + current_page = st.session_state.current_page_num.get(actual_file_name, 0) + + # PDF Navigation Header + col_prev, col_info, col_next = st.columns([1, 2, 1]) + with col_prev: + prev_clicked = st.button("⬅️ Previous", key=f"prev_page_{selected_file}_{actual_file_name}", + disabled=(current_page == 0), use_container_width=True) + with col_info: + st.markdown(f"
📄 Page {current_page + 1} of {total_pages}
", unsafe_allow_html=True) + with col_next: + next_clicked = st.button("Next ➡️", key=f"next_page_{selected_file}_{actual_file_name}", + disabled=(current_page >= total_pages - 1), use_container_width=True) + + # Handle navigation only if not already navigating + if not st.session_state.navigating_page: + if prev_clicked: + st.session_state.navigating_page = True + st.session_state.current_page_num[actual_file_name] = max(0, current_page - 1) + st.session_state.canvas_key += 1 + st.session_state.ocr_active_section = None + st.session_state.ocr_active_field = None + st.rerun() + elif next_clicked: + st.session_state.navigating_page = True + st.session_state.current_page_num[actual_file_name] = min(total_pages - 1, current_page + 1) + st.session_state.canvas_key += 1 + st.session_state.ocr_active_section = None + st.session_state.ocr_active_field = None + st.rerun() + else: + # Reset the flag after rerun + st.session_state.navigating_page = False - with col_next: - next_clicked = st.button("Next ➡️", key=f"next_page_{selected_file}_{actual_file_name}", - disabled=(current_page >= total_pages - 1), use_container_width=True) + # Determine if PDF and get the appropriate image + is_pdf = actual_file_name in st.session_state.pdf_metadata + if is_pdf: + # Get the current page image + current_page = st.session_state.current_page_num.get(actual_file_name, 0) + pdf_meta = st.session_state.pdf_metadata[actual_file_name] + current_image = pdf_meta['pages'][current_page] + else: + current_image = st.session_state.images[actual_file_name] - # Handle navigation only if not already navigating - if not st.session_state.navigating_page: - if prev_clicked: - st.session_state.navigating_page = True - st.session_state.current_page_num[actual_file_name] = max(0, current_page - 1) - st.session_state.canvas_key += 1 - st.session_state.ocr_active_section = None - st.session_state.ocr_active_field = None - st.rerun() - elif next_clicked: - st.session_state.navigating_page = True - st.session_state.current_page_num[actual_file_name] = min(total_pages - 1, current_page + 1) - st.session_state.canvas_key += 1 - st.session_state.ocr_active_section = None - st.session_state.ocr_active_field = None + if current_image: + # Scale to a reasonable size so canvas doesn't become excessively large + scaled_image, scale_ratio, paste_x, paste_y = scale_image_to_fixed_size(current_image) + + # Render the canvas. Its internal canvas will be constrained by the wrapper due to CSS above. + canvas_result = st_canvas( + fill_color="rgba(255, 165, 0, 0.3)", + stroke_width=2, + stroke_color="#FF0000", + background_image=scaled_image, + update_streamlit=True, + height=scaled_image.height, + width=scaled_image.width, + drawing_mode="rect", + key=f"canvas_{selected_file}_{st.session_state.canvas_key}", + ) + + # Only attempt OCR if there's an active OCR target AND the user has drawn something (objects exist) + if canvas_result.json_data is not None and st.session_state.ocr_active_field: + objects = canvas_result.json_data.get("objects", []) + if len(objects) > 0: + rect = objects[-1] + bbox = [ + (rect["left"] - paste_x) / scale_ratio, + (rect["top"] - paste_y) / scale_ratio, + (rect["left"] + rect["width"] - paste_x) / scale_ratio, + (rect["top"] + rect["height"] - paste_y) / scale_ratio + ] + + with st.spinner("Performing OCR..."): + ocr_text = perform_ocr(current_image, bbox) + if ocr_text and not ocr_text.startswith("OCR Error"): + st.success(f"✅ OCR Result: {ocr_text}") + gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) + if st.session_state.ocr_active_section == 'Line_items': + line_items = gt_parse.get('Line_items', []) + row_idx = st.session_state.ocr_line_item_row + if row_idx is not None and row_idx < len(line_items): + line_items[row_idx][st.session_state.ocr_active_field] = ocr_text + gt_parse['Line_items'] = line_items + # ensure expander stays open for this row after OCR + expander_key = f"line_item_expander_{selected_file}_{row_idx}" + st.session_state[expander_key] = True + else: + section = st.session_state.ocr_active_section + field = st.session_state.ocr_active_field + if section not in gt_parse: + gt_parse[section] = {} + gt_parse[section][field] = ocr_text + + st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse + st.session_state.modified_indices.add(selected_file) + + # Keep the OCR field active so user can draw multiple rectangles for the same field + # Field will only change when user clicks a different OCR button + # Clear canvas for next OCR by bumping canvas_key then rerun + st.session_state.canvas_key += 1 + st.rerun() + else: + st.error(ocr_text) + else: + st.error(f"❌ File '{original_file_name}' not found in uploaded files") + st.info("💡 Available files:") + with st.expander("Show available files"): + for img_name in list(st.session_state.images.keys())[:20]: + st.text(f" • {img_name}") + if len(st.session_state.images) > 20: + st.text(f" ... and {len(st.session_state.images) - 20} more") + else: + st.warning("No file name specified in record") + + # RIGHT SIDE: Editable Details + with right_col: + with st.container(height=700, border=False): + st.markdown("### 📝 Document Details") + gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) + + tab1, tab2, tab3, tab4 = st.tabs([ + "📄 Remittance Details", + "👥 Party Details", + "🏦 Bank Details", + "📋 Line Items" + ]) + + # TAB 1: Remittance Details + with tab1: + remittance = gt_parse.get('Remittance_details', {}) + + # Each field with OCR button + col_input, col_btn = st.columns([5, 1]) + with col_input: + remittance['Remittance_adv_no'] = st.text_input( + "Remittance Advice No", + value=remittance.get('Remittance_adv_no', ''), + key=f"rem_adv_no_{selected_file}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_rem_adv_no_{selected_file}", + type="primary" if is_ocr_active('Remittance_details', 'Remittance_adv_no') else "secondary"): + activate_ocr_field('Remittance_details', 'Remittance_adv_no') + + col_input, col_btn = st.columns([5, 1]) + with col_input: + remittance['Remittance_adv_date'] = st.text_input( + "Remittance Advice Date", + value=remittance.get('Remittance_adv_date', ''), + key=f"rem_adv_date_{selected_file}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_rem_adv_date_{selected_file}", + type="primary" if is_ocr_active('Remittance_details', 'Remittance_adv_date') else "secondary"): + activate_ocr_field('Remittance_details', 'Remittance_adv_date') + + col_input, col_btn = st.columns([5, 1]) + with col_input: + remittance['Payment_method'] = st.text_input( + "Payment Method", + value=remittance.get('Payment_method', ''), + key=f"payment_method_{selected_file}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_payment_method_{selected_file}", + type="primary" if is_ocr_active('Remittance_details', 'Payment_method') else "secondary"): + activate_ocr_field('Remittance_details', 'Payment_method') + + col_input, col_btn = st.columns([5, 1]) + with col_input: + remittance['FCY'] = st.text_input( + "FCY (Foreign Currency)", + value=remittance.get('FCY', ''), + key=f"fcy_{selected_file}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_fcy_{selected_file}", + type="primary" if is_ocr_active('Remittance_details', 'FCY') else "secondary"): + activate_ocr_field('Remittance_details', 'FCY') + + col_input, col_btn = st.columns([5, 1]) + with col_input: + remittance['Total_payment_amt_FCY'] = st.text_input( + "Total Payment Amount (FCY)", + value=remittance.get('Total_payment_amt_FCY', ''), + key=f"total_payment_{selected_file}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_total_payment_{selected_file}", + type="primary" if is_ocr_active('Remittance_details', 'Total_payment_amt_FCY') else "secondary"): + activate_ocr_field('Remittance_details', 'Total_payment_amt_FCY') + + col_input, col_btn = st.columns([5, 1]) + with col_input: + remittance['Payment_date'] = st.text_input( + "Payment Date", + value=remittance.get('Payment_date', ''), + key=f"payment_date_{selected_file}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_payment_date_{selected_file}", + type="primary" if is_ocr_active('Remittance_details', 'Payment_date') else "secondary"): + activate_ocr_field('Remittance_details', 'Payment_date') + + col_input, col_btn = st.columns([5, 1]) + with col_input: + remittance['Payment_ref_no'] = st.text_input( + "Payment Reference No", + value=remittance.get('Payment_ref_no', ''), + key=f"payment_ref_{selected_file}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_payment_ref_{selected_file}", + type="primary" if is_ocr_active('Remittance_details', 'Payment_ref_no') else "secondary"): + activate_ocr_field('Remittance_details', 'Payment_ref_no') + + gt_parse['Remittance_details'] = remittance + + # TAB 2: Customer/Supplier Details with SWAP button + with tab2: + # SWAP BUTTON - Centered and prominent + col1, col2, col3 = st.columns([1, 2, 1]) + with col2: + if st.button("🔄 Swap Customer ↔ Supplier", key=f"swap_btn_{selected_file}", + type="primary", use_container_width=True): + if not st.session_state.just_swapped: + st.session_state.just_swapped = True + swap_customer_supplier_details(selected_file) st.rerun() - else: - # Reset the flag after rerun - st.session_state.navigating_page = False + + # Reset the flag after rerun + if st.session_state.just_swapped: + st.session_state.just_swapped = False + + st.markdown("**Customer Details**") + customer_supplier = gt_parse.get('Customer_supplier_details', {}) + + col_input, col_btn = st.columns([5, 1]) + with col_input: + customer_supplier['Customer_name'] = st.text_input( + "Customer Name", + value=customer_supplier.get('Customer_name', ''), + key=f"cust_name_{selected_file}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_cust_name_{selected_file}", + type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_name') else "secondary"): + activate_ocr_field('Customer_supplier_details', 'Customer_name') + + col_input, col_btn = st.columns([5, 1]) + with col_input: + customer_supplier['Customer_address'] = st.text_area( + "Customer Address", + value=customer_supplier.get('Customer_address', ''), + key=f"cust_addr_{selected_file}", + height=60 + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_cust_addr_{selected_file}", + type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_address') else "secondary"): + activate_ocr_field('Customer_supplier_details', 'Customer_address') + + col_input, col_btn = st.columns([5, 1]) + with col_input: + customer_supplier['Customer_contact_info'] = st.text_input( + "Customer Contact Info", + value=customer_supplier.get('Customer_contact_info', ''), + key=f"cust_contact_{selected_file}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_cust_contact_{selected_file}", + type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_contact_info') else "secondary"): + activate_ocr_field('Customer_supplier_details', 'Customer_contact_info') + + st.markdown("**Supplier Details**") + + col_input, col_btn = st.columns([5, 1]) + with col_input: + customer_supplier['Supplier_name'] = st.text_input( + "Supplier Name", + value=customer_supplier.get('Supplier_name', ''), + key=f"supp_name_{selected_file}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_supp_name_{selected_file}", + type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_name') else "secondary"): + activate_ocr_field('Customer_supplier_details', 'Supplier_name') + + col_input, col_btn = st.columns([5, 1]) + with col_input: + customer_supplier['Supplier_address'] = st.text_area( + "Supplier Address", + value=customer_supplier.get('Supplier_address', ''), + key=f"supp_addr_{selected_file}", + height=60 + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_supp_addr_{selected_file}", + type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_address') else "secondary"): + activate_ocr_field('Customer_supplier_details', 'Supplier_address') + + col_input, col_btn = st.columns([5, 1]) + with col_input: + customer_supplier['Supplier_contact_info'] = st.text_input( + "Supplier Contact Info", + value=customer_supplier.get('Supplier_contact_info', ''), + key=f"supp_contact_{selected_file}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_supp_contact_{selected_file}", + type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_contact_info') else "secondary"): + activate_ocr_field('Customer_supplier_details', 'Supplier_contact_info') + + gt_parse['Customer_supplier_details'] = customer_supplier - if actual_file_name: - # Determine if PDF and get the appropriate image - is_pdf = actual_file_name in st.session_state.pdf_metadata + # TAB 3: Bank Details + with tab3: + bank = gt_parse.get('Bank_details', {}) - if is_pdf: - # Get the current page image - current_page = st.session_state.current_page_num.get(actual_file_name, 0) - pdf_meta = st.session_state.pdf_metadata[actual_file_name] - current_image = pdf_meta['pages'][current_page] - else: - current_image = st.session_state.images[actual_file_name] - else: - st.error(f"❌ File '{file_name}' not found in uploaded files") - st.info("💡 Available files:") - with st.expander("Show available files"): - for img_name in list(st.session_state.images.keys())[:20]: - st.text(f" • {img_name}") - if len(st.session_state.images) > 20: - st.text(f" ... and {len(st.session_state.images) - 20} more") + col_input, col_btn = st.columns([5, 1]) + with col_input: + bank['Bank_name'] = st.text_input( + "Bank Name", + value=bank.get('Bank_name', ''), + key=f"bank_name_{selected_file}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_bank_name_{selected_file}", + type="primary" if is_ocr_active('Bank_details', 'Bank_name') else "secondary"): + activate_ocr_field('Bank_details', 'Bank_name') + + col_input, col_btn = st.columns([5, 1]) + with col_input: + bank['Bank_acc_no'] = st.text_input( + "Bank Account No", + value=bank.get('Bank_acc_no', ''), + key=f"bank_acc_{selected_file}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_bank_acc_{selected_file}", + type="primary" if is_ocr_active('Bank_details', 'Bank_acc_no') else "secondary"): + activate_ocr_field('Bank_details', 'Bank_acc_no') + + col_input, col_btn = st.columns([5, 1]) + with col_input: + bank['Bank_routing_no'] = st.text_input( + "Bank Routing No", + value=bank.get('Bank_routing_no', ''), + key=f"bank_routing_{selected_file}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_bank_routing_{selected_file}", + type="primary" if is_ocr_active('Bank_details', 'Bank_routing_no') else "secondary"): + activate_ocr_field('Bank_details', 'Bank_routing_no') + + col_input, col_btn = st.columns([5, 1]) + with col_input: + bank['Swift_code'] = st.text_input( + "SWIFT Code", + value=bank.get('Swift_code', ''), + key=f"swift_{selected_file}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_swift_{selected_file}", + type="primary" if is_ocr_active('Bank_details', 'Swift_code') else "secondary"): + activate_ocr_field('Bank_details', 'Swift_code') + + gt_parse['Bank_details'] = bank - if current_image: - # Scale to a reasonable size so canvas doesn't become excessively large - scaled_image, scale_ratio, paste_x, paste_y = scale_image_to_fixed_size(current_image) + # TAB 4: Line Items + with tab4: + current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) + line_items = current_gt_parse.get('Line_items', []) - # Render the canvas. Its internal canvas will be constrained by the wrapper due to CSS above. - canvas_result = st_canvas( - fill_color="rgba(255, 165, 0, 0.3)", - stroke_width=2, - stroke_color="#FF0000", - background_image=scaled_image, - update_streamlit=True, - height=scaled_image.height, - width=scaled_image.width, - drawing_mode="rect", - key=f"canvas_{selected_file}_{st.session_state.canvas_key}", - ) + # Add/Remove row buttons + col_add, col_remove = st.columns([1, 1]) + with col_add: + if st.button("➕ Add New Row", key=f"add_row_{selected_file}", use_container_width=True): + if not st.session_state.button_clicked: + st.session_state.button_clicked = True + new_row = { + "Po_number": "", "Invoice_no": "", "Other_doc_ref_no": "", + "Invoice_date": "", "Invoice_amount_FCY": "", + "Amount_paid_for_each_invoice": "", "Outstanding_balance_FCY": "", + "Discounts_taken_FCY": "", "Adjustments(without_holding_tax)_FCY": "", + "Descriptions": "" + } + current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) + current_line_items = current_gt_parse.get('Line_items', []) + current_line_items.append(new_row) + current_gt_parse['Line_items'] = current_line_items + st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse + st.session_state.modified_indices.add(selected_file) + # Ensure the newly added row's expander is open + new_idx = len(current_line_items) - 1 + expander_key_new = f"line_item_expander_{selected_file}_{new_idx}" + st.session_state[expander_key_new] = True + st.rerun() + with col_remove: + if st.button("➖ Remove Last Row", key=f"remove_row_{selected_file}", + disabled=(len(line_items) == 0), use_container_width=True): + if not st.session_state.button_clicked and len(line_items) > 0: + st.session_state.button_clicked = True + current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) + current_line_items = current_gt_parse.get('Line_items', []) + N = len(current_line_items) + current_line_items.pop() + current_gt_parse['Line_items'] = current_line_items + st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse + st.session_state.modified_indices.add(selected_file) + # Remove the expander flag for the popped row (if present) + popped_idx = N - 1 + expander_key_popped = f"line_item_expander_{selected_file}_{popped_idx}" + if expander_key_popped in st.session_state: + del st.session_state[expander_key_popped] + st.rerun() - # Only attempt OCR if there's an active OCR target AND the user has drawn something (objects exist) - if canvas_result.json_data is not None and st.session_state.ocr_active_field: - objects = canvas_result.json_data.get("objects", []) - if len(objects) > 0: - rect = objects[-1] - - bbox = [ - (rect["left"] - paste_x) / scale_ratio, - (rect["top"] - paste_y) / scale_ratio, - (rect["left"] + rect["width"] - paste_x) / scale_ratio, - (rect["top"] + rect["height"] - paste_y) / scale_ratio - ] - - with st.spinner("Performing OCR..."): - ocr_text = perform_ocr(current_image, bbox) + if st.session_state.button_clicked: + st.session_state.button_clicked = False + + # Display each row as an expander with OCR buttons + current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) + line_items = current_gt_parse.get('Line_items', []) + if line_items: + for idx, item in enumerate(line_items): + # Use a persistent session_state flag so expansion state is preserved across reruns. + expander_key = f"line_item_expander_{selected_file}_{idx}" + expanded_default = st.session_state.get(expander_key, False) - if ocr_text and not ocr_text.startswith("OCR Error"): - st.success(f"✅ OCR Result: {ocr_text}") + # Note: do NOT pass a 'key' arg to st.expander to maintain compatibility; control expanded via session_state flag. + with st.expander(f"**Row {idx + 1}** - Invoice: {item.get('Invoice_no', 'N/A')}", expanded=expanded_default): + # PO Number + col_input, col_btn = st.columns([5, 1]) + with col_input: + item['Po_number'] = st.text_input( + "PO Number", + value=item.get('Po_number', ''), + key=f"po_num_{selected_file}_{idx}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_po_{selected_file}_{idx}", + type="primary" if is_ocr_active('Line_items', 'Po_number', idx) else "secondary"): + # ensure expander stays open when user explicitly requests OCR + st.session_state[expander_key] = True + activate_ocr_field('Line_items', 'Po_number', idx) - gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) + # Invoice No + col_input, col_btn = st.columns([5, 1]) + with col_input: + item['Invoice_no'] = st.text_input( + "Invoice No", + value=item.get('Invoice_no', ''), + key=f"inv_no_{selected_file}_{idx}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_inv_{selected_file}_{idx}", + type="primary" if is_ocr_active('Line_items', 'Invoice_no', idx) else "secondary"): + st.session_state[expander_key] = True + activate_ocr_field('Line_items', 'Invoice_no', idx) - if st.session_state.ocr_active_section == 'Line_items': - line_items = gt_parse.get('Line_items', []) - row_idx = st.session_state.ocr_line_item_row - if row_idx is not None and row_idx < len(line_items): - line_items[row_idx][st.session_state.ocr_active_field] = ocr_text - gt_parse['Line_items'] = line_items - - # ensure expander stays open for this row after OCR - expander_key = f"line_item_expander_{selected_file}_{row_idx}" + # Other Doc Ref No + col_input, col_btn = st.columns([5, 1]) + with col_input: + item['Other_doc_ref_no'] = st.text_input( + "Other Doc Ref No", + value=item.get('Other_doc_ref_no', ''), + key=f"other_doc_{selected_file}_{idx}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_other_{selected_file}_{idx}", + type="primary" if is_ocr_active('Line_items', 'Other_doc_ref_no', idx) else "secondary"): st.session_state[expander_key] = True - else: - section = st.session_state.ocr_active_section - field = st.session_state.ocr_active_field - if section not in gt_parse: - gt_parse[section] = {} - gt_parse[section][field] = ocr_text + activate_ocr_field('Line_items', 'Other_doc_ref_no', idx) - st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse - st.session_state.modified_indices.add(selected_file) + # Invoice Date + col_input, col_btn = st.columns([5, 1]) + with col_input: + item['Invoice_date'] = st.text_input( + "Invoice Date", + value=item.get('Invoice_date', ''), + key=f"inv_date_{selected_file}_{idx}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_inv_date_{selected_file}_{idx}", + type="primary" if is_ocr_active('Line_items', 'Invoice_date', idx) else "secondary"): + st.session_state[expander_key] = True + activate_ocr_field('Line_items', 'Invoice_date', idx) - # Keep the OCR field active so user can draw multiple rectangles for the same field - # Field will only change when user clicks a different OCR button + # Invoice Amount FCY + col_input, col_btn = st.columns([5, 1]) + with col_input: + item['Invoice_amount_FCY'] = st.text_input( + "Invoice Amount FCY", + value=item.get('Invoice_amount_FCY', ''), + key=f"inv_amt_{selected_file}_{idx}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_inv_amt_{selected_file}_{idx}", + type="primary" if is_ocr_active('Line_items', 'Invoice_amount_FCY', idx) else "secondary"): + st.session_state[expander_key] = True + activate_ocr_field('Line_items', 'Invoice_amount_FCY', idx) - # Clear canvas for next OCR by bumping canvas_key then rerun - st.session_state.canvas_key += 1 - st.rerun() - else: - st.error(ocr_text) - else: - st.warning("No file name specified in record") - - # RIGHT SIDE: Editable Details - with right_col: - with st.container(height=700, border=False): - st.markdown("### 📝 Document Details") - - gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) - - tab1, tab2, tab3, tab4 = st.tabs([ - "📄 Remittance Details", - "👥 Party Details", - "🏦 Bank Details", - "📋 Line Items" - ]) - - # TAB 1: Remittance Details - with tab1: - remittance = gt_parse.get('Remittance_details', {}) - - # Each field with OCR button - col_input, col_btn = st.columns([5, 1]) - with col_input: - remittance['Remittance_adv_no'] = st.text_input( - "Remittance Advice No", - value=remittance.get('Remittance_adv_no', ''), - key=f"rem_adv_no_{selected_file}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_rem_adv_no_{selected_file}", - type="primary" if is_ocr_active('Remittance_details', 'Remittance_adv_no') else "secondary"): - activate_ocr_field('Remittance_details', 'Remittance_adv_no') - - col_input, col_btn = st.columns([5, 1]) - with col_input: - remittance['Remittance_adv_date'] = st.text_input( - "Remittance Advice Date", - value=remittance.get('Remittance_adv_date', ''), - key=f"rem_adv_date_{selected_file}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_rem_adv_date_{selected_file}", - type="primary" if is_ocr_active('Remittance_details', 'Remittance_adv_date') else "secondary"): - activate_ocr_field('Remittance_details', 'Remittance_adv_date') - - col_input, col_btn = st.columns([5, 1]) - with col_input: - remittance['Payment_method'] = st.text_input( - "Payment Method", - value=remittance.get('Payment_method', ''), - key=f"payment_method_{selected_file}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_payment_method_{selected_file}", - type="primary" if is_ocr_active('Remittance_details', 'Payment_method') else "secondary"): - activate_ocr_field('Remittance_details', 'Payment_method') - - col_input, col_btn = st.columns([5, 1]) - with col_input: - remittance['FCY'] = st.text_input( - "FCY (Foreign Currency)", - value=remittance.get('FCY', ''), - key=f"fcy_{selected_file}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_fcy_{selected_file}", - type="primary" if is_ocr_active('Remittance_details', 'FCY') else "secondary"): - activate_ocr_field('Remittance_details', 'FCY') - - col_input, col_btn = st.columns([5, 1]) - with col_input: - remittance['Total_payment_amt_FCY'] = st.text_input( - "Total Payment Amount (FCY)", - value=remittance.get('Total_payment_amt_FCY', ''), - key=f"total_payment_{selected_file}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_total_payment_{selected_file}", - type="primary" if is_ocr_active('Remittance_details', 'Total_payment_amt_FCY') else "secondary"): - activate_ocr_field('Remittance_details', 'Total_payment_amt_FCY') - - col_input, col_btn = st.columns([5, 1]) - with col_input: - remittance['Payment_date'] = st.text_input( - "Payment Date", - value=remittance.get('Payment_date', ''), - key=f"payment_date_{selected_file}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_payment_date_{selected_file}", - type="primary" if is_ocr_active('Remittance_details', 'Payment_date') else "secondary"): - activate_ocr_field('Remittance_details', 'Payment_date') - - col_input, col_btn = st.columns([5, 1]) - with col_input: - remittance['Payment_ref_no'] = st.text_input( - "Payment Reference No", - value=remittance.get('Payment_ref_no', ''), - key=f"payment_ref_{selected_file}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_payment_ref_{selected_file}", - type="primary" if is_ocr_active('Remittance_details', 'Payment_ref_no') else "secondary"): - activate_ocr_field('Remittance_details', 'Payment_ref_no') - - gt_parse['Remittance_details'] = remittance - - # TAB 2: Customer/Supplier Details with SWAP button - with tab2: - # SWAP BUTTON - Centered and prominent - col1, col2, col3 = st.columns([1, 2, 1]) - with col2: - if st.button("🔄 Swap Customer ↔ Supplier", key=f"swap_btn_{selected_file}", - type="primary", use_container_width=True): - if not st.session_state.just_swapped: - st.session_state.just_swapped = True - swap_customer_supplier_details(selected_file) - st.rerun() - - # Reset the flag after rerun - if st.session_state.just_swapped: - st.session_state.just_swapped = False - - st.markdown("**Customer Details**") - customer_supplier = gt_parse.get('Customer_supplier_details', {}) - - col_input, col_btn = st.columns([5, 1]) - with col_input: - customer_supplier['Customer_name'] = st.text_input( - "Customer Name", - value=customer_supplier.get('Customer_name', ''), - key=f"cust_name_{selected_file}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_cust_name_{selected_file}", - type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_name') else "secondary"): - activate_ocr_field('Customer_supplier_details', 'Customer_name') - - col_input, col_btn = st.columns([5, 1]) - with col_input: - customer_supplier['Customer_address'] = st.text_area( - "Customer Address", - value=customer_supplier.get('Customer_address', ''), - key=f"cust_addr_{selected_file}", - height=60 - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_cust_addr_{selected_file}", - type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_address') else "secondary"): - activate_ocr_field('Customer_supplier_details', 'Customer_address') - - col_input, col_btn = st.columns([5, 1]) - with col_input: - customer_supplier['Customer_contact_info'] = st.text_input( - "Customer Contact Info", - value=customer_supplier.get('Customer_contact_info', ''), - key=f"cust_contact_{selected_file}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_cust_contact_{selected_file}", - type="primary" if is_ocr_active('Customer_supplier_details', 'Customer_contact_info') else "secondary"): - activate_ocr_field('Customer_supplier_details', 'Customer_contact_info') - - st.markdown("**Supplier Details**") - - col_input, col_btn = st.columns([5, 1]) - with col_input: - customer_supplier['Supplier_name'] = st.text_input( - "Supplier Name", - value=customer_supplier.get('Supplier_name', ''), - key=f"supp_name_{selected_file}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_supp_name_{selected_file}", - type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_name') else "secondary"): - activate_ocr_field('Customer_supplier_details', 'Supplier_name') - - col_input, col_btn = st.columns([5, 1]) - with col_input: - customer_supplier['Supplier_address'] = st.text_area( - "Supplier Address", - value=customer_supplier.get('Supplier_address', ''), - key=f"supp_addr_{selected_file}", - height=60 - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_supp_addr_{selected_file}", - type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_address') else "secondary"): - activate_ocr_field('Customer_supplier_details', 'Supplier_address') - - col_input, col_btn = st.columns([5, 1]) - with col_input: - customer_supplier['Supplier_contact_info'] = st.text_input( - "Supplier Contact Info", - value=customer_supplier.get('Supplier_contact_info', ''), - key=f"supp_contact_{selected_file}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_supp_contact_{selected_file}", - type="primary" if is_ocr_active('Customer_supplier_details', 'Supplier_contact_info') else "secondary"): - activate_ocr_field('Customer_supplier_details', 'Supplier_contact_info') - - gt_parse['Customer_supplier_details'] = customer_supplier - - # TAB 3: Bank Details - with tab3: - bank = gt_parse.get('Bank_details', {}) - - col_input, col_btn = st.columns([5, 1]) - with col_input: - bank['Bank_name'] = st.text_input( - "Bank Name", - value=bank.get('Bank_name', ''), - key=f"bank_name_{selected_file}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_bank_name_{selected_file}", - type="primary" if is_ocr_active('Bank_details', 'Bank_name') else "secondary"): - activate_ocr_field('Bank_details', 'Bank_name') - - col_input, col_btn = st.columns([5, 1]) - with col_input: - bank['Bank_acc_no'] = st.text_input( - "Bank Account No", - value=bank.get('Bank_acc_no', ''), - key=f"bank_acc_{selected_file}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_bank_acc_{selected_file}", - type="primary" if is_ocr_active('Bank_details', 'Bank_acc_no') else "secondary"): - activate_ocr_field('Bank_details', 'Bank_acc_no') - - col_input, col_btn = st.columns([5, 1]) - with col_input: - bank['Bank_routing_no'] = st.text_input( - "Bank Routing No", - value=bank.get('Bank_routing_no', ''), - key=f"bank_routing_{selected_file}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_bank_routing_{selected_file}", - type="primary" if is_ocr_active('Bank_details', 'Bank_routing_no') else "secondary"): - activate_ocr_field('Bank_details', 'Bank_routing_no') - - col_input, col_btn = st.columns([5, 1]) - with col_input: - bank['Swift_code'] = st.text_input( - "SWIFT Code", - value=bank.get('Swift_code', ''), - key=f"swift_{selected_file}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_swift_{selected_file}", - type="primary" if is_ocr_active('Bank_details', 'Swift_code') else "secondary"): - activate_ocr_field('Bank_details', 'Swift_code') - - gt_parse['Bank_details'] = bank - - # TAB 4: Line Items - with tab4: - current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) - line_items = current_gt_parse.get('Line_items', []) + # Amount Paid + col_input, col_btn = st.columns([5, 1]) + with col_input: + item['Amount_paid_for_each_invoice'] = st.text_input( + "Amount Paid", + value=item.get('Amount_paid_for_each_invoice', ''), + key=f"amt_paid_{selected_file}_{idx}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_amt_paid_{selected_file}_{idx}", + type="primary" if is_ocr_active('Line_items', 'Amount_paid_for_each_invoice', idx) else "secondary"): + st.session_state[expander_key] = True + activate_ocr_field('Line_items', 'Amount_paid_for_each_invoice', idx) + + # Outstanding Balance + col_input, col_btn = st.columns([5, 1]) + with col_input: + item['Outstanding_balance_FCY'] = st.text_input( + "Outstanding Balance FCY", + value=item.get('Outstanding_balance_FCY', ''), + key=f"out_bal_{selected_file}_{idx}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_out_bal_{selected_file}_{idx}", + type="primary" if is_ocr_active('Line_items', 'Outstanding_balance_FCY', idx) else "secondary"): + st.session_state[expander_key] = True + activate_ocr_field('Line_items', 'Outstanding_balance_FCY', idx) + + # Discounts + col_input, col_btn = st.columns([5, 1]) + with col_input: + item['Discounts_taken_FCY'] = st.text_input( + "Discounts Taken FCY", + value=item.get('Discounts_taken_FCY', ''), + key=f"disc_{selected_file}_{idx}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_disc_{selected_file}_{idx}", + type="primary" if is_ocr_active('Line_items', 'Discounts_taken_FCY', idx) else "secondary"): + st.session_state[expander_key] = True + activate_ocr_field('Line_items', 'Discounts_taken_FCY', idx) + + # Adjustments + col_input, col_btn = st.columns([5, 1]) + with col_input: + item['Adjustments(without_holding_tax)_FCY'] = st.text_input( + "Adjustments FCY", + value=item.get('Adjustments(without_holding_tax)_FCY', ''), + key=f"adj_{selected_file}_{idx}" + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_adj_{selected_file}_{idx}", + type="primary" if is_ocr_active('Line_items', 'Adjustments(without_holding_tax)_FCY', idx) else "secondary"): + st.session_state[expander_key] = True + activate_ocr_field('Line_items', 'Adjustments(without_holding_tax)_FCY', idx) + + # Descriptions + col_input, col_btn = st.columns([5, 1]) + with col_input: + item['Descriptions'] = st.text_area( + "Descriptions", + value=item.get('Descriptions', ''), + key=f"desc_{selected_file}_{idx}", + height=60 + ) + with col_btn: + st.markdown("
", unsafe_allow_html=True) + if st.button("🔍", key=f"ocr_desc_{selected_file}_{idx}", + type="primary" if is_ocr_active('Line_items', 'Descriptions', idx) else "secondary"): + st.session_state[expander_key] = True + activate_ocr_field('Line_items', 'Descriptions', idx) + + # Update line items back to gt_parse + current_gt_parse['Line_items'] = line_items + st.markdown("**📊 Line Items Summary Table**") + + # Display summary table with index starting from 1 + df = pd.DataFrame(line_items) + df.index = df.index + 1 # Start index from 1 + df.index.name = 'SL No' + st.dataframe( + df, + use_container_width=True, + height=300 + ) + else: + st.info("No line items. Click '➕ Add New Row' to add a new row.") - # Add/Remove row buttons - col_add, col_remove = st.columns([1, 1]) - with col_add: - if st.button("➕ Add New Row", key=f"add_row_{selected_file}", use_container_width=True): - if not st.session_state.button_clicked: - st.session_state.button_clicked = True - new_row = { - "Po_number": "", "Invoice_no": "", "Other_doc_ref_no": "", - "Invoice_date": "", "Invoice_amount_FCY": "", - "Amount_paid_for_each_invoice": "", "Outstanding_balance_FCY": "", - "Discounts_taken_FCY": "", "Adjustments(without_holding_tax)_FCY": "", - "Descriptions": "" - } - current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) - current_line_items = current_gt_parse.get('Line_items', []) - current_line_items.append(new_row) - current_gt_parse['Line_items'] = current_line_items - st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse - st.session_state.modified_indices.add(selected_file) - - # Ensure the newly added row's expander is open - new_idx = len(current_line_items) - 1 - expander_key_new = f"line_item_expander_{selected_file}_{new_idx}" - st.session_state[expander_key_new] = True - - st.rerun() + st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse - with col_remove: - if st.button("➖ Remove Last Row", key=f"remove_row_{selected_file}", - disabled=(len(line_items) == 0), use_container_width=True): - if not st.session_state.button_clicked and len(line_items) > 0: - st.session_state.button_clicked = True - current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) - current_line_items = current_gt_parse.get('Line_items', []) - N = len(current_line_items) - current_line_items.pop() - current_gt_parse['Line_items'] = current_line_items - st.session_state.edited_data[selected_file]['gt_parse'] = current_gt_parse - st.session_state.modified_indices.add(selected_file) - - # Remove the expander flag for the popped row (if present) - popped_idx = N - 1 - expander_key_popped = f"line_item_expander_{selected_file}_{popped_idx}" - if expander_key_popped in st.session_state: - del st.session_state[expander_key_popped] - + # Save button + col1, col2 = st.columns([1, 1]) + with col1: + if st.button("💾 Save Changes", type="primary", use_container_width=True, key=f"save_btn_{selected_file}"): + if not st.session_state.just_saved: + st.session_state.just_saved = True + auto_save(selected_file) + st.session_state.save_message = "✅ Changes saved successfully!" + st.session_state.save_message_time = time.time() st.rerun() - if st.session_state.button_clicked: - st.session_state.button_clicked = False - - # Display each row as an expander with OCR buttons - current_gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {}) - line_items = current_gt_parse.get('Line_items', []) + if st.session_state.just_saved: + st.session_state.just_saved = False - if line_items: - for idx, item in enumerate(line_items): - # Use a persistent session_state flag so expansion state is preserved across reruns. - expander_key = f"line_item_expander_{selected_file}_{idx}" - expanded_default = st.session_state.get(expander_key, False) - - # Note: do NOT pass a 'key' arg to st.expander to maintain compatibility; control expanded via session_state flag. - with st.expander(f"**Row {idx + 1}** - Invoice: {item.get('Invoice_no', 'N/A')}", expanded=expanded_default): - # PO Number - col_input, col_btn = st.columns([5, 1]) - with col_input: - item['Po_number'] = st.text_input( - "PO Number", - value=item.get('Po_number', ''), - key=f"po_num_{selected_file}_{idx}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_po_{selected_file}_{idx}", - type="primary" if is_ocr_active('Line_items', 'Po_number', idx) else "secondary"): - # ensure expander stays open when user explicitly requests OCR - st.session_state[expander_key] = True - activate_ocr_field('Line_items', 'Po_number', idx) - - # Invoice No - col_input, col_btn = st.columns([5, 1]) - with col_input: - item['Invoice_no'] = st.text_input( - "Invoice No", - value=item.get('Invoice_no', ''), - key=f"inv_no_{selected_file}_{idx}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_inv_{selected_file}_{idx}", - type="primary" if is_ocr_active('Line_items', 'Invoice_no', idx) else "secondary"): - st.session_state[expander_key] = True - activate_ocr_field('Line_items', 'Invoice_no', idx) - - # Other Doc Ref No - col_input, col_btn = st.columns([5, 1]) - with col_input: - item['Other_doc_ref_no'] = st.text_input( - "Other Doc Ref No", - value=item.get('Other_doc_ref_no', ''), - key=f"other_doc_{selected_file}_{idx}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_other_{selected_file}_{idx}", - type="primary" if is_ocr_active('Line_items', 'Other_doc_ref_no', idx) else "secondary"): - st.session_state[expander_key] = True - activate_ocr_field('Line_items', 'Other_doc_ref_no', idx) - - # Invoice Date - col_input, col_btn = st.columns([5, 1]) - with col_input: - item['Invoice_date'] = st.text_input( - "Invoice Date", - value=item.get('Invoice_date', ''), - key=f"inv_date_{selected_file}_{idx}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_inv_date_{selected_file}_{idx}", - type="primary" if is_ocr_active('Line_items', 'Invoice_date', idx) else "secondary"): - st.session_state[expander_key] = True - activate_ocr_field('Line_items', 'Invoice_date', idx) - - # Invoice Amount FCY - col_input, col_btn = st.columns([5, 1]) - with col_input: - item['Invoice_amount_FCY'] = st.text_input( - "Invoice Amount FCY", - value=item.get('Invoice_amount_FCY', ''), - key=f"inv_amt_{selected_file}_{idx}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_inv_amt_{selected_file}_{idx}", - type="primary" if is_ocr_active('Line_items', 'Invoice_amount_FCY', idx) else "secondary"): - st.session_state[expander_key] = True - activate_ocr_field('Line_items', 'Invoice_amount_FCY', idx) - - # Amount Paid - col_input, col_btn = st.columns([5, 1]) - with col_input: - item['Amount_paid_for_each_invoice'] = st.text_input( - "Amount Paid", - value=item.get('Amount_paid_for_each_invoice', ''), - key=f"amt_paid_{selected_file}_{idx}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_amt_paid_{selected_file}_{idx}", - type="primary" if is_ocr_active('Line_items', 'Amount_paid_for_each_invoice', idx) else "secondary"): - st.session_state[expander_key] = True - activate_ocr_field('Line_items', 'Amount_paid_for_each_invoice', idx) - - # Outstanding Balance - col_input, col_btn = st.columns([5, 1]) - with col_input: - item['Outstanding_balance_FCY'] = st.text_input( - "Outstanding Balance FCY", - value=item.get('Outstanding_balance_FCY', ''), - key=f"out_bal_{selected_file}_{idx}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_out_bal_{selected_file}_{idx}", - type="primary" if is_ocr_active('Line_items', 'Outstanding_balance_FCY', idx) else "secondary"): - st.session_state[expander_key] = True - activate_ocr_field('Line_items', 'Outstanding_balance_FCY', idx) - - # Discounts - col_input, col_btn = st.columns([5, 1]) - with col_input: - item['Discounts_taken_FCY'] = st.text_input( - "Discounts Taken FCY", - value=item.get('Discounts_taken_FCY', ''), - key=f"disc_{selected_file}_{idx}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_disc_{selected_file}_{idx}", - type="primary" if is_ocr_active('Line_items', 'Discounts_taken_FCY', idx) else "secondary"): - st.session_state[expander_key] = True - activate_ocr_field('Line_items', 'Discounts_taken_FCY', idx) - - # Adjustments - col_input, col_btn = st.columns([5, 1]) - with col_input: - item['Adjustments(without_holding_tax)_FCY'] = st.text_input( - "Adjustments FCY", - value=item.get('Adjustments(without_holding_tax)_FCY', ''), - key=f"adj_{selected_file}_{idx}" - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_adj_{selected_file}_{idx}", - type="primary" if is_ocr_active('Line_items', 'Adjustments(without_holding_tax)_FCY', idx) else "secondary"): - st.session_state[expander_key] = True - activate_ocr_field('Line_items', 'Adjustments(without_holding_tax)_FCY', idx) - - # Descriptions - col_input, col_btn = st.columns([5, 1]) - with col_input: - item['Descriptions'] = st.text_area( - "Descriptions", - value=item.get('Descriptions', ''), - key=f"desc_{selected_file}_{idx}", - height=60 - ) - with col_btn: - st.markdown("
", unsafe_allow_html=True) - if st.button("🔍", key=f"ocr_desc_{selected_file}_{idx}", - type="primary" if is_ocr_active('Line_items', 'Descriptions', idx) else "secondary"): - st.session_state[expander_key] = True - activate_ocr_field('Line_items', 'Descriptions', idx) - - # Update line items back to gt_parse - current_gt_parse['Line_items'] = line_items - - st.markdown("**📊 Line Items Summary Table**") - - # Display summary table with index starting from 1 - df = pd.DataFrame(line_items) - df.index = df.index + 1 # Start index from 1 - df.index.name = 'SL No' - - st.dataframe( - df, - use_container_width=True, - height=300 - ) - else: - st.info("No line items. Click '➕ Add New Row' to add a new row.") - - st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse - - # Save button - col1, col2 = st.columns([1, 1]) - with col1: - if st.button("💾 Save Changes", type="primary", use_container_width=True, key=f"save_btn_{selected_file}"): - if not st.session_state.just_saved: - st.session_state.just_saved = True - auto_save(selected_file) - st.session_state.save_message = "✅ Changes saved successfully!" - st.session_state.save_message_time = time.time() - st.rerun() - - if st.session_state.just_saved: - st.session_state.just_saved = False - - if st.session_state.save_message: - st.success(st.session_state.save_message) + if st.session_state.save_message: + st.success(st.session_state.save_message)