Remittance-Annotation

Sleeping

App Files Files Community

Ankushbl6 commited on Dec 19, 2025

Commit

fcbb889

verified ·

1 Parent(s): 99902ac

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +833 -567

src/streamlit_app.py CHANGED Viewed

@@ -1,580 +1,846 @@
 import os
-from io import BytesIO
-import json
 import streamlit as st
-from PIL import Image, ImageEnhance
 from streamlit_drawable_canvas import st_canvas
 import pytesseract
-# ---------------------------------
-# Page config
-# ---------------------------------
-st.set_page_config(
-    page_title="Remittance GT Annotator - Interactive OCR",
-    layout="wide"
-)
-st.title("Remittance GT Annotator - Interactive OCR")
-# ---------------------------------
-# Field definitions
-# ---------------------------------
-SINGLE_FIELDS = [
-    "Remittance Advice Number",
-    "Remittance Advice Date",
-    "Payment Method",
-    "FCY",
-    "Total Payment Amount in FCY",
-    "Payment Date",
-    "Payment Reference Number/Check Number",
-    "Customer Name",
-    "Customer Address",
-    "Customer Contact Information",
-    "Supplier Name",
-    "Supplier Address",
-    "Supplier Contact Information",
-    "Bank Name",
-    "Bank Account Number",
-    "Bank Routing Number",
-    "SWIFT/BIC Code",
-]
-LINE_ITEM_FIELDS = [
-    "PO number",
-    "Invoice number",
-    "Other document reference number",
-    "Invoice Date",
-    "Invoice Amount in FCY",
-    "Amount Paid for Each Invoice in FCY",
-    "Outstanding Balance in FCY",
-    "Discounts Taken in FCY",
-    "Adjustments(Withholding Tax) in FCY",
-    "Description",
-]
-COLOR_PALETTE = [
-    "#e6194b", "#3cb44b", "#ffe119", "#4363d8", "#f58231",
-    "#911eb4", "#46f0f0", "#f032e6", "#bcf60c", "#fabebe",
-    "#008080", "#e6beff", "#9a6324", "#fffac8", "#800000",
-    "#aaffc3", "#808000", "#ffd8b1", "#000075", "#808080",
-    "#ffe4e1", "#40e0d0", "#ff1493", "#7fffd4", "#b0e0e6",
-    "#ffb6c1", "#add8e6",
-]
-ALL_BASE_FIELDS = SINGLE_FIELDS + LINE_ITEM_FIELDS
-FIELD_COLORS = {field: COLOR_PALETTE[i % len(COLOR_PALETTE)] for i, field in enumerate(ALL_BASE_FIELDS)}
-# ---------------------------------
-# JSONL schema mappings
-# ---------------------------------
-HEADER_GROUPS = {
-    "remittance_advice_details": {
-        "Remittance Advice Number": "remittance_advice_number",
-        "Remittance Advice Date": "remittance_advice_date",
-        "Payment Method": "payment_method",
-        "FCY": "fcy",
-        "Total Payment Amount in FCY": "total_payment_amount_in_fcy",
-        "Payment Date": "payment_date",
-        "Payment Reference Number/Check Number": "payment_reference_number_check_number",
-    },
-    "customer_supplier_details": {
-        "Customer Name": "customer_name",
-        "Customer Address": "customer_address",
-        "Customer Contact Information": "customer_contact_information",
-        "Supplier Name": "supplier_name",
-        "Supplier Address": "supplier_address",
-        "Supplier Contact Information": "supplier_contact_information",
-    },
-    "bank_details": {
-        "Bank Name": "bank_name",
-        "Bank Account Number": "bank_account_number",
-        "Bank Routing Number": "bank_routing_number",
-        "SWIFT/BIC Code": "swift_bic_code",
-    },
-}
-LINE_ITEM_FIELD_KEY_MAP = {
-    "PO number": "po_number",
-    "Invoice number": "invoice_number",
-    "Other document reference number": "other_document_reference_number",
-    "Invoice Date": "invoice_date",
-    "Invoice Amount in FCY": "invoice_amount_in_fcy",
-    "Amount Paid for Each Invoice in FCY": "amount_paid_for_each_invoice_in_fcy",
-    "Outstanding Balance in FCY": "outstanding_balance_in_fcy",
-    "Discounts Taken in FCY": "discounts_taken_in_fcy",
-    "Adjustments(Withholding Tax) in FCY": "adjustments_withholding_tax_in_fcy",
-    "Description": "description",
-}
-# Fixed zoom options
-ZOOM_OPTIONS = [25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 110, 120, 130, 140, 150]
-# ---------------------------------
-# Session state init
-# ---------------------------------
-if "field_values" not in st.session_state:
-    st.session_state.field_values = {}  # {image_name: {field_name: value}}
-if "field_rects_orig" not in st.session_state:
-    st.session_state.field_rects_orig = {}  # {image_name: {field_name: rect_in_orig_coords}}
-if "num_line_items" not in st.session_state:
-    st.session_state.num_line_items = {}  # {image_name: int}
-if "selected_image" not in st.session_state:
-    st.session_state.selected_image = None
-if "zoom_values" not in st.session_state:
-    st.session_state.zoom_values = {}  # {image_name: zoom_int}
-if "rect_version" not in st.session_state:
-    st.session_state.rect_version = {}  # {image_name: int}
-if "image_data" not in st.session_state:
-    st.session_state.image_data = {}  # {image_name: bytes}
-if "pending_delete" not in st.session_state:
-    st.session_state.pending_delete = None
-# Process pending delete early
-if st.session_state.pending_delete is not None:
-    img_name, field_key = st.session_state.pending_delete
-    if img_name in st.session_state.field_rects_orig:
-        st.session_state.field_rects_orig[img_name].pop(field_key, None)
-    if img_name in st.session_state.field_values:
-        st.session_state.field_values[img_name].pop(field_key, None)
-    if img_name in st.session_state.rect_version:
-        st.session_state.rect_version[img_name] += 1
-    st.session_state.pending_delete = None
-    # Force a quick rerun so canvas reflects deletion
-    st.experimental_rerun()
-# ---------------------------------
-# Helper functions
-# ---------------------------------
-@st.cache_data
-def load_image(file_content: bytes):
-    return Image.open(BytesIO(file_content)).convert("RGB")
-@st.cache_data
-def get_display_image(image_bytes: bytes, width: int, height: int):
-    """Cached resize + enhancement to minimize flicker on reruns."""
-    pil_image = Image.open(BytesIO(image_bytes)).convert("RGB")
-    resized = pil_image.resize((width, height), Image.LANCZOS)
-    resized = ImageEnhance.Sharpness(resized).enhance(1.2)
-    resized = ImageEnhance.Contrast(resized).enhance(1.1)
-    return resized
-def get_default_zoom(pil_image: Image.Image) -> int:
-    MAX_WIDTH = 850
-    MAX_HEIGHT = 900
-    default_scale = min(MAX_WIDTH / pil_image.width, MAX_HEIGHT / pil_image.height, 1.0)
-    default_zoom = int(default_scale * 100)
-    closest = min(ZOOM_OPTIONS, key=lambda x: abs(x - default_zoom))
-    return closest
-def build_gt_record_for_file(file_name: str) -> dict:
-    values = st.session_state.field_values.get(file_name, {})
-    num_items = st.session_state.num_line_items.get(file_name, 1)
-    def v(label: str) -> str:
-        return str(values.get(label, "")).strip()
-    gt_parse: dict = {}
-    # Header sections
-    for section_name, mapping in HEADER_GROUPS.items():
-        section_dict = {}
-        for ui_label, json_key in mapping.items():
-            section_dict[json_key] = v(ui_label)
-        gt_parse[section_name] = section_dict
-    # Line items
-    line_items = []
-    for idx in range(1, num_items + 1):
-        row = {}
-        any_non_empty = False
-        for ui_label, json_key in LINE_ITEM_FIELD_KEY_MAP.items():
-            key = f"Line {idx}: {ui_label}"
-            val = str(values.get(key, "")).strip()
-            row[json_key] = val
-            if val:
-                any_non_empty = True
-        if any_non_empty:
-            line_items.append(row)
-    gt_parse["line_items"] = line_items
-    return {
-        "file_name": file_name,
-        "gt_parse": gt_parse,
     }
-def has_any_label(fname: str) -> bool:
-    vals = st.session_state.field_values.get(fname, {})
-    return any(str(v).strip() for v in vals.values())
-# ---------------------------------
-# Upload
-# ---------------------------------
-uploaded_files = st.file_uploader(
-    "Upload remittance images",
-    type=["png", "jpg", "jpeg"],
-    accept_multiple_files=True,
-    label_visibility="collapsed",
-)
-if not uploaded_files:
-    st.info("Upload at least one image to begin.")
-    st.stop()
-images = []
-for f in uploaded_files:
-    f.seek(0)
-    content = f.read()
-    if f.name not in st.session_state.image_data:
-        st.session_state.image_data[f.name] = content
-    img = load_image(st.session_state.image_data[f.name])
-    images.append({"name": f.name, "image": img, "bytes": st.session_state.image_data[f.name]})
-file_names = [img["name"] for img in images]
-selected_name = st.selectbox("Select image", file_names, label_visibility="collapsed")
-st.session_state.selected_image = selected_name
-selected_img_data = next(img for img in images if img["name"] == selected_name)
-pil_image = selected_img_data["image"]
-image_bytes = selected_img_data["bytes"]
-# Init per-image state
-if selected_name not in st.session_state.field_values:
-    st.session_state.field_values[selected_name] = {}
-if selected_name not in st.session_state.field_rects_orig:
-    st.session_state.field_rects_orig[selected_name] = {}
-if selected_name not in st.session_state.num_line_items:
-    st.session_state.num_line_items[selected_name] = 1
-if selected_name not in st.session_state.zoom_values:
-    st.session_state.zoom_values[selected_name] = get_default_zoom(pil_image)
-if selected_name not in st.session_state.rect_version:
-    st.session_state.rect_version[selected_name] = 0
-# ---------------------------------
-# Layout columns
-# ---------------------------------
-col1, col2 = st.columns([3, 2])
-# Defaults for current field
-display_field_name = SINGLE_FIELDS[0]
-storage_field_name = SINGLE_FIELDS[0]
-base_field_for_color = SINGLE_FIELDS[0]
-field_color = FIELD_COLORS[base_field_for_color]
-# ---------------------------------
-# RHS TOP: Field selection + zoom
-# ---------------------------------
-with col2:
-    st.markdown("#### 🎯 Field Selection")
-    def add_line_item():
-        img = st.session_state.selected_image
-        if img:
-            st.session_state.num_line_items[img] += 1
-    def remove_line_item():
-        img = st.session_state.selected_image
-        if img and st.session_state.num_line_items[img] > 1:
-            last_num = st.session_state.num_line_items[img]
-            for lif in LINE_ITEM_FIELDS:
-                key = f"Line {last_num}: {lif}"
-                st.session_state.field_values[img].pop(key, None)
-                st.session_state.field_rects_orig[img].pop(key, None)
-            st.session_state.num_line_items[img] -= 1
-            st.session_state.rect_version[img] += 1
-            st.experimental_rerun()
-    field_type = st.radio("Type", ["Single", "Line Item"], horizontal=True, label_visibility="collapsed")
-    if field_type == "Single":
-        field_name = st.selectbox("Field", SINGLE_FIELDS, label_visibility="collapsed")
-        display_field_name = field_name
-        storage_field_name = field_name
-        base_field_for_color = field_name
     else:
-        num_items = st.session_state.num_line_items[selected_name]
-        line_col1, add_col, rem_col = st.columns([2, 1, 1])
-        with line_col1:
-            line_item_options = [f"Line {i+1}" for i in range(num_items)]
-            selected_line_item = st.selectbox("Line", line_item_options, label_visibility="collapsed")
-            line_item_num = int(selected_line_item.split()[1])
-        with add_col:
-            st.button("➕", key=f"addli_{selected_name}", on_click=add_line_item, help="Add line item")
-        with rem_col:
-            if st.session_state.num_line_items[selected_name] > 1:
-                st.button("➖", key=f"remli_{selected_name}", on_click=remove_line_item, help="Remove line item")
-        base_field = st.selectbox("Field", LINE_ITEM_FIELDS, label_visibility="collapsed")
-        display_field_name = f"{selected_line_item}: {base_field}"
-        storage_field_name = f"Line {line_item_num}: {base_field}"
-        base_field_for_color = base_field
-    if not storage_field_name:
-        storage_field_name = display_field_name
-    field_color = FIELD_COLORS.get(base_field_for_color or display_field_name, "#FF0000")
-    st.markdown(
-        f"**Current:** <span style='color:{field_color}'>●</span> {display_field_name}",
-        unsafe_allow_html=True,
     )
-    st.markdown("#### 🔍 Zoom")
-    current_zoom = st.session_state.zoom_values[selected_name]
-    zoom_index = ZOOM_OPTIONS.index(current_zoom) if current_zoom in ZOOM_OPTIONS else 0
-    def do_zoom_out():
-        img = st.session_state.selected_image
-        curr = st.session_state.zoom_values[img]
-        idx = ZOOM_OPTIONS.index(curr) if curr in ZOOM_OPTIONS else 0
-        if idx > 0:
-            st.session_state.zoom_values[img] = ZOOM_OPTIONS[idx - 1]
-    def do_zoom_in():
-        img = st.session_state.selected_image
-        curr = st.session_state.zoom_values[img]
-        idx = ZOOM_OPTIONS.index(curr) if curr in ZOOM_OPTIONS else 0
-        if idx < len(ZOOM_OPTIONS) - 1:
-            st.session_state.zoom_values[img] = ZOOM_OPTIONS[idx + 1]
-    def do_zoom_fit():
-        img = st.session_state.selected_image
-        img_bytes = st.session_state.image_data.get(img)
-        if img_bytes:
-            pil_img = load_image(img_bytes)
-            st.session_state.zoom_values[img] = get_default_zoom(pil_img)
-    zoom_col1, zoom_col2, zoom_col3, zoom_col4 = st.columns([2, 1, 1, 1])
-    with zoom_col1:
-        zoom = st.selectbox(
-            "Zoom",
-            options=ZOOM_OPTIONS,
-            index=zoom_index,
-            format_func=lambda x: f"{x}%",
-            key=f"zoom_select_{selected_name}",
-            label_visibility="collapsed",
-        )
-        st.session_state.zoom_values[selected_name] = zoom
-    with zoom_col2:
-        st.button("➖", key=f"zoom_out_{selected_name}", help="Zoom out", on_click=do_zoom_out)
-    with zoom_col3:
-        st.button("➕", key=f"zoom_in_{selected_name}", help="Zoom in", on_click=do_zoom_in)
-    with zoom_col4:
-        st.button("Fit", key=f"zoom_fit_{selected_name}", help="Fit to screen", on_click=do_zoom_fit)
-    st.caption(f"Original: {pil_image.width}×{pil_image.height}")
-# ---------------------------------
-# LHS: Canvas / Image
-# ---------------------------------
-with col1:
-    zoom = st.session_state.zoom_values[selected_name]
-    scale = zoom / 100.0
-    disp_w = int(pil_image.width * scale)
-    disp_h = int(pil_image.height * scale)
-    display_image = get_display_image(image_bytes, disp_w, disp_h)
-    def orig_to_display(rect_orig, s):
-        return {
-            "type": "rect",
-            "left": rect_orig["left"] * s,
-            "top": rect_orig["top"] * s,
-            "width": rect_orig["width"] * s,
-            "height": rect_orig["height"] * s,
-            "fill": "rgba(0,0,0,0)",
-            "stroke": rect_orig.get("stroke", "#FF0000"),
-            "strokeWidth": rect_orig.get("strokeWidth", 2),
-            "scaleX": 1,
-            "scaleY": 1,
-        }
-    def display_to_orig(rect_display, s):
-        w = rect_display.get("width", 0) * rect_display.get("scaleX", 1)
-        h = rect_display.get("height", 0) * rect_display.get("scaleY", 1)
-        return {
-            "left": rect_display.get("left", 0) / s,
-            "top": rect_display.get("top", 0) / s,
-            "width": w / s,
-            "height": h / s,
-            "stroke": rect_display.get("stroke", "#FF0000"),
-            "strokeWidth": rect_display.get("strokeWidth", 2),
-        }
-    # Build rectangles from state (one per field)
-    all_display_objects = []
-    rects_for_image = st.session_state.field_rects_orig[selected_name]
-    for fld, rect_orig in rects_for_image.items():
-        disp_rect = orig_to_display(rect_orig, scale)
-        base = fld.split(": ", 1)[1] if ": " in fld else fld
-        disp_rect["stroke"] = FIELD_COLORS.get(base, "#FF0000")
-        disp_rect["strokeWidth"] = 3 if fld == storage_field_name else 2
-        all_display_objects.append(disp_rect)
-    initial_drawing = {"version": "4.4.0", "objects": all_display_objects}
-    expected_count = len(all_display_objects)
-    rect_ver = st.session_state.rect_version[selected_name]
-    num_rects = len(rects_for_image)
-    canvas_key = f"canvas_{selected_name}_z{zoom}_rv{rect_ver}_n{num_rects}"
-    canvas_result = st_canvas(
-        background_image=display_image,
-        height=disp_h,
-        width=disp_w,
-        drawing_mode="rect",
-        stroke_width=3,
-        stroke_color=field_color,
-        fill_color="rgba(255,0,0,0.1)",
-        update_streamlit=True,
-        initial_drawing=initial_drawing,
-        key=canvas_key,
-    )
-    # Detect new rectangle
-    if canvas_result.json_data is not None:
-        objs = canvas_result.json_data.get("objects", []) or []
-        if len(objs) > expected_count:
-            new_rect_display = objs[-1]
-            new_rect_orig = display_to_orig(new_rect_display, scale)
-            new_rect_orig["stroke"] = field_color
-            # Overwrite previous rect for this field (so old one disappears)
-            st.session_state.field_rects_orig[selected_name][storage_field_name] = new_rect_orig
-            st.session_state.rect_version[selected_name] += 1
-            # Auto OCR
-            x1 = max(0, int(new_rect_orig["left"]))
-            y1 = max(0, int(new_rect_orig["top"]))
-            x2 = min(pil_image.width, int(new_rect_orig["left"] + new_rect_orig["width"]))
-            y2 = min(pil_image.height, int(new_rect_orig["top"] + new_rect_orig["height"]))
-            if x2 > x1 and y2 > y1:
-                crop = pil_image.crop((x1, y1, x2, y2))
-                try:
-                    text = pytesseract.image_to_string(crop, config="--psm 6").strip()
-                    if text:
-                        st.session_state.field_values[selected_name][storage_field_name] = text
-                        value_state_key = f"value_{selected_name}_{storage_field_name}"
-                        st.session_state[value_state_key] = text
-                        st.toast(f"✅ OCR: {text[:50]}{'...' if len(text) > 50 else ''}")
-                    else:
-                        st.toast("✅ Rectangle saved (no text detected)")
-                except Exception:
-                    st.toast("✅ Rectangle saved")
             else:
-                st.toast("✅ Rectangle saved")
-            # Rerun once so canvas remounts with cleaned rectangles (no old ones)
-            st.experimental_rerun()
-# ---------------------------------
-# RHS BOTTOM: OCR value, all values, export
-# ---------------------------------
-with col2:
-    st.markdown("#### ✏️ OCR & Value")
-    current_rect_orig = st.session_state.field_rects_orig[selected_name].get(storage_field_name)
-    value_state_key = f"value_{selected_name}_{storage_field_name}"
-    if value_state_key not in st.session_state:
-        st.session_state[value_state_key] = st.session_state.field_values[selected_name].get(
-            storage_field_name, ""
-        )
-    col_btn1, col_btn2, col_btn3 = st.columns(3)
-    with col_btn1:
-        if st.button("💾 Save"):
-            st.session_state.field_values[selected_name][storage_field_name] = st.session_state[value_state_key]
-            st.success("Saved!")
-    with col_btn2:
-        if current_rect_orig and st.button("🔄 Re-OCR"):
-            x1 = max(0, int(current_rect_orig["left"]))
-            y1 = max(0, int(current_rect_orig["top"]))
-            x2 = min(pil_image.width, int(current_rect_orig["left"] + current_rect_orig["width"]))
-            y2 = min(pil_image.height, int(current_rect_orig["top"] + current_rect_orig["height"]))
-            if x2 > x1 and y2 > y1:
-                crop = pil_image.crop((x1, y1, x2, y2))
-                try:
-                    text = pytesseract.image_to_string(crop, config="--psm 6").strip()
-                    if text:
-                        st.session_state.field_values[selected_name][storage_field_name] = text
-                        st.session_state[value_state_key] = text
-                        st.success(f"OCR: {text}")
-                    else:
-                        st.warning("Empty result")
-                except Exception as e:
-                    st.error(f"OCR failed: {e}")
-    with col_btn3:
-        def delete_rect():
-            st.session_state.pending_delete = (selected_name, storage_field_name)
-        if current_rect_orig:
-            st.button("🗑️ Delete", on_click=delete_rect)
-    st.text_area(
-        "Value (auto-filled by OCR)",
-        key=value_state_key,
-        height=80,
-        label_visibility="collapsed",
-        placeholder="Value (auto-filled by OCR)",
-    )
-    # All values
-    with st.expander("📋 All Values"):
-        for f in SINGLE_FIELDS:
-            v = st.session_state.field_values[selected_name].get(f, "")
-            if v.strip():
-                st.write(f"**{f}:** {v}")
-        num_items = st.session_state.num_line_items[selected_name]
-        for i in range(1, num_items + 1):
-            vals = [
-                (lif, st.session_state.field_values[selected_name].get(f"Line {i}: {lif}", ""))
-                for lif in LINE_ITEM_FIELDS
-            ]
-            vals = [(lif, v) for lif, v in vals if v.strip()]
-            if vals:
-                st.write(f"**Line {i}:**")
-                for lif, v in vals:
-                    st.write(f"  {lif}: {v}")
-    # Export
-    st.markdown("#### 📤 JSONL Export")
-    records_all = [
-        build_gt_record_for_file(img["name"])
-        for img in images
-        if has_any_label(img["name"])
-    ]
-    if records_all:
-        all_jsonl_str = "\n".join(json.dumps(rec, ensure_ascii=False) for rec in records_all)
         st.download_button(
-            "⬇️ Export ALL labeled (JSONL)",
-            data=all_jsonl_str.encode("utf-8"),
-            file_name="remittances_ground_truth.jsonl",
-            mime="application/json",
         )
-    else:
-        st.caption("No labeled remittances yet.")
-    current_record = build_gt_record_for_file(selected_name)
-    with st.expander("Preview CURRENT JSON"):
-        st.json(current_record)
-    current_jsonl_str = json.dumps(current_record, ensure_ascii=False) + "\n"
-    st.download_button(
-        "⬇️ Export CURRENT (JSONL)",
-        data=current_jsonl_str.encode("utf-8"),
-        file_name=f"{os.path.splitext(selected_name)[0]}_remittance.jsonl",
-        mime="application/json",
     )

 import os
+from pathlib import Path
+# -----------------------------
+# Environment hardening (HF Spaces, /.cache issue)
+# -----------------------------
+_home = os.environ.get("HOME", "")
+if _home in ("", "/", None):
+    repo_dir = os.getcwd()
+    safe_home = repo_dir if os.access(repo_dir, os.W_OK) else "/tmp"
+    os.environ["HOME"] = safe_home
+    print(f"[startup] HOME not set or unwritable — setting HOME={safe_home}")
+streamlit_dir = Path(os.environ["HOME"]) / ".streamlit"
+try:
+    streamlit_dir.mkdir(parents=True, exist_ok=True)
+    print(f"[startup] ensured {streamlit_dir}")
+except Exception as e:
+    print(f"[startup] WARNING: could not create {streamlit_dir}: {e}")
 import streamlit as st
+import json
+import io
+from PIL import Image
+import time
+import pandas as pd
 from streamlit_drawable_canvas import st_canvas
 import pytesseract
+import numpy as np
+# Set Tesseract path - auto-detect based on OS
+if os.name == 'nt':  # Windows
+    pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
+else:  # Linux/Mac (HF Spaces uses Linux)
+    # On HF Spaces with packages.txt, tesseract is in system PATH
+    # No need to set path explicitly
+    pass
+# Page configuration
+st.set_page_config(page_title="Remittance Data Viewer", layout="wide")
+# Custom CSS to reduce gaps between form fields and style buttons
+st.markdown("""
+<style>
+    /* Reduce spacing between form fields */
+    .stTextInput > div > div > input,
+    .stTextArea > div > div > textarea,
+    .stSelectbox > div > div > div {
+        margin-bottom: 0px !important;
     }
+    div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stTextInput"]),
+    div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stTextArea"]),
+    div[data-testid="stVerticalBlock"] > div:has(div[data-testid="stSelectbox"]) {
+        margin-bottom: 4px !important;
+    }
+    /* Reduce gap between selectbox and following elements */
+    .stSelectbox {
+        margin-bottom: 4px !important;
+    }
+    /* Style for small buttons */
+    .stButton > button {
+        padding: 0.25rem 0.5rem !important;
+        font-size: 1.2rem !important;
+        line-height: 1 !important;
+        min-height: 2rem !important;
+        height: 2rem !important;
+    }
+    /* Reduce padding in form containers */
+    [data-testid="stVerticalBlock"] > [data-testid="stVerticalBlock"] {
+        gap: 0.25rem !important;
+    }
+    /* REDUCE GAP BETWEEN COLUMNS */
+    [data-testid="column"] {
+        padding-left: 0.5rem !important;
+        padding-right: 0.5rem !important;
+    }
+    [data-testid="stHorizontalBlock"] {
+        gap: 0.5rem !important;
+    }
+</style>
+""", unsafe_allow_html=True)
+def load_jsonl(file):
+    """Load JSONL file and return list of records"""
+    data = []
+    content = file.getvalue().decode('utf-8')
+    for line in content.strip().split('\n'):
+        if line.strip():
+            data.append(json.loads(line))
+    return data
+def save_to_jsonl(data):
+    """Convert data list to JSONL format"""
+    jsonl_content = '\n'.join([json.dumps(record) for record in data])
+    return jsonl_content
+def perform_ocr(image, bbox):
+    """Perform OCR on the selected region of the image"""
+    try:
+        # bbox is [x1, y1, x2, y2]
+        x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
+        # Ensure coordinates are within image bounds
+        x1, y1 = max(0, x1), max(0, y1)
+        x2, y2 = min(image.width, x2), min(image.height, y2)
+        # Crop the image
+        cropped = image.crop((x1, y1, x2, y2))
+        # Perform OCR
+        text = pytesseract.image_to_string(cropped, config='--psm 6').strip()
+        return text
+    except Exception as e:
+        return f"OCR Error: {str(e)}"
+def scale_image_to_fixed_size(image, target_width=700, target_height=900):
+    """Scale and pad image to exact fixed size while maintaining aspect ratio and quality"""
+    # Convert image to RGB if it's not already (handles RGBA, L, etc.)
+    if image.mode not in ('RGB', 'RGBA'):
+        image = image.convert('RGB')
+    elif image.mode == 'RGBA':
+        # Create white background for transparent images
+        background = Image.new('RGB', image.size, (255, 255, 255))
+        background.paste(image, mask=image.split()[3])  # Use alpha channel as mask
+        image = background
+    # Calculate scaling ratio to fit within target dimensions
+    width_ratio = target_width / image.width
+    height_ratio = target_height / image.height
+    # Use the smaller ratio to ensure image fits within both constraints
+    ratio = min(width_ratio, height_ratio)
+    # Calculate new dimensions
+    new_width = int(image.width * ratio)
+    new_height = int(image.height * ratio)
+    # Resize image with high-quality LANCZOS resampling
+    # Only resize if needed (don't upscale small images too much)
+    if ratio < 1.0 or (ratio > 1.0 and ratio < 1.5):
+        resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
     else:
+        # For significant upscaling, use BICUBIC which can be sharper
+        resized_image = image.resize((new_width, new_height), Image.Resampling.BICUBIC)
+    # Create a new image with target size and white background
+    final_image = Image.new('RGB', (target_width, target_height), (255, 255, 255))
+    # Calculate position to paste resized image (center it)
+    paste_x = (target_width - new_width) // 2
+    paste_y = (target_height - new_height) // 2
+    # Paste resized image onto white background
+    final_image.paste(resized_image, (paste_x, paste_y))
+    return final_image, ratio, paste_x, paste_y
+# Initialize session state
+if 'data' not in st.session_state:
+    st.session_state.data = None
+if 'current_index' not in st.session_state:
+    st.session_state.current_index = 0
+if 'edited_data' not in st.session_state:
+    st.session_state.edited_data = None
+if 'page' not in st.session_state:
+    st.session_state.page = 'upload'
+if 'images' not in st.session_state:
+    st.session_state.images = {}
+if 'modified_indices' not in st.session_state:
+    st.session_state.modified_indices = set()
+if 'ocr_active_section' not in st.session_state:
+    st.session_state.ocr_active_section = None
+if 'ocr_active_field' not in st.session_state:
+    st.session_state.ocr_active_field = None
+if 'ocr_line_item_row' not in st.session_state:
+    st.session_state.ocr_line_item_row = None
+if 'canvas_key' not in st.session_state:
+    st.session_state.canvas_key = 0
+if 'line_items_temp' not in st.session_state:
+    st.session_state.line_items_temp = []
+if 'button_clicked' not in st.session_state:
+    st.session_state.button_clicked = False
+if 'save_message' not in st.session_state:
+    st.session_state.save_message = None
+if 'save_message_time' not in st.session_state:
+    st.session_state.save_message_time = None
+if 'just_saved' not in st.session_state:
+    st.session_state.just_saved = False
+# Auto-save function
+def auto_save(index):
+    """Automatically save changes to session state and mark as modified"""
+    if st.session_state.edited_data:
+        st.session_state.data = st.session_state.edited_data.copy()
+        st.session_state.modified_indices.add(index)
+# Save button callback
+def save_changes_callback():
+    """Callback function for save button"""
+    auto_save(st.session_state.current_index)
+    st.session_state.save_message = "✅ Changes saved successfully!"
+    st.session_state.save_message_time = time.time()
+# PAGE 1: Upload Page
+if st.session_state.page == 'upload':
+    st.title("📤 Remittance Data Viewer with OCR")
+    st.markdown("### Upload your files to begin")
+    # Step 1: Upload JSONL
+    st.markdown("**Step 1: Upload JSONL File**")
+    uploaded_file = st.file_uploader("Choose a JSONL file", type=['jsonl', 'json'])
+    if uploaded_file is not None:
+        try:
+            data = load_jsonl(uploaded_file)
+            st.session_state.data = data
+            st.session_state.edited_data = data.copy()
+            st.success(f"✅ Successfully loaded {len(data)} records!")
+        except Exception as e:
+            st.error(f"Error loading file: {str(e)}")
+    # Step 2: Upload Images
+    st.markdown("**Step 2: Upload Images Folder**")
+    uploaded_images = st.file_uploader(
+        "Choose image files",
+        type=['png', 'jpg', 'jpeg', 'tiff', 'tif', 'bmp'],
+        accept_multiple_files=True,
+        help="Select all images from your folder at once"
     )
+    if uploaded_images:
+        # Load images into session state
+        images_dict = {}
+        for img_file in uploaded_images:
+            try:
+                image = Image.open(img_file)
+                images_dict[img_file.name] = image
+            except Exception as e:
+                st.warning(f"Could not load image {img_file.name}: {str(e)}")
+        st.session_state.images = images_dict
+        # Show summary of loaded images and matches with ground truth
+        if st.session_state.data is not None:
+            # gather ground truth file names
+            gt_file_names = [rec.get('file_name', '') for rec in st.session_state.data]
+            matched_images = set()
+            unmatched_gt_files = []
+            # Find matched images - CASE SENSITIVE EXACT MATCH ONLY
+            for fname in gt_file_names:
+                if not fname:
+                    continue
+                # Check for exact match in uploaded images
+                if fname in images_dict:
+                    matched_images.add(fname)
+            # Find unmatched ground truth file names
+            for fname in gt_file_names:
+                if fname and fname not in matched_images:
+                    unmatched_gt_files.append(fname)
+            st.success(f"✅ Successfully loaded {len(images_dict)} images!")
+            st.info(f"🔎 Exact matches: {len(matched_images)}/{len([f for f in gt_file_names if f])}")
+            # Show unmatched files
+            if unmatched_gt_files:
+                st.warning(f"⚠️ {len(unmatched_gt_files)} file(s) from JSONL not matched to images:")
+                with st.expander(f"Show {len(unmatched_gt_files)} unmatched file names"):
+                    for fname in unmatched_gt_files:
+                        st.text(f"  • {fname}")
             else:
+                st.success("✅ All JSONL file names matched to images!")
+        else:
+            st.success(f"✅ Successfully loaded {len(images_dict)} images!")
+            st.info("ℹ️ Upload a JSONL file to see how many images match the ground truth 'file_name' field.")
+    # Continue Button
+    if st.session_state.data is not None:
+        col1, col2, col3 = st.columns([1, 1, 1])
+        with col2:
+            if st.button("Continue to Viewer →", type="primary", use_container_width=True):
+                st.session_state.page = 'viewer'
+                st.session_state.modified_indices = set()
+                st.rerun()
+# PAGE 2: Viewer Page
+elif st.session_state.page == 'viewer':
+    # Clear old save messages (after 3 seconds)
+    if st.session_state.save_message_time is not None:
+        if time.time() - st.session_state.save_message_time > 3:
+            st.session_state.save_message = None
+            st.session_state.save_message_time = None
+    # Header with back button and download options
+    col1, col2, col3, col4 = st.columns([1, 2, 2, 2])
+    with col1:
+        if st.button("← Back to Upload"):
+            st.session_state.page = 'upload'
+            st.session_state.ocr_active_section = None
+            st.session_state.ocr_active_field = None
+            st.session_state.save_message = None
+            st.session_state.save_message_time = None
+            st.rerun()
+    # Download modified records and unmodified records separately
+    with col2:
+        if st.session_state.modified_indices:
+            modified_data = [st.session_state.edited_data[i] for i in sorted(st.session_state.modified_indices)]
+            jsonl_modified = save_to_jsonl(modified_data)
+            st.download_button(
+                label=f"⬇️ Download Modified ({len(modified_data)})",
+                data=jsonl_modified,
+                file_name="modified_remittance_data.jsonl",
+                mime="application/jsonl",
+                type="primary",
+                use_container_width=True
+            )
+        else:
+            st.button(
+                "⬇️ No Modified Records",
+                disabled=True,
+                use_container_width=True
+            )
+    # Download unmodified records (original data excluding modified)
+    with col3:
+        if st.session_state.modified_indices:
+            # Get original unmodified data
+            unmodified_data = [st.session_state.data[i] for i in range(len(st.session_state.data))
+                              if i not in st.session_state.modified_indices]
+            jsonl_unmodified = save_to_jsonl(unmodified_data)
+            st.download_button(
+                label=f"⬇️ Download Unmodified ({len(unmodified_data)})",
+                data=jsonl_unmodified,
+                file_name="unmodified_remittance_data.jsonl",
+                mime="application/jsonl",
+                use_container_width=True
+            )
+        else:
+            st.button(
+                "⬇️ No Unmodified Records",
+                disabled=True,
+                use_container_width=True
+            )
+    # Download all edited data
+    with col4:
+        jsonl_all = save_to_jsonl(st.session_state.edited_data)
         st.download_button(
+            label=f"⬇️ Download All ({len(st.session_state.edited_data)})",
+            data=jsonl_all,
+            file_name="all_remittance_data.jsonl",
+            mime="application/jsonl",
+            use_container_width=True
         )
+    # File selector dropdown
+    file_names = [record.get('file_name', f'Record {i}') for i, record in enumerate(st.session_state.data)]
+    selected_file = st.selectbox(
+        "Select a file to view:",
+        options=range(len(file_names)),
+        format_func=lambda x: f"{'✏️ ' if x in st.session_state.modified_indices else ''}{file_names[x]}",
+        index=st.session_state.current_index
     )
+    st.session_state.current_index = selected_file
+    current_record = st.session_state.edited_data[selected_file]
+    # Main layout: LHS (Image) and RHS (Details) - REDUCED GAP
+    left_col, right_col = st.columns([1.3, 1], gap="small")
+    # LEFT SIDE: Image Display with OCR Canvas
+    with left_col:
+        st.markdown("### 🖼️ Document Image")
+        file_name = current_record.get('file_name', '')
+        if file_name:
+            st.caption(f"**File:** {file_name}")
+            # Try to find matching image - CASE SENSITIVE EXACT MATCH ONLY
+            current_image = None
+            if file_name in st.session_state.images:
+                current_image = st.session_state.images[file_name]
+            else:
+                st.error(f"❌ Image '{file_name}' not found in uploaded images")
+                st.info("💡 Available images:")
+                with st.expander("Show available images"):
+                    for img_name in list(st.session_state.images.keys())[:20]:
+                        st.text(f"  • {img_name}")
+                    if len(st.session_state.images) > 20:
+                        st.text(f"  ... and {len(st.session_state.images) - 20} more")
+            if current_image:
+                # Scale image to fixed size
+                scaled_image, scale_ratio, paste_x, paste_y = scale_image_to_fixed_size(current_image)
+                # Always show canvas for drawing rectangles
+                canvas_result = st_canvas(
+                    fill_color="rgba(255, 165, 0, 0.3)",
+                    stroke_width=2,
+                    stroke_color="#FF0000",
+                    background_image=scaled_image,
+                    update_streamlit=True,
+                    height=scaled_image.height,
+                    width=scaled_image.width,
+                    drawing_mode="rect",
+                    key=f"canvas_{selected_file}_{st.session_state.canvas_key}",
+                )
+                # Process OCR when rectangle is drawn and field is selected
+                if canvas_result.json_data is not None and st.session_state.ocr_active_field:
+                    objects = canvas_result.json_data["objects"]
+                    if len(objects) > 0:
+                        # Get the last drawn rectangle
+                        rect = objects[-1]
+                        # Adjust coordinates for padding and scale back to original image coordinates
+                        bbox = [
+                            (rect["left"] - paste_x) / scale_ratio,
+                            (rect["top"] - paste_y) / scale_ratio,
+                            (rect["left"] + rect["width"] - paste_x) / scale_ratio,
+                            (rect["top"] + rect["height"] - paste_y) / scale_ratio
+                        ]
+                        # Perform OCR on original image
+                        with st.spinner("Performing OCR..."):
+                            ocr_text = perform_ocr(current_image, bbox)
+                        if ocr_text and not ocr_text.startswith("OCR Error"):
+                            st.success(f"✅ OCR Result: {ocr_text}")
+                            # Update the field value
+                            gt_parse = st.session_state.edited_data[selected_file].get('gt_parse', {})
+                            if st.session_state.ocr_active_section == 'Line_items':
+                                # Handle line items
+                                line_items = gt_parse.get('Line_items', [])
+                                row_idx = st.session_state.ocr_line_item_row
+                                if row_idx is not None and row_idx < len(line_items):
+                                    line_items[row_idx][st.session_state.ocr_active_field] = ocr_text
+                                    gt_parse['Line_items'] = line_items
+                            else:
+                                # Handle other sections
+                                section = st.session_state.ocr_active_section
+                                field = st.session_state.ocr_active_field
+                                if section not in gt_parse:
+                                    gt_parse[section] = {}
+                                gt_parse[section][field] = ocr_text
+                            st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
+                            # Clear canvas and reset
+                            st.session_state.canvas_key += 1
+                            time.sleep(0.3)
+                            st.rerun()
+                        else:
+                            st.error(ocr_text)
+        else:
+            st.warning("No file name specified in record")
+    # RIGHT SIDE: Editable Details
+    with right_col:
+        st.markdown("### 📝 Document Details")
+        gt_parse = current_record.get('gt_parse', {})
+        # Create tabs for each section
+        tab1, tab2, tab3, tab4 = st.tabs([
+            "📄 Remittance Details",
+            "👥 Party Details",
+            "🏦 Bank Details",
+            "📋 Line Items"
+        ])
+        # TAB 1: Remittance Details
+        with tab1:
+            # OCR Field Selector
+            remittance_fields = [
+                'Select fields',
+                'Remittance_adv_no',
+                'Remittance_adv_date',
+                'Payment_method',
+                'FCY',
+                'Total_payment_amt_FCY',
+                'Payment_date',
+                'Payment_ref_no'
+            ]
+            selected_rem_field = st.selectbox(
+                "🔍 Select field to populate via OCR:",
+                options=remittance_fields,
+                key=f"rem_ocr_select_{selected_file}"
+            )
+            if selected_rem_field != 'Select fields':
+                st.session_state.ocr_active_section = 'Remittance_details'
+                st.session_state.ocr_active_field = selected_rem_field
+                st.session_state.ocr_line_item_row = None
+            else:
+                if st.session_state.ocr_active_section == 'Remittance_details':
+                    st.session_state.ocr_active_section = None
+                    st.session_state.ocr_active_field = None
+            remittance = gt_parse.get('Remittance_details', {})
+            remittance['Remittance_adv_no'] = st.text_input(
+                "Remittance Advice No",
+                value=remittance.get('Remittance_adv_no', ''),
+                key=f"rem_adv_no_{selected_file}"
+            )
+            remittance['Remittance_adv_date'] = st.text_input(
+                "Remittance Advice Date",
+                value=remittance.get('Remittance_adv_date', ''),
+                key=f"rem_adv_date_{selected_file}"
+            )
+            remittance['Payment_method'] = st.text_input(
+                "Payment Method",
+                value=remittance.get('Payment_method', ''),
+                key=f"payment_method_{selected_file}"
+            )
+            remittance['FCY'] = st.text_input(
+                "FCY (Foreign Currency)",
+                value=remittance.get('FCY', ''),
+                key=f"fcy_{selected_file}"
+            )
+            remittance['Total_payment_amt_FCY'] = st.text_input(
+                "Total Payment Amount (FCY)",
+                value=remittance.get('Total_payment_amt_FCY', ''),
+                key=f"total_payment_{selected_file}"
+            )
+            remittance['Payment_date'] = st.text_input(
+                "Payment Date",
+                value=remittance.get('Payment_date', ''),
+                key=f"payment_date_{selected_file}"
+            )
+            remittance['Payment_ref_no'] = st.text_input(
+                "Payment Reference No",
+                value=remittance.get('Payment_ref_no', ''),
+                key=f"payment_ref_{selected_file}"
+            )
+            gt_parse['Remittance_details'] = remittance
+        # TAB 2: Customer/Supplier Details
+        with tab2:
+            # OCR Field Selector
+            customer_fields = [
+                'Select fields',
+                'Customer_name',
+                'Customer_address',
+                'Customer_contact_info',
+                'Supplier_name',
+                'Supplier_address',
+                'Supplier_contact_info'
+            ]
+            selected_cust_field = st.selectbox(
+                "🔍 Select field to populate via OCR:",
+                options=customer_fields,
+                key=f"cust_ocr_select_{selected_file}"
+            )
+            if selected_cust_field != 'Select fields':
+                st.session_state.ocr_active_section = 'Customer_supplier_details'
+                st.session_state.ocr_active_field = selected_cust_field
+                st.session_state.ocr_line_item_row = None
+            else:
+                if st.session_state.ocr_active_section == 'Customer_supplier_details':
+                    st.session_state.ocr_active_section = None
+                    st.session_state.ocr_active_field = None
+            st.markdown("**Customer Details**")
+            customer_supplier = gt_parse.get('Customer_supplier_details', {})
+            customer_supplier['Customer_name'] = st.text_input(
+                "Customer Name",
+                value=customer_supplier.get('Customer_name', ''),
+                key=f"cust_name_{selected_file}"
+            )
+            customer_supplier['Customer_address'] = st.text_area(
+                "Customer Address",
+                value=customer_supplier.get('Customer_address', ''),
+                key=f"cust_addr_{selected_file}",
+                height=60
+            )
+            customer_supplier['Customer_contact_info'] = st.text_input(
+                "Customer Contact Info",
+                value=customer_supplier.get('Customer_contact_info', ''),
+                key=f"cust_contact_{selected_file}"
+            )
+            st.markdown("**Supplier Details**")
+            customer_supplier['Supplier_name'] = st.text_input(
+                "Supplier Name",
+                value=customer_supplier.get('Supplier_name', ''),
+                key=f"supp_name_{selected_file}"
+            )
+            customer_supplier['Supplier_address'] = st.text_area(
+                "Supplier Address",
+                value=customer_supplier.get('Supplier_address', ''),
+                key=f"supp_addr_{selected_file}",
+                height=60
+            )
+            customer_supplier['Supplier_contact_info'] = st.text_input(
+                "Supplier Contact Info",
+                value=customer_supplier.get('Supplier_contact_info', ''),
+                key=f"supp_contact_{selected_file}"
+            )
+            gt_parse['Customer_supplier_details'] = customer_supplier
+        # TAB 3: Bank Details
+        with tab3:
+            # OCR Field Selector
+            bank_fields = [
+                'Select fields',
+                'Bank_name',
+                'Bank_acc_no',
+                'Bank_routing_no',
+                'Swift_code'
+            ]
+            selected_bank_field = st.selectbox(
+                "🔍 Select field to populate via OCR:",
+                options=bank_fields,
+                key=f"bank_ocr_select_{selected_file}"
+            )
+            if selected_bank_field != 'Select fields':
+                st.session_state.ocr_active_section = 'Bank_details'
+                st.session_state.ocr_active_field = selected_bank_field
+                st.session_state.ocr_line_item_row = None
+            else:
+                if st.session_state.ocr_active_section == 'Bank_details':
+                    st.session_state.ocr_active_section = None
+                    st.session_state.ocr_active_field = None
+            bank = gt_parse.get('Bank_details', {})
+            bank['Bank_name'] = st.text_input(
+                "Bank Name",
+                value=bank.get('Bank_name', ''),
+                key=f"bank_name_{selected_file}"
+            )
+            bank['Bank_acc_no'] = st.text_input(
+                "Bank Account No",
+                value=bank.get('Bank_acc_no', ''),
+                key=f"bank_acc_{selected_file}"
+            )
+            bank['Bank_routing_no'] = st.text_input(
+                "Bank Routing No",
+                value=bank.get('Bank_routing_no', ''),
+                key=f"bank_routing_{selected_file}"
+            )
+            bank['Swift_code'] = st.text_input(
+                "SWIFT Code",
+                value=bank.get('Swift_code', ''),
+                key=f"swift_{selected_file}"
+            )
+            gt_parse['Bank_details'] = bank
+        # TAB 4: Line Items
+        with tab4:
+            # OCR Controls for Line Items - Fixed layout
+            line_items = gt_parse.get('Line_items', [])
+            # Adjusted column widths - all controls in single compact line
+            col_field, col_row, col_add, col_remove = st.columns([1.5, 0.7, 0.30, 0.30])
+            line_item_fields = [
+                'Select fields',
+                'Po_number',
+                'Invoice_no',
+                'Other_doc_ref_no',
+                'Invoice_date',
+                'Invoice_amount_FCY',
+                'Amount_paid_for_each_invoice',
+                'Outstanding_balance_FCY',
+                'Discounts_taken_FCY',
+                'Adjustments(without_holding_tax)_FCY',
+                'Descriptions'
+            ]
+            with col_field:
+                selected_line_field = st.selectbox(
+                    "🔍 Field:",
+                    options=line_item_fields,
+                    key=f"line_ocr_field_{selected_file}"
+                )
+            with col_row:
+                if len(line_items) > 0:
+                    selected_row = st.selectbox(
+                        "Row:",
+                        options=list(range(len(line_items))),
+                        format_func=lambda x: f"Row {x + 1}",
+                        key=f"line_ocr_row_{selected_file}"
+                    )
+                else:
+                    st.selectbox("Row:", options=[], disabled=True, key=f"line_ocr_row_empty_{selected_file}")
+                    selected_row = None
+            with col_add:
+                # Use button with on_click callback to prevent loop
+                if st.button("➕", key=f"add_row_{selected_file}", help="Add new row"):
+                    if not st.session_state.button_clicked:
+                        st.session_state.button_clicked = True
+                        new_row = {
+                            "Po_number": "",
+                            "Invoice_no": "",
+                            "Other_doc_ref_no": "",
+                            "Invoice_date": "",
+                            "Invoice_amount_FCY": "",
+                            "Amount_paid_for_each_invoice": "",
+                            "Outstanding_balance_FCY": "",
+                            "Discounts_taken_FCY": "",
+                            "Adjustments(without_holding_tax)_FCY": "",
+                            "Descriptions": ""
+                        }
+                        line_items.append(new_row)
+                        gt_parse['Line_items'] = line_items
+                        st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
+                        st.session_state.modified_indices.add(selected_file)
+                        st.rerun()
+            with col_remove:
+                if st.button("➖", key=f"remove_row_{selected_file}", help="Remove selected row", disabled=(len(line_items) == 0)):
+                    if not st.session_state.button_clicked and len(line_items) > 0 and selected_row is not None:
+                        st.session_state.button_clicked = True
+                        line_items.pop(selected_row)
+                        gt_parse['Line_items'] = line_items
+                        st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
+                        st.session_state.modified_indices.add(selected_file)
+                        st.rerun()
+            # Reset button clicked flag after processing
+            if st.session_state.button_clicked:
+                st.session_state.button_clicked = False
+            # Set OCR state for line items
+            if selected_line_field != 'Select fields' and selected_row is not None:
+                st.session_state.ocr_active_section = 'Line_items'
+                st.session_state.ocr_active_field = selected_line_field
+                st.session_state.ocr_line_item_row = selected_row
+            else:
+                if st.session_state.ocr_active_section == 'Line_items':
+                    st.session_state.ocr_active_section = None
+                    st.session_state.ocr_active_field = None
+                    st.session_state.ocr_line_item_row = None
+            # Display line items table
+            if line_items:
+                df = pd.DataFrame(line_items)
+                # Convert amount fields to numeric
+                amount_fields = ['Invoice_amount_FCY', 'Amount_paid_for_each_invoice',
+                               'Outstanding_balance_FCY', 'Discounts_taken_FCY',
+                               'Adjustments(without_holding_tax)_FCY']
+                for field in amount_fields:
+                    if field in df.columns:
+                        df[field] = pd.to_numeric(df[field].replace('', None), errors='coerce')
+                column_config = {
+                    "Po_number": st.column_config.TextColumn("PO Number", width="small"),
+                    "Invoice_no": st.column_config.TextColumn("Invoice No", width="small"),
+                    "Other_doc_ref_no": st.column_config.TextColumn("Other Doc Ref No", width="small"),
+                    "Invoice_date": st.column_config.TextColumn("Invoice Date", width="small"),
+                    "Invoice_amount_FCY": st.column_config.NumberColumn("Invoice Amt FCY", width="small", format="%.2f"),
+                    "Amount_paid_for_each_invoice": st.column_config.NumberColumn("Amount Paid", width="small", format="%.2f"),
+                    "Outstanding_balance_FCY": st.column_config.NumberColumn("Outstanding FCY", width="small", format="%.2f"),
+                    "Discounts_taken_FCY": st.column_config.NumberColumn("Discounts FCY", width="small", format="%.2f"),
+                    "Adjustments(without_holding_tax)_FCY": st.column_config.NumberColumn("Adjustments FCY", width="small", format="%.2f"),
+                    "Descriptions": st.column_config.TextColumn("Descriptions", width="medium"),
+                }
+                edited_df = st.data_editor(
+                    df,
+                    column_config=column_config,
+                    num_rows="fixed",
+                    use_container_width=True,
+                    key=f"line_items_table_{selected_file}",
+                    hide_index=False
+                )
+                # Convert back to string
+                for field in amount_fields:
+                    if field in edited_df.columns:
+                        edited_df[field] = edited_df[field].apply(lambda x: str(x) if pd.notna(x) else '')
+                gt_parse['Line_items'] = edited_df.to_dict('records')
+            else:
+                st.info("No line items. Click ➕ to add a new row.")
+        # Update the edited data
+        st.session_state.edited_data[selected_file]['gt_parse'] = gt_parse
+        # Save button
+        st.markdown("---")
+        col1, col2 = st.columns([1, 1])
+        with col1:
+            if st.button("💾 Save Changes", type="primary", use_container_width=True, key=f"save_btn_{selected_file}"):
+                if not st.session_state.just_saved:
+                    st.session_state.just_saved = True
+                    auto_save(selected_file)
+                    st.session_state.save_message = "✅ Changes saved successfully!"
+                    st.session_state.save_message_time = time.time()
+                    st.rerun()
+        # Reset the just_saved flag after rerun
+        if st.session_state.just_saved:
+            st.session_state.just_saved = False
+        # Display save message under the button (appears after rerun)
+        if st.session_state.save_message:
+            st.success(st.session_state.save_message)