Spaces:

Bhuvi13
/

donut_UI

Sleeping

App Files Files Community

Bhuvi13 commited on Sep 10, 2025

Commit

098f047

verified ·

1 Parent(s): 55e21c4

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +780 -38

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,782 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

+import os
+# --- Fix: ensure HOME is writable before Streamlit initializes ---
+from pathlib import Path
+_home = os.environ.get("HOME", "")
+if _home in ("", "/", None):
+    # Prefer the repo working directory if writable, otherwise use /tmp
+    repo_dir = os.getcwd()
+    safe_home = repo_dir if os.access(repo_dir, os.W_OK) else "/tmp"
+    os.environ["HOME"] = safe_home
+    print(f"[startup] HOME not set or unwritable — setting HOME={safe_home}")
+# Ensure the .streamlit folder exists under HOME so Streamlit won't try to write to '/'
+streamlit_dir = Path(os.environ["HOME"]) / ".streamlit"
+try:
+    streamlit_dir.mkdir(parents=True, exist_ok=True)
+    print(f"[startup] ensured {streamlit_dir}")
+except Exception as e:
+    print(f"[startup] WARNING: could not create {streamlit_dir}: {e}")
+import json
+from io import BytesIO
+from datetime import datetime
+from pathlib import Path
+import hashlib
 import streamlit as st
+import pandas as pd
+from PIL import Image
+from huggingface_hub import login
+# ---------------------------
+# UI: main
+# ---------------------------
+st.set_page_config(page_title="Invoice Extractor (Donut)", layout="wide")
+st.title("Invoice Extraction")
+# Reduce top margin and tighten layout
+st.markdown(
+    """
+    <style>
+        /* Reduce top padding of main block */
+        .stApp {
+            background-color: #E8E8E8 !important;
+        }
+        div.block-container {
+            padding-top: 2rem;
+            padding-bottom: 1rem;
+        }
+        /* Tighten title spacing */
+        h1 {
+            margin-top: 0.4rem !important;
+            margin-bottom: 0.4rem !important;
+            background-color: #E8E8E8 !important;
+        }
+        /* Reduce gap between columns */
+        [data-testid="column"] {
+            padding-top: 0.5rem;
+            background-color: #E8E8E8 !important;
+        }
+    </style>
+    """,
+    unsafe_allow_html=True
+)
+# --- Secure token handling: prefer env var or Streamlit secrets; never hardcode or save the token ---
+# Safe token retrieval: prefer env var, then Streamlit secrets if available, else None
+# --- Secure token handling: prefer session-state -> env var -> Streamlit secrets; never hardcode or commit token ---
+from pathlib import Path
+# --- Robust token retrieval (session -> env -> secrets-if-file-exists) ---
+def _get_hf_token():
+    # 0) In-memory token from an earlier interactive login (preferred during dev)
+    if st.session_state.get("_hf_token"):
+        return st.session_state.get("_hf_token"), "session"
+    # 1) Environment variable (preferred for deployments)
+    env_tok = os.getenv("HF_TOKEN")
+    if env_tok:
+        return env_tok, "env"
+    # 2) Only try Streamlit secrets if a secrets.toml actually exists (avoids noisy message)
+    try:
+        project_secrets = Path(".streamlit/secrets.toml")
+        user_secrets = Path.home() / ".streamlit" / "secrets.toml"
+        if project_secrets.exists() or user_secrets.exists():
+            sec = st.secrets.get("HF_TOKEN")
+            if sec:
+                return sec, "secrets"
+    except Exception:
+        pass
+    # nothing found
+    return None, None
+# get token and its source
+hf_token, hf_token_source = _get_hf_token()
+# --- Interactive login fallback (development) ---
+if hf_token is None:
+    st.subheader("Login Token🔑")
+    token_input = st.text_input("Enter your Login token (starts with 'hf_'):", type="password")
+    if token_input:
+        if not token_input.startswith("hf_"):
+            st.error("Invalid token format. Token must start with 'hf_'.")
+            st.stop()
+        try:
+            login(token_input)
+            # store only in-memory for this session (not on disk)
+            st.session_state["_hf_token"] = token_input
+            st.session_state.logged_in = True
+            st.success("Logged in successfully. Loading model...")
+            st.rerun()
+        except Exception as e:
+            st.error(f"Failed to log in: {e}")
+            st.stop()
+    else:
+        st.warning("Provide a token via the UI or set HF_TOKEN as an environment variable.")
+        st.stop()
+else:
+    # ensure HF client is logged-in for env/secrets/session tokens
+    try:
+        login(hf_token)
+        st.session_state.logged_in = True
+        # OPTIONAL debug: show token source (no token value)
+        _ = st.query_params  # touch the query params (no-op) to keep UI in sync without using deprecated API
+        # noop, but keeps UI in sync
+        # st.info(f"Token source: {hf_token_source}")   # un-comment for debugging
+    except Exception as e:
+        st.error(f"Failed to log in with {hf_token_source or 'unknown'} token: {e}")
+        st.stop()
+# ---------------------------
+# Configuration (edit these)
+# ---------------------------
+HF_MODEL_ID = "Bhuvi13/model-V7"  # your HF model id
+TASK_PROMPT = "<s_cord-v2>"       # your decoder prompt used during training
+# ---------------------------
+# Helper: load model & processor (cached)
+# ---------------------------
+@st.cache_resource(show_spinner=False)
+def load_model_and_processor(hf_model_id: str, task_prompt: str):
+    """
+    Lazily import torch/transformers/donut and load model + processor.
+    This prevents Streamlit's watcher from touching torch internals during import-time.
+    """
+    try:
+        # lazy imports
+        import torch
+        from transformers import VisionEncoderDecoderModel, DonutProcessor
+    except Exception as e:
+        raise RuntimeError(f"Failed to import ML libraries: {e}")
+    try:
+        processor = DonutProcessor.from_pretrained(hf_model_id)
+        model = VisionEncoderDecoderModel.from_pretrained(hf_model_id)
+    except Exception as e:
+        raise RuntimeError(
+            f"Failed to load model/processor from Hugging Face ({hf_model_id}). "
+            "Make sure your HF token is available and model id is correct.\n"
+            f"Original error: {e}"
+        )
+    model.eval()
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.to(device)
+    with torch.no_grad():
+        decoder_input_ids = processor.tokenizer(
+            task_prompt,
+            add_special_tokens=False,
+            return_tensors="pt"
+        ).input_ids.to(device)
+    return processor, model, device, decoder_input_ids
+def run_inference_on_image(image: Image.Image, processor, model, device, decoder_input_ids):
+    """
+    Lazily uses torch to run inference on a single PIL.Image.
+    Meant to be called after the model/processor are loaded.
+    """
+    import torch  # lazy import ensures torch isn't touched at module-import time
+    pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
+    gen_kwargs = dict(
+        pixel_values=pixel_values,
+        decoder_input_ids=decoder_input_ids,
+        max_length=1536,
+        num_beams=1,
+        early_stopping=False,
+    )
+    with torch.no_grad():
+        generated_ids = model.generate(**gen_kwargs)
+    raw_pred = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+    cleaned = (raw_pred
+               .replace(processor.tokenizer.eos_token or "", "")
+               .replace(processor.tokenizer.pad_token or "", "")
+               .strip())
+    token2json_out = processor.token2json(cleaned)
+    if isinstance(token2json_out, str):
+        try:
+            pred_dict = json.loads(token2json_out)
+        except Exception:
+            pred_dict = token2json_out
+    else:
+        pred_dict = token2json_out
+    return pred_dict
+# ---------------------------
+# Helper: map donut output to our UI schema
+# (kept unchanged from your original)
+# ---------------------------
+def map_prediction_to_ui(pred):
+    import json, re
+    from datetime import datetime
+    def safe_json_load(s):
+        if s is None:
+            return None
+        if isinstance(s, (dict, list)):
+            return s
+        if isinstance(s, str):
+            try:
+                return json.loads(s)
+            except Exception:
+                try:
+                    t = s.strip()
+                    t = t.replace("\\'", "'").replace('\"{', '{').replace('}\"', '}')
+                    return json.loads(t)
+                except Exception:
+                    return None
+        return None
+    def clean_number(x):
+        if x is None:
+            return 0.0
+        if isinstance(x, (int, float)):
+            return float(x)
+        s = str(x).strip()
+        if s == "":
+            return 0.0
+        s = re.sub(r"[,\s]", "", s)
+        s = re.sub(r"[^\d\.\-]", "", s)
+        if s in ("", ".", "-", "-."):
+            return 0.0
+        try:
+            return float(s)
+        except Exception:
+            return 0.0
+    def parse_date(s):
+        if not s:
+            return ""
+        s = str(s).strip()
+        for fmt in ("%Y-%m-%d", "%d-%m-%Y", "%d/%m/%Y", "%m/%d/%Y", "%d.%m.%Y"):
+            try:
+                return datetime.strptime(s, fmt).strftime("%Y-%m-%d")
+            except Exception:
+                pass
+        m = re.match(r"^(\d{1,2})/(\d{1,2})/(\d{4})$", s)
+        if m:
+            a, b, y = int(m.group(1)), int(m.group(2)), int(m.group(3))
+            if a > 12:
+                d, mo = a, b
+            else:
+                mo, d = a, b
+            try:
+                return datetime(year=y, month=mo, day=d).strftime("%Y-%m-%d")
+            except Exception:
+                return s
+        return s
+    ui = {
+        "Invoice Number": "",
+        "Invoice Date": "",
+        "Due Date": "",
+        "Currency": "",
+        "Subtotal": 0.0,
+        "Tax Percentage": 0.0,
+        "Total Tax": 0.0,
+        "Total Amount": 0.0,
+        "Sender": {"Name": "", "Address": ""},
+        "Recipient": {"Name": "", "Address": ""},
+        "Sender Name": "",
+        "Sender Address": "",
+        "Recipient Name": "",
+        "Recipient Address": "",
+        "Bank Details": {},
+        "Itemized Data": []
+    }
+    if pred is None:
+        return ui
+    if isinstance(pred, str):
+        parsed = safe_json_load(pred)
+        if parsed is not None:
+            pred = parsed
+    gt = None
+    if isinstance(pred, dict):
+        if "gt_parse" in pred:
+            gp = pred["gt_parse"]
+            gp_parsed = safe_json_load(gp)
+            gt = gp_parsed if gp_parsed is not None else (gp if isinstance(gp, dict) else {})
+        else:
+            gt = pred
+    else:
+        return ui
+    header = gt.get("header") or {}
+    items = gt.get("items") or []
+    summary = gt.get("summary") or {}
+    ui["Invoice Number"] = header.get("invoice_no") or header.get("invoice_number") or ui["Invoice Number"]
+    ui["Invoice Date"] = str(header.get("invoice_date") or header.get("inv_date") or "")
+    ui["Due Date"] = str(header.get("due_date") or header.get("due") or "")
+    ui["Sender Name"] = header.get("sender_name") or header.get("seller_name") or header.get("from_name") or ui["Sender Name"]
+    ui["Sender Address"] = header.get("sender_addr") or header.get("sender_address") or header.get("seller_addr") or ui["Sender Address"]
+    ui["Recipient Name"] = header.get("rcpt_name") or header.get("recipient_name") or header.get("to_name") or ui["Recipient Name"]
+    ui["Recipient Address"] = header.get("rcpt_addr") or header.get("rcpt_address") or header.get("recipient_address") or ui["Recipient Address"]
+    ui["Sender"] = {"Name": ui["Sender Name"], "Address": ui["Sender Address"]}
+    ui["Recipient"] = {"Name": ui["Recipient Name"], "Address": ui["Recipient Address"]}
+    bank = {}
+    if header.get("bank_name"):
+        bank["bank_name"] = str(header.get("bank_name")).strip()
+    if header.get("bank_acc_no"):
+        bank["bank_account_number"] = str(header.get("bank_acc_no")).strip()
+    if header.get("bank_account_number"):
+        bank["bank_account_number"] = bank.get("bank_account_number") or str(header.get("bank_account_number")).strip()
+    if header.get("bank_iban"):
+        bank["bank_iban"] = str(header.get("bank_iban")).strip()
+    if header.get("bank_routing"):
+        bank["bank_routing"] = str(header.get("bank_routing")).strip()
+    if header.get("bank_swift"):
+        bank["bank_swift"] = str(header.get("bank_swift")).strip()
+    if header.get("bank_branch"):
+        bank["bank_branch"] = str(header.get("bank_branch")).strip()
+    if header.get("bank_acc_name"):
+        bank["bank_acc_name"] = str(header.get("bank_acc_name")).strip()
+    hb = header.get("bank")
+    if isinstance(hb, dict):
+        for k, v in hb.items():
+            if not v:
+                continue
+            lk = k.lower()
+            if "iban" in lk:
+                bank["bank_iban"] = bank.get("bank_iban") or str(v).strip()
+            elif "swift" in lk:
+                bank["bank_swift"] = bank.get("bank_swift") or str(v).strip()
+            elif "acc" in lk or "account" in lk:
+                bank["bank_account_number"] = bank.get("bank_account_number") or str(v).strip()
+            elif "name" in lk and "bank" in lk:
+                bank["bank_name"] = bank.get("bank_name") or str(v).strip()
+            elif "branch" in lk:
+                bank["bank_branch"] = bank.get("bank_branch") or str(v).strip()
+            elif "acc_name" in lk or "account_name" in lk:
+                bank["bank_acc_name"] = bank.get("bank_acc_name") or str(v).strip()
+    ui["Bank Details"] = bank
+    ui["Subtotal"] = clean_number(summary.get("subtotal") or summary.get("sub_total") or summary.get("subTotal"))
+    ui["Tax Percentage"] = clean_number(summary.get("tax_rate") or summary.get("taxRate") or summary.get("tax_percentage"))
+    ui["Total Tax"] = clean_number(summary.get("tax_amount") or summary.get("tax") or summary.get("taxAmount"))
+    ui["Total Amount"] = clean_number(summary.get("total_amount") or summary.get("grand_total") or summary.get("total") or summary.get("amount_total"))
+    ui["Currency"] = summary.get("currency") or header.get("currency") or ui["Currency"] or ""
+    normalized_items = []
+    if isinstance(items, str):
+        parsed_items = safe_json_load(items)
+        if parsed_items is not None:
+            items = parsed_items
+    if isinstance(items, dict):
+        if any(isinstance(v, list) for v in items.values()):
+            list_cols = {k: v for k, v in items.items() if isinstance(v, list)}
+            max_len = max((len(v) for v in list_cols.values()), default=0)
+            for i in range(max_len):
+                row = {}
+                for k, v in items.items():
+                    if isinstance(v, list):
+                        row[k] = v[i] if i < len(v) else ""
+                    else:
+                        row[k] = v
+                normalized_items.append(row)
+        else:
+            normalized_items.append(items)
+    elif isinstance(items, list):
+        normalized_items = items
+    else:
+        normalized_items = []
+    item_rows = []
+    for it in normalized_items:
+        if not isinstance(it, dict):
+            item_rows.append({"Description": str(it), "Quantity": 1, "Unit Price": 0.0, "Amount": 0.0})
+            continue
+        desc = it.get("descriptions") or it.get("description") or it.get("desc") or it.get("item") or it.get("name") or ""
+        qty = it.get("quantity") or it.get("qty") or it.get("Quantity") or ""
+        unit = it.get("unit_price") or it.get("unitPrice") or it.get("price") or ""
+        amt = it.get("amount") or it.get("Line_total") or it.get("line_total") or it.get("total") or ""
+        item_rows.append({
+            "Description": str(desc).strip(),
+            "Quantity": float(clean_number(qty)),
+            "Unit Price": float(clean_number(unit)),
+            "Amount": float(clean_number(amt))
+        })
+    ui["Itemized Data"] = item_rows
+    return ui
+# show model load status and try to load model lazily
+try:
+    with st.spinner("Loading model & processor (cached) ..."):
+        processor, model, device, decoder_input_ids = load_model_and_processor(HF_MODEL_ID, TASK_PROMPT)
+    #st.success("Model loaded (cached).")
+except Exception as e:
+    st.error("Could not load model automatically. See details below.")
+    st.exception(e)
+    st.stop()
+# initialize session state variables
+if "extracted_data" not in st.session_state:
+    st.session_state.extracted_data = None
+    st.session_state.raw_prediction = None
+if "uploaded_file_hash" not in st.session_state:
+    st.session_state.uploaded_file_hash = None
+if "show_results" not in st.session_state:
+    st.session_state.show_results = False
+if "last_image" not in st.session_state:
+    st.session_state.last_image = None
+if "is_running_inference" not in st.session_state:
+    st.session_state.is_running_inference = False
+# ---------------------------
+# SHOW UPLOAD UI ONLY IF NOT RUNNING INFERENCE AND NOT IN RESULTS
+# ---------------------------
+if (not st.session_state.show_results and
+    not st.session_state.is_running_inference and
+    st.session_state.uploaded_file_hash is None):
+    st.markdown(
+        """
+        Upload an invoice image (png/jpg/jpeg). The app will run your Donut model and map detected fields into
+        an editable UI. After editing you can download the extracted JSONL / CSV.
+        """
+    )
+    st.header("📤 Upload Invoice")
+    uploaded_file = st.file_uploader("Upload invoice image (png/jpg/jpeg/pdf)", type=["png", "jpg", "jpeg", "pdf"], accept_multiple_files=False)
+    # allow user to optionally paste a local path or sample file (for debug)
+    col_top_1, col_top_2 = st.columns([1, 3])
+    #with col_top_1:
+        #if st.button("Use example image (if available)"):
+            #st.info("No example included. Please upload an image.")
+    with col_top_2:
+        st.write(" ")
+    if uploaded_file is not None:
+        # Read bytes and compute hash
+        uploaded_bytes = uploaded_file.read()
+        file_hash = hashlib.sha256(uploaded_bytes).hexdigest()
+        # Render image or first PDF page
+        image = None
+        is_pdf = uploaded_file.name.lower().endswith('.pdf') or (hasattr(uploaded_file, 'type') and uploaded_file.type == 'application/pdf')
+        if is_pdf:
+            try:
+                from pdf2image import convert_from_bytes
+                pages = convert_from_bytes(uploaded_bytes, dpi=200)
+                if len(pages) > 0:
+                    image = pages[0].convert("RGB")
+                    st.session_state.last_image = image
+                else:
+                    st.error("PDF has no pages.")
+                    image = None
+            except Exception as e:
+                st.error("Could not render PDF. Ensure 'pdf2image' and poppler are installed.")
+                image = None
+        else:
+            try:
+                image = Image.open(BytesIO(uploaded_bytes)).convert("RGB")
+                st.session_state.last_image = image
+            except Exception as e:
+                st.error("Failed to open uploaded image.")
+                image = None
+        if image is not None:
+            # ✅ SET FLAG TO HIDE UPLOAD UI
+            st.session_state.is_running_inference = True
+            # ✅ RENDER THE SAME LAYOUT AS RESULTS PAGE — RIGHT COLUMN = LOADING TABS
+            left_col, right_col = st.columns([1, 1])
+            with left_col:
+                 st.image(image, caption="Uploaded Invoice", use_container_width=True)
+                 st.write(f"**File Hash:** {file_hash[:8]}...")
+            with right_col:
+                 #st.subheader("📄 Extracted Invoice Details")
+                 #st.caption(f"File Hash: {file_hash[:8]}... | Model: {HF_MODEL_ID}")
+                # Show identical tab structure during loading
+                placeholder_tabs = st.tabs([
+                    "Invoice Details",
+                    "Sender/Recipient info",
+                    "Bank Details",
+                    "Line Items"
+                ])
+                #for tab in placeholder_tabs:
+                    #with tab:
+                        #st.info("⏳ Extracting Invoice Details... Please wait.")
+                        # Optional: show spinner inside each tab
+                        #st.spinner("Processing...")
+                # ACTUALLY RUN INFERENCE
+                with st.spinner("⏳ Extracting Invoice Details... Please wait."):
+                    try:
+                        pred = run_inference_on_image(image, processor, model, device, decoder_input_ids)
+                    except Exception as e:
+                        st.session_state.inference_error = str(e)
+                        pred = None
+                # Store results
+                st.session_state.uploaded_file_hash = file_hash
+                st.session_state.raw_prediction = pred
+                try:
+                    mapped = map_prediction_to_ui(pred)
+                except Exception as e:
+                    st.session_state.mapping_error = str(e)
+                    mapped = {}
+                st.session_state.extracted_data = mapped
+                st.session_state.show_results = True
+                st.session_state.is_running_inference = False  # 👈 RESET FLAG
+                st.success("✅ Extraction complete!")
+            # Rerun to show real editable form
+            st.rerun()
+        else:
+            st.error("Could not process uploaded file into an image.")
+# If inference is running (e.g., after rerun or error), show only the layout
+# ---------------------------
+# INFERENCE IN PROGRESS — Show only left/right columns with loading UI
+# ---------------------------
+if not st.session_state.show_results and st.session_state.is_running_inference:
+    if st.session_state.last_image is not None:
+        left_col, right_col = st.columns([1, 1])
+        with left_col:
+            st.image(st.session_state.last_image, caption="Uploaded Invoice", use_container_width=True)
+            if st.session_state.uploaded_file_hash:
+                st.write(f"**File Hash:** {st.session_state.uploaded_file_hash[:8]}...")
+        with right_col:
+            #st.subheader("📄 Extracted Invoice Details")
+            #st.caption(f"File Hash: {st.session_state.uploaded_file_hash[:8]}... | Model: {HF_MODEL_ID}")
+            placeholder_tabs = st.tabs([
+                "Invoice Details",
+                "Sender/Recipient info",
+                "Bank Details",
+                "Line Items"
+            ])
+            for tab in placeholder_tabs:
+                with tab:
+                    st.info("⏳ Still processing... Please wait.")
+    else:
+        st.warning("Inference in progress, but no image available. Please re-upload.")
+# ---------------------------
+# RESULTS READY — Show editable form + back button
+# ---------------------------
+elif st.session_state.show_results:
+    # ✅ Back Button — ONLY shown when results are ready
+    if st.button("⬅️ Back to Upload"):
+        st.session_state.show_results = False
+        st.session_state.extracted_data = None
+        st.session_state.raw_prediction = None
+        st.session_state.uploaded_file_hash = None
+        st.session_state.last_image = None
+        st.session_state.is_running_inference = False  # 👈 Also reset this
+        st.rerun()
+    # Layout: two columns, image on left, form on right
+    left_col, right_col = st.columns([1, 1])
+    # LEFT: Show image
+    with left_col:
+        if st.session_state.last_image is not None:
+            st.image(st.session_state.last_image, caption="Uploaded Invoice", use_container_width=True)
+            st.write(f"**File Hash:** {st.session_state.uploaded_file_hash[:8]}...")
+            # 👇 RAW MODEL OUTPUT NOW APPEARS HERE, BELOW IMAGE
+            if st.session_state.get('raw_prediction') is not None:
+                with st.expander("🔍 Show raw model output"):
+                    st.json(st.session_state.raw_prediction)
+        else:
+            st.warning("Image preview not available. Please re-upload.")
+        # provide a way to force re-run if needed
+        #if st.button("Re-run extraction for this file"):
+            #st.session_state.raw_prediction = None
+            #st.session_state.extracted_data = None
+            #st.rerun()
+    # RIGHT: Editable form
+    with right_col:
+        data = st.session_state.extracted_data
+        if data is None:
+            st.error("No data extracted. Something went wrong.")
+        else:
+            st.subheader("Editable Invoice Form")
+            tabs = st.tabs(["Invoice Details", "Sender/Recipient info", "Bank Details", "Line Items"])
+            st.markdown(
+                """
+                <style>
+                    div[data-testid="stTabs"] > div > div {
+                        padding-bottom: 5px !important;
+                        margin-top: -5px !important;
+                        background-color: #E8E8E8 !important;
+                    }
+                    .stTextInput, .stNumberInput, .stSelectbox, .stTextArea, .stDateInput {
+                        margin-bottom: -10px !important;
+                        padding-bottom: 5px !important;
+                    }
+                    div[data-testid="stTabs"] {
+                        background-color: #E8E8E8 !important;
+                    }
+                    h3:first-of-type {
+                        margin-top: -50px !important;
+                    }
+                </style>
+                """,
+                unsafe_allow_html=True,
+            )
+            # ---------- Invoice Details ----------
+            with tabs[0]:
+                with st.container():
+                    data['Invoice Number'] = st.text_input("Invoice Number", value=data.get('Invoice Number', ''), key="invoice_number")
+                    # Invoice Date with calendar and callback
+                    data['Invoice Date'] = st.text_input(
+                        "Invoice Date",
+                        value=str(data.get('Invoice Date', '')).strip(),
+                        key="invoice_date_text"
+                    )
+                    # Due Date — preserve original format
+                    data['Due Date'] = st.text_input(
+                        "Due Date",
+                        value=str(data.get('Due Date', '')).strip(),
+                        key="due_date_text"
+                    )
+                    curr_options = ['USD', 'EUR', 'GBP', 'INR', 'Other']
+                    curr_value = data.get('Currency', 'USD')
+                    curr_index = curr_options.index(curr_value) if curr_value in curr_options else (len(curr_options) - 1)
+                    new_curr = st.selectbox("Currency", options=curr_options, index=curr_index, key="currency_select")
+                    if new_curr == 'Other':
+                        new_curr = st.text_input("Specify Currency", value=data.get('Currency', ''), key="custom_currency")
+                    data['Currency'] = new_curr
+                    # numeric fields - safe conversion
+                    def safe_number_input(label, value, key):
+                        try:
+                            v = float(value)
+                        except Exception:
+                            v = 0.0
+                        return st.number_input(label, value=v, key=key)
+                    data['Subtotal'] = safe_number_input("Subtotal", data.get('Subtotal', 0.0), "subtotal")
+                    data['Tax Percentage'] = safe_number_input("Tax Percentage", data.get('Tax Percentage', 0.0), "tax_pct")
+                    data['Total Tax'] = safe_number_input("Total Tax", data.get('Total Tax', 0.0), "total_tax")
+                    data['Total Amount'] = safe_number_input("Total Amount", data.get('Total Amount', 0.0), "total_amount")
+            # ---------- Sender / Recipient ----------
+            with tabs[1]:
+                if 'Sender' not in data:
+                    data['Sender'] = {'Name': '', 'Address': ''}
+                if 'Recipient' not in data:
+                    data['Recipient'] = {'Name': '', 'Address': ''}
+                sender_info = data['Sender']
+                recipient_info = data['Recipient']
+                with st.container():
+                    sender_info['Name'] = st.text_input("Sender Name*", value=sender_info.get('Name', ''), key="sender_name")
+                    sender_info['Address'] = st.text_area("Sender Address*", value=sender_info.get('Address', ''), key="sender_address")
+                    recipient_info['Name'] = st.text_input("Recipient Name*", value=recipient_info.get('Name', ''), key="recipient_name")
+                    recipient_info['Address'] = st.text_area("Recipient Address*", value=recipient_info.get('Address', ''), key="recipient_address")
+                    if st.button("⇄ Swap", help="Swap sender and recipient information"):
+                        data['Sender'], data['Recipient'] = data['Recipient'], data['Sender']
+                        st.session_state.extracted_data['Sender'] = data['Sender']
+                        st.session_state.extracted_data['Recipient'] = data['Recipient']
+                        st.rerun()
+            # ---------- Bank Details ----------
+            with tabs[2]:
+                bank_info = data.get('Bank Details', {}) or {}
+                with st.container():
+                    bank_info['bank_name'] = st.text_input("Bank Name", value=bank_info.get('bank_name', ''), key="bank_name")
+                    bank_info['bank_account_number'] = st.text_input("Account Number", value=bank_info.get('bank_account_number', '') or bank_info.get('bank_acc_no',''), key="bank_account")
+                    bank_info['bank_acc_name'] = st.text_input("Bank Account Name", value=bank_info.get('bank_acc_name', '') or bank_info.get('bank_acc_name', ''), key="bank_acc_name")
+                    bank_info['bank_iban'] = st.text_input("IBAN", value=bank_info.get('bank_iban', ''), key="iban")
+                    bank_info['bank_swift'] = st.text_input("SWIFT Code", value=bank_info.get('bank_swift', ''), key="swift_code")
+                    bank_info['bank_routing'] = st.text_input("Routing Number", value=bank_info.get('bank_routing', ''), key="routing")
+                    bank_info['bank_branch'] = st.text_input("Branch", value=bank_info.get('bank_branch', ''), key="branch")
+                data['Bank Details'] = bank_info
+            # ---------- Line Items ----------
+            with tabs[3]:
+                file_hash = st.session_state.get("uploaded_file_hash", "")
+                editor_key = f"item_editor_{file_hash}"
+                if "extracted_data" in st.session_state and "Itemized Data" in st.session_state.extracted_data:
+                    item_rows = st.session_state.extracted_data["Itemized Data"]
+                else:
+                    item_rows = data.get('Itemized Data') or []
+                df = pd.DataFrame(item_rows)
+                for col in ["Description", "Quantity", "Unit Price", "Amount"]:
+                    if col not in df.columns:
+                        df[col] = ""
+                edited_df = st.data_editor(df, num_rows="dynamic", key=editor_key, use_container_width=True)
+                if len(edited_df) == 0:
+                    st.info("No line items found in the invoice.")
+            # ---------- Save / Export ----------
+            st.markdown("---")
+            col_a, col_b, col_c = st.columns([1, 1, 1])
+            with col_a:
+                if st.button("Save to session"):
+                    st.session_state.extracted_data = data
+                    st.success("Saved to session_state.extracted_data")
+# ---------------------------
+# DEFAULT STATE — Show upload UI
+# ---------------------------
+else:
+    # This is the initial state: nothing running, no results
+    ## Upload UI is already rendered above — nothing more needed here
+    pass
+            #with col_b:
+                #jsonl_str = json.dumps(data, ensure_ascii=False)
+                #st.download_button("Download JSONL", jsonl_str.encode("utf-8"), file_name="extracted_invoice.jsonl", mime="application/json")
+            #with col_c:
+                #items_df = pd.DataFrame(data.get("Itemized Data", []))
+                #csv_bytes = items_df.to_csv(index=False).encode("utf-8")
+                #st.download_button("Download line-items CSV", csv_bytes, file_name="invoice_items.csv", mime="text/csv")
+            #with st.expander("Preview mapped data (for quick check)"):
+                #st.json(data)