Spaces:

Bhuvi13
/

donut_UI

Sleeping

App Files Files Community

Bhuvi13 commited on Sep 22, 2025

Commit

5b3d6b6

verified ·

1 Parent(s): e77e890

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +118 -55

src/streamlit_app.py CHANGED Viewed

@@ -162,6 +162,7 @@ def load_model_and_processor(hf_model_id: str, task_prompt: str):
     return processor, model, device, decoder_input_ids
 def run_inference_on_image(image: Image.Image, processor, model, device, decoder_input_ids):
     import torch
@@ -385,18 +386,26 @@ def map_prediction_to_ui(pred):
     item_rows = []
     for it in normalized_items:
         if not isinstance(it, dict):
-            item_rows.append({"Description": str(it), "Quantity": 1, "Unit Price": 0.0, "Amount": 0.0})
             continue
         desc = it.get("descriptions") or it.get("description") or it.get("desc") or it.get("item") or it.get("name") or ""
         qty = it.get("quantity") or it.get("qty") or it.get("Quantity") or ""
         unit = it.get("unit_price") or it.get("unitPrice") or it.get("price") or ""
         amt = it.get("amount") or it.get("Line_total") or it.get("line_total") or it.get("total") or ""
         item_rows.append({
             "Description": str(desc).strip(),
             "Quantity": float(clean_number(qty)),
             "Unit Price": float(clean_number(unit)),
-            "Amount": float(clean_number(amt))
         })
     ui["Itemized Data"] = item_rows
@@ -413,6 +422,7 @@ def flatten_invoice_to_rows(invoice_data) -> list:
     """
     rows = []
     line_items = invoice_data.get("Itemized Data", [])
     if not line_items:
         # If no line items, create one row with invoice info only
         row = {
@@ -429,10 +439,13 @@ def flatten_invoice_to_rows(invoice_data) -> list:
             "Recipient Name": invoice_data.get("Recipient", {}).get("Name", ""),
             "Recipient Address": invoice_data.get("Recipient", {}).get("Address", ""),
         }
         # Flatten bank details
         bank = invoice_data.get("Bank Details", {})
         for k, v in bank.items():
-            row[f"bank_{k}"] = v
         # Add empty line item fields
         row.update({
@@ -440,6 +453,8 @@ def flatten_invoice_to_rows(invoice_data) -> list:
             "Item Quantity": 0,
             "Item Unit Price": 0.0,
             "Item Amount": 0.0,
         })
         rows.append(row)
         return rows
@@ -464,7 +479,9 @@ def flatten_invoice_to_rows(invoice_data) -> list:
         # Flatten bank details
         bank = invoice_data.get("Bank Details", {})
         for k, v in bank.items():
-            row[f"bank_{k}"] = v
         # Add line item fields
         row.update({
@@ -472,12 +489,15 @@ def flatten_invoice_to_rows(invoice_data) -> list:
             "Item Quantity": item.get("Quantity", 0),
             "Item Unit Price": item.get("Unit Price", 0.0),
             "Item Amount": item.get("Amount", 0.0),
         })
         rows.append(row)
     return rows
 # Load model once
 try:
     with st.spinner("Loading model & processor (cached) ..."):
@@ -501,7 +521,7 @@ if "is_processing_batch" not in st.session_state:
 if not st.session_state.is_processing_batch and len(st.session_state.batch_results) == 0:
     st.markdown("Upload one or more invoice images (png/jpg/jpeg/pdf). The app will process them one by one.")
-    st.header("📤 Upload Invoices (Batch)")
     uploaded_files = st.file_uploader(
         "Upload invoice images (png/jpg/jpeg/pdf)",
@@ -624,6 +644,26 @@ elif len(st.session_state.batch_results) > 0:
     # RIGHT: Editable Form
     with right_col:
         st.subheader(f"Editable Invoice: {current['file_name']}")
         tabs = st.tabs(["Invoice Details", "Sender/Recipient info", "Bank Details", "Line Items"])
         st.markdown(
@@ -728,7 +768,7 @@ elif len(st.session_state.batch_results) > 0:
             item_rows = data.get('Itemized Data', [])
             df = pd.DataFrame(item_rows)
-            for col in ["Description", "Quantity", "Unit Price", "Amount"]:
                 if col not in df.columns:
                     df[col] = ""
@@ -759,15 +799,15 @@ elif len(st.session_state.batch_results) > 0:
         # Download buttons (per file)
         st.markdown("---")
         col_a, col_b, col_c = st.columns([1, 1, 1])
-        with col_a:
-            jsonl_str = json.dumps(data, ensure_ascii=False, indent=2)
-            st.download_button(
-                "📥 Download JSON",
-                jsonl_str.encode("utf-8"),
-                file_name=f"{Path(current['file_name']).stem}_extracted.json",
-                mime="application/json",
-                key=f"dl_json_{selected_hash}"
-            )
         with col_b:
             # ✅ Flatten entire invoice into rows (one per line item)
             rows = flatten_invoice_to_rows(data)
@@ -779,7 +819,7 @@ elif len(st.session_state.batch_results) > 0:
                 "Sender Name", "Sender Address", "Recipient Name", "Recipient Address",
                 "Subtotal", "Tax Percentage", "Total Tax", "Total Amount",
                 "bank_name", "bank_account_number", "bank_iban", "bank_swift", "bank_routing", "bank_branch", "bank_acc_name",
-                "Item Description", "Item Quantity", "Item Unit Price", "Item Amount"
             ]
             # Keep only columns that exist
             existing_cols = [col for col in desired_col_order if col in full_df.columns]
@@ -797,46 +837,69 @@ elif len(st.session_state.batch_results) > 0:
                 mime="text/csv",
                 key=f"dl_csv_{selected_hash}"
             )
-    # Global Download All
-    if st.button("📦 Download All Results (ZIP)", key="download_all"):
-        zip_buffer = BytesIO()
-        with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zf:
-            for file_hash, result in st.session_state.batch_results.items():
-                # Save JSON
-                json_data = json.dumps(result["edited_data"], ensure_ascii=False, indent=2)
-                json_name = f"{Path(result['file_name']).stem}_extracted.json"
-                zf.writestr(json_name, json_data)
-                # Save FULL CSV (all data)
-                rows = flatten_invoice_to_rows(result["edited_data"])
-                full_df = pd.DataFrame(rows)
-                # Optional: reorder columns (same as above)
-                desired_col_order = [
-                    "Invoice Number", "Invoice Date", "Due Date", "Currency",
-                    "Sender Name", "Sender Address", "Recipient Name", "Recipient Address",
-                    "Subtotal", "Tax Percentage", "Total Tax", "Total Amount",
-                    "bank_name", "bank_account_number", "bank_iban", "bank_swift", "bank_routing", "bank_branch", "bank_acc_name",
-                    "Item Description", "Item Quantity", "Item Unit Price", "Item Amount"
-                ]
-                existing_cols = [col for col in desired_col_order if col in full_df.columns]
-                remaining_cols = [col for col in full_df.columns if col not in existing_cols]
-                final_col_order = existing_cols + remaining_cols
-                full_df = full_df[final_col_order]
-                csv_data = full_df.to_csv(index=False)
-                csv_name = f"{Path(result['file_name']).stem}_full.csv"
-                zf.writestr(csv_name, csv_data)
-        zip_buffer.seek(0)
-        st.download_button(
-            label="⬇️ Download ZIP",
-            data=zip_buffer,
-            file_name="all_extracted_invoices.zip",
-            mime="application/zip",
-            key="final_download_button"
-        )
 # ---------------------------
 # PROCESSING STATE — Show progress

     return processor, model, device, decoder_input_ids
 def run_inference_on_image(image: Image.Image, processor, model, device, decoder_input_ids):
     import torch
     item_rows = []
     for it in normalized_items:
         if not isinstance(it, dict):
+            item_rows.append({"Description": str(it), "Quantity": 1, "Unit Price": 0.0, "Amount": 0.0, "Tax": 0.0, "Line Total": 0.0})
             continue
         desc = it.get("descriptions") or it.get("description") or it.get("desc") or it.get("item") or it.get("name") or ""
         qty = it.get("quantity") or it.get("qty") or it.get("Quantity") or ""
         unit = it.get("unit_price") or it.get("unitPrice") or it.get("price") or ""
         amt = it.get("amount") or it.get("Line_total") or it.get("line_total") or it.get("total") or ""
+        # Extract item-level tax if available under common keys
+        tax_val = it.get("tax") or it.get("tax_amount") or it.get("line_tax") or it.get("item_tax") or it.get("taxAmount") or ""
+        # Extract explicit line total if present; otherwise fall back to amount
+        line_total_val = it.get("Line_total") or it.get("line_total") or it.get("lineTotal") or amt
         item_rows.append({
             "Description": str(desc).strip(),
             "Quantity": float(clean_number(qty)),
             "Unit Price": float(clean_number(unit)),
+            "Amount": float(clean_number(amt)),
+            "Tax": float(clean_number(tax_val)),
+            "Line Total": float(clean_number(line_total_val))
         })
     ui["Itemized Data"] = item_rows
     """
     rows = []
     line_items = invoice_data.get("Itemized Data", [])
     if not line_items:
         # If no line items, create one row with invoice info only
         row = {
             "Recipient Name": invoice_data.get("Recipient", {}).get("Name", ""),
             "Recipient Address": invoice_data.get("Recipient", {}).get("Address", ""),
         }
         # Flatten bank details
         bank = invoice_data.get("Bank Details", {})
         for k, v in bank.items():
+            # Avoid double-prefixing if key already contains 'bank_'
+            key_name = k if str(k).startswith("bank_") else f"bank_{k}"
+            row[key_name] = v
         # Add empty line item fields
         row.update({
             "Item Quantity": 0,
             "Item Unit Price": 0.0,
             "Item Amount": 0.0,
+            "Item Tax": 0.0,
+            "Item Line Total": 0.0,
         })
         rows.append(row)
         return rows
         # Flatten bank details
         bank = invoice_data.get("Bank Details", {})
         for k, v in bank.items():
+            # Avoid double-prefixing if key already contains 'bank_'
+            key_name = k if str(k).startswith("bank_") else f"bank_{k}"
+            row[key_name] = v
         # Add line item fields
         row.update({
             "Item Quantity": item.get("Quantity", 0),
             "Item Unit Price": item.get("Unit Price", 0.0),
             "Item Amount": item.get("Amount", 0.0),
+            "Item Tax": item.get("Tax", 0.0),
+            "Item Line Total": item.get("Line Total", item.get("Amount", 0.0)),
         })
         rows.append(row)
     return rows
 # Load model once
 try:
     with st.spinner("Loading model & processor (cached) ..."):
 if not st.session_state.is_processing_batch and len(st.session_state.batch_results) == 0:
     st.markdown("Upload one or more invoice images (png/jpg/jpeg/pdf). The app will process them one by one.")
+    st.header("📤 Upload Invoices")
     uploaded_files = st.file_uploader(
         "Upload invoice images (png/jpg/jpeg/pdf)",
     # RIGHT: Editable Form
     with right_col:
         st.subheader(f"Editable Invoice: {current['file_name']}")
+        # ---------- Re-run (per-file) ----------
+        if st.button("🔁 Re-Run", key=f"rerun_{selected_hash}"):
+            # Re-run inference only for the selected file's image, update stored predictions and editable copy
+            with st.spinner("Re-running inference for selected file..."):
+                try:
+                    pred = run_inference_on_image(image, processor, model, device, decoder_input_ids)
+                    mapped = map_prediction_to_ui(pred)
+                    safe_mapped = mapped if isinstance(mapped, dict) else {}
+                    # Save updated results for this single file
+                    st.session_state.batch_results[selected_hash]["raw_pred"] = pred
+                    st.session_state.batch_results[selected_hash]["mapped_data"] = mapped
+                    st.session_state.batch_results[selected_hash]["edited_data"] = safe_mapped.copy()
+                    st.success("✅ Re-run complete — predictions updated for this file.")
+                    # Refresh the UI so the new values appear in the form
+                    st.rerun()
+                except Exception as e:
+                    st.error(f"Re-run failed: {e}")
         tabs = st.tabs(["Invoice Details", "Sender/Recipient info", "Bank Details", "Line Items"])
         st.markdown(
             item_rows = data.get('Itemized Data', [])
             df = pd.DataFrame(item_rows)
+            for col in ["Description", "Quantity", "Unit Price", "Amount", "Tax", "Line Total"]:
                 if col not in df.columns:
                     df[col] = ""
         # Download buttons (per file)
         st.markdown("---")
         col_a, col_b, col_c = st.columns([1, 1, 1])
+        #with col_a:
+            #jsonl_str = json.dumps(data, ensure_ascii=False, indent=2)
+            #st.download_button(
+               # "📥 Download JSON",
+                #jsonl_str.encode("utf-8"),
+                #file_name=f"{Path(current['file_name']).stem}_extracted.json",
+                #mime="application/json",
+                #key=f"dl_json_{selected_hash}"
+            #)
         with col_b:
             # ✅ Flatten entire invoice into rows (one per line item)
             rows = flatten_invoice_to_rows(data)
                 "Sender Name", "Sender Address", "Recipient Name", "Recipient Address",
                 "Subtotal", "Tax Percentage", "Total Tax", "Total Amount",
                 "bank_name", "bank_account_number", "bank_iban", "bank_swift", "bank_routing", "bank_branch", "bank_acc_name",
+                "Item Description", "Item Quantity", "Item Unit Price", "Item Amount", "Item Tax", "Item Line Total"
             ]
             # Keep only columns that exist
             existing_cols = [col for col in desired_col_order if col in full_df.columns]
                 mime="text/csv",
                 key=f"dl_csv_{selected_hash}"
             )
+# Global Download All — produce a single Excel file (concatenated rows) and trigger direct download
+if st.button("📦 Download All Results (Excel)", key="download_all"):
+        # Collect rows from all invoices and concatenate into one DataFrame
+        all_rows = []
+        for file_hash, result in st.session_state.batch_results.items():
+            rows = flatten_invoice_to_rows(result["edited_data"])
+            # Annotate rows with source file name so user can identify which invoice each row came from
+            for r in rows:
+                r["Source File"] = result.get("file_name", file_hash)
+            all_rows.extend(rows)
+        if len(all_rows) == 0:
+            st.warning("No invoice data available to download.")
+        else:
+            full_df = pd.DataFrame(all_rows)
+            # Reorder columns to put Source File first
+            cols = list(full_df.columns)
+            if "Source File" in cols:
+                cols = ["Source File"] + [c for c in cols if c != "Source File"]
+            full_df = full_df[cols]
+            # Try to write XLSX (preferred). If engine not available, fall back to CSV.
+            buffer = BytesIO()
+            dl_filename = "all_extracted_invoices.xlsx"
+            tried_xlsx = False
+            try:
+                with pd.ExcelWriter(buffer, engine="openpyxl") as writer:
+                    full_df.to_excel(writer, index=False, sheet_name="Invoices")
+                tried_xlsx = True
+                buffer.seek(0)
+                file_bytes = buffer.read()
+                mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+            except Exception:
+                # Fallback to CSV
+                buffer = BytesIO()
+                csv_data = full_df.to_csv(index=False).encode("utf-8")
+                buffer.write(csv_data)
+                buffer.seek(0)
+                file_bytes = buffer.read()
+                dl_filename = "all_extracted_invoices.csv"
+                mime = "text/csv"
+            # Trigger immediate download via a data URI and small HTML snippet
+            import base64
+            import streamlit.components.v1 as components
+            b64 = base64.b64encode(file_bytes).decode()
+            data_uri = f"data:{mime};base64,{b64}"
+            auto_dl_html = f'''<html>
+                <body>
+                    <a id="dlLink" href="{data_uri}" download="{dl_filename}"></a>
+                    <script>
+                        const a = document.getElementById('dlLink');
+                        a.click();
+                    </script>
+                </body>
+            </html>'''
+            components.html(auto_dl_html, height=0)
+# ---------------------------
+# PROCESSING STATE
 # ---------------------------
 # PROCESSING STATE — Show progress