Update src/streamlit_app.py
Browse files- src/streamlit_app.py +64 -59
src/streamlit_app.py
CHANGED
|
@@ -41,7 +41,7 @@ def trigger_rerun():
|
|
| 41 |
# UI: main
|
| 42 |
# ---------------------------
|
| 43 |
st.set_page_config(page_title="Invoice Extractor (Donut) - Batch Mode", layout="wide")
|
| 44 |
-
st.title("
|
| 45 |
|
| 46 |
# Reduce top margin and tighten layout
|
| 47 |
st.markdown(
|
|
@@ -603,6 +603,69 @@ if not st.session_state.is_processing_batch and len(st.session_state.batch_resul
|
|
| 603 |
# RESULTS VIEW — Show selector + editable form
|
| 604 |
# ---------------------------
|
| 605 |
elif len(st.session_state.batch_results) > 0:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 606 |
# File selector
|
| 607 |
file_options = {
|
| 608 |
f"{v['file_name']} ({k[:6]})": k
|
|
@@ -838,65 +901,7 @@ elif len(st.session_state.batch_results) > 0:
|
|
| 838 |
key=f"dl_csv_{selected_hash}"
|
| 839 |
)
|
| 840 |
# Global Download All — produce a single Excel file (concatenated rows) and trigger direct download
|
| 841 |
-
if st.button("📦 Download All Results (Excel)", key="download_all"):
|
| 842 |
-
# Collect rows from all invoices and concatenate into one DataFrame
|
| 843 |
-
all_rows = []
|
| 844 |
-
for file_hash, result in st.session_state.batch_results.items():
|
| 845 |
-
rows = flatten_invoice_to_rows(result["edited_data"])
|
| 846 |
-
# Annotate rows with source file name so user can identify which invoice each row came from
|
| 847 |
-
for r in rows:
|
| 848 |
-
r["Source File"] = result.get("file_name", file_hash)
|
| 849 |
-
all_rows.extend(rows)
|
| 850 |
|
| 851 |
-
if len(all_rows) == 0:
|
| 852 |
-
st.warning("No invoice data available to download.")
|
| 853 |
-
else:
|
| 854 |
-
full_df = pd.DataFrame(all_rows)
|
| 855 |
-
|
| 856 |
-
# Reorder columns to put Source File first
|
| 857 |
-
cols = list(full_df.columns)
|
| 858 |
-
if "Source File" in cols:
|
| 859 |
-
cols = ["Source File"] + [c for c in cols if c != "Source File"]
|
| 860 |
-
full_df = full_df[cols]
|
| 861 |
-
|
| 862 |
-
# Try to write XLSX (preferred). If engine not available, fall back to CSV.
|
| 863 |
-
buffer = BytesIO()
|
| 864 |
-
dl_filename = "all_extracted_invoices.xlsx"
|
| 865 |
-
tried_xlsx = False
|
| 866 |
-
try:
|
| 867 |
-
with pd.ExcelWriter(buffer, engine="openpyxl") as writer:
|
| 868 |
-
full_df.to_excel(writer, index=False, sheet_name="Invoices")
|
| 869 |
-
tried_xlsx = True
|
| 870 |
-
buffer.seek(0)
|
| 871 |
-
file_bytes = buffer.read()
|
| 872 |
-
mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
| 873 |
-
except Exception:
|
| 874 |
-
# Fallback to CSV
|
| 875 |
-
buffer = BytesIO()
|
| 876 |
-
csv_data = full_df.to_csv(index=False).encode("utf-8")
|
| 877 |
-
buffer.write(csv_data)
|
| 878 |
-
buffer.seek(0)
|
| 879 |
-
file_bytes = buffer.read()
|
| 880 |
-
dl_filename = "all_extracted_invoices.csv"
|
| 881 |
-
mime = "text/csv"
|
| 882 |
-
|
| 883 |
-
# Trigger immediate download via a data URI and small HTML snippet
|
| 884 |
-
import base64
|
| 885 |
-
import streamlit.components.v1 as components
|
| 886 |
-
b64 = base64.b64encode(file_bytes).decode()
|
| 887 |
-
data_uri = f"data:{mime};base64,{b64}"
|
| 888 |
-
|
| 889 |
-
auto_dl_html = f'''<html>
|
| 890 |
-
<body>
|
| 891 |
-
<a id="dlLink" href="{data_uri}" download="{dl_filename}"></a>
|
| 892 |
-
<script>
|
| 893 |
-
const a = document.getElementById('dlLink');
|
| 894 |
-
a.click();
|
| 895 |
-
</script>
|
| 896 |
-
</body>
|
| 897 |
-
</html>'''
|
| 898 |
-
|
| 899 |
-
components.html(auto_dl_html, height=0)
|
| 900 |
|
| 901 |
# ---------------------------
|
| 902 |
# PROCESSING STATE
|
|
|
|
| 41 |
# UI: main
|
| 42 |
# ---------------------------
|
| 43 |
st.set_page_config(page_title="Invoice Extractor (Donut) - Batch Mode", layout="wide")
|
| 44 |
+
st.title("Invoice Extraction")
|
| 45 |
|
| 46 |
# Reduce top margin and tighten layout
|
| 47 |
st.markdown(
|
|
|
|
| 603 |
# RESULTS VIEW — Show selector + editable form
|
| 604 |
# ---------------------------
|
| 605 |
elif len(st.session_state.batch_results) > 0:
|
| 606 |
+
# ---------------------------
|
| 607 |
+
# Global Download All — produce a single Excel file (concatenated rows) and trigger direct download
|
| 608 |
+
# ---------------------------
|
| 609 |
+
if st.button("📦 Download All Results (Excel)", key="download_all"):
|
| 610 |
+
# Collect rows from all invoices and concatenate into one DataFrame
|
| 611 |
+
all_rows = []
|
| 612 |
+
for file_hash, result in st.session_state.batch_results.items():
|
| 613 |
+
rows = flatten_invoice_to_rows(result["edited_data"])
|
| 614 |
+
# Annotate rows with source file name so user can identify which invoice each row came from
|
| 615 |
+
for r in rows:
|
| 616 |
+
r["Source File"] = result.get("file_name", file_hash)
|
| 617 |
+
all_rows.extend(rows)
|
| 618 |
+
|
| 619 |
+
if len(all_rows) == 0:
|
| 620 |
+
st.warning("No invoice data available to download.")
|
| 621 |
+
else:
|
| 622 |
+
full_df = pd.DataFrame(all_rows)
|
| 623 |
+
|
| 624 |
+
# Reorder columns to put Source File first
|
| 625 |
+
cols = list(full_df.columns)
|
| 626 |
+
if "Source File" in cols:
|
| 627 |
+
cols = ["Source File"] + [c for c in cols if c != "Source File"]
|
| 628 |
+
full_df = full_df[cols]
|
| 629 |
+
|
| 630 |
+
# Try to write XLSX (preferred). If engine not available, fall back to CSV.
|
| 631 |
+
buffer = BytesIO()
|
| 632 |
+
dl_filename = "all_extracted_invoices.xlsx"
|
| 633 |
+
tried_xlsx = False
|
| 634 |
+
try:
|
| 635 |
+
with pd.ExcelWriter(buffer, engine="openpyxl") as writer:
|
| 636 |
+
full_df.to_excel(writer, index=False, sheet_name="Invoices")
|
| 637 |
+
tried_xlsx = True
|
| 638 |
+
buffer.seek(0)
|
| 639 |
+
file_bytes = buffer.read()
|
| 640 |
+
mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
| 641 |
+
except Exception:
|
| 642 |
+
# Fallback to CSV
|
| 643 |
+
buffer = BytesIO()
|
| 644 |
+
csv_data = full_df.to_csv(index=False).encode("utf-8")
|
| 645 |
+
buffer.write(csv_data)
|
| 646 |
+
buffer.seek(0)
|
| 647 |
+
file_bytes = buffer.read()
|
| 648 |
+
dl_filename = "all_extracted_invoices.csv"
|
| 649 |
+
mime = "text/csv"
|
| 650 |
+
|
| 651 |
+
# Trigger immediate download via a data URI and small HTML snippet
|
| 652 |
+
import base64
|
| 653 |
+
import streamlit.components.v1 as components
|
| 654 |
+
b64 = base64.b64encode(file_bytes).decode()
|
| 655 |
+
data_uri = f"data:{mime};base64,{b64}"
|
| 656 |
+
|
| 657 |
+
auto_dl_html = f'''<html>
|
| 658 |
+
<body>
|
| 659 |
+
<a id="dlLink" href="{data_uri}" download="{dl_filename}"></a>
|
| 660 |
+
<script>
|
| 661 |
+
const a = document.getElementById('dlLink');
|
| 662 |
+
a.click();
|
| 663 |
+
</script>
|
| 664 |
+
</body>
|
| 665 |
+
</html>'''
|
| 666 |
+
|
| 667 |
+
components.html(auto_dl_html, height=0)
|
| 668 |
+
|
| 669 |
# File selector
|
| 670 |
file_options = {
|
| 671 |
f"{v['file_name']} ({k[:6]})": k
|
|
|
|
| 901 |
key=f"dl_csv_{selected_hash}"
|
| 902 |
)
|
| 903 |
# Global Download All — produce a single Excel file (concatenated rows) and trigger direct download
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 904 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 905 |
|
| 906 |
# ---------------------------
|
| 907 |
# PROCESSING STATE
|