Spaces:

yashm
/

Yantra

Sleeping

App Files Files Community

yashm commited on Sep 11, 2024

Commit

da4ce7a

verified ·

1 Parent(s): 48902e5

Create app.py

Browse files

Files changed (1) hide show

app.py +66 -0

app.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import streamlit as st
+import camelot
+import fitz  # PyMuPDF
+import pandas as pd
+# Set the title of the Streamlit app
+st.title("PDF Table Extractor")
+# Instructions
+st.write("Upload a PDF file containing tables, and this app will extract the tables for you.")
+# File uploader widget
+uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
+if uploaded_file is not None:
+    # Load the uploaded PDF using PyMuPDF
+    pdf_document = fitz.open(stream=uploaded_file.read(), filetype="pdf")
+    # Show the number of pages in the PDF
+    num_pages = pdf_document.page_count
+    st.write(f"The uploaded PDF has {num_pages} pages.")
+    # Let the user select a page to extract tables from
+    page_num = st.number_input("Select the page number to extract tables from", min_value=1, max_value=num_pages, value=1)
+    # Extract tables using Camelot
+    if st.button("Extract Tables"):
+        # Convert the uploaded file to a local file for Camelot to process
+        with open("temp.pdf", "wb") as f:
+            f.write(uploaded_file.getvalue())
+        # Extract tables from the selected page
+        tables = camelot.read_pdf("temp.pdf", pages=str(page_num))
+        if len(tables) > 0:
+            st.write(f"Found {len(tables)} table(s) on page {page_num}.")
+            # Loop through all the extracted tables
+            for i, table in enumerate(tables):
+                st.write(f"Table {i+1}:")
+                # Convert the table to a Pandas DataFrame
+                df = table.df
+                # Display the extracted table
+                st.dataframe(df)
+                # Download button for CSV
+                csv = df.to_csv(index=False).encode('utf-8')
+                st.download_button(
+                    label=f"Download Table {i+1} as CSV",
+                    data=csv,
+                    file_name=f"table_{i+1}.csv",
+                    mime="text/csv"
+                )
+                # Download button for Excel
+                excel_file = df.to_excel(index=False, engine="xlsxwriter")
+                st.download_button(
+                    label=f"Download Table {i+1} as Excel",
+                    data=excel_file,
+                    file_name=f"table_{i+1}.xlsx",
+                    mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+                )
+        else:
+            st.write("No tables found on the selected page.")