Spaces:
Sleeping
Sleeping
File size: 1,139 Bytes
2a62784 b57f5b7 2a62784 0b2878a 2a62784 b57f5b7 2a62784 b57f5b7 2a62784 b57f5b7 2a62784 b57f5b7 0b2878a b57f5b7 0b2878a b57f5b7 0b2878a b57f5b7 0b2878a b57f5b7 0b2878a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
import streamlit as st
import os
import base64
import pdfplumber
st.title("PDF Table Extractor")
input_pdf = st.file_uploader(label="Upload PDF here", type='pdf')
st.markdown("### Page Number")
page_number = st.text_input("Enter the page # from where you want the table", value='1')
if input_pdf is not None:
with open("input.pdf", "wb") as f:
base64_pdf = base64.b64encode(input_pdf.read()).decode('utf-8')
f.write(base64.b64decode(base64_pdf))
pdf = pdfplumber.open("input.pdf")
num_pages = len(pdf.pages)
if not page_number.isdigit() or int(page_number) <= 0 or int(page_number) > num_pages:
st.error(f"Invalid page number. Please enter a number between 1 and {num_pages}.")
else:
page = pdf.pages[int(page_number) - 1]
tables = page.extract_tables()
st.markdown("## Number of Tables")
st.write(len(tables))
if tables:
option = st.selectbox(label="Select the table to be displayed", options=list(range(1, len(tables) + 1)))
st.markdown("### Output Table")
st.dataframe(tables[option - 1])
pdf.close()
|