File size: 1,139 Bytes
2a62784
 
 
b57f5b7
2a62784
0b2878a
2a62784
b57f5b7
2a62784
 
 
b57f5b7
2a62784
 
b57f5b7
2a62784
 
b57f5b7
 
0b2878a
b57f5b7
0b2878a
 
 
 
 
b57f5b7
0b2878a
 
b57f5b7
0b2878a
 
b57f5b7
0b2878a
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import streamlit as st
import os
import base64
import pdfplumber

st.title("PDF Table Extractor")

input_pdf = st.file_uploader(label="Upload PDF here", type='pdf')

st.markdown("### Page Number")

page_number = st.text_input("Enter the page # from where you want the table", value='1')

if input_pdf is not None:
    with open("input.pdf", "wb") as f:
        base64_pdf = base64.b64encode(input_pdf.read()).decode('utf-8')
        f.write(base64.b64decode(base64_pdf))

    pdf = pdfplumber.open("input.pdf")
    num_pages = len(pdf.pages)

    if not page_number.isdigit() or int(page_number) <= 0 or int(page_number) > num_pages:
        st.error(f"Invalid page number. Please enter a number between 1 and {num_pages}.")
    else:
        page = pdf.pages[int(page_number) - 1]
        tables = page.extract_tables()

        st.markdown("## Number of Tables")
        st.write(len(tables))

        if tables:
            option = st.selectbox(label="Select the table to be displayed", options=list(range(1, len(tables) + 1)))

            st.markdown("### Output Table")
            st.dataframe(tables[option - 1])

    pdf.close()