pratham0011 commited on
Commit
b57f5b7
·
verified ·
1 Parent(s): 56fc8e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -1,29 +1,32 @@
1
  import streamlit as st
2
  import os
3
  import base64
4
- import tabula
5
-
6
 
7
  st.title("Extract Tables from PDFs")
8
 
9
- input_pdf = st.file_uploader(label="Upload PDF here",type='pdf')
10
 
11
  st.markdown("### Page Number")
12
 
13
- page_number = st.text_input("Enter the page # from where you want the table", value=1)
14
 
15
  if input_pdf is not None:
16
- with open("input.pdf","wb") as f:
17
  base64_pdf = base64.b64encode(input_pdf.read()).decode('utf-8')
18
  f.write(base64.b64decode(base64_pdf))
19
-
20
- tables = tabula.read_pdf("input.pdf", pages=page_number, multiple_tables=True)
21
-
 
 
22
  st.markdown("## Number of Tables")
23
  st.write(len(tables))
24
-
25
  if tables:
26
  option = st.selectbox(label="Select the table to be displayed", options=list(range(1, len(tables) + 1)))
27
-
28
  st.markdown("### Output Table")
29
  st.dataframe(tables[option - 1])
 
 
 
1
  import streamlit as st
2
  import os
3
  import base64
4
+ import pdfplumber
 
5
 
6
  st.title("Extract Tables from PDFs")
7
 
8
+ input_pdf = st.file_uploader(label="Upload PDF here", type='pdf')
9
 
10
  st.markdown("### Page Number")
11
 
12
+ page_number = st.text_input("Enter the page # from where you want the table", value='1')
13
 
14
  if input_pdf is not None:
15
+ with open("input.pdf", "wb") as f:
16
  base64_pdf = base64.b64encode(input_pdf.read()).decode('utf-8')
17
  f.write(base64.b64decode(base64_pdf))
18
+
19
+ pdf = pdfplumber.open("input.pdf")
20
+ page = pdf.pages[int(page_number) - 1]
21
+ tables = page.extract_tables()
22
+
23
  st.markdown("## Number of Tables")
24
  st.write(len(tables))
25
+
26
  if tables:
27
  option = st.selectbox(label="Select the table to be displayed", options=list(range(1, len(tables) + 1)))
28
+
29
  st.markdown("### Output Table")
30
  st.dataframe(tables[option - 1])
31
+
32
+ pdf.close()