tjxj commited on
Commit
f49e74b
·
1 Parent(s): 630e542
Files changed (2) hide show
  1. .history/app_20220620163100.py +51 -0
  2. app.py +10 -10
.history/app_20220620163100.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #-*- coding : utf-8-*-
2
+ import os,subprocess,base64
3
+ from subprocess import STDOUT #os process manipuation
4
+ os.system("apt-get update")
5
+ os.system("apt-get install sudo")
6
+ os.system("sudo apt update")
7
+ os.system("apt-get install -y libgl1-mesa-glx")
8
+ os.system("apt install ghostscript python3-tk")
9
+
10
+ import streamlit as st
11
+ # @st.cache
12
+ # def gh():
13
+ # """install ghostscript on the linux machine"""
14
+
15
+ # proc = subprocess.Popen('apt-get update', shell=True, stdin=None, stdout=open(os.devnull,"wb"), stderr=STDOUT, executable="/bin/bash")
16
+ # proc = subprocess.Popen('apt-get install sudo', shell=True, stdin=None, stdout=open(os.devnull,"wb"), stderr=STDOUT, executable="/bin/bash")
17
+ # proc = subprocess.Popen('sudo apt update', shell=True, stdin=None, stdout=open(os.devnull,"wb"), stderr=STDOUT, executable="/bin/bash")
18
+ # proc = subprocess.Popen('apt install ghostscript python3-tk', shell=True, stdin=None, stdout=open(os.devnull,"wb"), stderr=STDOUT, executable="/bin/bash")
19
+ # proc = subprocess.Popen('apt-get install -y libgl1-mesa-glx', shell=True, stdin=None, stdout=open(os.devnull,"wb"), stderr=STDOUT, executable="/bin/bash")
20
+ # proc.wait()
21
+ # gh()
22
+ import pandas as pd
23
+ import camelot as cam # extracting tables from PDFs
24
+
25
+ st.title("PDF Table Extractor")
26
+
27
+ input_pdf = st.file_uploader(label = "", type = 'pdf')
28
+
29
+ page_number = st.text_input("请填写表格所在PDF页码,eg: 3", value = 1)
30
+
31
+ if input_pdf is not None:
32
+ # byte object into a PDF file
33
+ with open("input.pdf", "wb") as f:
34
+ base64_pdf = base64.b64encode(input_pdf.read()).decode('utf-8')
35
+ f.write(base64.b64decode(base64_pdf))
36
+ f.close()
37
+
38
+ # read the pdf and parse it using stream
39
+ tables = cam.read_pdf("input.pdf", pages=page_number)
40
+ result = pd.ExcelWriter('result.xlsx', engine='xlsxwriter')
41
+ tables[0].to_excel(result,index=False)
42
+ # for i in range(0,len(tables)):
43
+ # table = tables[i].df
44
+ # sheetname = str(i)
45
+ # table.to_excel(result, sheetname,index=False)
46
+
47
+ with open('result.xlsx','rb') as f:
48
+ st.download_button('提取完成,点击下载!', f,file_name='result.xlsx',mime="application/vnd.ms-excel")
49
+
50
+
51
+
app.py CHANGED
@@ -8,17 +8,17 @@ os.system("apt-get install -y libgl1-mesa-glx")
8
  os.system("apt install ghostscript python3-tk")
9
 
10
  import streamlit as st
11
- @st.cache
12
- def gh():
13
- """install ghostscript on the linux machine"""
14
 
15
- proc = subprocess.Popen('apt-get update', shell=True, stdin=None, stdout=open(os.devnull,"wb"), stderr=STDOUT, executable="/bin/bash")
16
- proc = subprocess.Popen('apt-get install sudo', shell=True, stdin=None, stdout=open(os.devnull,"wb"), stderr=STDOUT, executable="/bin/bash")
17
- proc = subprocess.Popen('sudo apt update', shell=True, stdin=None, stdout=open(os.devnull,"wb"), stderr=STDOUT, executable="/bin/bash")
18
- proc = subprocess.Popen('apt install ghostscript python3-tk', shell=True, stdin=None, stdout=open(os.devnull,"wb"), stderr=STDOUT, executable="/bin/bash")
19
- proc = subprocess.Popen('apt-get install -y libgl1-mesa-glx', shell=True, stdin=None, stdout=open(os.devnull,"wb"), stderr=STDOUT, executable="/bin/bash")
20
- proc.wait()
21
- gh()
22
  import pandas as pd
23
  import camelot as cam # extracting tables from PDFs
24
 
 
8
  os.system("apt install ghostscript python3-tk")
9
 
10
  import streamlit as st
11
+ # @st.cache
12
+ # def gh():
13
+ # """install ghostscript on the linux machine"""
14
 
15
+ # proc = subprocess.Popen('apt-get update', shell=True, stdin=None, stdout=open(os.devnull,"wb"), stderr=STDOUT, executable="/bin/bash")
16
+ # proc = subprocess.Popen('apt-get install sudo', shell=True, stdin=None, stdout=open(os.devnull,"wb"), stderr=STDOUT, executable="/bin/bash")
17
+ # proc = subprocess.Popen('sudo apt update', shell=True, stdin=None, stdout=open(os.devnull,"wb"), stderr=STDOUT, executable="/bin/bash")
18
+ # proc = subprocess.Popen('apt install ghostscript python3-tk', shell=True, stdin=None, stdout=open(os.devnull,"wb"), stderr=STDOUT, executable="/bin/bash")
19
+ # proc = subprocess.Popen('apt-get install -y libgl1-mesa-glx', shell=True, stdin=None, stdout=open(os.devnull,"wb"), stderr=STDOUT, executable="/bin/bash")
20
+ # proc.wait()
21
+ # gh()
22
  import pandas as pd
23
  import camelot as cam # extracting tables from PDFs
24