table_detector / app.py
darthPanda's picture
Upload app.py
7ffe108
raw
history blame
2.41 kB
import streamlit as st
from pdf2jpg import pdf2jpg
import shutil
import os
from ultralytics import YOLO
import shutil
import os
from tabula import read_pdf
import pandas as pd
import gdown
if os.path.exists('prediction') and os.path.isdir('prediction'):
shutil.rmtree('prediction')
# Check if the directory exists
if not os.path.exists('temp.pdf_dir'):
# If it does not exist, create it
os.makedirs('temp.pdf_dir')
print('not_found')
else:
print('found')
# Check if the directory exists
if not os.path.exists('model'):
# If it does not exist, create it
os.makedirs('model')
url = "https://drive.google.com/uc?id=1zv3VDW-LXuesKLrTm6xSdKGrycutFdHb"
output = "model//best.pt"
gdown.download(url, output, quiet=False)
temp_file_path = 'temp//temp.pdf'
model = YOLO('model//best.pt')
def main():
# Set the title of the app
st.title("Table detection")
# Create a file uploader to upload PDF files
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
if uploaded_file is not None:
# Create a temporary directory
with open(temp_file_path, 'wb') as f:
f.write(uploaded_file.getbuffer())
inputpath = "temp//temp.pdf"
outputpath = ""
with st.spinner('Converting pdf to images...'):
result = pdf2jpg.convert_pdf2jpg(inputpath,outputpath, pages="ALL")
st.markdown('### Images of detected tables')
with st.spinner('Detecting table in images...'):
for index, entry in enumerate(os.listdir('temp.pdf_dir')):
# Construct the full file path
full_path = os.path.join('temp.pdf_dir', entry)
print(full_path)
results = model.predict(full_path, save=True, project="prediction", name=f'image_{index}')
st.image(os.path.join(f'prediction//image_{index}',entry))
st.markdown('### Extracted data from tables')
with st.spinner('Performing OCR on tables to extract images...'):
tables = read_pdf(inputpath, pages='all', multiple_tables=True)
for i, table in enumerate(tables):
print(f"Table {i+1}")
print(table)
st.dataframe(table)
st.success('Processing Completed!')
# st.image(os.listdir('temp.pdf_dir'))
# Run the app
if __name__ == "__main__":
main()