Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| from pdf2jpg import pdf2jpg | |
| import shutil | |
| import os | |
| from ultralytics import YOLO | |
| import shutil | |
| import os | |
| from tabula import read_pdf | |
| import pandas as pd | |
| import gdown | |
| if os.path.exists('prediction') and os.path.isdir('prediction'): | |
| shutil.rmtree('prediction') | |
| # Check if the directory exists | |
| if not os.path.exists('temp.pdf_dir'): | |
| # If it does not exist, create it | |
| os.makedirs('temp.pdf_dir') | |
| print('not_found') | |
| else: | |
| print('found') | |
| # Check if the directory exists | |
| if not os.path.exists('model'): | |
| # If it does not exist, create it | |
| os.makedirs('model') | |
| url = "https://drive.google.com/uc?id=1zv3VDW-LXuesKLrTm6xSdKGrycutFdHb" | |
| output = "model//best.pt" | |
| gdown.download(url, output, quiet=False) | |
| temp_file_path = 'temp//temp.pdf' | |
| model = YOLO('model//best.pt') | |
| def main(): | |
| # Set the title of the app | |
| st.title("Table detection") | |
| # Create a file uploader to upload PDF files | |
| uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") | |
| if uploaded_file is not None: | |
| # Create a temporary directory | |
| with open(temp_file_path, 'wb') as f: | |
| f.write(uploaded_file.getbuffer()) | |
| inputpath = "temp//temp.pdf" | |
| outputpath = "" | |
| with st.spinner('Converting pdf to images...'): | |
| result = pdf2jpg.convert_pdf2jpg(inputpath,outputpath, pages="ALL") | |
| st.markdown('### Images of detected tables') | |
| with st.spinner('Detecting table in images...'): | |
| for index, entry in enumerate(os.listdir('temp.pdf_dir')): | |
| # Construct the full file path | |
| full_path = os.path.join('temp.pdf_dir', entry) | |
| print(full_path) | |
| results = model.predict(full_path, save=True, project="prediction", name=f'image_{index}') | |
| st.image(os.path.join(f'prediction//image_{index}',entry)) | |
| st.markdown('### Extracted data from tables') | |
| with st.spinner('Performing OCR on tables to extract images...'): | |
| tables = read_pdf(inputpath, pages='all', multiple_tables=True) | |
| for i, table in enumerate(tables): | |
| print(f"Table {i+1}") | |
| print(table) | |
| st.dataframe(table) | |
| st.success('Processing Completed!') | |
| # st.image(os.listdir('temp.pdf_dir')) | |
| # Run the app | |
| if __name__ == "__main__": | |
| main() |