Spaces:

darthPanda
/

table_detector

Runtime error

App Files Files Community

table_detector / app.py

darthPanda

Upload app.py

7ffe108 over 2 years ago

raw

history blame

2.41 kB

	import streamlit as st
	from pdf2jpg import pdf2jpg
	import shutil
	import os
	from ultralytics import YOLO
	import shutil
	import os
	from tabula import read_pdf
	import pandas as pd
	import gdown


	if os.path.exists('prediction') and os.path.isdir('prediction'):
	shutil.rmtree('prediction')

	# Check if the directory exists
	if not os.path.exists('temp.pdf_dir'):
	# If it does not exist, create it
	os.makedirs('temp.pdf_dir')
	print('not_found')
	else:
	print('found')

	# Check if the directory exists
	if not os.path.exists('model'):
	# If it does not exist, create it
	os.makedirs('model')
	url = "https://drive.google.com/uc?id=1zv3VDW-LXuesKLrTm6xSdKGrycutFdHb"
	output = "model//best.pt"
	gdown.download(url, output, quiet=False)

	temp_file_path = 'temp//temp.pdf'

	model = YOLO('model//best.pt')

	def main():
	# Set the title of the app
	st.title("Table detection")

	# Create a file uploader to upload PDF files
	uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

	if uploaded_file is not None:
	# Create a temporary directory

	with open(temp_file_path, 'wb') as f:
	f.write(uploaded_file.getbuffer())


	inputpath = "temp//temp.pdf"
	outputpath = ""
	with st.spinner('Converting pdf to images...'):
	result = pdf2jpg.convert_pdf2jpg(inputpath,outputpath, pages="ALL")

	st.markdown('### Images of detected tables')
	with st.spinner('Detecting table in images...'):
	for index, entry in enumerate(os.listdir('temp.pdf_dir')):
	# Construct the full file path
	full_path = os.path.join('temp.pdf_dir', entry)
	print(full_path)
	results = model.predict(full_path, save=True, project="prediction", name=f'image_{index}')
	st.image(os.path.join(f'prediction//image_{index}',entry))

	st.markdown('### Extracted data from tables')

	with st.spinner('Performing OCR on tables to extract images...'):
	tables = read_pdf(inputpath, pages='all', multiple_tables=True)
	for i, table in enumerate(tables):
	print(f"Table {i+1}")
	print(table)
	st.dataframe(table)

	st.success('Processing Completed!')

	# st.image(os.listdir('temp.pdf_dir'))

	# Run the app
	if __name__ == "__main__":
	main()