Spaces:

SmokeyBandit
/

ocrmultipages

Sleeping

ocrmultipages / app.py

Update app.py

cfbb0f6 verified 9 months ago

1.24 kB

	import gradio as gr
	import pytesseract
	import cv2
	import os

	def process(files, lang: str = 'eng') -> str:
	results = []
	for file in files:
	try:
	img = cv2.imread(file)
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	_, threshold_img = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO)
	result = pytesseract.image_to_string(threshold_img, lang=lang)
	results.append(result)
	except Exception as e:
	results.append(f"Error processing {file}: {str(e)}")
	finally:
	os.remove(file)
	return "\n\n".join(results)

	# Get available languages for pytesseract
	langs = pytesseract.get_languages()

	# Define the Gradio interface using gr.Files to allow multiple file uploads
	interface = gr.Interface(
	fn=process,
	inputs=[
	gr.Files(label="Upload Images", file_count="multiple", type="filepath"),
	gr.Dropdown(label="Select Language", choices=langs, type="value")
	],
	outputs="text",
	css="footer {visibility: hidden}",
	title="Optical Character Recognition \| Batch Image To Text",
	article="""
	<p style='text-align: center;'>

	</p>
	"""
	)

	# Launch the interface
	interface.launch(show_api=False)