Spaces:

JEET1812
/

abc

Sleeping

App Files Files Community

abc / app.py

JEET1812

Create app.py

f543a92 over 2 years ago

raw

history blame contribute delete

2.15 kB

	import gradio as gr
	import easyocr
	import numpy as np
	import cv2
	from PIL import Image

	# Load the OCR model outside the OCR function to optimize performance
	reader = easyocr.Reader(['en'], gpu=False)

	def perform_ocr_and_segment(image):
	# Convert the image to a NumPy array (required for OpenCV operations)
	image_np = np.array(image)

	# Convert the image to grayscale using OpenCV
	grayscale_image = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)

	# Perform thresholding using Otsu's method
	threshold_value, thresholded_image = cv2.threshold(grayscale_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

	# Invert the thresholded image
	thresholded_image = cv2.bitwise_not(thresholded_image)

	# Convert the thresholded image to PIL image for Gradio output
	thresholded_pil_image = Image.fromarray(thresholded_image)

	# Perform OCR using the preloaded OCR model on the thresholded image
	ocr_result = reader.readtext(thresholded_image)

	# Combine OCR result with the thresholded image
	ocr_text = '\n'.join([entry[1] for entry in ocr_result])
	result_with_segmentation = f"{ocr_text}\n\nThresholded Image (Segments):"

	return result_with_segmentation, thresholded_pil_image

	with gr.Blocks() as demo:
	gr.Markdown("OCR (Optical Character Recognition) methods use algorithms to analyze images, recognize characters, and convert them into editable text. Popular tools like Tesseract OCR, Google Cloud Vision API, and Microsoft Azure Cognitive Services OCR efficiently perform this task, enabling data extraction and document digitization from images.")

	with gr.Row():
	image_input = gr.inputs.Image(label="Upload an Image")
	button = gr.Button("Perform OCR and Threshold-Based Segmentation")

	with gr.Row():
	gr.Label("OCR Result and Thresholded Image:")
	gr.Label("") # Add an empty string as a placeholder
	image_output = gr.outputs.Image(type="pil") # Specify the output type as 'pil'
	text_output = gr.outputs.Textbox()

	button.click(perform_ocr_and_segment, inputs=image_input, outputs=[text_output, image_output])

	demo.launch()