abc / app.py
JEET1812's picture
Create app.py
f543a92
import gradio as gr
import easyocr
import numpy as np
import cv2
from PIL import Image
# Load the OCR model outside the OCR function to optimize performance
reader = easyocr.Reader(['en'], gpu=False)
def perform_ocr_and_segment(image):
# Convert the image to a NumPy array (required for OpenCV operations)
image_np = np.array(image)
# Convert the image to grayscale using OpenCV
grayscale_image = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
# Perform thresholding using Otsu's method
threshold_value, thresholded_image = cv2.threshold(grayscale_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Invert the thresholded image
thresholded_image = cv2.bitwise_not(thresholded_image)
# Convert the thresholded image to PIL image for Gradio output
thresholded_pil_image = Image.fromarray(thresholded_image)
# Perform OCR using the preloaded OCR model on the thresholded image
ocr_result = reader.readtext(thresholded_image)
# Combine OCR result with the thresholded image
ocr_text = '\n'.join([entry[1] for entry in ocr_result])
result_with_segmentation = f"{ocr_text}\n\nThresholded Image (Segments):"
return result_with_segmentation, thresholded_pil_image
with gr.Blocks() as demo:
gr.Markdown("OCR (Optical Character Recognition) methods use algorithms to analyze images, recognize characters, and convert them into editable text. Popular tools like Tesseract OCR, Google Cloud Vision API, and Microsoft Azure Cognitive Services OCR efficiently perform this task, enabling data extraction and document digitization from images.")
with gr.Row():
image_input = gr.inputs.Image(label="Upload an Image")
button = gr.Button("Perform OCR and Threshold-Based Segmentation")
with gr.Row():
gr.Label("OCR Result and Thresholded Image:")
gr.Label("") # Add an empty string as a placeholder
image_output = gr.outputs.Image(type="pil") # Specify the output type as 'pil'
text_output = gr.outputs.Textbox()
button.click(perform_ocr_and_segment, inputs=image_input, outputs=[text_output, image_output])
demo.launch()