|
|
import gradio as gr |
|
|
import easyocr |
|
|
import numpy as np |
|
|
import cv2 |
|
|
from PIL import Image |
|
|
|
|
|
|
|
|
reader = easyocr.Reader(['en'], gpu=False) |
|
|
|
|
|
def perform_ocr_and_segment(image): |
|
|
|
|
|
image_np = np.array(image) |
|
|
|
|
|
|
|
|
grayscale_image = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY) |
|
|
|
|
|
|
|
|
threshold_value, thresholded_image = cv2.threshold(grayscale_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) |
|
|
|
|
|
|
|
|
thresholded_image = cv2.bitwise_not(thresholded_image) |
|
|
|
|
|
|
|
|
thresholded_pil_image = Image.fromarray(thresholded_image) |
|
|
|
|
|
|
|
|
ocr_result = reader.readtext(thresholded_image) |
|
|
|
|
|
|
|
|
ocr_text = '\n'.join([entry[1] for entry in ocr_result]) |
|
|
result_with_segmentation = f"{ocr_text}\n\nThresholded Image (Segments):" |
|
|
|
|
|
return result_with_segmentation, thresholded_pil_image |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("OCR (Optical Character Recognition) methods use algorithms to analyze images, recognize characters, and convert them into editable text. Popular tools like Tesseract OCR, Google Cloud Vision API, and Microsoft Azure Cognitive Services OCR efficiently perform this task, enabling data extraction and document digitization from images.") |
|
|
|
|
|
with gr.Row(): |
|
|
image_input = gr.inputs.Image(label="Upload an Image") |
|
|
button = gr.Button("Perform OCR and Threshold-Based Segmentation") |
|
|
|
|
|
with gr.Row(): |
|
|
gr.Label("OCR Result and Thresholded Image:") |
|
|
gr.Label("") |
|
|
image_output = gr.outputs.Image(type="pil") |
|
|
text_output = gr.outputs.Textbox() |
|
|
|
|
|
button.click(perform_ocr_and_segment, inputs=image_input, outputs=[text_output, image_output]) |
|
|
|
|
|
demo.launch() |
|
|
|