OCR_tester / app.py
Daniel Jarvis
Add application file
807fdd0
# Method 1: EasyOCR (Recommended - Fast & Lightweight)
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
#os.environ["OMP_NUM_THREADS"] = "1" # Optional: limit threads
import gradio as gr
import datetime
import easyocr
import numpy as np
from PIL import Image
def ocr_easyocr(image):
"""EasyOCR method - supports 80+ languages, very efficient"""
try:
# Initialize reader (cache it for better performance)
if not hasattr(ocr_easyocr, "reader"):
ocr_easyocr.reader = easyocr.Reader(['en'], gpu=False)
# Convert PIL to numpy array
img_array = np.array(image)
# Extract text
results = ocr_easyocr.reader.readtext(img_array)
# Format results
extracted_text = []
for (bbox, text, confidence) in results:
extracted_text.append(f"{text} (confidence: {confidence:.2f})")
return "\n".join(extracted_text) if extracted_text else "No text detected"
except Exception as e:
return f"Error: {str(e)}"
# Create Gradio app for EasyOCR
def create_easyocr_app():
with gr.Blocks(title="EasyOCR Text Extractor") as app:
gr.Markdown("# EasyOCR Text Extraction")
gr.Markdown("Upload an image to extract text using EasyOCR")
with gr.Row():
image_input = gr.Image(type="pil", label="Upload Image")
text_output = gr.Textbox(label="Extracted Text", lines=10)
extract_btn = gr.Button("Extract Text", variant="primary")
extract_btn.click(ocr_easyocr, inputs=image_input, outputs=text_output)
# Auto-extract on image upload
image_input.change(ocr_easyocr, inputs=image_input, outputs=text_output)
return app
# Method 2: Tesseract OCR (Classic & Reliable)
import pytesseract
from PIL import Image
def ocr_tesseract(image):
"""Tesseract OCR method - classic and reliable"""
try:
# Basic OCR
text = pytesseract.image_to_string(image)
# Get detailed data with confidence scores
data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
# Filter out low confidence text
filtered_text = []
for i, conf in enumerate(data['conf']):
if int(conf) > 30: # confidence threshold
word = data['text'][i].strip()
if word:
filtered_text.append(f"{word} ({conf}% confidence)")
result = text.strip() if text.strip() else "No text detected"
detailed = "\n".join(filtered_text) if filtered_text else "No high-confidence text"
return f"Text:\n{result}\n\nDetailed (>30% confidence):\n{detailed}"
except Exception as e:
return f"Error: {str(e)}"
# Method 3: TrOCR (Hugging Face Transformers)
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import torch
def ocr_trocr(image):
"""TrOCR method - transformer-based OCR"""
try:
# Initialize models (cache them)
if not hasattr(ocr_trocr, "processor"):
ocr_trocr.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
ocr_trocr.model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
# Process image
pixel_values = ocr_trocr.processor(image, return_tensors="pt").pixel_values
generated_ids = ocr_trocr.model.generate(pixel_values)
generated_text = ocr_trocr.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return generated_text if generated_text.strip() else "No text detected"
except Exception as e:
return f"Error: {str(e)}"
# Method 4: PaddleOCR (Lightweight & Fast)
from paddleocr import PaddleOCR
import cv2
def ocr_paddle(image):
"""PaddleOCR method - lightweight and fast"""
try:
# Initialize PaddleOCR (cache it)
if not hasattr(ocr_paddle, "ocr"):
ocr_paddle.ocr = PaddleOCR(use_angle_cls=True, lang='en', show_log=False)
# Convert PIL to OpenCV format
img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
# Extract text
results = ocr_paddle.ocr.ocr(img_cv, cls=True)
if results and results[0]:
extracted_text = []
for line in results[0]:
text = line[1][0]
confidence = line[1][1]
extracted_text.append(f"{text} (confidence: {confidence:.2f})")
return "\n".join(extracted_text)
else:
return "No text detected"
except Exception as e:
return f"Error: {str(e)}"
### Test gradio UI
# Complete Multi-Method Gradio App
def create_multi_ocr_app():
"""Complete app with multiple OCR methods"""
def process_with_method(image, method):
start_time = datetime.datetime.now()
if image is None:
return "Please upload an image","00:00:00"
if method == "EasyOCR":
results = ocr_easyocr(image)
elif method == "Tesseract":
results = ocr_tesseract(image)
elif method == "TrOCR":
results =ocr_trocr(image)
elif method == "PaddleOCR":
results = ocr_paddle(image)
else:
results = "Invalid method selected"
try:
elapsed_time = datetime.datetime.now() - start_time
except Exception as e:
elapsed_time = datetime.timedelta(seconds=0)
print(f"Error calculating elapsed time: {str(e)}")
return results, str(elapsed_time)
with gr.Blocks(title="Multi-OCR Comparator") as app:
gr.Markdown("# Multi-Method OCR Comparison")
gr.Markdown("Compare different OCR methods on your images")
with gr.Row():
with gr.Column():
image_input = gr.Image(type="pil", label="Upload Image")
method_dropdown = gr.Dropdown(
choices=["EasyOCR", "Tesseract", "TrOCR", "PaddleOCR"],
value="EasyOCR",
label="OCR Method"
)
extract_btn = gr.Button("Extract Text", variant="primary")
with gr.Column():
text_output = gr.Textbox(label="Extracted Text", lines=15)
elapsed_time_output = gr.Textbox(label="Elapsed Time", lines=1, value="00:00:00")
# Process on button click
extract_btn.click(
process_with_method,
inputs=[image_input, method_dropdown],
outputs=[text_output,elapsed_time_output]
)
# Auto-process on image change
image_input.change(
process_with_method,
inputs=[image_input, method_dropdown],
outputs=[text_output,elapsed_time_output]
)
return app
# Launch instructions
if __name__ == "__main__":
app = create_multi_ocr_app()
app.launch()