Spaces:

ARC-NECT
/

OCR_tester

Sleeping

File size: 7,080 Bytes

807fdd0

# Method 1: EasyOCR (Recommended - Fast & Lightweight)
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
#os.environ["OMP_NUM_THREADS"] = "1"  # Optional: limit threads


import gradio as gr
import datetime 
import easyocr
import numpy as np
from PIL import Image


def ocr_easyocr(image):
    """EasyOCR method - supports 80+ languages, very efficient"""
    try:
        # Initialize reader (cache it for better performance)
        if not hasattr(ocr_easyocr, "reader"):
            ocr_easyocr.reader = easyocr.Reader(['en'], gpu=False)
        
        # Convert PIL to numpy array
        img_array = np.array(image)
        
        # Extract text
        results = ocr_easyocr.reader.readtext(img_array)
        
        # Format results
        extracted_text = []
        for (bbox, text, confidence) in results:
            extracted_text.append(f"{text} (confidence: {confidence:.2f})")
        
        return "\n".join(extracted_text) if extracted_text else "No text detected"
    
    except Exception as e:
        return f"Error: {str(e)}"

# Create Gradio app for EasyOCR
def create_easyocr_app():
    with gr.Blocks(title="EasyOCR Text Extractor") as app:
        gr.Markdown("# EasyOCR Text Extraction")
        gr.Markdown("Upload an image to extract text using EasyOCR")
        
        with gr.Row():
            image_input = gr.Image(type="pil", label="Upload Image")
            text_output = gr.Textbox(label="Extracted Text", lines=10)
        
        extract_btn = gr.Button("Extract Text", variant="primary")
        extract_btn.click(ocr_easyocr, inputs=image_input, outputs=text_output)
        
        # Auto-extract on image upload
        image_input.change(ocr_easyocr, inputs=image_input, outputs=text_output)
    
    return app

# Method 2: Tesseract OCR (Classic & Reliable)
import pytesseract
from PIL import Image

def ocr_tesseract(image):
    """Tesseract OCR method - classic and reliable"""
    try:
        # Basic OCR
        text = pytesseract.image_to_string(image)
        
        # Get detailed data with confidence scores
        data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
        
        # Filter out low confidence text
        filtered_text = []
        for i, conf in enumerate(data['conf']):
            if int(conf) > 30:  # confidence threshold
                word = data['text'][i].strip()
                if word:
                    filtered_text.append(f"{word} ({conf}% confidence)")
        
        result = text.strip() if text.strip() else "No text detected"
        detailed = "\n".join(filtered_text) if filtered_text else "No high-confidence text"
        
        return f"Text:\n{result}\n\nDetailed (>30% confidence):\n{detailed}"
    
    except Exception as e:
        return f"Error: {str(e)}"

# Method 3: TrOCR (Hugging Face Transformers)
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import torch

def ocr_trocr(image):
    """TrOCR method - transformer-based OCR"""
    try:
        # Initialize models (cache them)
        if not hasattr(ocr_trocr, "processor"):
            ocr_trocr.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
            ocr_trocr.model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
        
        # Process image
        pixel_values = ocr_trocr.processor(image, return_tensors="pt").pixel_values
        generated_ids = ocr_trocr.model.generate(pixel_values)
        generated_text = ocr_trocr.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        
        return generated_text if generated_text.strip() else "No text detected"
    
    except Exception as e:
        return f"Error: {str(e)}"

# Method 4: PaddleOCR (Lightweight & Fast)
from paddleocr import PaddleOCR
import cv2

def ocr_paddle(image):
    """PaddleOCR method - lightweight and fast"""
    try:
        # Initialize PaddleOCR (cache it)
        if not hasattr(ocr_paddle, "ocr"):
            ocr_paddle.ocr = PaddleOCR(use_angle_cls=True, lang='en', show_log=False)
        
        # Convert PIL to OpenCV format
        img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
        
        # Extract text
        results = ocr_paddle.ocr.ocr(img_cv, cls=True)
        
        if results and results[0]:
            extracted_text = []
            for line in results[0]:
                text = line[1][0]
                confidence = line[1][1]
                extracted_text.append(f"{text} (confidence: {confidence:.2f})")
            return "\n".join(extracted_text)
        else:
            return "No text detected"
    
    except Exception as e:
        return f"Error: {str(e)}"
    

### Test gradio UI 

# Complete Multi-Method Gradio App
def create_multi_ocr_app():
    """Complete app with multiple OCR methods"""
    
    def process_with_method(image, method):
        start_time = datetime.datetime.now()
        if image is None:
            return "Please upload an image","00:00:00"
        if method == "EasyOCR":

            results = ocr_easyocr(image)
            
        
        elif method == "Tesseract":
            results = ocr_tesseract(image)
        elif method == "TrOCR":
            results =ocr_trocr(image)
        elif method == "PaddleOCR":
            results = ocr_paddle(image)
        else:
            results = "Invalid method selected"
        try:
            elapsed_time = datetime.datetime.now() - start_time
        except Exception as e:
            elapsed_time = datetime.timedelta(seconds=0)
            print(f"Error calculating elapsed time: {str(e)}")

        return results, str(elapsed_time)
    
    with gr.Blocks(title="Multi-OCR Comparator") as app:
        gr.Markdown("# Multi-Method OCR Comparison")
        gr.Markdown("Compare different OCR methods on your images")
        
        with gr.Row():
            with gr.Column():
                image_input = gr.Image(type="pil", label="Upload Image")
                method_dropdown = gr.Dropdown(
                    choices=["EasyOCR", "Tesseract", "TrOCR", "PaddleOCR"],
                    value="EasyOCR",
                    label="OCR Method"
                )
                extract_btn = gr.Button("Extract Text", variant="primary")
            
            with gr.Column():
                text_output = gr.Textbox(label="Extracted Text", lines=15)
                elapsed_time_output = gr.Textbox(label="Elapsed Time", lines=1, value="00:00:00")
        # Process on button click
        extract_btn.click(
            process_with_method,
            inputs=[image_input, method_dropdown],
            outputs=[text_output,elapsed_time_output] 
        )
        
        # Auto-process on image change
        image_input.change(
            process_with_method,
            inputs=[image_input, method_dropdown],
            outputs=[text_output,elapsed_time_output]
        )
    
    return app

# Launch instructions
if __name__ == "__main__":
  
    
    app = create_multi_ocr_app()
    
    app.launch()