File size: 7,080 Bytes
807fdd0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# Method 1: EasyOCR (Recommended - Fast & Lightweight)
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
#os.environ["OMP_NUM_THREADS"] = "1"  # Optional: limit threads


import gradio as gr
import datetime 
import easyocr
import numpy as np
from PIL import Image


def ocr_easyocr(image):
    """EasyOCR method - supports 80+ languages, very efficient"""
    try:
        # Initialize reader (cache it for better performance)
        if not hasattr(ocr_easyocr, "reader"):
            ocr_easyocr.reader = easyocr.Reader(['en'], gpu=False)
        
        # Convert PIL to numpy array
        img_array = np.array(image)
        
        # Extract text
        results = ocr_easyocr.reader.readtext(img_array)
        
        # Format results
        extracted_text = []
        for (bbox, text, confidence) in results:
            extracted_text.append(f"{text} (confidence: {confidence:.2f})")
        
        return "\n".join(extracted_text) if extracted_text else "No text detected"
    
    except Exception as e:
        return f"Error: {str(e)}"

# Create Gradio app for EasyOCR
def create_easyocr_app():
    with gr.Blocks(title="EasyOCR Text Extractor") as app:
        gr.Markdown("# EasyOCR Text Extraction")
        gr.Markdown("Upload an image to extract text using EasyOCR")
        
        with gr.Row():
            image_input = gr.Image(type="pil", label="Upload Image")
            text_output = gr.Textbox(label="Extracted Text", lines=10)
        
        extract_btn = gr.Button("Extract Text", variant="primary")
        extract_btn.click(ocr_easyocr, inputs=image_input, outputs=text_output)
        
        # Auto-extract on image upload
        image_input.change(ocr_easyocr, inputs=image_input, outputs=text_output)
    
    return app

# Method 2: Tesseract OCR (Classic & Reliable)
import pytesseract
from PIL import Image

def ocr_tesseract(image):
    """Tesseract OCR method - classic and reliable"""
    try:
        # Basic OCR
        text = pytesseract.image_to_string(image)
        
        # Get detailed data with confidence scores
        data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
        
        # Filter out low confidence text
        filtered_text = []
        for i, conf in enumerate(data['conf']):
            if int(conf) > 30:  # confidence threshold
                word = data['text'][i].strip()
                if word:
                    filtered_text.append(f"{word} ({conf}% confidence)")
        
        result = text.strip() if text.strip() else "No text detected"
        detailed = "\n".join(filtered_text) if filtered_text else "No high-confidence text"
        
        return f"Text:\n{result}\n\nDetailed (>30% confidence):\n{detailed}"
    
    except Exception as e:
        return f"Error: {str(e)}"

# Method 3: TrOCR (Hugging Face Transformers)
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import torch

def ocr_trocr(image):
    """TrOCR method - transformer-based OCR"""
    try:
        # Initialize models (cache them)
        if not hasattr(ocr_trocr, "processor"):
            ocr_trocr.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
            ocr_trocr.model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
        
        # Process image
        pixel_values = ocr_trocr.processor(image, return_tensors="pt").pixel_values
        generated_ids = ocr_trocr.model.generate(pixel_values)
        generated_text = ocr_trocr.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        
        return generated_text if generated_text.strip() else "No text detected"
    
    except Exception as e:
        return f"Error: {str(e)}"

# Method 4: PaddleOCR (Lightweight & Fast)
from paddleocr import PaddleOCR
import cv2

def ocr_paddle(image):
    """PaddleOCR method - lightweight and fast"""
    try:
        # Initialize PaddleOCR (cache it)
        if not hasattr(ocr_paddle, "ocr"):
            ocr_paddle.ocr = PaddleOCR(use_angle_cls=True, lang='en', show_log=False)
        
        # Convert PIL to OpenCV format
        img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
        
        # Extract text
        results = ocr_paddle.ocr.ocr(img_cv, cls=True)
        
        if results and results[0]:
            extracted_text = []
            for line in results[0]:
                text = line[1][0]
                confidence = line[1][1]
                extracted_text.append(f"{text} (confidence: {confidence:.2f})")
            return "\n".join(extracted_text)
        else:
            return "No text detected"
    
    except Exception as e:
        return f"Error: {str(e)}"
    

### Test gradio UI 

# Complete Multi-Method Gradio App
def create_multi_ocr_app():
    """Complete app with multiple OCR methods"""
    
    def process_with_method(image, method):
        start_time = datetime.datetime.now()
        if image is None:
            return "Please upload an image","00:00:00"
        if method == "EasyOCR":

            results = ocr_easyocr(image)
            
        
        elif method == "Tesseract":
            results = ocr_tesseract(image)
        elif method == "TrOCR":
            results =ocr_trocr(image)
        elif method == "PaddleOCR":
            results = ocr_paddle(image)
        else:
            results = "Invalid method selected"
        try:
            elapsed_time = datetime.datetime.now() - start_time
        except Exception as e:
            elapsed_time = datetime.timedelta(seconds=0)
            print(f"Error calculating elapsed time: {str(e)}")

        return results, str(elapsed_time)
    
    with gr.Blocks(title="Multi-OCR Comparator") as app:
        gr.Markdown("# Multi-Method OCR Comparison")
        gr.Markdown("Compare different OCR methods on your images")
        
        with gr.Row():
            with gr.Column():
                image_input = gr.Image(type="pil", label="Upload Image")
                method_dropdown = gr.Dropdown(
                    choices=["EasyOCR", "Tesseract", "TrOCR", "PaddleOCR"],
                    value="EasyOCR",
                    label="OCR Method"
                )
                extract_btn = gr.Button("Extract Text", variant="primary")
            
            with gr.Column():
                text_output = gr.Textbox(label="Extracted Text", lines=15)
                elapsed_time_output = gr.Textbox(label="Elapsed Time", lines=1, value="00:00:00")
        # Process on button click
        extract_btn.click(
            process_with_method,
            inputs=[image_input, method_dropdown],
            outputs=[text_output,elapsed_time_output] 
        )
        
        # Auto-process on image change
        image_input.change(
            process_with_method,
            inputs=[image_input, method_dropdown],
            outputs=[text_output,elapsed_time_output]
        )
    
    return app

# Launch instructions
if __name__ == "__main__":
  
    
    app = create_multi_ocr_app()
    
    app.launch()