Daniel Jarvis commited on
Commit
807fdd0
·
1 Parent(s): bd837a7

Add application file

Browse files
Files changed (3) hide show
  1. app.py +207 -0
  2. packages.txt +1 -0
  3. requirements.txt +1 -0
app.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Method 1: EasyOCR (Recommended - Fast & Lightweight)
2
+ import os
3
+ os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
4
+ #os.environ["OMP_NUM_THREADS"] = "1" # Optional: limit threads
5
+
6
+
7
+ import gradio as gr
8
+ import datetime
9
+ import easyocr
10
+ import numpy as np
11
+ from PIL import Image
12
+
13
+
14
+ def ocr_easyocr(image):
15
+ """EasyOCR method - supports 80+ languages, very efficient"""
16
+ try:
17
+ # Initialize reader (cache it for better performance)
18
+ if not hasattr(ocr_easyocr, "reader"):
19
+ ocr_easyocr.reader = easyocr.Reader(['en'], gpu=False)
20
+
21
+ # Convert PIL to numpy array
22
+ img_array = np.array(image)
23
+
24
+ # Extract text
25
+ results = ocr_easyocr.reader.readtext(img_array)
26
+
27
+ # Format results
28
+ extracted_text = []
29
+ for (bbox, text, confidence) in results:
30
+ extracted_text.append(f"{text} (confidence: {confidence:.2f})")
31
+
32
+ return "\n".join(extracted_text) if extracted_text else "No text detected"
33
+
34
+ except Exception as e:
35
+ return f"Error: {str(e)}"
36
+
37
+ # Create Gradio app for EasyOCR
38
+ def create_easyocr_app():
39
+ with gr.Blocks(title="EasyOCR Text Extractor") as app:
40
+ gr.Markdown("# EasyOCR Text Extraction")
41
+ gr.Markdown("Upload an image to extract text using EasyOCR")
42
+
43
+ with gr.Row():
44
+ image_input = gr.Image(type="pil", label="Upload Image")
45
+ text_output = gr.Textbox(label="Extracted Text", lines=10)
46
+
47
+ extract_btn = gr.Button("Extract Text", variant="primary")
48
+ extract_btn.click(ocr_easyocr, inputs=image_input, outputs=text_output)
49
+
50
+ # Auto-extract on image upload
51
+ image_input.change(ocr_easyocr, inputs=image_input, outputs=text_output)
52
+
53
+ return app
54
+
55
+ # Method 2: Tesseract OCR (Classic & Reliable)
56
+ import pytesseract
57
+ from PIL import Image
58
+
59
+ def ocr_tesseract(image):
60
+ """Tesseract OCR method - classic and reliable"""
61
+ try:
62
+ # Basic OCR
63
+ text = pytesseract.image_to_string(image)
64
+
65
+ # Get detailed data with confidence scores
66
+ data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
67
+
68
+ # Filter out low confidence text
69
+ filtered_text = []
70
+ for i, conf in enumerate(data['conf']):
71
+ if int(conf) > 30: # confidence threshold
72
+ word = data['text'][i].strip()
73
+ if word:
74
+ filtered_text.append(f"{word} ({conf}% confidence)")
75
+
76
+ result = text.strip() if text.strip() else "No text detected"
77
+ detailed = "\n".join(filtered_text) if filtered_text else "No high-confidence text"
78
+
79
+ return f"Text:\n{result}\n\nDetailed (>30% confidence):\n{detailed}"
80
+
81
+ except Exception as e:
82
+ return f"Error: {str(e)}"
83
+
84
+ # Method 3: TrOCR (Hugging Face Transformers)
85
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
86
+ import torch
87
+
88
+ def ocr_trocr(image):
89
+ """TrOCR method - transformer-based OCR"""
90
+ try:
91
+ # Initialize models (cache them)
92
+ if not hasattr(ocr_trocr, "processor"):
93
+ ocr_trocr.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
94
+ ocr_trocr.model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
95
+
96
+ # Process image
97
+ pixel_values = ocr_trocr.processor(image, return_tensors="pt").pixel_values
98
+ generated_ids = ocr_trocr.model.generate(pixel_values)
99
+ generated_text = ocr_trocr.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
100
+
101
+ return generated_text if generated_text.strip() else "No text detected"
102
+
103
+ except Exception as e:
104
+ return f"Error: {str(e)}"
105
+
106
+ # Method 4: PaddleOCR (Lightweight & Fast)
107
+ from paddleocr import PaddleOCR
108
+ import cv2
109
+
110
+ def ocr_paddle(image):
111
+ """PaddleOCR method - lightweight and fast"""
112
+ try:
113
+ # Initialize PaddleOCR (cache it)
114
+ if not hasattr(ocr_paddle, "ocr"):
115
+ ocr_paddle.ocr = PaddleOCR(use_angle_cls=True, lang='en', show_log=False)
116
+
117
+ # Convert PIL to OpenCV format
118
+ img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
119
+
120
+ # Extract text
121
+ results = ocr_paddle.ocr.ocr(img_cv, cls=True)
122
+
123
+ if results and results[0]:
124
+ extracted_text = []
125
+ for line in results[0]:
126
+ text = line[1][0]
127
+ confidence = line[1][1]
128
+ extracted_text.append(f"{text} (confidence: {confidence:.2f})")
129
+ return "\n".join(extracted_text)
130
+ else:
131
+ return "No text detected"
132
+
133
+ except Exception as e:
134
+ return f"Error: {str(e)}"
135
+
136
+
137
+ ### Test gradio UI
138
+
139
+ # Complete Multi-Method Gradio App
140
+ def create_multi_ocr_app():
141
+ """Complete app with multiple OCR methods"""
142
+
143
+ def process_with_method(image, method):
144
+ start_time = datetime.datetime.now()
145
+ if image is None:
146
+ return "Please upload an image","00:00:00"
147
+ if method == "EasyOCR":
148
+
149
+ results = ocr_easyocr(image)
150
+
151
+
152
+ elif method == "Tesseract":
153
+ results = ocr_tesseract(image)
154
+ elif method == "TrOCR":
155
+ results =ocr_trocr(image)
156
+ elif method == "PaddleOCR":
157
+ results = ocr_paddle(image)
158
+ else:
159
+ results = "Invalid method selected"
160
+ try:
161
+ elapsed_time = datetime.datetime.now() - start_time
162
+ except Exception as e:
163
+ elapsed_time = datetime.timedelta(seconds=0)
164
+ print(f"Error calculating elapsed time: {str(e)}")
165
+
166
+ return results, str(elapsed_time)
167
+
168
+ with gr.Blocks(title="Multi-OCR Comparator") as app:
169
+ gr.Markdown("# Multi-Method OCR Comparison")
170
+ gr.Markdown("Compare different OCR methods on your images")
171
+
172
+ with gr.Row():
173
+ with gr.Column():
174
+ image_input = gr.Image(type="pil", label="Upload Image")
175
+ method_dropdown = gr.Dropdown(
176
+ choices=["EasyOCR", "Tesseract", "TrOCR", "PaddleOCR"],
177
+ value="EasyOCR",
178
+ label="OCR Method"
179
+ )
180
+ extract_btn = gr.Button("Extract Text", variant="primary")
181
+
182
+ with gr.Column():
183
+ text_output = gr.Textbox(label="Extracted Text", lines=15)
184
+ elapsed_time_output = gr.Textbox(label="Elapsed Time", lines=1, value="00:00:00")
185
+ # Process on button click
186
+ extract_btn.click(
187
+ process_with_method,
188
+ inputs=[image_input, method_dropdown],
189
+ outputs=[text_output,elapsed_time_output]
190
+ )
191
+
192
+ # Auto-process on image change
193
+ image_input.change(
194
+ process_with_method,
195
+ inputs=[image_input, method_dropdown],
196
+ outputs=[text_output,elapsed_time_output]
197
+ )
198
+
199
+ return app
200
+
201
+ # Launch instructions
202
+ if __name__ == "__main__":
203
+
204
+
205
+ app = create_multi_ocr_app()
206
+
207
+ app.launch()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ -e "tesseract-ocr\ntesseract-ocr-eng\nlibtesseract-dev"
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ -e "gradio\neasyocr\nPillow\nnumpy"