Amandeep01 commited on
Commit
3dafe75
Β·
verified Β·
1 Parent(s): 6133149

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +441 -0
app.py ADDED
@@ -0,0 +1,441 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import easyocr
3
+ from deep_translator import GoogleTranslator
4
+ from PIL import Image, ImageDraw, ImageFont
5
+ import numpy as np
6
+ import cv2
7
+ import time
8
+ import re
9
+ from typing import Tuple, List, Optional
10
+ import io
11
+ import os
12
+
13
+ # Global variables
14
+ reader = None
15
+ translation_cache = {}
16
+
17
+ # Define supported languages with better language detection
18
+ SUPPORTED_LANGUAGES = {
19
+ 'en': 'English',
20
+ 'hi': 'Hindi',
21
+ 'mr': 'Marathi'
22
+ }
23
+
24
+ # Language code mapping for Google Translator
25
+ LANG_CODE_MAP = {
26
+ 'English': 'en',
27
+ 'Hindi': 'hi',
28
+ 'Marathi': 'mr'
29
+ }
30
+
31
+ def initialize_reader():
32
+ """Initialize EasyOCR reader with optimized language support"""
33
+ global reader
34
+ if reader is None:
35
+ try:
36
+ # Initialize with English, Hindi, and Marathi support
37
+ reader = easyocr.Reader(['en', 'hi', 'mr'], gpu=False, verbose=False)
38
+ print("EasyOCR initialized successfully")
39
+ except Exception as e:
40
+ print(f"Error initializing EasyOCR: {e}")
41
+ return None
42
+ return reader
43
+
44
+ def get_font_for_text(text: str, target_size: int = 20) -> ImageFont.FreeTypeFont:
45
+ """Get appropriate font based on text content and size"""
46
+ # Check if text contains Devanagari script (Hindi/Marathi)
47
+ has_devanagari = bool(re.search(r'[\u0900-\u097F]', text))
48
+
49
+ # Font paths for different scripts
50
+ devanagari_fonts = [
51
+ "/usr/share/fonts/truetype/noto/NotoSansDevanagari-Regular.ttf",
52
+ "/usr/share/fonts/truetype/noto/NotoSansDevanagari-Bold.ttf",
53
+ "/usr/share/fonts/truetype/lohit-devanagari/Lohit-Devanagari.ttf",
54
+ "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
55
+ ]
56
+
57
+ english_fonts = [
58
+ "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
59
+ "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
60
+ "/usr/share/fonts/truetype/noto/NotoSans-Bold.ttf",
61
+ "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
62
+ ]
63
+
64
+ font_paths = devanagari_fonts if has_devanagari else english_fonts
65
+
66
+ for font_path in font_paths:
67
+ try:
68
+ if os.path.exists(font_path):
69
+ return ImageFont.truetype(font_path, size=target_size)
70
+ except (OSError, IOError):
71
+ continue
72
+
73
+ # Fallback to default font
74
+ try:
75
+ return ImageFont.load_default()
76
+ except:
77
+ return None
78
+
79
+ def smart_translate(text: str, target_lang: str, source_lang: str = 'auto') -> str:
80
+ """Enhanced translation with context awareness and caching"""
81
+ if not text or not text.strip():
82
+ return ""
83
+
84
+ # Clean and normalize text
85
+ cleaned_text = re.sub(r'\s+', ' ', text.strip())
86
+
87
+ # Cache key
88
+ cache_key = f"{cleaned_text}|{source_lang}|{target_lang}"
89
+ if cache_key in translation_cache:
90
+ return translation_cache[cache_key]
91
+
92
+ max_retries = 3
93
+ for attempt in range(max_retries):
94
+ try:
95
+ # Use GoogleTranslator with better error handling
96
+ translator = GoogleTranslator(source=source_lang, target=target_lang)
97
+ translated = translator.translate(cleaned_text)
98
+
99
+ if translated and translated.strip():
100
+ # Post-process translation for better readability
101
+ translated = translated.strip()
102
+
103
+ # Cache successful translation
104
+ translation_cache[cache_key] = translated
105
+ return translated
106
+
107
+ except Exception as e:
108
+ print(f"Translation attempt {attempt + 1} failed: {e}")
109
+ if attempt < max_retries - 1:
110
+ time.sleep(0.5)
111
+
112
+ return f"[Translation failed: {cleaned_text}]"
113
+
114
+ def calculate_optimal_font_size(text: str, bbox_width: int, bbox_height: int, min_size: int = 10, max_size: int = 50) -> int:
115
+ """Calculate optimal font size based on bounding box dimensions and text length"""
116
+ if not text:
117
+ return min_size
118
+
119
+ # Base calculation on text length and available space
120
+ char_width_ratio = 0.6 # Approximate character width to height ratio
121
+ estimated_char_width = bbox_height * char_width_ratio
122
+ calculated_size = int(bbox_width / (len(text) * char_width_ratio))
123
+
124
+ # Consider height constraint
125
+ height_based_size = int(bbox_height * 0.7) # Use 70% of available height
126
+
127
+ # Take the smaller of the two constraints
128
+ optimal_size = min(calculated_size, height_based_size)
129
+
130
+ # Apply bounds
131
+ return max(min_size, min(optimal_size, max_size))
132
+
133
+ def get_text_color_with_contrast(background_color: Tuple[int, int, int]) -> Tuple[int, int, int, int]:
134
+ """Calculate optimal text color based on background for maximum contrast"""
135
+ r, g, b = background_color[:3]
136
+
137
+ # Calculate luminance using standard formula
138
+ luminance = (0.299 * r + 0.587 * g + 0.114 * b) / 255
139
+
140
+ # Return white for dark backgrounds, black for light backgrounds
141
+ if luminance < 0.5:
142
+ return (255, 255, 255, 255) # White text
143
+ else:
144
+ return (0, 0, 0, 255) # Black text
145
+
146
+ def extract_background_color(image: np.ndarray, bbox: List, expand_factor: float = 1.2) -> Tuple[int, int, int, int]:
147
+ """Extract representative background color from around the text region"""
148
+ try:
149
+ # Get bounding box coordinates
150
+ top_left, top_right, bottom_right, bottom_left = bbox
151
+
152
+ # Calculate center and dimensions
153
+ center_x = (top_left[0] + top_right[0]) / 2
154
+ center_y = (top_left[1] + bottom_left[1]) / 2
155
+ width = abs(top_right[0] - top_left[0])
156
+ height = abs(bottom_left[1] - top_left[1])
157
+
158
+ # Expand region for better color sampling
159
+ expanded_width = width * expand_factor
160
+ expanded_height = height * expand_factor
161
+
162
+ # Calculate expanded coordinates
163
+ x1 = max(0, int(center_x - expanded_width / 2))
164
+ y1 = max(0, int(center_y - expanded_height / 2))
165
+ x2 = min(image.shape[1], int(center_x + expanded_width / 2))
166
+ y2 = min(image.shape[0], int(center_y + expanded_height / 2))
167
+
168
+ # Extract region
169
+ region = image[y1:y2, x1:x2]
170
+
171
+ if region.size > 0:
172
+ # Calculate mean color
173
+ mean_color = np.mean(region.reshape(-1, region.shape[-1]), axis=0)
174
+ return tuple(map(int, mean_color)) + (220,) # Add alpha for semi-transparency
175
+
176
+ except Exception as e:
177
+ print(f"Error extracting background color: {e}")
178
+
179
+ # Default background color
180
+ return (240, 240, 240, 200)
181
+
182
+ def create_smart_overlay(image: Image.Image, bbox: List, original_text: str, translated_text: str) -> None:
183
+ """Create intelligent overlay with proper sizing and positioning"""
184
+ draw = ImageDraw.Draw(image, 'RGBA')
185
+
186
+ # Extract bounding box coordinates
187
+ top_left, top_right, bottom_right, bottom_left = bbox
188
+
189
+ # Calculate dimensions
190
+ x = int(min(top_left[0], bottom_left[0]))
191
+ y = int(min(top_left[1], top_right[1]))
192
+ width = int(max(top_right[0], bottom_right[0]) - x)
193
+ height = int(max(bottom_left[1], bottom_right[1]) - y)
194
+
195
+ # Calculate optimal font size
196
+ font_size = calculate_optimal_font_size(translated_text, width, height)
197
+
198
+ # Get appropriate font
199
+ font = get_font_for_text(translated_text, font_size)
200
+ if font is None:
201
+ font = get_font_for_text(translated_text, 14) # Fallback size
202
+
203
+ # Get background color from image
204
+ img_array = np.array(image.convert('RGB'))
205
+ bg_color = extract_background_color(img_array, bbox)
206
+
207
+ # Create background rectangle with padding
208
+ padding = max(2, font_size // 8)
209
+ bg_rect = [
210
+ x - padding,
211
+ y - padding,
212
+ x + width + padding,
213
+ y + height + padding
214
+ ]
215
+
216
+ # Draw semi-transparent background
217
+ draw.rectangle(bg_rect, fill=bg_color)
218
+
219
+ # Calculate text position for centering
220
+ try:
221
+ bbox_text = draw.textbbox((0, 0), translated_text, font=font)
222
+ text_width = bbox_text[2] - bbox_text[0]
223
+ text_height = bbox_text[3] - bbox_text[1]
224
+ except:
225
+ # Fallback for older PIL versions
226
+ text_width = len(translated_text) * font_size * 0.6
227
+ text_height = font_size
228
+
229
+ # Center the text
230
+ text_x = x + (width - text_width) / 2
231
+ text_y = y + (height - text_height) / 2
232
+
233
+ # Get optimal text color
234
+ text_color = get_text_color_with_contrast(bg_color[:3])
235
+
236
+ # Draw the translated text
237
+ draw.text((text_x, text_y), translated_text, fill=text_color, font=font)
238
+
239
+ def process_image(image: Image.Image, target_language: str, progress=gr.Progress()) -> Tuple[Optional[Image.Image], str]:
240
+ """Main image processing function with enhanced OCR and translation"""
241
+
242
+ if image is None:
243
+ return None, "❌ Please upload an image first."
244
+
245
+ if target_language not in LANG_CODE_MAP:
246
+ return image, f"❌ Unsupported target language: {target_language}"
247
+
248
+ target_lang_code = LANG_CODE_MAP[target_language]
249
+
250
+ progress(0.1, "πŸ”§ Initializing OCR engine...")
251
+
252
+ # Initialize OCR
253
+ ocr = initialize_reader()
254
+ if ocr is None:
255
+ return image, "❌ Failed to initialize OCR. Please try again."
256
+
257
+ progress(0.3, "πŸ” Extracting text from image...")
258
+
259
+ try:
260
+ # Convert PIL image to numpy array for OCR
261
+ img_array = np.array(image)
262
+
263
+ # Perform OCR with confidence filtering
264
+ results = ocr.readtext(img_array, paragraph=True, width_ths=0.7, height_ths=0.7)
265
+
266
+ if not results:
267
+ return image, "ℹ️ No readable text found in the image."
268
+
269
+ # Filter results by confidence
270
+ filtered_results = [(bbox, text, conf) for bbox, text, conf in results if conf > 0.5]
271
+
272
+ if not filtered_results:
273
+ return image, "ℹ️ No text detected with sufficient confidence."
274
+
275
+ progress(0.5, f"🌐 Translating {len(filtered_results)} text regions...")
276
+
277
+ # Create a copy of the image for overlay
278
+ result_image = image.copy().convert('RGBA')
279
+
280
+ # Process each detected text region
281
+ translations_info = []
282
+
283
+ for i, (bbox, text, confidence) in enumerate(filtered_results):
284
+ # Update progress
285
+ progress(0.5 + (0.4 * i / len(filtered_results)), f"Translating region {i+1}/{len(filtered_results)}")
286
+
287
+ if text and text.strip():
288
+ # Clean the extracted text
289
+ cleaned_text = re.sub(r'\s+', ' ', text.strip())
290
+
291
+ # Translate the text
292
+ translated = smart_translate(cleaned_text, target_lang_code)
293
+
294
+ # Create overlay on image
295
+ create_smart_overlay(result_image, bbox, cleaned_text, translated)
296
+
297
+ # Store translation info
298
+ translations_info.append({
299
+ 'original': cleaned_text,
300
+ 'translated': translated,
301
+ 'confidence': confidence
302
+ })
303
+
304
+ progress(1.0, "βœ… Translation completed!")
305
+
306
+ # Convert back to RGB for final output
307
+ final_image = result_image.convert('RGB')
308
+
309
+ # Create summary text
310
+ summary_lines = []
311
+ summary_lines.append(f"🎯 Successfully processed {len(translations_info)} text regions:\n")
312
+
313
+ for i, info in enumerate(translations_info, 1):
314
+ summary_lines.append(f"{i}. Original: {info['original']}")
315
+ summary_lines.append(f" Translation: {info['translated']}")
316
+ summary_lines.append(f" Confidence: {info['confidence']:.2f}\n")
317
+
318
+ summary_text = "\n".join(summary_lines)
319
+
320
+ return final_image, summary_text
321
+
322
+ except Exception as e:
323
+ error_msg = f"❌ Error processing image: {str(e)}"
324
+ print(f"Processing error: {e}")
325
+ return image, error_msg
326
+
327
+ # Custom CSS for better UI
328
+ custom_css = """
329
+ .gradio-container {
330
+ max-width: 1200px;
331
+ margin: auto;
332
+ }
333
+ .main-header {
334
+ text-align: center;
335
+ background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
336
+ -webkit-background-clip: text;
337
+ -webkit-text-fill-color: transparent;
338
+ font-size: 2.5em;
339
+ font-weight: bold;
340
+ margin-bottom: 0.5em;
341
+ }
342
+ .description {
343
+ text-align: center;
344
+ font-size: 1.1em;
345
+ color: #666;
346
+ margin-bottom: 2em;
347
+ }
348
+ .feature-box {
349
+ background: #f8f9fa;
350
+ padding: 1em;
351
+ border-radius: 8px;
352
+ margin: 1em 0;
353
+ }
354
+ """
355
+
356
+ # Create the Gradio interface
357
+ with gr.Blocks(css=custom_css, title="Multilingual Signboard Translator") as demo:
358
+
359
+ gr.HTML("""
360
+ <div class="main-header">🌐 Multilingual Signboard Translator</div>
361
+ <div class="description">
362
+ Extract and translate text from images with intelligent overlay technology
363
+ </div>
364
+ """)
365
+
366
+ with gr.Row():
367
+ with gr.Column(scale=1):
368
+ gr.Markdown("### πŸ“€ Upload & Configure")
369
+
370
+ input_image = gr.Image(
371
+ label="πŸ“· Upload Image",
372
+ type="pil",
373
+ height=300
374
+ )
375
+
376
+ target_language = gr.Dropdown(
377
+ choices=list(LANG_CODE_MAP.keys()),
378
+ value="Hindi",
379
+ label="🎯 Translate To",
380
+ info="Select target language for translation"
381
+ )
382
+
383
+ translate_btn = gr.Button(
384
+ "πŸš€ Translate Text",
385
+ variant="primary",
386
+ size="lg"
387
+ )
388
+
389
+ with gr.Column(scale=1):
390
+ gr.Markdown("### πŸ“€ Results")
391
+
392
+ output_image = gr.Image(
393
+ label="πŸ–ΌοΈ Translated Image",
394
+ type="pil",
395
+ height=300
396
+ )
397
+
398
+ output_text = gr.Textbox(
399
+ label="πŸ“ Translation Details",
400
+ lines=8,
401
+ max_lines=15,
402
+ info="Detailed translation information"
403
+ )
404
+
405
+ # Event binding
406
+ translate_btn.click(
407
+ fn=process_image,
408
+ inputs=[input_image, target_language],
409
+ outputs=[output_image, output_text],
410
+ show_progress=True
411
+ )
412
+
413
+ # Feature information
414
+ gr.HTML("""
415
+ <div class="feature-box">
416
+ <h3>✨ Key Features:</h3>
417
+ <ul>
418
+ <li><strong>🎯 Smart OCR:</strong> Advanced text detection with confidence filtering</li>
419
+ <li><strong>🌐 Multilingual Support:</strong> English ↔ Hindi ↔ Marathi translation</li>
420
+ <li><strong>🎨 Intelligent Overlay:</strong> Context-aware text positioning and sizing</li>
421
+ <li><strong>πŸ”§ Adaptive Fonts:</strong> Script-specific font selection for better readability</li>
422
+ <li><strong>⚑ Optimized Performance:</strong> Efficient processing with caching</li>
423
+ </ul>
424
+ </div>
425
+ """)
426
+
427
+ if __name__ == "__main__":
428
+ # Pre-initialize OCR for faster first-time usage
429
+ print("πŸ”§ Pre-initializing OCR engine...")
430
+ try:
431
+ initialize_reader()
432
+ print("βœ… OCR engine ready!")
433
+ except Exception as e:
434
+ print(f"⚠️ OCR initialization warning: {e}")
435
+
436
+ # Launch the application
437
+ demo.launch(
438
+ share=False,
439
+ show_error=True,
440
+ show_tips=True
441
+ )