Amandeep01 commited on
Commit
4281ed9
·
verified ·
1 Parent(s): eed6b37

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +487 -216
app.py CHANGED
@@ -6,60 +6,203 @@ import numpy as np
6
  import cv2
7
  import time
8
  import re
9
- from typing import Tuple, List, Optional
10
  import io
11
  import os
 
 
12
 
13
  # Global variables
14
  reader = None
15
  translation_cache = {}
16
 
17
- # Define supported languages with better language detection
18
  SUPPORTED_LANGUAGES = {
19
  'en': 'English',
20
- 'hi': 'Hindi'
 
 
 
 
 
 
21
  }
22
 
23
  # Language code mapping for Google Translator
24
  LANG_CODE_MAP = {
25
  'English': 'en',
26
- 'Hindi': 'hi'
 
 
 
 
 
 
27
  }
28
 
29
  def initialize_reader():
30
- """Initialize EasyOCR reader with optimized language support"""
31
  global reader
32
  if reader is None:
33
- try:
34
- # Initialize with English and Hindi only for faster loading
35
- reader = easyocr.Reader(['en', 'hi'], gpu=False, verbose=False, download_enabled=True)
36
- print("EasyOCR initialized successfully")
37
- except Exception as e:
38
- print(f"Error initializing EasyOCR: {e}")
39
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  return reader
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def get_font_for_text(text: str, target_size: int = 20) -> ImageFont.FreeTypeFont:
43
- """Get appropriate font based on text content and size"""
44
- # Check if text contains Devanagari script (Hindi/Marathi)
45
  has_devanagari = bool(re.search(r'[\u0900-\u097F]', text))
 
 
 
 
46
 
47
  # Font paths for different scripts
48
- devanagari_fonts = [
49
- "/usr/share/fonts/truetype/noto/NotoSansDevanagari-Regular.ttf",
50
- "/usr/share/fonts/truetype/noto/NotoSansDevanagari-Bold.ttf",
51
- "/usr/share/fonts/truetype/lohit-devanagari/Lohit-Devanagari.ttf",
52
- "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
53
- ]
 
54
 
55
- english_fonts = [
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
57
  "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
58
- "/usr/share/fonts/truetype/noto/NotoSans-Bold.ttf",
59
- "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
60
- ]
61
-
62
- font_paths = devanagari_fonts if has_devanagari else english_fonts
63
 
64
  for font_path in font_paths:
65
  try:
@@ -68,14 +211,14 @@ def get_font_for_text(text: str, target_size: int = 20) -> ImageFont.FreeTypeFon
68
  except (OSError, IOError):
69
  continue
70
 
71
- # Fallback to default font
72
  try:
73
  return ImageFont.load_default()
74
  except:
75
  return None
76
 
77
- def smart_translate(text: str, target_lang: str, source_lang: str = 'auto') -> str:
78
- """Enhanced translation with context awareness and caching"""
79
  if not text or not text.strip():
80
  return ""
81
 
@@ -87,16 +230,42 @@ def smart_translate(text: str, target_lang: str, source_lang: str = 'auto') -> s
87
  if cache_key in translation_cache:
88
  return translation_cache[cache_key]
89
 
90
- max_retries = 2 # Reduced retries for faster response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  for attempt in range(max_retries):
92
  try:
93
- # Use GoogleTranslator with better error handling
94
  translator = GoogleTranslator(source=source_lang, target=target_lang)
95
- translated = translator.translate(cleaned_text)
96
 
97
- if translated and translated.strip() and translated != cleaned_text:
98
- # Post-process translation for better readability
99
- translated = translated.strip()
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  # Cache successful translation
102
  translation_cache[cache_key] = translated
@@ -105,137 +274,149 @@ def smart_translate(text: str, target_lang: str, source_lang: str = 'auto') -> s
105
  except Exception as e:
106
  print(f"Translation attempt {attempt + 1} failed: {e}")
107
  if attempt < max_retries - 1:
108
- time.sleep(0.3) # Shorter wait time
109
-
110
- return cleaned_text # Return original text if translation fails
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
- def calculate_optimal_font_size(text: str, bbox_width: int, bbox_height: int, min_size: int = 10, max_size: int = 50) -> int:
113
- """Calculate optimal font size based on bounding box dimensions and text length"""
114
  if not text:
115
  return min_size
116
 
117
- # Base calculation on text length and available space
118
- char_width_ratio = 0.6 # Approximate character width to height ratio
119
- estimated_char_width = bbox_height * char_width_ratio
120
- calculated_size = int(bbox_width / (len(text) * char_width_ratio))
 
 
121
 
122
- # Consider height constraint
123
- height_based_size = int(bbox_height * 0.7) # Use 70% of available height
124
 
125
- # Take the smaller of the two constraints
126
- optimal_size = min(calculated_size, height_based_size)
 
 
 
127
 
128
  # Apply bounds
129
  return max(min_size, min(optimal_size, max_size))
130
 
131
- def get_text_color_with_contrast(background_color: Tuple[int, int, int]) -> Tuple[int, int, int, int]:
132
- """Calculate optimal text color based on background for maximum contrast"""
133
- r, g, b = background_color[:3]
134
-
135
- # Calculate luminance using standard formula
136
  luminance = (0.299 * r + 0.587 * g + 0.114 * b) / 255
137
 
138
- # Return white for dark backgrounds, black for light backgrounds
139
- if luminance < 0.5:
140
- return (255, 255, 255, 255) # White text
141
  else:
142
- return (0, 0, 0, 255) # Black text
143
 
144
- def extract_background_color(image: np.ndarray, bbox: List, expand_factor: float = 1.2) -> Tuple[int, int, int, int]:
145
- """Extract representative background color from around the text region"""
146
  try:
147
  # Get bounding box coordinates
148
- top_left, top_right, bottom_right, bottom_left = bbox
149
 
150
- # Calculate center and dimensions
151
- center_x = (top_left[0] + top_right[0]) / 2
152
- center_y = (top_left[1] + bottom_left[1]) / 2
153
- width = abs(top_right[0] - top_left[0])
154
- height = abs(bottom_left[1] - top_left[1])
155
 
156
- # Expand region for better color sampling
157
- expanded_width = width * expand_factor
158
- expanded_height = height * expand_factor
159
 
160
- # Calculate expanded coordinates
161
- x1 = max(0, int(center_x - expanded_width / 2))
162
- y1 = max(0, int(center_y - expanded_height / 2))
163
- x2 = min(image.shape[1], int(center_x + expanded_width / 2))
164
- y2 = min(image.shape[0], int(center_y + expanded_height / 2))
165
-
166
- # Extract region
167
- region = image[y1:y2, x1:x2]
168
-
169
- if region.size > 0:
170
  # Calculate mean color
171
- mean_color = np.mean(region.reshape(-1, region.shape[-1]), axis=0)
172
- return tuple(map(int, mean_color)) + (220,) # Add alpha for semi-transparency
173
-
174
  except Exception as e:
175
- print(f"Error extracting background color: {e}")
176
 
177
- # Default background color
178
- return (240, 240, 240, 200)
179
 
180
- def create_smart_overlay(image: Image.Image, bbox: List, original_text: str, translated_text: str) -> None:
181
- """Create intelligent overlay with proper sizing and positioning"""
182
  draw = ImageDraw.Draw(image, 'RGBA')
183
 
184
- # Extract bounding box coordinates
185
- top_left, top_right, bottom_right, bottom_left = bbox
 
 
 
 
 
 
 
186
 
187
- # Calculate dimensions
188
- x = int(min(top_left[0], bottom_left[0]))
189
- y = int(min(top_left[1], top_right[1]))
190
- width = int(max(top_right[0], bottom_right[0]) - x)
191
- height = int(max(bottom_left[1], bottom_right[1]) - y)
192
 
193
  # Calculate optimal font size
194
  font_size = calculate_optimal_font_size(translated_text, width, height)
195
-
196
- # Get appropriate font
197
  font = get_font_for_text(translated_text, font_size)
198
- if font is None:
199
- font = get_font_for_text(translated_text, 14) # Fallback size
200
 
201
- # Get background color from image
202
  img_array = np.array(image.convert('RGB'))
203
- bg_color = extract_background_color(img_array, bbox)
204
 
205
- # Create background rectangle with padding
206
- padding = max(2, font_size // 8)
207
  bg_rect = [
208
- x - padding,
209
- y - padding,
210
- x + width + padding,
211
- y + height + padding
212
  ]
213
 
214
- # Draw semi-transparent background
215
- draw.rectangle(bg_rect, fill=bg_color)
 
216
 
217
- # Calculate text position for centering
218
  try:
219
  bbox_text = draw.textbbox((0, 0), translated_text, font=font)
220
  text_width = bbox_text[2] - bbox_text[0]
221
  text_height = bbox_text[3] - bbox_text[1]
222
  except:
223
- # Fallback for older PIL versions
224
  text_width = len(translated_text) * font_size * 0.6
225
  text_height = font_size
226
 
227
- # Center the text
228
- text_x = x + (width - text_width) / 2
229
- text_y = y + (height - text_height) / 2
 
 
 
 
 
 
230
 
231
- # Get optimal text color
232
- text_color = get_text_color_with_contrast(bg_color[:3])
 
233
 
234
- # Draw the translated text
235
  draw.text((text_x, text_y), translated_text, fill=text_color, font=font)
236
 
237
- def process_image(image: Image.Image, target_language: str, progress=gr.Progress()) -> Tuple[Optional[Image.Image], str]:
238
- """Main image processing function with enhanced OCR and translation"""
239
 
240
  if image is None:
241
  return None, "❌ Please upload an image first."
@@ -247,73 +428,106 @@ def process_image(image: Image.Image, target_language: str, progress=gr.Progress
247
 
248
  progress(0.1, "🔧 Initializing OCR engine...")
249
 
250
- # Initialize OCR
251
- ocr = initialize_reader()
252
- if ocr is None:
253
- return image, "❌ Failed to initialize OCR. Please try again."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
- progress(0.3, "🔍 Extracting text from image...")
256
 
257
  try:
258
- # Convert PIL image to numpy array for OCR
259
- img_array = np.array(image)
260
 
261
- # Perform OCR with simplified parameters
262
- results = ocr.readtext(img_array)
263
 
264
- if not results:
265
- return image, "ℹ️ No readable text found in the image."
266
 
267
- print(f"OCR Results format: {results[0] if results else 'Empty'}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
 
269
- # Handle different OCR result formats
270
- processed_results = []
271
- for result in results:
272
- if len(result) == 3:
273
- # Standard format: (bbox, text, confidence)
274
- bbox, text, confidence = result
275
- processed_results.append((bbox, text, confidence))
276
- elif len(result) == 2:
277
- # Alternative format: (bbox, text) - assume high confidence
278
- bbox, text = result
279
- processed_results.append((bbox, text, 0.8))
280
- else:
281
- print(f"Unexpected result format: {result}")
282
- continue
283
 
284
- # Filter results by confidence and text quality
285
- filtered_results = []
286
- for bbox, text, confidence in processed_results:
287
- if text and text.strip() and confidence > 0.3: # Lower threshold for better detection
288
- filtered_results.append((bbox, text, confidence))
289
 
290
  if not filtered_results:
291
  return image, "ℹ️ No text detected with sufficient confidence."
292
 
293
- progress(0.5, f"🌐 Translating {len(filtered_results)} text regions...")
 
 
 
294
 
295
- # Create a copy of the image for overlay
 
 
296
  result_image = image.copy().convert('RGBA')
297
 
298
- # Process each detected text region
299
- translations_info = []
300
 
301
- for i, (bbox, text, confidence) in enumerate(filtered_results):
302
- # Update progress
303
- progress(0.5 + (0.4 * i / len(filtered_results)), f"Translating region {i+1}/{len(filtered_results)}")
304
 
305
  if text and text.strip():
306
- # Clean the extracted text
307
  cleaned_text = re.sub(r'\s+', ' ', text.strip())
308
 
309
- # Translate the text
310
- translated = smart_translate(cleaned_text, target_lang_code)
311
 
312
- # Create overlay on image
313
- create_smart_overlay(result_image, bbox, cleaned_text, translated)
314
 
315
- # Store translation info
316
- translations_info.append({
317
  'original': cleaned_text,
318
  'translated': translated,
319
  'confidence': confidence
@@ -321,17 +535,18 @@ def process_image(image: Image.Image, target_language: str, progress=gr.Progress
321
 
322
  progress(1.0, "✅ Translation completed!")
323
 
324
- # Convert back to RGB for final output
325
  final_image = result_image.convert('RGB')
326
 
327
- # Create summary text
328
- summary_lines = []
329
- summary_lines.append(f"🎯 Successfully processed {len(translations_info)} text regions:\n")
330
 
331
- for i, info in enumerate(translations_info, 1):
332
- summary_lines.append(f"{i}. Original: {info['original']}")
333
- summary_lines.append(f" Translation: {info['translated']}")
334
- summary_lines.append(f" Confidence: {info['confidence']:.2f}\n")
 
 
335
 
336
  summary_text = "\n".join(summary_lines)
337
 
@@ -342,42 +557,66 @@ def process_image(image: Image.Image, target_language: str, progress=gr.Progress
342
  print(f"Processing error: {e}")
343
  return image, error_msg
344
 
345
- # Custom CSS for better UI
346
  custom_css = """
347
  .gradio-container {
348
- max-width: 1200px;
349
  margin: auto;
 
350
  }
 
351
  .main-header {
352
  text-align: center;
353
- background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
354
  -webkit-background-clip: text;
355
  -webkit-text-fill-color: transparent;
356
- font-size: 2.5em;
357
- font-weight: bold;
 
358
  margin-bottom: 0.5em;
 
359
  }
 
360
  .description {
361
  text-align: center;
362
- font-size: 1.1em;
363
- color: #666;
364
  margin-bottom: 2em;
 
365
  }
 
366
  .feature-box {
367
- background: #f8f9fa;
368
- padding: 1em;
369
- border-radius: 8px;
 
 
 
 
 
 
 
 
370
  margin: 1em 0;
 
 
 
 
 
 
 
 
 
371
  }
372
  """
373
 
374
- # Create the Gradio interface
375
- with gr.Blocks(css=custom_css, title="Multilingual Signboard Translator") as demo:
376
 
377
  gr.HTML("""
378
- <div class="main-header">🌐 Multilingual Signboard Translator</div>
379
  <div class="description">
380
- Extract and translate text from images with intelligent overlay technology
381
  </div>
382
  """)
383
 
@@ -386,73 +625,105 @@ with gr.Blocks(css=custom_css, title="Multilingual Signboard Translator") as dem
386
  gr.Markdown("### 📤 Upload & Configure")
387
 
388
  input_image = gr.Image(
389
- label="📷 Upload Image",
390
  type="pil",
391
- height=300
392
  )
393
 
394
  target_language = gr.Dropdown(
395
  choices=list(LANG_CODE_MAP.keys()),
396
  value="Hindi",
397
- label="🎯 Translate To",
398
- info="Select target language for translation"
399
  )
400
 
401
  translate_btn = gr.Button(
402
- "🚀 Translate Text",
403
  variant="primary",
404
- size="lg"
 
405
  )
406
 
407
  with gr.Column(scale=1):
408
- gr.Markdown("### 📤 Results")
409
 
410
  output_image = gr.Image(
411
- label="🖼️ Translated Image",
412
- type="pil",
413
- height=300
414
  )
415
 
416
  output_text = gr.Textbox(
417
- label="📝 Translation Details",
418
- lines=8,
419
- max_lines=15,
420
- info="Detailed translation information"
421
  )
422
 
423
  # Event binding
424
  translate_btn.click(
425
- fn=process_image,
426
  inputs=[input_image, target_language],
427
  outputs=[output_image, output_text],
428
  show_progress=True
429
  )
430
 
431
- # Feature information
 
 
 
 
 
 
 
 
 
 
 
 
 
432
  gr.HTML("""
433
  <div class="feature-box">
434
- <h3>✨ Key Features:</h3>
435
  <ul>
436
- <li><strong>🎯 Smart OCR:</strong> Advanced text detection with confidence filtering</li>
437
- <li><strong>🌐 Bilingual Support:</strong> English Hindi translation</li>
438
- <li><strong>🎨 Intelligent Overlay:</strong> Context-aware text positioning and sizing</li>
439
- <li><strong>🔧 Adaptive Fonts:</strong> Script-specific font selection for better readability</li>
440
- <li><strong>⚡ Optimized Performance:</strong> Fast processing with caching</li>
 
441
  </ul>
442
  </div>
443
  """)
444
 
445
  if __name__ == "__main__":
446
- # Pre-initialize OCR for faster first-time usage
447
- print("🔧 Pre-initializing OCR engine...")
 
 
 
 
448
  try:
449
- initialize_reader()
450
- print("✅ OCR engine ready!")
 
 
 
 
451
  except Exception as e:
452
- print(f"⚠️ OCR initialization warning: {e}")
 
453
 
454
- # Launch the application
455
- demo.launch(
456
- share=False,
457
- show_error=True
458
- )
 
 
 
 
 
 
 
 
 
6
  import cv2
7
  import time
8
  import re
9
+ from typing import Tuple, List, Optional, Dict
10
  import io
11
  import os
12
+ from collections import defaultdict
13
+ import math
14
 
15
  # Global variables
16
  reader = None
17
  translation_cache = {}
18
 
19
+ # Define supported languages
20
  SUPPORTED_LANGUAGES = {
21
  'en': 'English',
22
+ 'hi': 'Hindi',
23
+ 'es': 'Spanish',
24
+ 'fr': 'French',
25
+ 'de': 'German',
26
+ 'ja': 'Japanese',
27
+ 'ko': 'Korean',
28
+ 'zh': 'Chinese'
29
  }
30
 
31
  # Language code mapping for Google Translator
32
  LANG_CODE_MAP = {
33
  'English': 'en',
34
+ 'Hindi': 'hi',
35
+ 'Spanish': 'es',
36
+ 'French': 'fr',
37
+ 'German': 'de',
38
+ 'Japanese': 'ja',
39
+ 'Korean': 'ko',
40
+ 'Chinese': 'zh'
41
  }
42
 
43
  def initialize_reader():
44
+ """Initialize EasyOCR reader with fallback options"""
45
  global reader
46
  if reader is None:
47
+ # Try different initialization strategies
48
+ init_strategies = [
49
+ (['en', 'hi'], "English and Hindi"),
50
+ (['en'], "English only"),
51
+ (['en', 'hi'], "English and Hindi with verbose"),
52
+ ]
53
+
54
+ for i, (languages, description) in enumerate(init_strategies):
55
+ try:
56
+ print(f"Attempting OCR initialization: {description}")
57
+ verbose_setting = True if i == 2 else False
58
+
59
+ reader = easyocr.Reader(
60
+ languages,
61
+ gpu=False,
62
+ verbose=verbose_setting,
63
+ download_enabled=True,
64
+ detector=True,
65
+ recognizer=True
66
+ )
67
+ print(f"✅ EasyOCR initialized successfully with {description}")
68
+ return reader
69
+
70
+ except ImportError as e:
71
+ print(f"❌ Import error: {e}")
72
+ continue
73
+ except Exception as e:
74
+ print(f"❌ Initialization attempt {i+1} failed: {e}")
75
+ if i < len(init_strategies) - 1:
76
+ print("Trying alternative approach...")
77
+ continue
78
+ else:
79
+ print("All initialization strategies failed")
80
+
81
+ # If all strategies fail, return None
82
+ reader = None
83
+ print("❌ Could not initialize EasyOCR with any strategy")
84
+
85
  return reader
86
 
87
+ def calculate_distance(box1, box2):
88
+ """Calculate distance between two bounding boxes"""
89
+ # Get center points
90
+ center1 = [(box1[0][0] + box1[2][0]) / 2, (box1[0][1] + box1[2][1]) / 2]
91
+ center2 = [(box2[0][0] + box2[2][0]) / 2, (box2[0][1] + box2[2][1]) / 2]
92
+
93
+ return math.sqrt((center1[0] - center2[0])**2 + (center1[1] - center2[1])**2)
94
+
95
+ def are_boxes_on_same_line(box1, box2, tolerance=20):
96
+ """Check if two bounding boxes are on the same horizontal line"""
97
+ # Get y-coordinates (vertical positions)
98
+ y1_avg = (box1[0][1] + box1[2][1]) / 2
99
+ y2_avg = (box2[0][1] + box2[2][1]) / 2
100
+
101
+ return abs(y1_avg - y2_avg) <= tolerance
102
+
103
+ def group_text_regions(ocr_results, line_tolerance=25, proximity_threshold=50):
104
+ """Group OCR results into meaningful text blocks"""
105
+ if not ocr_results:
106
+ return []
107
+
108
+ # Sort by vertical position first, then horizontal
109
+ sorted_results = sorted(ocr_results, key=lambda x: (x[0][0][1], x[0][0][0]))
110
+
111
+ grouped_lines = []
112
+ current_line = [sorted_results[0]]
113
+
114
+ for i in range(1, len(sorted_results)):
115
+ current_box = sorted_results[i][0]
116
+ prev_box = current_line[-1][0]
117
+
118
+ # Check if boxes are on the same line
119
+ if are_boxes_on_same_line(current_box, prev_box, line_tolerance):
120
+ # Check proximity (not too far apart horizontally)
121
+ if calculate_distance(current_box, prev_box) <= proximity_threshold:
122
+ current_line.append(sorted_results[i])
123
+ else:
124
+ # Start new line if too far apart
125
+ grouped_lines.append(current_line)
126
+ current_line = [sorted_results[i]]
127
+ else:
128
+ # Different line
129
+ grouped_lines.append(current_line)
130
+ current_line = [sorted_results[i]]
131
+
132
+ # Don't forget the last line
133
+ if current_line:
134
+ grouped_lines.append(current_line)
135
+
136
+ # Merge text within each line
137
+ merged_groups = []
138
+ for line in grouped_lines:
139
+ if len(line) == 1:
140
+ merged_groups.append(line[0])
141
+ else:
142
+ # Sort by horizontal position within the line
143
+ line.sort(key=lambda x: x[0][0][0])
144
+
145
+ # Merge text
146
+ merged_text = ' '.join([item[1] for item in line])
147
+
148
+ # Create combined bounding box
149
+ all_points = []
150
+ for item in line:
151
+ all_points.extend(item[0])
152
+
153
+ # Find min/max coordinates
154
+ x_coords = [point[0] for point in all_points]
155
+ y_coords = [point[1] for point in all_points]
156
+
157
+ min_x, max_x = min(x_coords), max(x_coords)
158
+ min_y, max_y = min(y_coords), max(y_coords)
159
+
160
+ # Create new bounding box
161
+ merged_bbox = [[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]]
162
+
163
+ # Use average confidence
164
+ avg_confidence = sum([item[2] for item in line]) / len(line)
165
+
166
+ merged_groups.append((merged_bbox, merged_text, avg_confidence))
167
+
168
+ return merged_groups
169
+
170
  def get_font_for_text(text: str, target_size: int = 20) -> ImageFont.FreeTypeFont:
171
+ """Get appropriate font based on text content"""
172
+ # Check for different scripts
173
  has_devanagari = bool(re.search(r'[\u0900-\u097F]', text))
174
+ has_chinese = bool(re.search(r'[\u4e00-\u9fff]', text))
175
+ has_japanese = bool(re.search(r'[\u3040-\u309f\u30a0-\u30ff]', text))
176
+ has_korean = bool(re.search(r'[\uac00-\ud7af]', text))
177
+ has_arabic = bool(re.search(r'[\u0600-\u06ff]', text))
178
 
179
  # Font paths for different scripts
180
+ font_paths = []
181
+
182
+ if has_devanagari:
183
+ font_paths.extend([
184
+ "/usr/share/fonts/truetype/noto/NotoSansDevanagari-Regular.ttf",
185
+ "/usr/share/fonts/truetype/lohit-devanagari/Lohit-Devanagari.ttf"
186
+ ])
187
 
188
+ if has_chinese or has_japanese:
189
+ font_paths.extend([
190
+ "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
191
+ "/usr/share/fonts/truetype/arphic/uming.ttc"
192
+ ])
193
+
194
+ if has_korean:
195
+ font_paths.append("/usr/share/fonts/truetype/noto/NotoSansKR-Regular.otf")
196
+
197
+ if has_arabic:
198
+ font_paths.append("/usr/share/fonts/truetype/noto/NotoSansArabic-Regular.ttf")
199
+
200
+ # Default fonts
201
+ font_paths.extend([
202
  "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
203
  "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
204
+ "/usr/share/fonts/truetype/noto/NotoSans-Bold.ttf"
205
+ ])
 
 
 
206
 
207
  for font_path in font_paths:
208
  try:
 
211
  except (OSError, IOError):
212
  continue
213
 
214
+ # Fallback
215
  try:
216
  return ImageFont.load_default()
217
  except:
218
  return None
219
 
220
+ def smart_translate_with_context(text: str, target_lang: str, source_lang: str = 'auto') -> str:
221
+ """Enhanced translation with better context handling"""
222
  if not text or not text.strip():
223
  return ""
224
 
 
230
  if cache_key in translation_cache:
231
  return translation_cache[cache_key]
232
 
233
+ # Pre-processing for better translation context
234
+ # Handle common signboard patterns
235
+ signboard_patterns = {
236
+ r'\b(no|not|don\'t|do not)\s+(use|mobile|phone|cell)\b': 'prohibition_mobile',
237
+ r'\b(please|kindly)\s+(do not|don\'t)\s+(use|mobile|phone)\b': 'polite_prohibition_mobile',
238
+ r'\b(exit|entrance|entry|way out|way in)\b': 'direction',
239
+ r'\b(toilet|restroom|bathroom|washroom)\b': 'facility',
240
+ r'\b(parking|park|no parking)\b': 'parking',
241
+ r'\b(emergency|fire|safety)\b': 'safety'
242
+ }
243
+
244
+ context_hint = ""
245
+ for pattern, context in signboard_patterns.items():
246
+ if re.search(pattern, cleaned_text.lower()):
247
+ context_hint = f"[Signboard context: {context}] "
248
+ break
249
+
250
+ max_retries = 3
251
  for attempt in range(max_retries):
252
  try:
 
253
  translator = GoogleTranslator(source=source_lang, target=target_lang)
 
254
 
255
+ # Add context hint for better translation
256
+ text_to_translate = context_hint + cleaned_text if context_hint else cleaned_text
257
+ translated = translator.translate(text_to_translate)
258
+
259
+ if translated and translated.strip():
260
+ # Remove context hint from result if it was added
261
+ if context_hint and translated.startswith('['):
262
+ # Try to remove the context hint from translation
263
+ bracket_end = translated.find('] ')
264
+ if bracket_end != -1:
265
+ translated = translated[bracket_end + 2:].strip()
266
+
267
+ # Post-process for common improvements
268
+ translated = post_process_translation(translated, target_lang)
269
 
270
  # Cache successful translation
271
  translation_cache[cache_key] = translated
 
274
  except Exception as e:
275
  print(f"Translation attempt {attempt + 1} failed: {e}")
276
  if attempt < max_retries - 1:
277
+ time.sleep(0.5)
278
+
279
+ return cleaned_text
280
+
281
+ def post_process_translation(translated_text: str, target_lang: str) -> str:
282
+ """Post-process translation for better quality"""
283
+ # Language-specific post-processing
284
+ if target_lang == 'hi': # Hindi
285
+ # Common corrections for Hindi translations
286
+ corrections = {
287
+ 'मत करो': 'न करें', # More polite form
288
+ 'का उपयोग मत करो': 'का उपयोग न करें',
289
+ 'फोन का उपयोग': 'मोबाइल का उपयोग'
290
+ }
291
+
292
+ for old, new in corrections.items():
293
+ translated_text = translated_text.replace(old, new)
294
+
295
+ return translated_text.strip()
296
 
297
+ def calculate_optimal_font_size(text: str, bbox_width: int, bbox_height: int, min_size: int = 12, max_size: int = 48) -> int:
298
+ """Calculate optimal font size with better scaling"""
299
  if not text:
300
  return min_size
301
 
302
+ # Estimate character width (varies by language)
303
+ char_width_ratio = 0.7 # More conservative estimate
304
+
305
+ # For non-Latin scripts, adjust ratio
306
+ if re.search(r'[\u0900-\u097F\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]', text):
307
+ char_width_ratio = 0.9 # Wider characters
308
 
309
+ # Calculate based on width constraint
310
+ width_based_size = int(bbox_width / (len(text) * char_width_ratio))
311
 
312
+ # Calculate based on height constraint (use 80% of available height)
313
+ height_based_size = int(bbox_height * 0.8)
314
+
315
+ # Take the smaller constraint
316
+ optimal_size = min(width_based_size, height_based_size)
317
 
318
  # Apply bounds
319
  return max(min_size, min(optimal_size, max_size))
320
 
321
+ def get_contrasting_color(bg_color: Tuple[int, int, int]) -> Tuple[int, int, int]:
322
+ """Get contrasting text color"""
323
+ r, g, b = bg_color[:3]
 
 
324
  luminance = (0.299 * r + 0.587 * g + 0.114 * b) / 255
325
 
326
+ if luminance > 0.5:
327
+ return (0, 0, 0) # Black text for light background
 
328
  else:
329
+ return (255, 255, 255) # White text for dark background
330
 
331
+ def extract_dominant_color(image: np.ndarray, bbox: List) -> Tuple[int, int, int]:
332
+ """Extract dominant color from the bounding box region"""
333
  try:
334
  # Get bounding box coordinates
335
+ points = np.array(bbox, dtype=np.int32)
336
 
337
+ # Create mask for the region
338
+ mask = np.zeros(image.shape[:2], dtype=np.uint8)
339
+ cv2.fillPoly(mask, [points], 255)
 
 
340
 
341
+ # Extract pixels within the region
342
+ region_pixels = image[mask > 0]
 
343
 
344
+ if len(region_pixels) > 0:
 
 
 
 
 
 
 
 
 
345
  # Calculate mean color
346
+ mean_color = np.mean(region_pixels, axis=0)
347
+ return tuple(map(int, mean_color))
348
+
349
  except Exception as e:
350
+ print(f"Error extracting color: {e}")
351
 
352
+ return (240, 240, 240) # Default light gray
 
353
 
354
+ def create_enhanced_overlay(image: Image.Image, bbox: List, translated_text: str, bg_opacity: int = 180):
355
+ """Create enhanced overlay with better positioning"""
356
  draw = ImageDraw.Draw(image, 'RGBA')
357
 
358
+ # Convert bbox to integer coordinates
359
+ points = [[int(p[0]), int(p[1])] for p in bbox]
360
+
361
+ # Calculate bounding rectangle
362
+ x_coords = [p[0] for p in points]
363
+ y_coords = [p[1] for p in points]
364
+
365
+ x_min, x_max = min(x_coords), max(x_coords)
366
+ y_min, y_max = min(y_coords), max(y_coords)
367
 
368
+ width = x_max - x_min
369
+ height = y_max - y_min
 
 
 
370
 
371
  # Calculate optimal font size
372
  font_size = calculate_optimal_font_size(translated_text, width, height)
 
 
373
  font = get_font_for_text(translated_text, font_size)
 
 
374
 
375
+ # Extract background color
376
  img_array = np.array(image.convert('RGB'))
377
+ bg_color = extract_dominant_color(img_array, bbox)
378
 
379
+ # Create semi-transparent background
380
+ padding = max(4, font_size // 6)
381
  bg_rect = [
382
+ x_min - padding,
383
+ y_min - padding,
384
+ x_max + padding,
385
+ y_max + padding
386
  ]
387
 
388
+ # Draw background with original color but semi-transparent
389
+ bg_color_with_alpha = bg_color + (bg_opacity,)
390
+ draw.rectangle(bg_rect, fill=bg_color_with_alpha)
391
 
392
+ # Calculate text position (center alignment)
393
  try:
394
  bbox_text = draw.textbbox((0, 0), translated_text, font=font)
395
  text_width = bbox_text[2] - bbox_text[0]
396
  text_height = bbox_text[3] - bbox_text[1]
397
  except:
 
398
  text_width = len(translated_text) * font_size * 0.6
399
  text_height = font_size
400
 
401
+ text_x = x_min + (width - text_width) / 2
402
+ text_y = y_min + (height - text_height) / 2
403
+
404
+ # Get contrasting text color
405
+ text_color = get_contrasting_color(bg_color)
406
+
407
+ # Draw text with slight shadow for better readability
408
+ shadow_offset = max(1, font_size // 20)
409
+ shadow_color = (0, 0, 0) if text_color == (255, 255, 255) else (255, 255, 255)
410
 
411
+ # Draw shadow
412
+ draw.text((text_x + shadow_offset, text_y + shadow_offset), translated_text,
413
+ fill=shadow_color + (100,), font=font)
414
 
415
+ # Draw main text
416
  draw.text((text_x, text_y), translated_text, fill=text_color, font=font)
417
 
418
+ def process_image_enhanced(image: Image.Image, target_language: str, progress=gr.Progress()) -> Tuple[Optional[Image.Image], str]:
419
+ """Enhanced image processing with better text grouping"""
420
 
421
  if image is None:
422
  return None, "❌ Please upload an image first."
 
428
 
429
  progress(0.1, "🔧 Initializing OCR engine...")
430
 
431
+ # Initialize OCR with better error handling
432
+ try:
433
+ ocr = initialize_reader()
434
+ if ocr is None:
435
+ return image, """❌ OCR initialization failed. This might be due to:
436
+ • Missing system dependencies
437
+ • Network issues downloading models
438
+ • Insufficient memory
439
+
440
+ Please try refreshing the page or contact support."""
441
+
442
+ # Test OCR with a simple operation
443
+ test_array = np.array(image.convert('RGB'))
444
+ if test_array.size == 0:
445
+ return image, "❌ Invalid image format. Please upload a valid image file."
446
+
447
+ except Exception as e:
448
+ error_details = str(e)
449
+ return image, f"""❌ OCR Setup Error: {error_details}
450
+
451
+ Possible solutions:
452
+ • Refresh the browser and try again
453
+ • Upload a different image format (JPG/PNG)
454
+ • Check if the image is not corrupted
455
+
456
+ Technical details: {type(e).__name__}"""
457
 
458
+ progress(0.3, "🔍 Extracting and grouping text regions...")
459
 
460
  try:
461
+ # Convert PIL image to numpy array with error handling
462
+ img_array = np.array(image.convert('RGB'))
463
 
464
+ if img_array is None or img_array.size == 0:
465
+ return image, "❌ Error processing image. Please try a different image."
466
 
467
+ print(f"Image shape: {img_array.shape}")
 
468
 
469
+ # Perform OCR with error handling and fallback options
470
+ try:
471
+ results = ocr.readtext(img_array, detail=1, paragraph=False, width_ths=0.7, height_ths=0.7)
472
+ except Exception as ocr_error:
473
+ print(f"Primary OCR failed: {ocr_error}")
474
+ # Fallback: try with different parameters
475
+ try:
476
+ results = ocr.readtext(img_array, detail=1)
477
+ except Exception as fallback_error:
478
+ print(f"Fallback OCR failed: {fallback_error}")
479
+ return image, f"""❌ OCR Processing Failed: {str(ocr_error)}
480
+
481
+ Troubleshooting:
482
+ • Image might be too complex or low quality
483
+ • Try uploading a clearer image
484
+ • Ensure text is clearly visible
485
+
486
+ Fallback error: {str(fallback_error)}"""
487
 
488
+ if not results:
489
+ return image, """ℹ️ No readable text found in the image.
490
+
491
+ Tips for better results:
492
+ Ensure text is clearly visible and well-lit
493
+ Upload higher resolution images
494
+ Make sure text is not too small or blurry"""
 
 
 
 
 
 
 
495
 
496
+ # Filter by confidence
497
+ filtered_results = [(bbox, text, conf) for bbox, text, conf in results
498
+ if conf > 0.4 and text.strip()]
 
 
499
 
500
  if not filtered_results:
501
  return image, "ℹ️ No text detected with sufficient confidence."
502
 
503
+ progress(0.5, "🔗 Grouping related text regions...")
504
+
505
+ # Group text regions for contextual translation
506
+ grouped_results = group_text_regions(filtered_results)
507
 
508
+ progress(0.6, f"🌐 Translating {len(grouped_results)} text groups...")
509
+
510
+ # Create result image
511
  result_image = image.copy().convert('RGBA')
512
 
513
+ translation_info = []
 
514
 
515
+ for i, (bbox, text, confidence) in enumerate(grouped_results):
516
+ progress(0.6 + (0.3 * i / len(grouped_results)),
517
+ f"Translating group {i+1}/{len(grouped_results)}")
518
 
519
  if text and text.strip():
520
+ # Clean text
521
  cleaned_text = re.sub(r'\s+', ' ', text.strip())
522
 
523
+ # Translate with context
524
+ translated = smart_translate_with_context(cleaned_text, target_lang_code)
525
 
526
+ # Create overlay
527
+ create_enhanced_overlay(result_image, bbox, translated)
528
 
529
+ # Store info
530
+ translation_info.append({
531
  'original': cleaned_text,
532
  'translated': translated,
533
  'confidence': confidence
 
535
 
536
  progress(1.0, "✅ Translation completed!")
537
 
538
+ # Convert to RGB
539
  final_image = result_image.convert('RGB')
540
 
541
+ # Create detailed summary
542
+ summary_lines = [f"🎯 Successfully processed {len(translation_info)} text groups:\n"]
 
543
 
544
+ for i, info in enumerate(translation_info, 1):
545
+ summary_lines.append(f"**Group {i}:**")
546
+ summary_lines.append(f"📝 Original: _{info['original']}_")
547
+ summary_lines.append(f"🌐 Translation: **{info['translated']}**")
548
+ summary_lines.append(f"📊 Confidence: {info['confidence']:.2f}")
549
+ summary_lines.append("")
550
 
551
  summary_text = "\n".join(summary_lines)
552
 
 
557
  print(f"Processing error: {e}")
558
  return image, error_msg
559
 
560
+ # Enhanced CSS
561
  custom_css = """
562
  .gradio-container {
563
+ max-width: 1400px;
564
  margin: auto;
565
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
566
  }
567
+
568
  .main-header {
569
  text-align: center;
570
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
571
  -webkit-background-clip: text;
572
  -webkit-text-fill-color: transparent;
573
+ background-clip: text;
574
+ font-size: 2.8em;
575
+ font-weight: 800;
576
  margin-bottom: 0.5em;
577
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
578
  }
579
+
580
  .description {
581
  text-align: center;
582
+ font-size: 1.2em;
583
+ color: #555;
584
  margin-bottom: 2em;
585
+ line-height: 1.6;
586
  }
587
+
588
  .feature-box {
589
+ background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
590
+ padding: 1.5em;
591
+ border-radius: 12px;
592
+ margin: 1.5em 0;
593
+ box-shadow: 0 4px 6px rgba(0,0,0,0.1);
594
+ }
595
+
596
+ .improvement-box {
597
+ background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%);
598
+ padding: 1.2em;
599
+ border-radius: 10px;
600
  margin: 1em 0;
601
+ border-left: 4px solid #667eea;
602
+ }
603
+
604
+ .btn-primary {
605
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
606
+ border: none;
607
+ font-weight: 600;
608
+ text-transform: uppercase;
609
+ letter-spacing: 1px;
610
  }
611
  """
612
 
613
+ # Create Gradio interface
614
+ with gr.Blocks(css=custom_css, title="Enhanced Multilingual Signboard Translator") as demo:
615
 
616
  gr.HTML("""
617
+ <div class="main-header">🌐 Enhanced Multilingual Signboard Translator</div>
618
  <div class="description">
619
+ Advanced OCR with intelligent text grouping and contextual translation overlay
620
  </div>
621
  """)
622
 
 
625
  gr.Markdown("### 📤 Upload & Configure")
626
 
627
  input_image = gr.Image(
628
+ label="📷 Upload Signboard Image",
629
  type="pil",
630
+ height=350
631
  )
632
 
633
  target_language = gr.Dropdown(
634
  choices=list(LANG_CODE_MAP.keys()),
635
  value="Hindi",
636
+ label="🎯 Target Language",
637
+ info="Select language for translation"
638
  )
639
 
640
  translate_btn = gr.Button(
641
+ "🚀 Translate Signboard",
642
  variant="primary",
643
+ size="lg",
644
+ elem_classes=["btn-primary"]
645
  )
646
 
647
  with gr.Column(scale=1):
648
+ gr.Markdown("### 📋 Results")
649
 
650
  output_image = gr.Image(
651
+ label="🖼️ Translated Signboard",
652
+ type="pil",
653
+ height=350
654
  )
655
 
656
  output_text = gr.Textbox(
657
+ label="📝 Translation Analysis",
658
+ lines=10,
659
+ max_lines=20,
660
+ info="Detailed breakdown of detected and translated text"
661
  )
662
 
663
  # Event binding
664
  translate_btn.click(
665
+ fn=process_image_enhanced,
666
  inputs=[input_image, target_language],
667
  outputs=[output_image, output_text],
668
  show_progress=True
669
  )
670
 
671
+ # Enhanced information sections
672
+ gr.HTML("""
673
+ <div class="improvement-box">
674
+ <h3>🚀 Key Improvements in This Version:</h3>
675
+ <ul>
676
+ <li><strong>🧠 Intelligent Text Grouping:</strong> Combines fragmented words into meaningful phrases</li>
677
+ <li><strong>🎯 Contextual Translation:</strong> Uses signboard context for accurate translations</li>
678
+ <li><strong>🌈 Smart Color Preservation:</strong> Maintains original background colors with transparency</li>
679
+ <li><strong>📝 Multi-Script Support:</strong> Enhanced font handling for various languages</li>
680
+ <li><strong>⚡ Optimized Performance:</strong> Better caching and processing algorithms</li>
681
+ </ul>
682
+ </div>
683
+ """)
684
+
685
  gr.HTML("""
686
  <div class="feature-box">
687
+ <h3>✨ Advanced Features:</h3>
688
  <ul>
689
+ <li><strong>🔍 Smart OCR:</strong> Groups nearby text elements for better context</li>
690
+ <li><strong>🌐 Context-Aware Translation:</strong> Recognizes signboard patterns for accurate meaning</li>
691
+ <li><strong>🎨 Adaptive Overlays:</strong> Preserves original aesthetics while ensuring readability</li>
692
+ <li><strong>🔤 Multi-Language Support:</strong> Enhanced support for 8+ languages</li>
693
+ <li><strong>📊 Confidence Analysis:</strong> Shows detection confidence for quality assessment</li>
694
+ <li><strong>⚡ Performance Optimized:</strong> Faster processing with intelligent caching</li>
695
  </ul>
696
  </div>
697
  """)
698
 
699
  if __name__ == "__main__":
700
+ print("🔧 Initializing Enhanced OCR Translator...")
701
+ print("System Information:")
702
+ print(f"Python version: {os.sys.version}")
703
+ print(f"NumPy version: {np.__version__}")
704
+
705
+ # Pre-initialize with detailed logging
706
  try:
707
+ print("Starting OCR initialization...")
708
+ ocr_reader = initialize_reader()
709
+ if ocr_reader:
710
+ print("✅ OCR System ready!")
711
+ else:
712
+ print("⚠️ OCR initialization failed - will retry when needed")
713
  except Exception as e:
714
+ print(f"⚠️ Pre-initialization error: {e}")
715
+ print("OCR will be initialized on first use")
716
 
717
+ # Launch with better error handling
718
+ try:
719
+ demo.launch(
720
+ share=True,
721
+ show_error=True,
722
+ server_name="0.0.0.0",
723
+ server_port=7860,
724
+ enable_queue=True
725
+ )
726
+ except Exception as e:
727
+ print(f"Launch error: {e}")
728
+ # Fallback launch
729
+ demo.launch()