Amandeep01 commited on
Commit
ac5643f
·
verified ·
1 Parent(s): d116025

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +377 -203
app.py CHANGED
@@ -6,60 +6,172 @@ import numpy as np
6
  import cv2
7
  import time
8
  import re
9
- from typing import Tuple, List, Optional
10
  import io
11
  import os
 
 
12
 
13
  # Global variables
14
  reader = None
15
  translation_cache = {}
16
 
17
- # Define supported languages with better language detection
18
  SUPPORTED_LANGUAGES = {
19
  'en': 'English',
20
- 'hi': 'Hindi'
 
 
 
 
 
 
21
  }
22
 
23
  # Language code mapping for Google Translator
24
  LANG_CODE_MAP = {
25
  'English': 'en',
26
- 'Hindi': 'hi'
 
 
 
 
 
 
27
  }
28
 
29
  def initialize_reader():
30
- """Initialize EasyOCR reader with optimized language support"""
31
  global reader
32
  if reader is None:
33
  try:
34
- # Initialize with English and Hindi only for faster loading
35
- reader = easyocr.Reader(['en', 'hi'], gpu=False, verbose=False, download_enabled=True)
36
  print("EasyOCR initialized successfully")
37
  except Exception as e:
38
  print(f"Error initializing EasyOCR: {e}")
39
  return None
40
  return reader
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def get_font_for_text(text: str, target_size: int = 20) -> ImageFont.FreeTypeFont:
43
- """Get appropriate font based on text content and size"""
44
- # Check if text contains Devanagari script (Hindi/Marathi)
45
  has_devanagari = bool(re.search(r'[\u0900-\u097F]', text))
 
 
 
 
46
 
47
  # Font paths for different scripts
48
- devanagari_fonts = [
49
- "/usr/share/fonts/truetype/noto/NotoSansDevanagari-Regular.ttf",
50
- "/usr/share/fonts/truetype/noto/NotoSansDevanagari-Bold.ttf",
51
- "/usr/share/fonts/truetype/lohit-devanagari/Lohit-Devanagari.ttf",
52
- "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
53
- ]
 
54
 
55
- english_fonts = [
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
57
  "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
58
- "/usr/share/fonts/truetype/noto/NotoSans-Bold.ttf",
59
- "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
60
- ]
61
-
62
- font_paths = devanagari_fonts if has_devanagari else english_fonts
63
 
64
  for font_path in font_paths:
65
  try:
@@ -68,14 +180,14 @@ def get_font_for_text(text: str, target_size: int = 20) -> ImageFont.FreeTypeFon
68
  except (OSError, IOError):
69
  continue
70
 
71
- # Fallback to default font
72
  try:
73
  return ImageFont.load_default()
74
  except:
75
  return None
76
 
77
- def smart_translate(text: str, target_lang: str, source_lang: str = 'auto') -> str:
78
- """Enhanced translation with context awareness and caching"""
79
  if not text or not text.strip():
80
  return ""
81
 
@@ -87,16 +199,42 @@ def smart_translate(text: str, target_lang: str, source_lang: str = 'auto') -> s
87
  if cache_key in translation_cache:
88
  return translation_cache[cache_key]
89
 
90
- max_retries = 2 # Reduced retries for faster response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  for attempt in range(max_retries):
92
  try:
93
- # Use GoogleTranslator with better error handling
94
  translator = GoogleTranslator(source=source_lang, target=target_lang)
95
- translated = translator.translate(cleaned_text)
96
 
97
- if translated and translated.strip() and translated != cleaned_text:
98
- # Post-process translation for better readability
99
- translated = translated.strip()
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  # Cache successful translation
102
  translation_cache[cache_key] = translated
@@ -105,137 +243,149 @@ def smart_translate(text: str, target_lang: str, source_lang: str = 'auto') -> s
105
  except Exception as e:
106
  print(f"Translation attempt {attempt + 1} failed: {e}")
107
  if attempt < max_retries - 1:
108
- time.sleep(0.3) # Shorter wait time
109
-
110
- return cleaned_text # Return original text if translation fails
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
- def calculate_optimal_font_size(text: str, bbox_width: int, bbox_height: int, min_size: int = 10, max_size: int = 50) -> int:
113
- """Calculate optimal font size based on bounding box dimensions and text length"""
114
  if not text:
115
  return min_size
116
 
117
- # Base calculation on text length and available space
118
- char_width_ratio = 0.6 # Approximate character width to height ratio
119
- estimated_char_width = bbox_height * char_width_ratio
120
- calculated_size = int(bbox_width / (len(text) * char_width_ratio))
121
 
122
- # Consider height constraint
123
- height_based_size = int(bbox_height * 0.7) # Use 70% of available height
 
124
 
125
- # Take the smaller of the two constraints
126
- optimal_size = min(calculated_size, height_based_size)
 
 
 
 
 
 
127
 
128
  # Apply bounds
129
  return max(min_size, min(optimal_size, max_size))
130
 
131
- def get_text_color_with_contrast(background_color: Tuple[int, int, int]) -> Tuple[int, int, int, int]:
132
- """Calculate optimal text color based on background for maximum contrast"""
133
- r, g, b = background_color[:3]
134
-
135
- # Calculate luminance using standard formula
136
  luminance = (0.299 * r + 0.587 * g + 0.114 * b) / 255
137
 
138
- # Return white for dark backgrounds, black for light backgrounds
139
- if luminance < 0.5:
140
- return (255, 255, 255, 255) # White text
141
  else:
142
- return (0, 0, 0, 255) # Black text
143
 
144
- def extract_background_color(image: np.ndarray, bbox: List, expand_factor: float = 1.2) -> Tuple[int, int, int, int]:
145
- """Extract representative background color from around the text region"""
146
  try:
147
  # Get bounding box coordinates
148
- top_left, top_right, bottom_right, bottom_left = bbox
149
-
150
- # Calculate center and dimensions
151
- center_x = (top_left[0] + top_right[0]) / 2
152
- center_y = (top_left[1] + bottom_left[1]) / 2
153
- width = abs(top_right[0] - top_left[0])
154
- height = abs(bottom_left[1] - top_left[1])
155
 
156
- # Expand region for better color sampling
157
- expanded_width = width * expand_factor
158
- expanded_height = height * expand_factor
159
 
160
- # Calculate expanded coordinates
161
- x1 = max(0, int(center_x - expanded_width / 2))
162
- y1 = max(0, int(center_y - expanded_height / 2))
163
- x2 = min(image.shape[1], int(center_x + expanded_width / 2))
164
- y2 = min(image.shape[0], int(center_y + expanded_height / 2))
165
 
166
- # Extract region
167
- region = image[y1:y2, x1:x2]
168
-
169
- if region.size > 0:
170
  # Calculate mean color
171
- mean_color = np.mean(region.reshape(-1, region.shape[-1]), axis=0)
172
- return tuple(map(int, mean_color)) + (220,) # Add alpha for semi-transparency
173
-
174
  except Exception as e:
175
- print(f"Error extracting background color: {e}")
176
 
177
- # Default background color
178
- return (240, 240, 240, 200)
179
 
180
- def create_smart_overlay(image: Image.Image, bbox: List, original_text: str, translated_text: str) -> None:
181
- """Create intelligent overlay with proper sizing and positioning"""
182
  draw = ImageDraw.Draw(image, 'RGBA')
183
 
184
- # Extract bounding box coordinates
185
- top_left, top_right, bottom_right, bottom_left = bbox
 
 
 
 
186
 
187
- # Calculate dimensions
188
- x = int(min(top_left[0], bottom_left[0]))
189
- y = int(min(top_left[1], top_right[1]))
190
- width = int(max(top_right[0], bottom_right[0]) - x)
191
- height = int(max(bottom_left[1], bottom_right[1]) - y)
192
 
193
  # Calculate optimal font size
194
  font_size = calculate_optimal_font_size(translated_text, width, height)
195
-
196
- # Get appropriate font
197
  font = get_font_for_text(translated_text, font_size)
198
- if font is None:
199
- font = get_font_for_text(translated_text, 14) # Fallback size
200
 
201
- # Get background color from image
202
  img_array = np.array(image.convert('RGB'))
203
- bg_color = extract_background_color(img_array, bbox)
204
 
205
- # Create background rectangle with padding
206
- padding = max(2, font_size // 8)
207
  bg_rect = [
208
- x - padding,
209
- y - padding,
210
- x + width + padding,
211
- y + height + padding
212
  ]
213
 
214
- # Draw semi-transparent background
215
- draw.rectangle(bg_rect, fill=bg_color)
 
216
 
217
- # Calculate text position for centering
218
  try:
219
  bbox_text = draw.textbbox((0, 0), translated_text, font=font)
220
  text_width = bbox_text[2] - bbox_text[0]
221
  text_height = bbox_text[3] - bbox_text[1]
222
  except:
223
- # Fallback for older PIL versions
224
  text_width = len(translated_text) * font_size * 0.6
225
  text_height = font_size
226
 
227
- # Center the text
228
- text_x = x + (width - text_width) / 2
229
- text_y = y + (height - text_height) / 2
 
 
 
 
 
 
230
 
231
- # Get optimal text color
232
- text_color = get_text_color_with_contrast(bg_color[:3])
 
233
 
234
- # Draw the translated text
235
  draw.text((text_x, text_y), translated_text, fill=text_color, font=font)
236
 
237
- def process_image(image: Image.Image, target_language: str, progress=gr.Progress()) -> Tuple[Optional[Image.Image], str]:
238
- """Main image processing function with enhanced OCR and translation"""
239
 
240
  if image is None:
241
  return None, "❌ Please upload an image first."
@@ -252,68 +402,53 @@ def process_image(image: Image.Image, target_language: str, progress=gr.Progress
252
  if ocr is None:
253
  return image, "❌ Failed to initialize OCR. Please try again."
254
 
255
- progress(0.3, "🔍 Extracting text from image...")
256
 
257
  try:
258
- # Convert PIL image to numpy array for OCR
259
  img_array = np.array(image)
260
 
261
- # Perform OCR with simplified parameters
262
- results = ocr.readtext(img_array)
263
 
264
  if not results:
265
  return image, "ℹ️ No readable text found in the image."
266
 
267
- print(f"OCR Results format: {results[0] if results else 'Empty'}")
268
-
269
- # Handle different OCR result formats
270
- processed_results = []
271
- for result in results:
272
- if len(result) == 3:
273
- # Standard format: (bbox, text, confidence)
274
- bbox, text, confidence = result
275
- processed_results.append((bbox, text, confidence))
276
- elif len(result) == 2:
277
- # Alternative format: (bbox, text) - assume high confidence
278
- bbox, text = result
279
- processed_results.append((bbox, text, 0.8))
280
- else:
281
- print(f"Unexpected result format: {result}")
282
- continue
283
-
284
- # Filter results by confidence and text quality
285
- filtered_results = []
286
- for bbox, text, confidence in processed_results:
287
- if text and text.strip() and confidence > 0.3: # Lower threshold for better detection
288
- filtered_results.append((bbox, text, confidence))
289
 
290
  if not filtered_results:
291
  return image, "ℹ️ No text detected with sufficient confidence."
292
 
293
- progress(0.5, f"🌐 Translating {len(filtered_results)} text regions...")
 
 
 
294
 
295
- # Create a copy of the image for overlay
 
 
296
  result_image = image.copy().convert('RGBA')
297
 
298
- # Process each detected text region
299
- translations_info = []
300
 
301
- for i, (bbox, text, confidence) in enumerate(filtered_results):
302
- # Update progress
303
- progress(0.5 + (0.4 * i / len(filtered_results)), f"Translating region {i+1}/{len(filtered_results)}")
304
 
305
  if text and text.strip():
306
- # Clean the extracted text
307
  cleaned_text = re.sub(r'\s+', ' ', text.strip())
308
 
309
- # Translate the text
310
- translated = smart_translate(cleaned_text, target_lang_code)
311
 
312
- # Create overlay on image
313
- create_smart_overlay(result_image, bbox, cleaned_text, translated)
314
 
315
- # Store translation info
316
- translations_info.append({
317
  'original': cleaned_text,
318
  'translated': translated,
319
  'confidence': confidence
@@ -321,17 +456,18 @@ def process_image(image: Image.Image, target_language: str, progress=gr.Progress
321
 
322
  progress(1.0, "✅ Translation completed!")
323
 
324
- # Convert back to RGB for final output
325
  final_image = result_image.convert('RGB')
326
 
327
- # Create summary text
328
- summary_lines = []
329
- summary_lines.append(f"🎯 Successfully processed {len(translations_info)} text regions:\n")
330
 
331
- for i, info in enumerate(translations_info, 1):
332
- summary_lines.append(f"{i}. Original: {info['original']}")
333
- summary_lines.append(f" Translation: {info['translated']}")
334
- summary_lines.append(f" Confidence: {info['confidence']:.2f}\n")
 
 
335
 
336
  summary_text = "\n".join(summary_lines)
337
 
@@ -342,42 +478,66 @@ def process_image(image: Image.Image, target_language: str, progress=gr.Progress
342
  print(f"Processing error: {e}")
343
  return image, error_msg
344
 
345
- # Custom CSS for better UI
346
  custom_css = """
347
  .gradio-container {
348
- max-width: 1200px;
349
  margin: auto;
 
350
  }
 
351
  .main-header {
352
  text-align: center;
353
- background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
354
  -webkit-background-clip: text;
355
  -webkit-text-fill-color: transparent;
356
- font-size: 2.5em;
357
- font-weight: bold;
 
358
  margin-bottom: 0.5em;
 
359
  }
 
360
  .description {
361
  text-align: center;
362
- font-size: 1.1em;
363
- color: #666;
364
  margin-bottom: 2em;
 
365
  }
 
366
  .feature-box {
367
- background: #f8f9fa;
368
- padding: 1em;
369
- border-radius: 8px;
 
 
 
 
 
 
 
 
370
  margin: 1em 0;
 
 
 
 
 
 
 
 
 
371
  }
372
  """
373
 
374
- # Create the Gradio interface
375
- with gr.Blocks(css=custom_css, title="Multilingual Signboard Translator") as demo:
376
 
377
  gr.HTML("""
378
- <div class="main-header">🌐 Multilingual Signboard Translator</div>
379
  <div class="description">
380
- Extract and translate text from images with intelligent overlay technology
381
  </div>
382
  """)
383
 
@@ -386,73 +546,87 @@ with gr.Blocks(css=custom_css, title="Multilingual Signboard Translator") as dem
386
  gr.Markdown("### 📤 Upload & Configure")
387
 
388
  input_image = gr.Image(
389
- label="📷 Upload Image",
390
  type="pil",
391
- height=300
392
  )
393
 
394
  target_language = gr.Dropdown(
395
  choices=list(LANG_CODE_MAP.keys()),
396
  value="Hindi",
397
- label="🎯 Translate To",
398
- info="Select target language for translation"
399
  )
400
 
401
  translate_btn = gr.Button(
402
- "🚀 Translate Text",
403
  variant="primary",
404
- size="lg"
 
405
  )
406
 
407
  with gr.Column(scale=1):
408
- gr.Markdown("### 📤 Results")
409
 
410
  output_image = gr.Image(
411
- label="🖼️ Translated Image",
412
- type="pil",
413
- height=300
414
  )
415
 
416
  output_text = gr.Textbox(
417
- label="📝 Translation Details",
418
- lines=8,
419
- max_lines=15,
420
- info="Detailed translation information"
421
  )
422
 
423
  # Event binding
424
  translate_btn.click(
425
- fn=process_image,
426
  inputs=[input_image, target_language],
427
  outputs=[output_image, output_text],
428
  show_progress=True
429
  )
430
 
431
- # Feature information
 
 
 
 
 
 
 
 
 
 
 
 
 
432
  gr.HTML("""
433
  <div class="feature-box">
434
- <h3>✨ Key Features:</h3>
435
  <ul>
436
- <li><strong>🎯 Smart OCR:</strong> Advanced text detection with confidence filtering</li>
437
- <li><strong>🌐 Bilingual Support:</strong> English Hindi translation</li>
438
- <li><strong>🎨 Intelligent Overlay:</strong> Context-aware text positioning and sizing</li>
439
- <li><strong>🔧 Adaptive Fonts:</strong> Script-specific font selection for better readability</li>
440
- <li><strong>⚡ Optimized Performance:</strong> Fast processing with caching</li>
 
441
  </ul>
442
  </div>
443
  """)
444
 
445
  if __name__ == "__main__":
446
- # Pre-initialize OCR for faster first-time usage
447
- print("🔧 Pre-initializing OCR engine...")
448
  try:
449
  initialize_reader()
450
- print("✅ OCR engine ready!")
451
  except Exception as e:
452
- print(f"⚠️ OCR initialization warning: {e}")
453
 
454
- # Launch the application
455
  demo.launch(
456
- share=False,
457
- show_error=True
 
458
  )
 
6
  import cv2
7
  import time
8
  import re
9
+ from typing import Tuple, List, Optional, Dict
10
  import io
11
  import os
12
+ from collections import defaultdict
13
+ import math
14
 
15
  # Global variables
16
  reader = None
17
  translation_cache = {}
18
 
19
+ # Define supported languages
20
  SUPPORTED_LANGUAGES = {
21
  'en': 'English',
22
+ 'hi': 'Hindi',
23
+ 'es': 'Spanish',
24
+ 'fr': 'French',
25
+ 'de': 'German',
26
+ 'ja': 'Japanese',
27
+ 'ko': 'Korean',
28
+ 'zh': 'Chinese'
29
  }
30
 
31
  # Language code mapping for Google Translator
32
  LANG_CODE_MAP = {
33
  'English': 'en',
34
+ 'Hindi': 'hi',
35
+ 'Spanish': 'es',
36
+ 'French': 'fr',
37
+ 'German': 'de',
38
+ 'Japanese': 'ja',
39
+ 'Korean': 'ko',
40
+ 'Chinese': 'zh'
41
  }
42
 
43
  def initialize_reader():
44
+ """Initialize EasyOCR reader with optimized settings"""
45
  global reader
46
  if reader is None:
47
  try:
48
+ # Initialize with multiple languages for better detection
49
+ reader = easyocr.Reader(['en', 'hi', 'es', 'fr', 'de'], gpu=False, verbose=False)
50
  print("EasyOCR initialized successfully")
51
  except Exception as e:
52
  print(f"Error initializing EasyOCR: {e}")
53
  return None
54
  return reader
55
 
56
+ def calculate_distance(box1, box2):
57
+ """Calculate distance between two bounding boxes"""
58
+ # Get center points
59
+ center1 = [(box1[0][0] + box1[2][0]) / 2, (box1[0][1] + box1[2][1]) / 2]
60
+ center2 = [(box2[0][0] + box2[2][0]) / 2, (box2[0][1] + box2[2][1]) / 2]
61
+
62
+ return math.sqrt((center1[0] - center2[0])**2 + (center1[1] - center2[1])**2)
63
+
64
+ def are_boxes_on_same_line(box1, box2, tolerance=20):
65
+ """Check if two bounding boxes are on the same horizontal line"""
66
+ # Get y-coordinates (vertical positions)
67
+ y1_avg = (box1[0][1] + box1[2][1]) / 2
68
+ y2_avg = (box2[0][1] + box2[2][1]) / 2
69
+
70
+ return abs(y1_avg - y2_avg) <= tolerance
71
+
72
+ def group_text_regions(ocr_results, line_tolerance=25, proximity_threshold=50):
73
+ """Group OCR results into meaningful text blocks"""
74
+ if not ocr_results:
75
+ return []
76
+
77
+ # Sort by vertical position first, then horizontal
78
+ sorted_results = sorted(ocr_results, key=lambda x: (x[0][0][1], x[0][0][0]))
79
+
80
+ grouped_lines = []
81
+ current_line = [sorted_results[0]]
82
+
83
+ for i in range(1, len(sorted_results)):
84
+ current_box = sorted_results[i][0]
85
+ prev_box = current_line[-1][0]
86
+
87
+ # Check if boxes are on the same line
88
+ if are_boxes_on_same_line(current_box, prev_box, line_tolerance):
89
+ # Check proximity (not too far apart horizontally)
90
+ if calculate_distance(current_box, prev_box) <= proximity_threshold:
91
+ current_line.append(sorted_results[i])
92
+ else:
93
+ # Start new line if too far apart
94
+ grouped_lines.append(current_line)
95
+ current_line = [sorted_results[i]]
96
+ else:
97
+ # Different line
98
+ grouped_lines.append(current_line)
99
+ current_line = [sorted_results[i]]
100
+
101
+ # Don't forget the last line
102
+ if current_line:
103
+ grouped_lines.append(current_line)
104
+
105
+ # Merge text within each line
106
+ merged_groups = []
107
+ for line in grouped_lines:
108
+ if len(line) == 1:
109
+ merged_groups.append(line[0])
110
+ else:
111
+ # Sort by horizontal position within the line
112
+ line.sort(key=lambda x: x[0][0][0])
113
+
114
+ # Merge text
115
+ merged_text = ' '.join([item[1] for item in line])
116
+
117
+ # Create combined bounding box
118
+ all_points = []
119
+ for item in line:
120
+ all_points.extend(item[0])
121
+
122
+ # Find min/max coordinates
123
+ x_coords = [point[0] for point in all_points]
124
+ y_coords = [point[1] for point in all_points]
125
+
126
+ min_x, max_x = min(x_coords), max(x_coords)
127
+ min_y, max_y = min(y_coords), max(y_coords)
128
+
129
+ # Create new bounding box
130
+ merged_bbox = [[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]]
131
+
132
+ # Use average confidence
133
+ avg_confidence = sum([item[2] for item in line]) / len(line)
134
+
135
+ merged_groups.append((merged_bbox, merged_text, avg_confidence))
136
+
137
+ return merged_groups
138
+
139
  def get_font_for_text(text: str, target_size: int = 20) -> ImageFont.FreeTypeFont:
140
+ """Get appropriate font based on text content"""
141
+ # Check for different scripts
142
  has_devanagari = bool(re.search(r'[\u0900-\u097F]', text))
143
+ has_chinese = bool(re.search(r'[\u4e00-\u9fff]', text))
144
+ has_japanese = bool(re.search(r'[\u3040-\u309f\u30a0-\u30ff]', text))
145
+ has_korean = bool(re.search(r'[\uac00-\ud7af]', text))
146
+ has_arabic = bool(re.search(r'[\u0600-\u06ff]', text))
147
 
148
  # Font paths for different scripts
149
+ font_paths = []
150
+
151
+ if has_devanagari:
152
+ font_paths.extend([
153
+ "/usr/share/fonts/truetype/noto/NotoSansDevanagari-Regular.ttf",
154
+ "/usr/share/fonts/truetype/lohit-devanagari/Lohit-Devanagari.ttf"
155
+ ])
156
 
157
+ if has_chinese or has_japanese:
158
+ font_paths.extend([
159
+ "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
160
+ "/usr/share/fonts/truetype/arphic/uming.ttc"
161
+ ])
162
+
163
+ if has_korean:
164
+ font_paths.append("/usr/share/fonts/truetype/noto/NotoSansKR-Regular.otf")
165
+
166
+ if has_arabic:
167
+ font_paths.append("/usr/share/fonts/truetype/noto/NotoSansArabic-Regular.ttf")
168
+
169
+ # Default fonts
170
+ font_paths.extend([
171
  "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
172
  "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
173
+ "/usr/share/fonts/truetype/noto/NotoSans-Bold.ttf"
174
+ ])
 
 
 
175
 
176
  for font_path in font_paths:
177
  try:
 
180
  except (OSError, IOError):
181
  continue
182
 
183
+ # Fallback
184
  try:
185
  return ImageFont.load_default()
186
  except:
187
  return None
188
 
189
+ def smart_translate_with_context(text: str, target_lang: str, source_lang: str = 'auto') -> str:
190
+ """Enhanced translation with better context handling"""
191
  if not text or not text.strip():
192
  return ""
193
 
 
199
  if cache_key in translation_cache:
200
  return translation_cache[cache_key]
201
 
202
+ # Pre-processing for better translation context
203
+ # Handle common signboard patterns
204
+ signboard_patterns = {
205
+ r'\b(no|not|don\'t|do not)\s+(use|mobile|phone|cell)\b': 'prohibition_mobile',
206
+ r'\b(please|kindly)\s+(do not|don\'t)\s+(use|mobile|phone)\b': 'polite_prohibition_mobile',
207
+ r'\b(exit|entrance|entry|way out|way in)\b': 'direction',
208
+ r'\b(toilet|restroom|bathroom|washroom)\b': 'facility',
209
+ r'\b(parking|park|no parking)\b': 'parking',
210
+ r'\b(emergency|fire|safety)\b': 'safety'
211
+ }
212
+
213
+ context_hint = ""
214
+ for pattern, context in signboard_patterns.items():
215
+ if re.search(pattern, cleaned_text.lower()):
216
+ context_hint = f"[Signboard context: {context}] "
217
+ break
218
+
219
+ max_retries = 3
220
  for attempt in range(max_retries):
221
  try:
 
222
  translator = GoogleTranslator(source=source_lang, target=target_lang)
 
223
 
224
+ # Add context hint for better translation
225
+ text_to_translate = context_hint + cleaned_text if context_hint else cleaned_text
226
+ translated = translator.translate(text_to_translate)
227
+
228
+ if translated and translated.strip():
229
+ # Remove context hint from result if it was added
230
+ if context_hint and translated.startswith('['):
231
+ # Try to remove the context hint from translation
232
+ bracket_end = translated.find('] ')
233
+ if bracket_end != -1:
234
+ translated = translated[bracket_end + 2:].strip()
235
+
236
+ # Post-process for common improvements
237
+ translated = post_process_translation(translated, target_lang)
238
 
239
  # Cache successful translation
240
  translation_cache[cache_key] = translated
 
243
  except Exception as e:
244
  print(f"Translation attempt {attempt + 1} failed: {e}")
245
  if attempt < max_retries - 1:
246
+ time.sleep(0.5)
247
+
248
+ return cleaned_text
249
+
250
+ def post_process_translation(translated_text: str, target_lang: str) -> str:
251
+ """Post-process translation for better quality"""
252
+ # Language-specific post-processing
253
+ if target_lang == 'hi': # Hindi
254
+ # Common corrections for Hindi translations
255
+ corrections = {
256
+ 'मत करो': 'न करें', # More polite form
257
+ 'का उपयोग मत करो': 'का उपयोग न करें',
258
+ 'फोन का उपयोग': 'मोबाइल का उपयोग'
259
+ }
260
+
261
+ for old, new in corrections.items():
262
+ translated_text = translated_text.replace(old, new)
263
+
264
+ return translated_text.strip()
265
 
266
+ def calculate_optimal_font_size(text: str, bbox_width: int, bbox_height: int, min_size: int = 12, max_size: int = 48) -> int:
267
+ """Calculate optimal font size with better scaling"""
268
  if not text:
269
  return min_size
270
 
271
+ # Estimate character width (varies by language)
272
+ char_width_ratio = 0.7 # More conservative estimate
 
 
273
 
274
+ # For non-Latin scripts, adjust ratio
275
+ if re.search(r'[\u0900-\u097F\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]', text):
276
+ char_width_ratio = 0.9 # Wider characters
277
 
278
+ # Calculate based on width constraint
279
+ width_based_size = int(bbox_width / (len(text) * char_width_ratio))
280
+
281
+ # Calculate based on height constraint (use 80% of available height)
282
+ height_based_size = int(bbox_height * 0.8)
283
+
284
+ # Take the smaller constraint
285
+ optimal_size = min(width_based_size, height_based_size)
286
 
287
  # Apply bounds
288
  return max(min_size, min(optimal_size, max_size))
289
 
290
+ def get_contrasting_color(bg_color: Tuple[int, int, int]) -> Tuple[int, int, int]:
291
+ """Get contrasting text color"""
292
+ r, g, b = bg_color[:3]
 
 
293
  luminance = (0.299 * r + 0.587 * g + 0.114 * b) / 255
294
 
295
+ if luminance > 0.5:
296
+ return (0, 0, 0) # Black text for light background
 
297
  else:
298
+ return (255, 255, 255) # White text for dark background
299
 
300
+ def extract_dominant_color(image: np.ndarray, bbox: List) -> Tuple[int, int, int]:
301
+ """Extract dominant color from the bounding box region"""
302
  try:
303
  # Get bounding box coordinates
304
+ points = np.array(bbox, dtype=np.int32)
 
 
 
 
 
 
305
 
306
+ # Create mask for the region
307
+ mask = np.zeros(image.shape[:2], dtype=np.uint8)
308
+ cv2.fillPoly(mask, [points], 255)
309
 
310
+ # Extract pixels within the region
311
+ region_pixels = image[mask > 0]
 
 
 
312
 
313
+ if len(region_pixels) > 0:
 
 
 
314
  # Calculate mean color
315
+ mean_color = np.mean(region_pixels, axis=0)
316
+ return tuple(map(int, mean_color))
317
+
318
  except Exception as e:
319
+ print(f"Error extracting color: {e}")
320
 
321
+ return (240, 240, 240) # Default light gray
 
322
 
323
+ def create_enhanced_overlay(image: Image.Image, bbox: List, translated_text: str, bg_opacity: int = 180):
324
+ """Create enhanced overlay with better positioning"""
325
  draw = ImageDraw.Draw(image, 'RGBA')
326
 
327
+ # Convert bbox to integer coordinates
328
+ points = [[int(p[0]), int(p[1])] for p in bbox]
329
+
330
+ # Calculate bounding rectangle
331
+ x_coords = [p[0] for p in points]
332
+ y_coords = [p[1] for p in points]
333
 
334
+ x_min, x_max = min(x_coords), max(x_coords)
335
+ y_min, y_max = min(y_coords), max(y_coords)
336
+
337
+ width = x_max - x_min
338
+ height = y_max - y_min
339
 
340
  # Calculate optimal font size
341
  font_size = calculate_optimal_font_size(translated_text, width, height)
 
 
342
  font = get_font_for_text(translated_text, font_size)
 
 
343
 
344
+ # Extract background color
345
  img_array = np.array(image.convert('RGB'))
346
+ bg_color = extract_dominant_color(img_array, bbox)
347
 
348
+ # Create semi-transparent background
349
+ padding = max(4, font_size // 6)
350
  bg_rect = [
351
+ x_min - padding,
352
+ y_min - padding,
353
+ x_max + padding,
354
+ y_max + padding
355
  ]
356
 
357
+ # Draw background with original color but semi-transparent
358
+ bg_color_with_alpha = bg_color + (bg_opacity,)
359
+ draw.rectangle(bg_rect, fill=bg_color_with_alpha)
360
 
361
+ # Calculate text position (center alignment)
362
  try:
363
  bbox_text = draw.textbbox((0, 0), translated_text, font=font)
364
  text_width = bbox_text[2] - bbox_text[0]
365
  text_height = bbox_text[3] - bbox_text[1]
366
  except:
 
367
  text_width = len(translated_text) * font_size * 0.6
368
  text_height = font_size
369
 
370
+ text_x = x_min + (width - text_width) / 2
371
+ text_y = y_min + (height - text_height) / 2
372
+
373
+ # Get contrasting text color
374
+ text_color = get_contrasting_color(bg_color)
375
+
376
+ # Draw text with slight shadow for better readability
377
+ shadow_offset = max(1, font_size // 20)
378
+ shadow_color = (0, 0, 0) if text_color == (255, 255, 255) else (255, 255, 255)
379
 
380
+ # Draw shadow
381
+ draw.text((text_x + shadow_offset, text_y + shadow_offset), translated_text,
382
+ fill=shadow_color + (100,), font=font)
383
 
384
+ # Draw main text
385
  draw.text((text_x, text_y), translated_text, fill=text_color, font=font)
386
 
387
+ def process_image_enhanced(image: Image.Image, target_language: str, progress=gr.Progress()) -> Tuple[Optional[Image.Image], str]:
388
+ """Enhanced image processing with better text grouping"""
389
 
390
  if image is None:
391
  return None, "❌ Please upload an image first."
 
402
  if ocr is None:
403
  return image, "❌ Failed to initialize OCR. Please try again."
404
 
405
+ progress(0.3, "🔍 Extracting and grouping text regions...")
406
 
407
  try:
408
+ # Convert PIL image to numpy array
409
  img_array = np.array(image)
410
 
411
+ # Perform OCR with higher confidence threshold
412
+ results = ocr.readtext(img_array, detail=1, paragraph=False)
413
 
414
  if not results:
415
  return image, "ℹ️ No readable text found in the image."
416
 
417
+ # Filter by confidence
418
+ filtered_results = [(bbox, text, conf) for bbox, text, conf in results
419
+ if conf > 0.4 and text.strip()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
 
421
  if not filtered_results:
422
  return image, "ℹ️ No text detected with sufficient confidence."
423
 
424
+ progress(0.5, "🔗 Grouping related text regions...")
425
+
426
+ # Group text regions for contextual translation
427
+ grouped_results = group_text_regions(filtered_results)
428
 
429
+ progress(0.6, f"🌐 Translating {len(grouped_results)} text groups...")
430
+
431
+ # Create result image
432
  result_image = image.copy().convert('RGBA')
433
 
434
+ translation_info = []
 
435
 
436
+ for i, (bbox, text, confidence) in enumerate(grouped_results):
437
+ progress(0.6 + (0.3 * i / len(grouped_results)),
438
+ f"Translating group {i+1}/{len(grouped_results)}")
439
 
440
  if text and text.strip():
441
+ # Clean text
442
  cleaned_text = re.sub(r'\s+', ' ', text.strip())
443
 
444
+ # Translate with context
445
+ translated = smart_translate_with_context(cleaned_text, target_lang_code)
446
 
447
+ # Create overlay
448
+ create_enhanced_overlay(result_image, bbox, translated)
449
 
450
+ # Store info
451
+ translation_info.append({
452
  'original': cleaned_text,
453
  'translated': translated,
454
  'confidence': confidence
 
456
 
457
  progress(1.0, "✅ Translation completed!")
458
 
459
+ # Convert to RGB
460
  final_image = result_image.convert('RGB')
461
 
462
+ # Create detailed summary
463
+ summary_lines = [f"🎯 Successfully processed {len(translation_info)} text groups:\n"]
 
464
 
465
+ for i, info in enumerate(translation_info, 1):
466
+ summary_lines.append(f"**Group {i}:**")
467
+ summary_lines.append(f"📝 Original: _{info['original']}_")
468
+ summary_lines.append(f"🌐 Translation: **{info['translated']}**")
469
+ summary_lines.append(f"📊 Confidence: {info['confidence']:.2f}")
470
+ summary_lines.append("")
471
 
472
  summary_text = "\n".join(summary_lines)
473
 
 
478
  print(f"Processing error: {e}")
479
  return image, error_msg
480
 
481
+ # Enhanced CSS
482
  custom_css = """
483
  .gradio-container {
484
+ max-width: 1400px;
485
  margin: auto;
486
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
487
  }
488
+
489
  .main-header {
490
  text-align: center;
491
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
492
  -webkit-background-clip: text;
493
  -webkit-text-fill-color: transparent;
494
+ background-clip: text;
495
+ font-size: 2.8em;
496
+ font-weight: 800;
497
  margin-bottom: 0.5em;
498
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
499
  }
500
+
501
  .description {
502
  text-align: center;
503
+ font-size: 1.2em;
504
+ color: #555;
505
  margin-bottom: 2em;
506
+ line-height: 1.6;
507
  }
508
+
509
  .feature-box {
510
+ background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
511
+ padding: 1.5em;
512
+ border-radius: 12px;
513
+ margin: 1.5em 0;
514
+ box-shadow: 0 4px 6px rgba(0,0,0,0.1);
515
+ }
516
+
517
+ .improvement-box {
518
+ background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%);
519
+ padding: 1.2em;
520
+ border-radius: 10px;
521
  margin: 1em 0;
522
+ border-left: 4px solid #667eea;
523
+ }
524
+
525
+ .btn-primary {
526
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
527
+ border: none;
528
+ font-weight: 600;
529
+ text-transform: uppercase;
530
+ letter-spacing: 1px;
531
  }
532
  """
533
 
534
+ # Create Gradio interface
535
+ with gr.Blocks(css=custom_css, title="Enhanced Multilingual Signboard Translator") as demo:
536
 
537
  gr.HTML("""
538
+ <div class="main-header">🌐 Enhanced Multilingual Signboard Translator</div>
539
  <div class="description">
540
+ Advanced OCR with intelligent text grouping and contextual translation overlay
541
  </div>
542
  """)
543
 
 
546
  gr.Markdown("### 📤 Upload & Configure")
547
 
548
  input_image = gr.Image(
549
+ label="📷 Upload Signboard Image",
550
  type="pil",
551
+ height=350
552
  )
553
 
554
  target_language = gr.Dropdown(
555
  choices=list(LANG_CODE_MAP.keys()),
556
  value="Hindi",
557
+ label="🎯 Target Language",
558
+ info="Select language for translation"
559
  )
560
 
561
  translate_btn = gr.Button(
562
+ "🚀 Translate Signboard",
563
  variant="primary",
564
+ size="lg",
565
+ elem_classes=["btn-primary"]
566
  )
567
 
568
  with gr.Column(scale=1):
569
+ gr.Markdown("### 📋 Results")
570
 
571
  output_image = gr.Image(
572
+ label="🖼️ Translated Signboard",
573
+ type="pil",
574
+ height=350
575
  )
576
 
577
  output_text = gr.Textbox(
578
+ label="📝 Translation Analysis",
579
+ lines=10,
580
+ max_lines=20,
581
+ info="Detailed breakdown of detected and translated text"
582
  )
583
 
584
  # Event binding
585
  translate_btn.click(
586
+ fn=process_image_enhanced,
587
  inputs=[input_image, target_language],
588
  outputs=[output_image, output_text],
589
  show_progress=True
590
  )
591
 
592
+ # Enhanced information sections
593
+ gr.HTML("""
594
+ <div class="improvement-box">
595
+ <h3>🚀 Key Improvements in This Version:</h3>
596
+ <ul>
597
+ <li><strong>🧠 Intelligent Text Grouping:</strong> Combines fragmented words into meaningful phrases</li>
598
+ <li><strong>🎯 Contextual Translation:</strong> Uses signboard context for accurate translations</li>
599
+ <li><strong>🌈 Smart Color Preservation:</strong> Maintains original background colors with transparency</li>
600
+ <li><strong>📝 Multi-Script Support:</strong> Enhanced font handling for various languages</li>
601
+ <li><strong>⚡ Optimized Performance:</strong> Better caching and processing algorithms</li>
602
+ </ul>
603
+ </div>
604
+ """)
605
+
606
  gr.HTML("""
607
  <div class="feature-box">
608
+ <h3>✨ Advanced Features:</h3>
609
  <ul>
610
+ <li><strong>🔍 Smart OCR:</strong> Groups nearby text elements for better context</li>
611
+ <li><strong>🌐 Context-Aware Translation:</strong> Recognizes signboard patterns for accurate meaning</li>
612
+ <li><strong>🎨 Adaptive Overlays:</strong> Preserves original aesthetics while ensuring readability</li>
613
+ <li><strong>🔤 Multi-Language Support:</strong> Enhanced support for 8+ languages</li>
614
+ <li><strong>📊 Confidence Analysis:</strong> Shows detection confidence for quality assessment</li>
615
+ <li><strong>⚡ Performance Optimized:</strong> Faster processing with intelligent caching</li>
616
  </ul>
617
  </div>
618
  """)
619
 
620
  if __name__ == "__main__":
621
+ print("🔧 Initializing Enhanced OCR Translator...")
 
622
  try:
623
  initialize_reader()
624
+ print("✅ System ready!")
625
  except Exception as e:
626
+ print(f"⚠️ Initialization warning: {e}")
627
 
 
628
  demo.launch(
629
+ share=True,
630
+ show_error=True,
631
+ server_name="0.0.0.0"
632
  )