Amandeep01 commited on
Commit
aa3c16f
·
verified ·
1 Parent(s): 56d6668

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -82
app.py CHANGED
@@ -5,6 +5,7 @@ from PIL import Image, ImageDraw, ImageFont
5
  import numpy as np
6
  import io
7
  import time
 
8
 
9
  # Global variables
10
  reader = None
@@ -56,30 +57,52 @@ def get_default_font(size=20):
56
 
57
  def translate_text(text, target_lang):
58
  """Translate text with error handling and caching"""
59
- if not text or not text.strip():
60
- return ""
61
-
62
- # Use cache if available
63
- cache_key = f"{text}|{target_lang}"
64
- if cache_key in translation_cache:
65
- return translation_cache[cache_key]
66
-
67
- # Handle translation with retries
68
- max_retries = 3
69
- for attempt in range(max_retries):
70
- try:
71
- translated = GoogleTranslator(source='auto', target=target_lang).translate(text)
72
- if translated:
73
- translation_cache[cache_key] = translated
74
- return translated
75
- time.sleep(1) # Short delay before retry
76
- except Exception as e:
77
- print(f"Translation error (attempt {attempt+1}): {e}")
78
- if attempt == max_retries - 1:
79
- return f"[Translation Error: {text}]"
80
- time.sleep(1) # Wait before retry
81
-
82
- return f"[Unable to translate: {text}]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress()):
85
  """Process image with OCR and translation"""
@@ -114,7 +137,7 @@ def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress(
114
  progress(0.6, "Translating text...")
115
 
116
  # Create a copy for overlay
117
- img_pil = image.copy().convert("RGB")
118
  font = get_default_font(size=20)
119
 
120
  # If font creation failed, return with error
@@ -138,16 +161,17 @@ def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress(
138
  top_left, top_right, bottom_right, bottom_left = bbox
139
 
140
  # Calculate text dimensions and position
141
- # Use the original text bounding box size and position
142
  x, y = top_left[0], top_left[1]
143
  width = top_right[0] - top_left[0]
144
  height = bottom_left[1] - top_left[1]
145
 
146
- # Create a rectangle to cover the original text completely
147
- # Add a bit of padding around the original text area
 
 
148
  padding = 4
149
 
150
- # Create solid background to cover original text
151
  draw.rectangle(
152
  [
153
  x - padding,
@@ -155,11 +179,10 @@ def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress(
155
  x + width + padding,
156
  y + height + padding
157
  ],
158
- fill=(0, 0, 0, 255) # Solid black background to cover original text
159
  )
160
 
161
  # Calculate font size to fit within the bounding box
162
- # Start with a reasonable default size and adjust if needed
163
  fontsize = min(20, int(width / (len(translated) * 0.5))) if translated else 20
164
  fontsize = max(fontsize, 12) # Ensure minimum readability
165
 
@@ -177,13 +200,21 @@ def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress(
177
  text_x = x + (width - text_width) / 2
178
  text_y = y + (height - text_height) / 2
179
 
180
- # Draw text with contrasting color
181
- draw.text((text_x, text_y), translated, fill=(255, 0, 0), font=adjusted_font)
 
 
 
 
 
182
 
183
  # Join all translations
184
  all_translations = "\n".join(translations)
185
 
186
- return img_pil, all_translations
 
 
 
187
 
188
  except Exception as e:
189
  import traceback
@@ -193,53 +224,7 @@ def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress(
193
 
194
  # Create Gradio interface
195
  with gr.Blocks(title="Enhanced Image Translator") as iface:
196
- gr.Markdown("# Enhanced Image Translator")
197
- gr.Markdown("Extract & translate text from images with improved overlay visualization")
198
-
199
- with gr.Row():
200
- with gr.Column():
201
- input_image = gr.Image(type="pil", label="Upload Image")
202
-
203
- with gr.Row():
204
- target_lang = gr.Dropdown(
205
- choices=list(SUPPORTED_LANGUAGES.values()),
206
- value="Hindi",
207
- label="Translate To"
208
- )
209
- overlay_opacity = gr.Slider(
210
- minimum=0.1,
211
- maximum=1.0,
212
- value=0.7,
213
- step=0.1,
214
- label="Overlay Opacity"
215
- )
216
-
217
- translate_btn = gr.Button("Translate", variant="primary")
218
-
219
- with gr.Column():
220
- output_image = gr.Image(type="pil", label="Image with Translated Overlay")
221
- output_text = gr.Textbox(label="Translated Text Output", lines=10)
222
-
223
- # Connect the button to the processing function
224
- translate_btn.click(
225
- fn=process_image,
226
- inputs=[input_image, target_lang, overlay_opacity],
227
- outputs=[output_image, output_text]
228
- )
229
-
230
- gr.Markdown("""
231
- ## Features
232
- - Supports multiple languages for translation
233
- - Semi-transparent overlays for better readability
234
- - Simple and efficient text extraction and translation
235
- """)
236
 
237
  if __name__ == "__main__":
238
- # Initialize OCR model at startup to avoid delay on first request
239
- try:
240
- initialize_reader()
241
- except:
242
- pass
243
-
244
- # Launch the app
245
- iface.launch()
 
5
  import numpy as np
6
  import io
7
  import time
8
+ import cv2
9
 
10
  # Global variables
11
  reader = None
 
57
 
58
  def translate_text(text, target_lang):
59
  """Translate text with error handling and caching"""
60
+ # ... keep existing code (translate_text function)
61
+
62
+ def get_dominant_color(image, bbox, padding=4):
63
+ """Extract the dominant color around text for better background matching"""
64
+ try:
65
+ # Convert PIL to numpy if needed
66
+ if not isinstance(image, np.ndarray):
67
+ img_array = np.array(image)
68
+ else:
69
+ img_array = image
70
+
71
+ # Extract coordinates with padding
72
+ top_left, top_right, bottom_right, bottom_left = bbox
73
+ x, y = int(top_left[0]), int(top_left[1])
74
+ width = int(top_right[0] - top_left[0])
75
+ height = int(bottom_left[1] - top_left[1])
76
+
77
+ # Expand the area slightly to capture surrounding colors
78
+ x1 = max(0, x - padding)
79
+ y1 = max(0, y - padding)
80
+ x2 = min(img_array.shape[1], x + width + padding)
81
+ y2 = min(img_array.shape[0], y + height + padding)
82
+
83
+ # Get region around the text
84
+ region = img_array[y1:y2, x1:x2]
85
+
86
+ if region.size == 0:
87
+ # Fallback if region is empty
88
+ return (240, 240, 240, 180)
89
+
90
+ # Convert to RGB if it's in BGR format (OpenCV default)
91
+ if len(region.shape) == 3 and region.shape[2] == 3:
92
+ region_rgb = cv2.cvtColor(region, cv2.COLOR_BGR2RGB) if isinstance(region, np.ndarray) else region
93
+ else:
94
+ region_rgb = region
95
+
96
+ # Reshape and get mean color
97
+ pixels = region_rgb.reshape(-1, region_rgb.shape[-1])
98
+ dominant_color = np.mean(pixels, axis=0).astype(int)
99
+
100
+ # Add alpha channel for semi-transparency
101
+ return (int(dominant_color[0]), int(dominant_color[1]), int(dominant_color[2]), 230)
102
+ except Exception as e:
103
+ print(f"Error getting dominant color: {e}")
104
+ # Return a default semi-transparent light color
105
+ return (240, 240, 240, 180)
106
 
107
  def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress()):
108
  """Process image with OCR and translation"""
 
137
  progress(0.6, "Translating text...")
138
 
139
  # Create a copy for overlay
140
+ img_pil = image.copy().convert("RGBA") # Convert to RGBA for transparency support
141
  font = get_default_font(size=20)
142
 
143
  # If font creation failed, return with error
 
161
  top_left, top_right, bottom_right, bottom_left = bbox
162
 
163
  # Calculate text dimensions and position
 
164
  x, y = top_left[0], top_left[1]
165
  width = top_right[0] - top_left[0]
166
  height = bottom_left[1] - top_left[1]
167
 
168
+ # Get dominant color for better background matching
169
+ bg_color = get_dominant_color(img_array, bbox)
170
+
171
+ # Add padding
172
  padding = 4
173
 
174
+ # Create background that matches surrounding area
175
  draw.rectangle(
176
  [
177
  x - padding,
 
179
  x + width + padding,
180
  y + height + padding
181
  ],
182
+ fill=bg_color # Semi-transparent background that matches surrounding colors
183
  )
184
 
185
  # Calculate font size to fit within the bounding box
 
186
  fontsize = min(20, int(width / (len(translated) * 0.5))) if translated else 20
187
  fontsize = max(fontsize, 12) # Ensure minimum readability
188
 
 
200
  text_x = x + (width - text_width) / 2
201
  text_y = y + (height - text_height) / 2
202
 
203
+ # Determine text color based on background brightness
204
+ r, g, b, _ = bg_color
205
+ brightness = (r * 299 + g * 587 + b * 114) / 1000
206
+ text_color = (0, 0, 0, 255) if brightness > 128 else (255, 255, 255, 255) # Black or white based on background
207
+
208
+ # Draw text with appropriate contrast
209
+ draw.text((text_x, text_y), translated, fill=text_color, font=adjusted_font)
210
 
211
  # Join all translations
212
  all_translations = "\n".join(translations)
213
 
214
+ # Convert back to RGB for display
215
+ result_image = img_pil.convert('RGB')
216
+
217
+ return result_image, all_translations
218
 
219
  except Exception as e:
220
  import traceback
 
224
 
225
  # Create Gradio interface
226
  with gr.Blocks(title="Enhanced Image Translator") as iface:
227
+ # ... keep existing code (Gradio interface setup)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
  if __name__ == "__main__":
230
+ # ... keep existing code (initialization and app launch)