Spaces:

Amandeep01
/

Signboard_Overlay_Project

Sleeping

App Files Files Community

Amandeep01 commited on May 14, 2025

Commit

aa3c16f

verified ·

1 Parent(s): 56d6668

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -82

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 import io
 import time
 # Global variables
 reader = None
@@ -56,30 +57,52 @@ def get_default_font(size=20):
 def translate_text(text, target_lang):
     """Translate text with error handling and caching"""
-    if not text or not text.strip():
-        return ""
-    # Use cache if available
-    cache_key = f"{text}|{target_lang}"
-    if cache_key in translation_cache:
-        return translation_cache[cache_key]
-    # Handle translation with retries
-    max_retries = 3
-    for attempt in range(max_retries):
-        try:
-            translated = GoogleTranslator(source='auto', target=target_lang).translate(text)
-            if translated:
-                translation_cache[cache_key] = translated
-                return translated
-            time.sleep(1)  # Short delay before retry
-        except Exception as e:
-            print(f"Translation error (attempt {attempt+1}): {e}")
-            if attempt == max_retries - 1:
-                return f"[Translation Error: {text}]"
-            time.sleep(1)  # Wait before retry
-    return f"[Unable to translate: {text}]"
 def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress()):
     """Process image with OCR and translation"""
@@ -114,7 +137,7 @@ def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress(
         progress(0.6, "Translating text...")
         # Create a copy for overlay
-        img_pil = image.copy().convert("RGB")
         font = get_default_font(size=20)
         # If font creation failed, return with error
@@ -138,16 +161,17 @@ def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress(
                 top_left, top_right, bottom_right, bottom_left = bbox
                 # Calculate text dimensions and position
-                # Use the original text bounding box size and position
                 x, y = top_left[0], top_left[1]
                 width = top_right[0] - top_left[0]
                 height = bottom_left[1] - top_left[1]
-                # Create a rectangle to cover the original text completely
-                # Add a bit of padding around the original text area
                 padding = 4
-                # Create solid background to cover original text
                 draw.rectangle(
                     [
                         x - padding,
@@ -155,11 +179,10 @@ def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress(
                         x + width + padding,
                         y + height + padding
                     ],
-                    fill=(0, 0, 0, 255)  # Solid black background to cover original text
                 )
                 # Calculate font size to fit within the bounding box
-                # Start with a reasonable default size and adjust if needed
                 fontsize = min(20, int(width / (len(translated) * 0.5))) if translated else 20
                 fontsize = max(fontsize, 12)  # Ensure minimum readability
@@ -177,13 +200,21 @@ def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress(
                 text_x = x + (width - text_width) / 2
                 text_y = y + (height - text_height) / 2
-                # Draw text with contrasting color
-                draw.text((text_x, text_y), translated, fill=(255, 0, 0), font=adjusted_font)
         # Join all translations
         all_translations = "\n".join(translations)
-        return img_pil, all_translations
     except Exception as e:
         import traceback
@@ -193,53 +224,7 @@ def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress(
 # Create Gradio interface
 with gr.Blocks(title="Enhanced Image Translator") as iface:
-    gr.Markdown("# Enhanced Image Translator")
-    gr.Markdown("Extract & translate text from images with improved overlay visualization")
-    with gr.Row():
-        with gr.Column():
-            input_image = gr.Image(type="pil", label="Upload Image")
-            with gr.Row():
-                target_lang = gr.Dropdown(
-                    choices=list(SUPPORTED_LANGUAGES.values()),
-                    value="Hindi",
-                    label="Translate To"
-                )
-                overlay_opacity = gr.Slider(
-                    minimum=0.1,
-                    maximum=1.0,
-                    value=0.7,
-                    step=0.1,
-                    label="Overlay Opacity"
-                )
-            translate_btn = gr.Button("Translate", variant="primary")
-        with gr.Column():
-            output_image = gr.Image(type="pil", label="Image with Translated Overlay")
-            output_text = gr.Textbox(label="Translated Text Output", lines=10)
-    # Connect the button to the processing function
-    translate_btn.click(
-        fn=process_image,
-        inputs=[input_image, target_lang, overlay_opacity],
-        outputs=[output_image, output_text]
-    )
-    gr.Markdown("""
-    ## Features
-    - Supports multiple languages for translation
-    - Semi-transparent overlays for better readability
-    - Simple and efficient text extraction and translation
-    """)
 if __name__ == "__main__":
-    # Initialize OCR model at startup to avoid delay on first request
-    try:
-        initialize_reader()
-    except:
-        pass
-    # Launch the app
-    iface.launch()

 import numpy as np
 import io
 import time
+import cv2
 # Global variables
 reader = None
 def translate_text(text, target_lang):
     """Translate text with error handling and caching"""
+    # ... keep existing code (translate_text function)
+def get_dominant_color(image, bbox, padding=4):
+    """Extract the dominant color around text for better background matching"""
+    try:
+        # Convert PIL to numpy if needed
+        if not isinstance(image, np.ndarray):
+            img_array = np.array(image)
+        else:
+            img_array = image
+        # Extract coordinates with padding
+        top_left, top_right, bottom_right, bottom_left = bbox
+        x, y = int(top_left[0]), int(top_left[1])
+        width = int(top_right[0] - top_left[0])
+        height = int(bottom_left[1] - top_left[1])
+        # Expand the area slightly to capture surrounding colors
+        x1 = max(0, x - padding)
+        y1 = max(0, y - padding)
+        x2 = min(img_array.shape[1], x + width + padding)
+        y2 = min(img_array.shape[0], y + height + padding)
+        # Get region around the text
+        region = img_array[y1:y2, x1:x2]
+        if region.size == 0:
+            # Fallback if region is empty
+            return (240, 240, 240, 180)
+        # Convert to RGB if it's in BGR format (OpenCV default)
+        if len(region.shape) == 3 and region.shape[2] == 3:
+            region_rgb = cv2.cvtColor(region, cv2.COLOR_BGR2RGB) if isinstance(region, np.ndarray) else region
+        else:
+            region_rgb = region
+        # Reshape and get mean color
+        pixels = region_rgb.reshape(-1, region_rgb.shape[-1])
+        dominant_color = np.mean(pixels, axis=0).astype(int)
+        # Add alpha channel for semi-transparency
+        return (int(dominant_color[0]), int(dominant_color[1]), int(dominant_color[2]), 230)
+    except Exception as e:
+        print(f"Error getting dominant color: {e}")
+        # Return a default semi-transparent light color
+        return (240, 240, 240, 180)
 def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress()):
     """Process image with OCR and translation"""
         progress(0.6, "Translating text...")
         # Create a copy for overlay
+        img_pil = image.copy().convert("RGBA")  # Convert to RGBA for transparency support
         font = get_default_font(size=20)
         # If font creation failed, return with error
                 top_left, top_right, bottom_right, bottom_left = bbox
                 # Calculate text dimensions and position
                 x, y = top_left[0], top_left[1]
                 width = top_right[0] - top_left[0]
                 height = bottom_left[1] - top_left[1]
+                # Get dominant color for better background matching
+                bg_color = get_dominant_color(img_array, bbox)
+                # Add padding
                 padding = 4
+                # Create background that matches surrounding area
                 draw.rectangle(
                     [
                         x - padding,
                         x + width + padding,
                         y + height + padding
                     ],
+                    fill=bg_color  # Semi-transparent background that matches surrounding colors
                 )
                 # Calculate font size to fit within the bounding box
                 fontsize = min(20, int(width / (len(translated) * 0.5))) if translated else 20
                 fontsize = max(fontsize, 12)  # Ensure minimum readability
                 text_x = x + (width - text_width) / 2
                 text_y = y + (height - text_height) / 2
+                # Determine text color based on background brightness
+                r, g, b, _ = bg_color
+                brightness = (r * 299 + g * 587 + b * 114) / 1000
+                text_color = (0, 0, 0, 255) if brightness > 128 else (255, 255, 255, 255)  # Black or white based on background
+                # Draw text with appropriate contrast
+                draw.text((text_x, text_y), translated, fill=text_color, font=adjusted_font)
         # Join all translations
         all_translations = "\n".join(translations)
+        # Convert back to RGB for display
+        result_image = img_pil.convert('RGB')
+        return result_image, all_translations
     except Exception as e:
         import traceback
 # Create Gradio interface
 with gr.Blocks(title="Enhanced Image Translator") as iface:
+    # ... keep existing code (Gradio interface setup)
 if __name__ == "__main__":
+    # ... keep existing code (initialization and app launch)