Spaces:

Amandeep01
/

Signboard_Overlay_Project

Sleeping

App Files Files Community

Amandeep01 commited on May 13, 2025

Commit

6eefa77

verified ·

1 Parent(s): 0ca326d

Update app.py

Browse files

Files changed (1) hide show

app.py +217 -29

app.py CHANGED Viewed

@@ -1,41 +1,229 @@
 import gradio as gr
 import easyocr
-from libretranslatepy import LibreTranslateAPI
-from PIL import Image
 import numpy as np
-import cv2
-# Initialize OCR reader and translator
-reader = easyocr.Reader(['en', 'hi'])
-lt = LibreTranslateAPI("https://libretranslate.de")
-def process_image(img):
-    # Convert PIL image to NumPy array
-    img_array = np.array(img)
-    img_gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
-    result = reader.readtext(img_gray)
-    extracted_text = ""
-    translated_text = ""
-    for detection in result:
-        text = detection[1]
-        extracted_text += text + " "
-    # Translate extracted text to Hindi
-    if extracted_text.strip():
-        translated_text = lt.translate(extracted_text, "en", "hi")
-    return extracted_text.strip(), translated_text.strip()
-# Gradio interface
-iface = gr.Interface(
-    fn=process_image,
-    inputs=gr.Image(type="pil"),
-    outputs=["text", "text"],
-    title="Image Text Translator (English to Hindi)",
-    description="Upload an image with English text, and get Hindi translation using LibreTranslate (no API key needed)."
-)
-iface.launch()

 import gradio as gr
 import easyocr
+from deep_translator import GoogleTranslator
+from PIL import Image, ImageDraw, ImageFont
 import numpy as np
+import os
+import time
+import functools
+import torch
+from pathlib import Path
+# Constants
+FONT_PATH = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
+SUPPORTED_LANGUAGES = {
+    'en': 'English',
+    'hi': 'Hindi',
+    'mr': 'Marathi',
+    'ne': 'Nepali',
+    'es': 'Spanish',
+    'fr': 'French',
+    'de': 'German',
+    'ja': 'Japanese',
+    'ko': 'Korean',
+    'zh-CN': 'Chinese (Simplified)',
+    'ar': 'Arabic',
+    'ru': 'Russian',
+    'pt': 'Portuguese',
+    'it': 'Italian',
+}
+# OCR Language Support - add languages that EasyOCR supports
+OCR_LANGUAGES = ['en', 'hi', 'mr', 'ne', 'ja', 'ko', 'zh_cn', 'ar', 'ru']
+# Global cache for translations to avoid repeated API calls
+translation_cache = {}
+# Initialize reader with common languages - lazy loading
+reader = None
+def get_reader(progress=None):
+    """Lazy loading of EasyOCR reader"""
+    global reader
+    if reader is None:
+        if progress:
+            progress(0.1, "Loading OCR model...")
+        # Force CPU if memory is limited on deployment
+        gpu = torch.cuda.is_available()
+        reader = easyocr.Reader(OCR_LANGUAGES, gpu=gpu)
+        if progress:
+            progress(0.3, "OCR model loaded successfully")
+    return reader
+def get_font(size=20):
+    """Get font with fallback options"""
+    try:
+        return ImageFont.truetype(FONT_PATH, size=size)
+    except OSError:
+        # Fallback options if the primary font isn't available
+        fallback_fonts = [
+            "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
+            "/usr/share/fonts/truetype/freefont/FreeSans.ttf",
+        ]
+        for font_path in fallback_fonts:
+            try:
+                return ImageFont.truetype(font_path, size=size)
+            except OSError:
+                continue
+        # Last resort: use default font
+        return ImageFont.load_default()
+def translate_text(text, target_lang, source_lang='auto'):
+    """Translate text with caching"""
+    if not text.strip():
+        return ""
+    cache_key = f"{text}|{source_lang}|{target_lang}"
+    if cache_key in translation_cache:
+        return translation_cache[cache_key]
+    try:
+        translated = GoogleTranslator(source=source_lang, target=target_lang).translate(text)
+        translation_cache[cache_key] = translated
+        return translated
+    except Exception as e:
+        print(f"Translation error: {e}")
+        return "[Translation Error]"
+def draw_bbox_with_translation(img_pil, bbox, translated_text, font, opacity=0.7, text_color=(255, 0, 0)):
+    """Draw a semi-transparent background with text overlay"""
+    draw = ImageDraw.Draw(img_pil, 'RGBA')
+    # Calculate text dimensions and position
+    text_width, text_height = draw.textbbox((0, 0), translated_text, font=font)[2:]
+    x, y = bbox[0][0], max(0, bbox[0][1] - text_height - 10)
+    # Draw semi-transparent background for better readability
+    padding = 5
+    bg_box = (
+        x - padding,
+        y - padding,
+        x + text_width + padding,
+        y + text_height + padding
+    )
+    draw.rectangle(bg_box, fill=(0, 0, 0, int(255 * opacity)))
+    # Draw text
+    draw.text((x, y), translated_text, fill=text_color, font=font)
+    return img_pil
+def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress()):
+    """Process image with OCR and translation"""
+    if image is None:
+        return None, "Please upload an image"
+    # Ensure target language is valid
+    if target_lang not in SUPPORTED_LANGUAGES and target_lang not in SUPPORTED_LANGUAGES.values():
+        return image, f"Unsupported target language. Supported languages: {', '.join(SUPPORTED_LANGUAGES.values())}"
+    # Convert language name to code if needed
+    lang_code = target_lang
+    for code, name in SUPPORTED_LANGUAGES.items():
+        if name.lower() == target_lang.lower():
+            lang_code = code
+            break
+    progress(0.05, "Preparing image...")
+    # Convert to numpy array
+    img_array = np.array(image)
+    # Get OCR reader
+    ocr = get_reader(progress)
+    progress(0.4, "Detecting text...")
+    results = ocr.readtext(img_array)
+    if not results:
+        return image, "No text detected in the image"
+    progress(0.6, "Translating detected text...")
+    # Create a copy of the image for overlay
+    img_pil = image.copy().convert("RGB")
+    font = get_font(size=max(20, int(img_pil.width / 40)))  # Responsive font size
+    final_translations = []
+    # Process each text region
+    for idx, (bbox, text, prob) in enumerate(results):
+        progress(0.6 + 0.3 * (idx / len(results)), f"Translating text {idx+1}/{len(results)}")
+        if text.strip():
+            translated = translate_text(text, lang_code)
+            final_translations.append(f"{text} → {translated}")
+            # Draw translation with improved overlay
+            img_pil = draw_bbox_with_translation(
+                img_pil,
+                bbox,
+                translated,
+                font,
+                opacity=overlay_opacity
+            )
+    progress(1.0, "Completed!")
+    full_text = "\n".join(final_translations)
+    return img_pil, full_text
+# Gradio UI with enhanced features
+with gr.Blocks(title="Enhanced Image Translator") as iface:
+    gr.Markdown("# Enhanced Image Translator")
+    gr.Markdown("Extract & translate text from images with improved overlay visualization")
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(type="pil", label="Upload Image")
+            with gr.Row():
+                target_lang = gr.Dropdown(
+                    choices=list(SUPPORTED_LANGUAGES.values()),
+                    value="Hindi",
+                    label="Translate To"
+                )
+                overlay_opacity = gr.Slider(
+                    minimum=0.1,
+                    maximum=1.0,
+                    value=0.7,
+                    step=0.1,
+                    label="Overlay Opacity"
+                )
+            submit_btn = gr.Button("Translate", variant="primary")
+        with gr.Column():
+            output_image = gr.Image(type="pil", label="Image with Translated Overlay")
+            output_text = gr.Textbox(label="Translated Text Output", lines=10)
+    submit_btn.click(
+        fn=process_image,
+        inputs=[input_image, target_lang, overlay_opacity],
+        outputs=[output_image, output_text]
+    )
+    gr.Examples(
+        examples=[
+            ["examples/hindi_sample.jpg", "English"],
+            ["examples/english_sample.jpg", "Hindi"],
+        ],
+        inputs=[input_image, target_lang],
+        outputs=[output_image, output_text],
+        fn=process_image,
+        cache_examples=True,
+    )
+    gr.Markdown("""
+    ## Features
+    - Supports multiple languages for translation
+    - Semi-transparent overlays for better readability
+    - Caching for faster repeated translations
+    - Responsive font sizing based on image dimensions
+    """)
+if __name__ == "__main__":
+    iface.launch()