Spaces:

Amandeep01
/

Signboard_Overlay_Project

Sleeping

File size: 11,199 Bytes

import gradio as gr
import easyocr
from deep_translator import GoogleTranslator
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import io
import time
import cv2

# Global variables
reader = None
translation_cache = {}

# Define supported languages - use codes that both EasyOCR and Google Translator support
SUPPORTED_LANGUAGES = {
    'en': 'English',
    'hi': 'Hindi',
    'mr': 'Marathi',
    'ne': 'Nepali'
}

def initialize_reader():
    """Initialize EasyOCR reader with minimal languages to save memory"""
    global reader
    if reader is None:
        try:
            # Only load essential languages to reduce memory usage
            reader = easyocr.Reader(['en', 'hi', 'mr', 'ne'], gpu=False)
        except Exception as e:
            print(f"Error initializing EasyOCR: {e}")
            return None
    return reader

def get_default_font(size=20):
    """Get a default font that works on Hugging Face"""
    try:
        # Try common font paths
        font_paths = [
            "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
            "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
            "/usr/share/fonts/truetype/freefont/FreeSans.ttf",
            "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
            "/usr/share/fonts/truetype/noto/NotoSans-Regular.ttf"
        ]
        
        for path in font_paths:
            try:
                return ImageFont.truetype(path, size=size)
            except OSError:
                continue
                
        # If all fail, use default font
        return ImageFont.load_default()
    except Exception as e:
        print(f"Font error: {e}")
        return None

def translate_text(text, target_lang):
    """Translate text with error handling and caching"""
    if not text or not text.strip():
        return ""
    
    # Use cache if available
    cache_key = f"{text}|{target_lang}"
    if cache_key in translation_cache:
        return translation_cache[cache_key]
    
    # Handle translation with retries
    max_retries = 3
    for attempt in range(max_retries):
        try:
            translated = GoogleTranslator(source='auto', target=target_lang).translate(text)
            if translated:
                translation_cache[cache_key] = translated
                return translated
            time.sleep(1)  # Short delay before retry
        except Exception as e:
            print(f"Translation error (attempt {attempt+1}): {e}")
            if attempt == max_retries - 1:
                return f"[Translation Error: {text}]"
            time.sleep(1)  # Wait before retry
    
    return f"[Unable to translate: {text}]"

def get_dominant_color(image, bbox, padding=4):
    """Extract the dominant color around text for better background matching"""
    try:
        # Convert PIL to numpy if needed
        if not isinstance(image, np.ndarray):
            img_array = np.array(image)
        else:
            img_array = image
            
        # Extract coordinates with padding
        top_left, top_right, bottom_right, bottom_left = bbox
        x, y = int(top_left[0]), int(top_left[1])
        width = int(top_right[0] - top_left[0])
        height = int(bottom_left[1] - top_left[1])
        
        # Expand the area slightly to capture surrounding colors
        x1 = max(0, x - padding)
        y1 = max(0, y - padding)
        x2 = min(img_array.shape[1], x + width + padding)
        y2 = min(img_array.shape[0], y + height + padding)
        
        # Get region around the text
        region = img_array[y1:y2, x1:x2]
        
        if region.size == 0:
            # Fallback if region is empty
            return (240, 240, 240, 180)
        
        # Convert to RGB if it's in BGR format (OpenCV default)
        if len(region.shape) == 3 and region.shape[2] == 3:
            region_rgb = cv2.cvtColor(region, cv2.COLOR_BGR2RGB) if isinstance(region, np.ndarray) else region
        else:
            region_rgb = region
            
        # Reshape and get mean color
        pixels = region_rgb.reshape(-1, region_rgb.shape[-1])
        dominant_color = np.mean(pixels, axis=0).astype(int)
        
        # Add alpha channel for semi-transparency
        return (int(dominant_color[0]), int(dominant_color[1]), int(dominant_color[2]), 230)
    except Exception as e:
        print(f"Error getting dominant color: {e}")
        # Return a default semi-transparent light color
        return (240, 240, 240, 180)

def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress()):
    """Process image with OCR and translation"""
    if image is None:
        return None, "Please upload an image"
    
    # Validate target language
    if target_lang not in SUPPORTED_LANGUAGES:
        # Check if it's a language name instead of code
        target_lang = next((code for code, name in SUPPORTED_LANGUAGES.items() 
                        if name.lower() == target_lang.lower()), None)
        if not target_lang:
            return image, f"Unsupported language. Supported: {', '.join(SUPPORTED_LANGUAGES.values())}"
    
    progress(0.1, "Initializing...")
    
    # Initialize OCR reader
    ocr = initialize_reader()
    if ocr is None:
        return image, "Failed to initialize OCR. Please try again."
    
    progress(0.3, "Detecting text...")
    
    try:
        # Convert to numpy array for OCR
        img_array = np.array(image)
        results = ocr.readtext(img_array)
        
        if not results:
            return image, "No text detected in the image"
        
        progress(0.6, "Translating text...")
        
        # Create a copy for overlay
        img_pil = image.copy().convert("RGBA")  # Convert to RGBA for transparency support
        font = get_default_font(size=20)
        
        # If font creation failed, return with error
        if font is None:
            return image, "Error loading fonts. Processing without overlay."
        
        draw = ImageDraw.Draw(img_pil, 'RGBA')
        
        # Process detected text
        translations = []
        for i, (bbox, text, prob) in enumerate(results):
            if text and text.strip():
                # Update progress
                progress(0.6 + (0.4 * (i / len(results))), f"Translating text {i+1}/{len(results)}")
                
                # Translate text
                translated = translate_text(text, target_lang)
                translations.append(f"{text} → {translated}")
                
                # Extract bounding box coordinates
                top_left, top_right, bottom_right, bottom_left = bbox
                
                # Calculate text dimensions and position
                x, y = top_left[0], top_left[1]
                width = top_right[0] - top_left[0]
                height = bottom_left[1] - top_left[1]
                
                # Get dominant color for better background matching
                bg_color = get_dominant_color(img_array, bbox)
                
                # Add padding
                padding = 4
                
                # Create background that matches surrounding area
                draw.rectangle(
                    [
                        x - padding, 
                        y - padding, 
                        x + width + padding, 
                        y + height + padding
                    ],
                    fill=bg_color  # Semi-transparent background that matches surrounding colors
                )
                
                # Calculate font size to fit within the bounding box
                fontsize = min(20, int(width / (len(translated) * 0.5))) if translated else 20
                fontsize = max(fontsize, 12)  # Ensure minimum readability
                
                # Get adjusted font
                adjusted_font = get_default_font(size=fontsize)
                if adjusted_font is None:
                    adjusted_font = font
                
                # Get text size for centering
                text_size = draw.textbbox((0, 0), translated, font=adjusted_font)
                text_width = text_size[2] - text_size[0]
                text_height = text_size[3] - text_size[1]
                
                # Center text in the bounding box
                text_x = x + (width - text_width) / 2
                text_y = y + (height - text_height) / 2
                
                # Determine text color based on background brightness
                r, g, b, _ = bg_color
                brightness = (r * 299 + g * 587 + b * 114) / 1000
                text_color = (0, 0, 0, 255) if brightness > 128 else (255, 255, 255, 255)  # Black or white based on background
                
                # Draw text with appropriate contrast
                draw.text((text_x, text_y), translated, fill=text_color, font=adjusted_font)
        
        # Join all translations
        all_translations = "\n".join(translations)
        
        # Convert back to RGB for display
        result_image = img_pil.convert('RGB')
        
        return result_image, all_translations
        
    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        print(f"Error in process_image: {str(e)}\n{error_details}")
        return image, f"Error processing image: {str(e)}"

# Create Gradio interface
with gr.Blocks(title="Multilingual Signboard Translator with Image Overlay") as iface:
    gr.Markdown("# Multilingual Signboard Translator with Image Overlay")
    gr.Markdown("Extract & translate text from images with improved overlay visualization")
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="pil", label="Upload Image")
            
            with gr.Row():
                target_lang = gr.Dropdown(
                    choices=list(SUPPORTED_LANGUAGES.values()),
                    value="Hindi",
                    label="Translate To"
                )
                overlay_opacity = gr.Slider(
                    minimum=0.1, 
                    maximum=1.0, 
                    value=0.7, 
                    step=0.1, 
                    label="Overlay Opacity"
                )
            
            translate_btn = gr.Button("Translate", variant="primary")
            
        with gr.Column():
            output_image = gr.Image(type="pil", label="Image with Translated Overlay")
            output_text = gr.Textbox(label="Translated Text Output", lines=10)
    
    # Connect the button to the processing function
    translate_btn.click(
        fn=process_image,
        inputs=[input_image, target_lang, overlay_opacity],
        outputs=[output_image, output_text]
    )
    
    gr.Markdown("""
    ## Features
    - Supports multiple languages for translation
    - Semi-transparent overlays for better readability
    - Simple and efficient text extraction and translation
    """)

if __name__ == "__main__":
    # Initialize OCR model at startup to avoid delay on first request
    try:
        initialize_reader()
    except:
        pass
    
    # Launch the app
    iface.launch()