import gradio as gr import easyocr from deep_translator import GoogleTranslator from PIL import Image, ImageDraw, ImageFont import numpy as np import io import time import cv2 # Global variables reader = None translation_cache = {} # Define supported languages - use codes that both EasyOCR and Google Translator support SUPPORTED_LANGUAGES = { 'en': 'English', 'hi': 'Hindi', 'mr': 'Marathi', 'ne': 'Nepali' } def initialize_reader(): """Initialize EasyOCR reader with minimal languages to save memory""" global reader if reader is None: try: # Only load essential languages to reduce memory usage reader = easyocr.Reader(['en', 'hi', 'mr', 'ne'], gpu=False) except Exception as e: print(f"Error initializing EasyOCR: {e}") return None return reader def get_default_font(size=20): """Get a default font that works on Hugging Face""" try: # Try common font paths font_paths = [ "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", "/usr/share/fonts/truetype/freefont/FreeSans.ttf", "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", "/usr/share/fonts/truetype/noto/NotoSans-Regular.ttf" ] for path in font_paths: try: return ImageFont.truetype(path, size=size) except OSError: continue # If all fail, use default font return ImageFont.load_default() except Exception as e: print(f"Font error: {e}") return None def translate_text(text, target_lang): """Translate text with error handling and caching""" if not text or not text.strip(): return "" # Use cache if available cache_key = f"{text}|{target_lang}" if cache_key in translation_cache: return translation_cache[cache_key] # Handle translation with retries max_retries = 3 for attempt in range(max_retries): try: translated = GoogleTranslator(source='auto', target=target_lang).translate(text) if translated: translation_cache[cache_key] = translated return translated time.sleep(1) # Short delay before retry except Exception as e: print(f"Translation error (attempt {attempt+1}): {e}") if attempt == max_retries - 1: return f"[Translation Error: {text}]" time.sleep(1) # Wait before retry return f"[Unable to translate: {text}]" def get_dominant_color(image, bbox, padding=4): """Extract the dominant color around text for better background matching""" try: # Convert PIL to numpy if needed if not isinstance(image, np.ndarray): img_array = np.array(image) else: img_array = image # Extract coordinates with padding top_left, top_right, bottom_right, bottom_left = bbox x, y = int(top_left[0]), int(top_left[1]) width = int(top_right[0] - top_left[0]) height = int(bottom_left[1] - top_left[1]) # Expand the area slightly to capture surrounding colors x1 = max(0, x - padding) y1 = max(0, y - padding) x2 = min(img_array.shape[1], x + width + padding) y2 = min(img_array.shape[0], y + height + padding) # Get region around the text region = img_array[y1:y2, x1:x2] if region.size == 0: # Fallback if region is empty return (240, 240, 240, 180) # Convert to RGB if it's in BGR format (OpenCV default) if len(region.shape) == 3 and region.shape[2] == 3: region_rgb = cv2.cvtColor(region, cv2.COLOR_BGR2RGB) if isinstance(region, np.ndarray) else region else: region_rgb = region # Reshape and get mean color pixels = region_rgb.reshape(-1, region_rgb.shape[-1]) dominant_color = np.mean(pixels, axis=0).astype(int) # Add alpha channel for semi-transparency return (int(dominant_color[0]), int(dominant_color[1]), int(dominant_color[2]), 230) except Exception as e: print(f"Error getting dominant color: {e}") # Return a default semi-transparent light color return (240, 240, 240, 180) def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress()): """Process image with OCR and translation""" if image is None: return None, "Please upload an image" # Validate target language if target_lang not in SUPPORTED_LANGUAGES: # Check if it's a language name instead of code target_lang = next((code for code, name in SUPPORTED_LANGUAGES.items() if name.lower() == target_lang.lower()), None) if not target_lang: return image, f"Unsupported language. Supported: {', '.join(SUPPORTED_LANGUAGES.values())}" progress(0.1, "Initializing...") # Initialize OCR reader ocr = initialize_reader() if ocr is None: return image, "Failed to initialize OCR. Please try again." progress(0.3, "Detecting text...") try: # Convert to numpy array for OCR img_array = np.array(image) results = ocr.readtext(img_array) if not results: return image, "No text detected in the image" progress(0.6, "Translating text...") # Create a copy for overlay img_pil = image.copy().convert("RGBA") # Convert to RGBA for transparency support font = get_default_font(size=20) # If font creation failed, return with error if font is None: return image, "Error loading fonts. Processing without overlay." draw = ImageDraw.Draw(img_pil, 'RGBA') # Process detected text translations = [] for i, (bbox, text, prob) in enumerate(results): if text and text.strip(): # Update progress progress(0.6 + (0.4 * (i / len(results))), f"Translating text {i+1}/{len(results)}") # Translate text translated = translate_text(text, target_lang) translations.append(f"{text} → {translated}") # Extract bounding box coordinates top_left, top_right, bottom_right, bottom_left = bbox # Calculate text dimensions and position x, y = top_left[0], top_left[1] width = top_right[0] - top_left[0] height = bottom_left[1] - top_left[1] # Get dominant color for better background matching bg_color = get_dominant_color(img_array, bbox) # Add padding padding = 4 # Create background that matches surrounding area draw.rectangle( [ x - padding, y - padding, x + width + padding, y + height + padding ], fill=bg_color # Semi-transparent background that matches surrounding colors ) # Calculate font size to fit within the bounding box fontsize = min(20, int(width / (len(translated) * 0.5))) if translated else 20 fontsize = max(fontsize, 12) # Ensure minimum readability # Get adjusted font adjusted_font = get_default_font(size=fontsize) if adjusted_font is None: adjusted_font = font # Get text size for centering text_size = draw.textbbox((0, 0), translated, font=adjusted_font) text_width = text_size[2] - text_size[0] text_height = text_size[3] - text_size[1] # Center text in the bounding box text_x = x + (width - text_width) / 2 text_y = y + (height - text_height) / 2 # Determine text color based on background brightness r, g, b, _ = bg_color brightness = (r * 299 + g * 587 + b * 114) / 1000 text_color = (0, 0, 0, 255) if brightness > 128 else (255, 255, 255, 255) # Black or white based on background # Draw text with appropriate contrast draw.text((text_x, text_y), translated, fill=text_color, font=adjusted_font) # Join all translations all_translations = "\n".join(translations) # Convert back to RGB for display result_image = img_pil.convert('RGB') return result_image, all_translations except Exception as e: import traceback error_details = traceback.format_exc() print(f"Error in process_image: {str(e)}\n{error_details}") return image, f"Error processing image: {str(e)}" # Create Gradio interface with gr.Blocks(title="Multilingual Signboard Translator with Image Overlay") as iface: gr.Markdown("# Multilingual Signboard Translator with Image Overlay") gr.Markdown("Extract & translate text from images with improved overlay visualization") with gr.Row(): with gr.Column(): input_image = gr.Image(type="pil", label="Upload Image") with gr.Row(): target_lang = gr.Dropdown( choices=list(SUPPORTED_LANGUAGES.values()), value="Hindi", label="Translate To" ) overlay_opacity = gr.Slider( minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Overlay Opacity" ) translate_btn = gr.Button("Translate", variant="primary") with gr.Column(): output_image = gr.Image(type="pil", label="Image with Translated Overlay") output_text = gr.Textbox(label="Translated Text Output", lines=10) # Connect the button to the processing function translate_btn.click( fn=process_image, inputs=[input_image, target_lang, overlay_opacity], outputs=[output_image, output_text] ) gr.Markdown(""" ## Features - Supports multiple languages for translation - Semi-transparent overlays for better readability - Simple and efficient text extraction and translation """) if __name__ == "__main__": # Initialize OCR model at startup to avoid delay on first request try: initialize_reader() except: pass # Launch the app iface.launch()