Amandeep01's picture
Update app.py
d1d3e5b verified
import gradio as gr
import easyocr
from deep_translator import GoogleTranslator
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import io
import time
import cv2
# Global variables
reader = None
translation_cache = {}
# Define supported languages - use codes that both EasyOCR and Google Translator support
SUPPORTED_LANGUAGES = {
'en': 'English',
'hi': 'Hindi',
'mr': 'Marathi',
'ne': 'Nepali'
}
def initialize_reader():
"""Initialize EasyOCR reader with minimal languages to save memory"""
global reader
if reader is None:
try:
# Only load essential languages to reduce memory usage
reader = easyocr.Reader(['en', 'hi', 'mr', 'ne'], gpu=False)
except Exception as e:
print(f"Error initializing EasyOCR: {e}")
return None
return reader
def get_default_font(size=20):
"""Get a default font that works on Hugging Face"""
try:
# Try common font paths
font_paths = [
"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
"/usr/share/fonts/truetype/freefont/FreeSans.ttf",
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
"/usr/share/fonts/truetype/noto/NotoSans-Regular.ttf"
]
for path in font_paths:
try:
return ImageFont.truetype(path, size=size)
except OSError:
continue
# If all fail, use default font
return ImageFont.load_default()
except Exception as e:
print(f"Font error: {e}")
return None
def translate_text(text, target_lang):
"""Translate text with error handling and caching"""
if not text or not text.strip():
return ""
# Use cache if available
cache_key = f"{text}|{target_lang}"
if cache_key in translation_cache:
return translation_cache[cache_key]
# Handle translation with retries
max_retries = 3
for attempt in range(max_retries):
try:
translated = GoogleTranslator(source='auto', target=target_lang).translate(text)
if translated:
translation_cache[cache_key] = translated
return translated
time.sleep(1) # Short delay before retry
except Exception as e:
print(f"Translation error (attempt {attempt+1}): {e}")
if attempt == max_retries - 1:
return f"[Translation Error: {text}]"
time.sleep(1) # Wait before retry
return f"[Unable to translate: {text}]"
def get_dominant_color(image, bbox, padding=4):
"""Extract the dominant color around text for better background matching"""
try:
# Convert PIL to numpy if needed
if not isinstance(image, np.ndarray):
img_array = np.array(image)
else:
img_array = image
# Extract coordinates with padding
top_left, top_right, bottom_right, bottom_left = bbox
x, y = int(top_left[0]), int(top_left[1])
width = int(top_right[0] - top_left[0])
height = int(bottom_left[1] - top_left[1])
# Expand the area slightly to capture surrounding colors
x1 = max(0, x - padding)
y1 = max(0, y - padding)
x2 = min(img_array.shape[1], x + width + padding)
y2 = min(img_array.shape[0], y + height + padding)
# Get region around the text
region = img_array[y1:y2, x1:x2]
if region.size == 0:
# Fallback if region is empty
return (240, 240, 240, 180)
# Convert to RGB if it's in BGR format (OpenCV default)
if len(region.shape) == 3 and region.shape[2] == 3:
region_rgb = cv2.cvtColor(region, cv2.COLOR_BGR2RGB) if isinstance(region, np.ndarray) else region
else:
region_rgb = region
# Reshape and get mean color
pixels = region_rgb.reshape(-1, region_rgb.shape[-1])
dominant_color = np.mean(pixels, axis=0).astype(int)
# Add alpha channel for semi-transparency
return (int(dominant_color[0]), int(dominant_color[1]), int(dominant_color[2]), 230)
except Exception as e:
print(f"Error getting dominant color: {e}")
# Return a default semi-transparent light color
return (240, 240, 240, 180)
def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress()):
"""Process image with OCR and translation"""
if image is None:
return None, "Please upload an image"
# Validate target language
if target_lang not in SUPPORTED_LANGUAGES:
# Check if it's a language name instead of code
target_lang = next((code for code, name in SUPPORTED_LANGUAGES.items()
if name.lower() == target_lang.lower()), None)
if not target_lang:
return image, f"Unsupported language. Supported: {', '.join(SUPPORTED_LANGUAGES.values())}"
progress(0.1, "Initializing...")
# Initialize OCR reader
ocr = initialize_reader()
if ocr is None:
return image, "Failed to initialize OCR. Please try again."
progress(0.3, "Detecting text...")
try:
# Convert to numpy array for OCR
img_array = np.array(image)
results = ocr.readtext(img_array)
if not results:
return image, "No text detected in the image"
progress(0.6, "Translating text...")
# Create a copy for overlay
img_pil = image.copy().convert("RGBA") # Convert to RGBA for transparency support
font = get_default_font(size=20)
# If font creation failed, return with error
if font is None:
return image, "Error loading fonts. Processing without overlay."
draw = ImageDraw.Draw(img_pil, 'RGBA')
# Process detected text
translations = []
for i, (bbox, text, prob) in enumerate(results):
if text and text.strip():
# Update progress
progress(0.6 + (0.4 * (i / len(results))), f"Translating text {i+1}/{len(results)}")
# Translate text
translated = translate_text(text, target_lang)
translations.append(f"{text}{translated}")
# Extract bounding box coordinates
top_left, top_right, bottom_right, bottom_left = bbox
# Calculate text dimensions and position
x, y = top_left[0], top_left[1]
width = top_right[0] - top_left[0]
height = bottom_left[1] - top_left[1]
# Get dominant color for better background matching
bg_color = get_dominant_color(img_array, bbox)
# Add padding
padding = 4
# Create background that matches surrounding area
draw.rectangle(
[
x - padding,
y - padding,
x + width + padding,
y + height + padding
],
fill=bg_color # Semi-transparent background that matches surrounding colors
)
# Calculate font size to fit within the bounding box
fontsize = min(20, int(width / (len(translated) * 0.5))) if translated else 20
fontsize = max(fontsize, 12) # Ensure minimum readability
# Get adjusted font
adjusted_font = get_default_font(size=fontsize)
if adjusted_font is None:
adjusted_font = font
# Get text size for centering
text_size = draw.textbbox((0, 0), translated, font=adjusted_font)
text_width = text_size[2] - text_size[0]
text_height = text_size[3] - text_size[1]
# Center text in the bounding box
text_x = x + (width - text_width) / 2
text_y = y + (height - text_height) / 2
# Determine text color based on background brightness
r, g, b, _ = bg_color
brightness = (r * 299 + g * 587 + b * 114) / 1000
text_color = (0, 0, 0, 255) if brightness > 128 else (255, 255, 255, 255) # Black or white based on background
# Draw text with appropriate contrast
draw.text((text_x, text_y), translated, fill=text_color, font=adjusted_font)
# Join all translations
all_translations = "\n".join(translations)
# Convert back to RGB for display
result_image = img_pil.convert('RGB')
return result_image, all_translations
except Exception as e:
import traceback
error_details = traceback.format_exc()
print(f"Error in process_image: {str(e)}\n{error_details}")
return image, f"Error processing image: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="Multilingual Signboard Translator with Image Overlay") as iface:
gr.Markdown("# Multilingual Signboard Translator with Image Overlay")
gr.Markdown("Extract & translate text from images with improved overlay visualization")
with gr.Row():
with gr.Column():
input_image = gr.Image(type="pil", label="Upload Image")
with gr.Row():
target_lang = gr.Dropdown(
choices=list(SUPPORTED_LANGUAGES.values()),
value="Hindi",
label="Translate To"
)
overlay_opacity = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.7,
step=0.1,
label="Overlay Opacity"
)
translate_btn = gr.Button("Translate", variant="primary")
with gr.Column():
output_image = gr.Image(type="pil", label="Image with Translated Overlay")
output_text = gr.Textbox(label="Translated Text Output", lines=10)
# Connect the button to the processing function
translate_btn.click(
fn=process_image,
inputs=[input_image, target_lang, overlay_opacity],
outputs=[output_image, output_text]
)
gr.Markdown("""
## Features
- Supports multiple languages for translation
- Semi-transparent overlays for better readability
- Simple and efficient text extraction and translation
""")
if __name__ == "__main__":
# Initialize OCR model at startup to avoid delay on first request
try:
initialize_reader()
except:
pass
# Launch the app
iface.launch()