|
|
import gradio as gr |
|
|
import easyocr |
|
|
from deep_translator import GoogleTranslator |
|
|
from PIL import Image, ImageDraw, ImageFont |
|
|
import numpy as np |
|
|
import io |
|
|
import time |
|
|
import cv2 |
|
|
|
|
|
|
|
|
reader = None |
|
|
translation_cache = {} |
|
|
|
|
|
|
|
|
SUPPORTED_LANGUAGES = { |
|
|
'en': 'English', |
|
|
'hi': 'Hindi', |
|
|
'mr': 'Marathi', |
|
|
'ne': 'Nepali' |
|
|
} |
|
|
|
|
|
def initialize_reader(): |
|
|
"""Initialize EasyOCR reader with minimal languages to save memory""" |
|
|
global reader |
|
|
if reader is None: |
|
|
try: |
|
|
|
|
|
reader = easyocr.Reader(['en', 'hi', 'mr', 'ne'], gpu=False) |
|
|
except Exception as e: |
|
|
print(f"Error initializing EasyOCR: {e}") |
|
|
return None |
|
|
return reader |
|
|
|
|
|
def get_default_font(size=20): |
|
|
"""Get a default font that works on Hugging Face""" |
|
|
try: |
|
|
|
|
|
font_paths = [ |
|
|
"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", |
|
|
"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", |
|
|
"/usr/share/fonts/truetype/freefont/FreeSans.ttf", |
|
|
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", |
|
|
"/usr/share/fonts/truetype/noto/NotoSans-Regular.ttf" |
|
|
] |
|
|
|
|
|
for path in font_paths: |
|
|
try: |
|
|
return ImageFont.truetype(path, size=size) |
|
|
except OSError: |
|
|
continue |
|
|
|
|
|
|
|
|
return ImageFont.load_default() |
|
|
except Exception as e: |
|
|
print(f"Font error: {e}") |
|
|
return None |
|
|
|
|
|
def translate_text(text, target_lang): |
|
|
"""Translate text with error handling and caching""" |
|
|
if not text or not text.strip(): |
|
|
return "" |
|
|
|
|
|
|
|
|
cache_key = f"{text}|{target_lang}" |
|
|
if cache_key in translation_cache: |
|
|
return translation_cache[cache_key] |
|
|
|
|
|
|
|
|
max_retries = 3 |
|
|
for attempt in range(max_retries): |
|
|
try: |
|
|
translated = GoogleTranslator(source='auto', target=target_lang).translate(text) |
|
|
if translated: |
|
|
translation_cache[cache_key] = translated |
|
|
return translated |
|
|
time.sleep(1) |
|
|
except Exception as e: |
|
|
print(f"Translation error (attempt {attempt+1}): {e}") |
|
|
if attempt == max_retries - 1: |
|
|
return f"[Translation Error: {text}]" |
|
|
time.sleep(1) |
|
|
|
|
|
return f"[Unable to translate: {text}]" |
|
|
|
|
|
def get_dominant_color(image, bbox, padding=4): |
|
|
"""Extract the dominant color around text for better background matching""" |
|
|
try: |
|
|
|
|
|
if not isinstance(image, np.ndarray): |
|
|
img_array = np.array(image) |
|
|
else: |
|
|
img_array = image |
|
|
|
|
|
|
|
|
top_left, top_right, bottom_right, bottom_left = bbox |
|
|
x, y = int(top_left[0]), int(top_left[1]) |
|
|
width = int(top_right[0] - top_left[0]) |
|
|
height = int(bottom_left[1] - top_left[1]) |
|
|
|
|
|
|
|
|
x1 = max(0, x - padding) |
|
|
y1 = max(0, y - padding) |
|
|
x2 = min(img_array.shape[1], x + width + padding) |
|
|
y2 = min(img_array.shape[0], y + height + padding) |
|
|
|
|
|
|
|
|
region = img_array[y1:y2, x1:x2] |
|
|
|
|
|
if region.size == 0: |
|
|
|
|
|
return (240, 240, 240, 180) |
|
|
|
|
|
|
|
|
if len(region.shape) == 3 and region.shape[2] == 3: |
|
|
region_rgb = cv2.cvtColor(region, cv2.COLOR_BGR2RGB) if isinstance(region, np.ndarray) else region |
|
|
else: |
|
|
region_rgb = region |
|
|
|
|
|
|
|
|
pixels = region_rgb.reshape(-1, region_rgb.shape[-1]) |
|
|
dominant_color = np.mean(pixels, axis=0).astype(int) |
|
|
|
|
|
|
|
|
return (int(dominant_color[0]), int(dominant_color[1]), int(dominant_color[2]), 230) |
|
|
except Exception as e: |
|
|
print(f"Error getting dominant color: {e}") |
|
|
|
|
|
return (240, 240, 240, 180) |
|
|
|
|
|
def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress()): |
|
|
"""Process image with OCR and translation""" |
|
|
if image is None: |
|
|
return None, "Please upload an image" |
|
|
|
|
|
|
|
|
if target_lang not in SUPPORTED_LANGUAGES: |
|
|
|
|
|
target_lang = next((code for code, name in SUPPORTED_LANGUAGES.items() |
|
|
if name.lower() == target_lang.lower()), None) |
|
|
if not target_lang: |
|
|
return image, f"Unsupported language. Supported: {', '.join(SUPPORTED_LANGUAGES.values())}" |
|
|
|
|
|
progress(0.1, "Initializing...") |
|
|
|
|
|
|
|
|
ocr = initialize_reader() |
|
|
if ocr is None: |
|
|
return image, "Failed to initialize OCR. Please try again." |
|
|
|
|
|
progress(0.3, "Detecting text...") |
|
|
|
|
|
try: |
|
|
|
|
|
img_array = np.array(image) |
|
|
results = ocr.readtext(img_array) |
|
|
|
|
|
if not results: |
|
|
return image, "No text detected in the image" |
|
|
|
|
|
progress(0.6, "Translating text...") |
|
|
|
|
|
|
|
|
img_pil = image.copy().convert("RGBA") |
|
|
font = get_default_font(size=20) |
|
|
|
|
|
|
|
|
if font is None: |
|
|
return image, "Error loading fonts. Processing without overlay." |
|
|
|
|
|
draw = ImageDraw.Draw(img_pil, 'RGBA') |
|
|
|
|
|
|
|
|
translations = [] |
|
|
for i, (bbox, text, prob) in enumerate(results): |
|
|
if text and text.strip(): |
|
|
|
|
|
progress(0.6 + (0.4 * (i / len(results))), f"Translating text {i+1}/{len(results)}") |
|
|
|
|
|
|
|
|
translated = translate_text(text, target_lang) |
|
|
translations.append(f"{text} → {translated}") |
|
|
|
|
|
|
|
|
top_left, top_right, bottom_right, bottom_left = bbox |
|
|
|
|
|
|
|
|
x, y = top_left[0], top_left[1] |
|
|
width = top_right[0] - top_left[0] |
|
|
height = bottom_left[1] - top_left[1] |
|
|
|
|
|
|
|
|
bg_color = get_dominant_color(img_array, bbox) |
|
|
|
|
|
|
|
|
padding = 4 |
|
|
|
|
|
|
|
|
draw.rectangle( |
|
|
[ |
|
|
x - padding, |
|
|
y - padding, |
|
|
x + width + padding, |
|
|
y + height + padding |
|
|
], |
|
|
fill=bg_color |
|
|
) |
|
|
|
|
|
|
|
|
fontsize = min(20, int(width / (len(translated) * 0.5))) if translated else 20 |
|
|
fontsize = max(fontsize, 12) |
|
|
|
|
|
|
|
|
adjusted_font = get_default_font(size=fontsize) |
|
|
if adjusted_font is None: |
|
|
adjusted_font = font |
|
|
|
|
|
|
|
|
text_size = draw.textbbox((0, 0), translated, font=adjusted_font) |
|
|
text_width = text_size[2] - text_size[0] |
|
|
text_height = text_size[3] - text_size[1] |
|
|
|
|
|
|
|
|
text_x = x + (width - text_width) / 2 |
|
|
text_y = y + (height - text_height) / 2 |
|
|
|
|
|
|
|
|
r, g, b, _ = bg_color |
|
|
brightness = (r * 299 + g * 587 + b * 114) / 1000 |
|
|
text_color = (0, 0, 0, 255) if brightness > 128 else (255, 255, 255, 255) |
|
|
|
|
|
|
|
|
draw.text((text_x, text_y), translated, fill=text_color, font=adjusted_font) |
|
|
|
|
|
|
|
|
all_translations = "\n".join(translations) |
|
|
|
|
|
|
|
|
result_image = img_pil.convert('RGB') |
|
|
|
|
|
return result_image, all_translations |
|
|
|
|
|
except Exception as e: |
|
|
import traceback |
|
|
error_details = traceback.format_exc() |
|
|
print(f"Error in process_image: {str(e)}\n{error_details}") |
|
|
return image, f"Error processing image: {str(e)}" |
|
|
|
|
|
|
|
|
with gr.Blocks(title="Multilingual Signboard Translator with Image Overlay") as iface: |
|
|
gr.Markdown("# Multilingual Signboard Translator with Image Overlay") |
|
|
gr.Markdown("Extract & translate text from images with improved overlay visualization") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
input_image = gr.Image(type="pil", label="Upload Image") |
|
|
|
|
|
with gr.Row(): |
|
|
target_lang = gr.Dropdown( |
|
|
choices=list(SUPPORTED_LANGUAGES.values()), |
|
|
value="Hindi", |
|
|
label="Translate To" |
|
|
) |
|
|
overlay_opacity = gr.Slider( |
|
|
minimum=0.1, |
|
|
maximum=1.0, |
|
|
value=0.7, |
|
|
step=0.1, |
|
|
label="Overlay Opacity" |
|
|
) |
|
|
|
|
|
translate_btn = gr.Button("Translate", variant="primary") |
|
|
|
|
|
with gr.Column(): |
|
|
output_image = gr.Image(type="pil", label="Image with Translated Overlay") |
|
|
output_text = gr.Textbox(label="Translated Text Output", lines=10) |
|
|
|
|
|
|
|
|
translate_btn.click( |
|
|
fn=process_image, |
|
|
inputs=[input_image, target_lang, overlay_opacity], |
|
|
outputs=[output_image, output_text] |
|
|
) |
|
|
|
|
|
gr.Markdown(""" |
|
|
## Features |
|
|
- Supports multiple languages for translation |
|
|
- Semi-transparent overlays for better readability |
|
|
- Simple and efficient text extraction and translation |
|
|
""") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
try: |
|
|
initialize_reader() |
|
|
except: |
|
|
pass |
|
|
|
|
|
|
|
|
iface.launch() |