Spaces:

Amandeep01
/

Signboard_Overlay_Project

Sleeping

App Files Files Community

Signboard_Overlay_Project / app.py

Amandeep01

Update app.py

d1d3e5b verified 7 months ago

raw

history blame contribute delete

11.2 kB

	import gradio as gr
	import easyocr
	from deep_translator import GoogleTranslator
	from PIL import Image, ImageDraw, ImageFont
	import numpy as np
	import io
	import time
	import cv2

	# Global variables
	reader = None
	translation_cache = {}

	# Define supported languages - use codes that both EasyOCR and Google Translator support
	SUPPORTED_LANGUAGES = {
	'en': 'English',
	'hi': 'Hindi',
	'mr': 'Marathi',
	'ne': 'Nepali'
	}

	def initialize_reader():
	"""Initialize EasyOCR reader with minimal languages to save memory"""
	global reader
	if reader is None:
	try:
	# Only load essential languages to reduce memory usage
	reader = easyocr.Reader(['en', 'hi', 'mr', 'ne'], gpu=False)
	except Exception as e:
	print(f"Error initializing EasyOCR: {e}")
	return None
	return reader

	def get_default_font(size=20):
	"""Get a default font that works on Hugging Face"""
	try:
	# Try common font paths
	font_paths = [
	"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
	"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
	"/usr/share/fonts/truetype/freefont/FreeSans.ttf",
	"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
	"/usr/share/fonts/truetype/noto/NotoSans-Regular.ttf"
	]

	for path in font_paths:
	try:
	return ImageFont.truetype(path, size=size)
	except OSError:
	continue

	# If all fail, use default font
	return ImageFont.load_default()
	except Exception as e:
	print(f"Font error: {e}")
	return None

	def translate_text(text, target_lang):
	"""Translate text with error handling and caching"""
	if not text or not text.strip():
	return ""

	# Use cache if available
	cache_key = f"{text}\|{target_lang}"
	if cache_key in translation_cache:
	return translation_cache[cache_key]

	# Handle translation with retries
	max_retries = 3
	for attempt in range(max_retries):
	try:
	translated = GoogleTranslator(source='auto', target=target_lang).translate(text)
	if translated:
	translation_cache[cache_key] = translated
	return translated
	time.sleep(1) # Short delay before retry
	except Exception as e:
	print(f"Translation error (attempt {attempt+1}): {e}")
	if attempt == max_retries - 1:
	return f"[Translation Error: {text}]"
	time.sleep(1) # Wait before retry

	return f"[Unable to translate: {text}]"

	def get_dominant_color(image, bbox, padding=4):
	"""Extract the dominant color around text for better background matching"""
	try:
	# Convert PIL to numpy if needed
	if not isinstance(image, np.ndarray):
	img_array = np.array(image)
	else:
	img_array = image

	# Extract coordinates with padding
	top_left, top_right, bottom_right, bottom_left = bbox
	x, y = int(top_left[0]), int(top_left[1])
	width = int(top_right[0] - top_left[0])
	height = int(bottom_left[1] - top_left[1])

	# Expand the area slightly to capture surrounding colors
	x1 = max(0, x - padding)
	y1 = max(0, y - padding)
	x2 = min(img_array.shape[1], x + width + padding)
	y2 = min(img_array.shape[0], y + height + padding)

	# Get region around the text
	region = img_array[y1:y2, x1:x2]

	if region.size == 0:
	# Fallback if region is empty
	return (240, 240, 240, 180)

	# Convert to RGB if it's in BGR format (OpenCV default)
	if len(region.shape) == 3 and region.shape[2] == 3:
	region_rgb = cv2.cvtColor(region, cv2.COLOR_BGR2RGB) if isinstance(region, np.ndarray) else region
	else:
	region_rgb = region

	# Reshape and get mean color
	pixels = region_rgb.reshape(-1, region_rgb.shape[-1])
	dominant_color = np.mean(pixels, axis=0).astype(int)

	# Add alpha channel for semi-transparency
	return (int(dominant_color[0]), int(dominant_color[1]), int(dominant_color[2]), 230)
	except Exception as e:
	print(f"Error getting dominant color: {e}")
	# Return a default semi-transparent light color
	return (240, 240, 240, 180)

	def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress()):
	"""Process image with OCR and translation"""
	if image is None:
	return None, "Please upload an image"

	# Validate target language
	if target_lang not in SUPPORTED_LANGUAGES:
	# Check if it's a language name instead of code
	target_lang = next((code for code, name in SUPPORTED_LANGUAGES.items()
	if name.lower() == target_lang.lower()), None)
	if not target_lang:
	return image, f"Unsupported language. Supported: {', '.join(SUPPORTED_LANGUAGES.values())}"

	progress(0.1, "Initializing...")

	# Initialize OCR reader
	ocr = initialize_reader()
	if ocr is None:
	return image, "Failed to initialize OCR. Please try again."

	progress(0.3, "Detecting text...")

	try:
	# Convert to numpy array for OCR
	img_array = np.array(image)
	results = ocr.readtext(img_array)

	if not results:
	return image, "No text detected in the image"

	progress(0.6, "Translating text...")

	# Create a copy for overlay
	img_pil = image.copy().convert("RGBA") # Convert to RGBA for transparency support
	font = get_default_font(size=20)

	# If font creation failed, return with error
	if font is None:
	return image, "Error loading fonts. Processing without overlay."

	draw = ImageDraw.Draw(img_pil, 'RGBA')

	# Process detected text
	translations = []
	for i, (bbox, text, prob) in enumerate(results):
	if text and text.strip():
	# Update progress
	progress(0.6 + (0.4 * (i / len(results))), f"Translating text {i+1}/{len(results)}")

	# Translate text
	translated = translate_text(text, target_lang)
	translations.append(f"{text} → {translated}")

	# Extract bounding box coordinates
	top_left, top_right, bottom_right, bottom_left = bbox

	# Calculate text dimensions and position
	x, y = top_left[0], top_left[1]
	width = top_right[0] - top_left[0]
	height = bottom_left[1] - top_left[1]

	# Get dominant color for better background matching
	bg_color = get_dominant_color(img_array, bbox)

	# Add padding
	padding = 4

	# Create background that matches surrounding area
	draw.rectangle(
	[
	x - padding,
	y - padding,
	x + width + padding,
	y + height + padding
	],
	fill=bg_color # Semi-transparent background that matches surrounding colors
	)

	# Calculate font size to fit within the bounding box
	fontsize = min(20, int(width / (len(translated) * 0.5))) if translated else 20
	fontsize = max(fontsize, 12) # Ensure minimum readability

	# Get adjusted font
	adjusted_font = get_default_font(size=fontsize)
	if adjusted_font is None:
	adjusted_font = font

	# Get text size for centering
	text_size = draw.textbbox((0, 0), translated, font=adjusted_font)
	text_width = text_size[2] - text_size[0]
	text_height = text_size[3] - text_size[1]

	# Center text in the bounding box
	text_x = x + (width - text_width) / 2
	text_y = y + (height - text_height) / 2

	# Determine text color based on background brightness
	r, g, b, _ = bg_color
	brightness = (r * 299 + g * 587 + b * 114) / 1000
	text_color = (0, 0, 0, 255) if brightness > 128 else (255, 255, 255, 255) # Black or white based on background

	# Draw text with appropriate contrast
	draw.text((text_x, text_y), translated, fill=text_color, font=adjusted_font)

	# Join all translations
	all_translations = "\n".join(translations)

	# Convert back to RGB for display
	result_image = img_pil.convert('RGB')

	return result_image, all_translations

	except Exception as e:
	import traceback
	error_details = traceback.format_exc()
	print(f"Error in process_image: {str(e)}\n{error_details}")
	return image, f"Error processing image: {str(e)}"

	# Create Gradio interface
	with gr.Blocks(title="Multilingual Signboard Translator with Image Overlay") as iface:
	gr.Markdown("# Multilingual Signboard Translator with Image Overlay")
	gr.Markdown("Extract & translate text from images with improved overlay visualization")

	with gr.Row():
	with gr.Column():
	input_image = gr.Image(type="pil", label="Upload Image")

	with gr.Row():
	target_lang = gr.Dropdown(
	choices=list(SUPPORTED_LANGUAGES.values()),
	value="Hindi",
	label="Translate To"
	)
	overlay_opacity = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.7,
	step=0.1,
	label="Overlay Opacity"
	)

	translate_btn = gr.Button("Translate", variant="primary")

	with gr.Column():
	output_image = gr.Image(type="pil", label="Image with Translated Overlay")
	output_text = gr.Textbox(label="Translated Text Output", lines=10)

	# Connect the button to the processing function
	translate_btn.click(
	fn=process_image,
	inputs=[input_image, target_lang, overlay_opacity],
	outputs=[output_image, output_text]
	)

	gr.Markdown("""
	## Features
	- Supports multiple languages for translation
	- Semi-transparent overlays for better readability
	- Simple and efficient text extraction and translation
	""")

	if __name__ == "__main__":
	# Initialize OCR model at startup to avoid delay on first request
	try:
	initialize_reader()
	except:
	pass

	# Launch the app
	iface.launch()