Spaces:

Amandeep01
/

Sign_Win

Sleeping

App Files Files Community

Amandeep01 commited on May 27, 2025

Commit

3dafe75

verified ·

1 Parent(s): 6133149

Create app.py

Browse files

Files changed (1) hide show

app.py +441 -0

app.py ADDED Viewed

	@@ -0,0 +1,441 @@

+import gradio as gr
+import easyocr
+from deep_translator import GoogleTranslator
+from PIL import Image, ImageDraw, ImageFont
+import numpy as np
+import cv2
+import time
+import re
+from typing import Tuple, List, Optional
+import io
+import os
+# Global variables
+reader = None
+translation_cache = {}
+# Define supported languages with better language detection
+SUPPORTED_LANGUAGES = {
+    'en': 'English',
+    'hi': 'Hindi',
+    'mr': 'Marathi'
+}
+# Language code mapping for Google Translator
+LANG_CODE_MAP = {
+    'English': 'en',
+    'Hindi': 'hi',
+    'Marathi': 'mr'
+}
+def initialize_reader():
+    """Initialize EasyOCR reader with optimized language support"""
+    global reader
+    if reader is None:
+        try:
+            # Initialize with English, Hindi, and Marathi support
+            reader = easyocr.Reader(['en', 'hi', 'mr'], gpu=False, verbose=False)
+            print("EasyOCR initialized successfully")
+        except Exception as e:
+            print(f"Error initializing EasyOCR: {e}")
+            return None
+    return reader
+def get_font_for_text(text: str, target_size: int = 20) -> ImageFont.FreeTypeFont:
+    """Get appropriate font based on text content and size"""
+    # Check if text contains Devanagari script (Hindi/Marathi)
+    has_devanagari = bool(re.search(r'[\u0900-\u097F]', text))
+    # Font paths for different scripts
+    devanagari_fonts = [
+        "/usr/share/fonts/truetype/noto/NotoSansDevanagari-Regular.ttf",
+        "/usr/share/fonts/truetype/noto/NotoSansDevanagari-Bold.ttf",
+        "/usr/share/fonts/truetype/lohit-devanagari/Lohit-Devanagari.ttf",
+        "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
+    ]
+    english_fonts = [
+        "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
+        "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
+        "/usr/share/fonts/truetype/noto/NotoSans-Bold.ttf",
+        "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
+    ]
+    font_paths = devanagari_fonts if has_devanagari else english_fonts
+    for font_path in font_paths:
+        try:
+            if os.path.exists(font_path):
+                return ImageFont.truetype(font_path, size=target_size)
+        except (OSError, IOError):
+            continue
+    # Fallback to default font
+    try:
+        return ImageFont.load_default()
+    except:
+        return None
+def smart_translate(text: str, target_lang: str, source_lang: str = 'auto') -> str:
+    """Enhanced translation with context awareness and caching"""
+    if not text or not text.strip():
+        return ""
+    # Clean and normalize text
+    cleaned_text = re.sub(r'\s+', ' ', text.strip())
+    # Cache key
+    cache_key = f"{cleaned_text}|{source_lang}|{target_lang}"
+    if cache_key in translation_cache:
+        return translation_cache[cache_key]
+    max_retries = 3
+    for attempt in range(max_retries):
+        try:
+            # Use GoogleTranslator with better error handling
+            translator = GoogleTranslator(source=source_lang, target=target_lang)
+            translated = translator.translate(cleaned_text)
+            if translated and translated.strip():
+                # Post-process translation for better readability
+                translated = translated.strip()
+                # Cache successful translation
+                translation_cache[cache_key] = translated
+                return translated
+        except Exception as e:
+            print(f"Translation attempt {attempt + 1} failed: {e}")
+            if attempt < max_retries - 1:
+                time.sleep(0.5)
+    return f"[Translation failed: {cleaned_text}]"
+def calculate_optimal_font_size(text: str, bbox_width: int, bbox_height: int, min_size: int = 10, max_size: int = 50) -> int:
+    """Calculate optimal font size based on bounding box dimensions and text length"""
+    if not text:
+        return min_size
+    # Base calculation on text length and available space
+    char_width_ratio = 0.6  # Approximate character width to height ratio
+    estimated_char_width = bbox_height * char_width_ratio
+    calculated_size = int(bbox_width / (len(text) * char_width_ratio))
+    # Consider height constraint
+    height_based_size = int(bbox_height * 0.7)  # Use 70% of available height
+    # Take the smaller of the two constraints
+    optimal_size = min(calculated_size, height_based_size)
+    # Apply bounds
+    return max(min_size, min(optimal_size, max_size))
+def get_text_color_with_contrast(background_color: Tuple[int, int, int]) -> Tuple[int, int, int, int]:
+    """Calculate optimal text color based on background for maximum contrast"""
+    r, g, b = background_color[:3]
+    # Calculate luminance using standard formula
+    luminance = (0.299 * r + 0.587 * g + 0.114 * b) / 255
+    # Return white for dark backgrounds, black for light backgrounds
+    if luminance < 0.5:
+        return (255, 255, 255, 255)  # White text
+    else:
+        return (0, 0, 0, 255)  # Black text
+def extract_background_color(image: np.ndarray, bbox: List, expand_factor: float = 1.2) -> Tuple[int, int, int, int]:
+    """Extract representative background color from around the text region"""
+    try:
+        # Get bounding box coordinates
+        top_left, top_right, bottom_right, bottom_left = bbox
+        # Calculate center and dimensions
+        center_x = (top_left[0] + top_right[0]) / 2
+        center_y = (top_left[1] + bottom_left[1]) / 2
+        width = abs(top_right[0] - top_left[0])
+        height = abs(bottom_left[1] - top_left[1])
+        # Expand region for better color sampling
+        expanded_width = width * expand_factor
+        expanded_height = height * expand_factor
+        # Calculate expanded coordinates
+        x1 = max(0, int(center_x - expanded_width / 2))
+        y1 = max(0, int(center_y - expanded_height / 2))
+        x2 = min(image.shape[1], int(center_x + expanded_width / 2))
+        y2 = min(image.shape[0], int(center_y + expanded_height / 2))
+        # Extract region
+        region = image[y1:y2, x1:x2]
+        if region.size > 0:
+            # Calculate mean color
+            mean_color = np.mean(region.reshape(-1, region.shape[-1]), axis=0)
+            return tuple(map(int, mean_color)) + (220,)  # Add alpha for semi-transparency
+    except Exception as e:
+        print(f"Error extracting background color: {e}")
+    # Default background color
+    return (240, 240, 240, 200)
+def create_smart_overlay(image: Image.Image, bbox: List, original_text: str, translated_text: str) -> None:
+    """Create intelligent overlay with proper sizing and positioning"""
+    draw = ImageDraw.Draw(image, 'RGBA')
+    # Extract bounding box coordinates
+    top_left, top_right, bottom_right, bottom_left = bbox
+    # Calculate dimensions
+    x = int(min(top_left[0], bottom_left[0]))
+    y = int(min(top_left[1], top_right[1]))
+    width = int(max(top_right[0], bottom_right[0]) - x)
+    height = int(max(bottom_left[1], bottom_right[1]) - y)
+    # Calculate optimal font size
+    font_size = calculate_optimal_font_size(translated_text, width, height)
+    # Get appropriate font
+    font = get_font_for_text(translated_text, font_size)
+    if font is None:
+        font = get_font_for_text(translated_text, 14)  # Fallback size
+    # Get background color from image
+    img_array = np.array(image.convert('RGB'))
+    bg_color = extract_background_color(img_array, bbox)
+    # Create background rectangle with padding
+    padding = max(2, font_size // 8)
+    bg_rect = [
+        x - padding,
+        y - padding,
+        x + width + padding,
+        y + height + padding
+    ]
+    # Draw semi-transparent background
+    draw.rectangle(bg_rect, fill=bg_color)
+    # Calculate text position for centering
+    try:
+        bbox_text = draw.textbbox((0, 0), translated_text, font=font)
+        text_width = bbox_text[2] - bbox_text[0]
+        text_height = bbox_text[3] - bbox_text[1]
+    except:
+        # Fallback for older PIL versions
+        text_width = len(translated_text) * font_size * 0.6
+        text_height = font_size
+    # Center the text
+    text_x = x + (width - text_width) / 2
+    text_y = y + (height - text_height) / 2
+    # Get optimal text color
+    text_color = get_text_color_with_contrast(bg_color[:3])
+    # Draw the translated text
+    draw.text((text_x, text_y), translated_text, fill=text_color, font=font)
+def process_image(image: Image.Image, target_language: str, progress=gr.Progress()) -> Tuple[Optional[Image.Image], str]:
+    """Main image processing function with enhanced OCR and translation"""
+    if image is None:
+        return None, "❌ Please upload an image first."
+    if target_language not in LANG_CODE_MAP:
+        return image, f"❌ Unsupported target language: {target_language}"
+    target_lang_code = LANG_CODE_MAP[target_language]
+    progress(0.1, "🔧 Initializing OCR engine...")
+    # Initialize OCR
+    ocr = initialize_reader()
+    if ocr is None:
+        return image, "❌ Failed to initialize OCR. Please try again."
+    progress(0.3, "🔍 Extracting text from image...")
+    try:
+        # Convert PIL image to numpy array for OCR
+        img_array = np.array(image)
+        # Perform OCR with confidence filtering
+        results = ocr.readtext(img_array, paragraph=True, width_ths=0.7, height_ths=0.7)
+        if not results:
+            return image, "ℹ️ No readable text found in the image."
+        # Filter results by confidence
+        filtered_results = [(bbox, text, conf) for bbox, text, conf in results if conf > 0.5]
+        if not filtered_results:
+            return image, "ℹ️ No text detected with sufficient confidence."
+        progress(0.5, f"🌐 Translating {len(filtered_results)} text regions...")
+        # Create a copy of the image for overlay
+        result_image = image.copy().convert('RGBA')
+        # Process each detected text region
+        translations_info = []
+        for i, (bbox, text, confidence) in enumerate(filtered_results):
+            # Update progress
+            progress(0.5 + (0.4 * i / len(filtered_results)), f"Translating region {i+1}/{len(filtered_results)}")
+            if text and text.strip():
+                # Clean the extracted text
+                cleaned_text = re.sub(r'\s+', ' ', text.strip())
+                # Translate the text
+                translated = smart_translate(cleaned_text, target_lang_code)
+                # Create overlay on image
+                create_smart_overlay(result_image, bbox, cleaned_text, translated)
+                # Store translation info
+                translations_info.append({
+                    'original': cleaned_text,
+                    'translated': translated,
+                    'confidence': confidence
+                })
+        progress(1.0, "✅ Translation completed!")
+        # Convert back to RGB for final output
+        final_image = result_image.convert('RGB')
+        # Create summary text
+        summary_lines = []
+        summary_lines.append(f"🎯 Successfully processed {len(translations_info)} text regions:\n")
+        for i, info in enumerate(translations_info, 1):
+            summary_lines.append(f"{i}. Original: {info['original']}")
+            summary_lines.append(f"   Translation: {info['translated']}")
+            summary_lines.append(f"   Confidence: {info['confidence']:.2f}\n")
+        summary_text = "\n".join(summary_lines)
+        return final_image, summary_text
+    except Exception as e:
+        error_msg = f"❌ Error processing image: {str(e)}"
+        print(f"Processing error: {e}")
+        return image, error_msg
+# Custom CSS for better UI
+custom_css = """
+.gradio-container {
+    max-width: 1200px;
+    margin: auto;
+}
+.main-header {
+    text-align: center;
+    background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    font-size: 2.5em;
+    font-weight: bold;
+    margin-bottom: 0.5em;
+}
+.description {
+    text-align: center;
+    font-size: 1.1em;
+    color: #666;
+    margin-bottom: 2em;
+}
+.feature-box {
+    background: #f8f9fa;
+    padding: 1em;
+    border-radius: 8px;
+    margin: 1em 0;
+}
+"""
+# Create the Gradio interface
+with gr.Blocks(css=custom_css, title="Multilingual Signboard Translator") as demo:
+    gr.HTML("""
+        <div class="main-header">🌐 Multilingual Signboard Translator</div>
+        <div class="description">
+            Extract and translate text from images with intelligent overlay technology
+        </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### 📤 Upload & Configure")
+            input_image = gr.Image(
+                label="📷 Upload Image",
+                type="pil",
+                height=300
+            )
+            target_language = gr.Dropdown(
+                choices=list(LANG_CODE_MAP.keys()),
+                value="Hindi",
+                label="🎯 Translate To",
+                info="Select target language for translation"
+            )
+            translate_btn = gr.Button(
+                "🚀 Translate Text",
+                variant="primary",
+                size="lg"
+            )
+        with gr.Column(scale=1):
+            gr.Markdown("### 📤 Results")
+            output_image = gr.Image(
+                label="🖼️ Translated Image",
+                type="pil",
+                height=300
+            )
+            output_text = gr.Textbox(
+                label="📝 Translation Details",
+                lines=8,
+                max_lines=15,
+                info="Detailed translation information"
+            )
+    # Event binding
+    translate_btn.click(
+        fn=process_image,
+        inputs=[input_image, target_language],
+        outputs=[output_image, output_text],
+        show_progress=True
+    )
+    # Feature information
+    gr.HTML("""
+        <div class="feature-box">
+            <h3>✨ Key Features:</h3>
+            <ul>
+                <li><strong>🎯 Smart OCR:</strong> Advanced text detection with confidence filtering</li>
+                <li><strong>🌐 Multilingual Support:</strong> English ↔ Hindi ↔ Marathi translation</li>
+                <li><strong>🎨 Intelligent Overlay:</strong> Context-aware text positioning and sizing</li>
+                <li><strong>🔧 Adaptive Fonts:</strong> Script-specific font selection for better readability</li>
+                <li><strong>⚡ Optimized Performance:</strong> Efficient processing with caching</li>
+            </ul>
+        </div>
+    """)
+if __name__ == "__main__":
+    # Pre-initialize OCR for faster first-time usage
+    print("🔧 Pre-initializing OCR engine...")
+    try:
+        initialize_reader()
+        print("✅ OCR engine ready!")
+    except Exception as e:
+        print(f"⚠️ OCR initialization warning: {e}")
+    # Launch the application
+    demo.launch(
+        share=False,
+        show_error=True,
+        show_tips=True
+    )