Spaces:

Amandeep01
/

Sign_Win

Sleeping

App Files Files Community

Amandeep01 commited on May 27, 2025

Commit

eed6b37

verified ·

1 Parent(s): e786136

Update app.py

Browse files

Files changed (1) hide show

app.py +216 -487

app.py CHANGED Viewed

@@ -6,203 +6,60 @@ import numpy as np
 import cv2
 import time
 import re
-from typing import Tuple, List, Optional, Dict
 import io
 import os
-from collections import defaultdict
-import math
 # Global variables
 reader = None
 translation_cache = {}
-# Define supported languages
 SUPPORTED_LANGUAGES = {
     'en': 'English',
-    'hi': 'Hindi',
-    'es': 'Spanish',
-    'fr': 'French',
-    'de': 'German',
-    'ja': 'Japanese',
-    'ko': 'Korean',
-    'zh': 'Chinese'
 }
 # Language code mapping for Google Translator
 LANG_CODE_MAP = {
     'English': 'en',
-    'Hindi': 'hi',
-    'Spanish': 'es',
-    'French': 'fr',
-    'German': 'de',
-    'Japanese': 'ja',
-    'Korean': 'ko',
-    'Chinese': 'zh'
 }
 def initialize_reader():
-    """Initialize EasyOCR reader with fallback options"""
     global reader
     if reader is None:
-        # Try different initialization strategies
-        init_strategies = [
-            (['en', 'hi'], "English and Hindi"),
-            (['en'], "English only"),
-            (['en', 'hi'], "English and Hindi with verbose"),
-        ]
-        for i, (languages, description) in enumerate(init_strategies):
-            try:
-                print(f"Attempting OCR initialization: {description}")
-                verbose_setting = True if i == 2 else False
-                reader = easyocr.Reader(
-                    languages,
-                    gpu=False,
-                    verbose=verbose_setting,
-                    download_enabled=True,
-                    detector=True,
-                    recognizer=True
-                )
-                print(f"✅ EasyOCR initialized successfully with {description}")
-                return reader
-            except ImportError as e:
-                print(f"❌ Import error: {e}")
-                continue
-            except Exception as e:
-                print(f"❌ Initialization attempt {i+1} failed: {e}")
-                if i < len(init_strategies) - 1:
-                    print("Trying alternative approach...")
-                    continue
-                else:
-                    print("All initialization strategies failed")
-        # If all strategies fail, return None
-        reader = None
-        print("❌ Could not initialize EasyOCR with any strategy")
     return reader
-def calculate_distance(box1, box2):
-    """Calculate distance between two bounding boxes"""
-    # Get center points
-    center1 = [(box1[0][0] + box1[2][0]) / 2, (box1[0][1] + box1[2][1]) / 2]
-    center2 = [(box2[0][0] + box2[2][0]) / 2, (box2[0][1] + box2[2][1]) / 2]
-    return math.sqrt((center1[0] - center2[0])**2 + (center1[1] - center2[1])**2)
-def are_boxes_on_same_line(box1, box2, tolerance=20):
-    """Check if two bounding boxes are on the same horizontal line"""
-    # Get y-coordinates (vertical positions)
-    y1_avg = (box1[0][1] + box1[2][1]) / 2
-    y2_avg = (box2[0][1] + box2[2][1]) / 2
-    return abs(y1_avg - y2_avg) <= tolerance
-def group_text_regions(ocr_results, line_tolerance=25, proximity_threshold=50):
-    """Group OCR results into meaningful text blocks"""
-    if not ocr_results:
-        return []
-    # Sort by vertical position first, then horizontal
-    sorted_results = sorted(ocr_results, key=lambda x: (x[0][0][1], x[0][0][0]))
-    grouped_lines = []
-    current_line = [sorted_results[0]]
-    for i in range(1, len(sorted_results)):
-        current_box = sorted_results[i][0]
-        prev_box = current_line[-1][0]
-        # Check if boxes are on the same line
-        if are_boxes_on_same_line(current_box, prev_box, line_tolerance):
-            # Check proximity (not too far apart horizontally)
-            if calculate_distance(current_box, prev_box) <= proximity_threshold:
-                current_line.append(sorted_results[i])
-            else:
-                # Start new line if too far apart
-                grouped_lines.append(current_line)
-                current_line = [sorted_results[i]]
-        else:
-            # Different line
-            grouped_lines.append(current_line)
-            current_line = [sorted_results[i]]
-    # Don't forget the last line
-    if current_line:
-        grouped_lines.append(current_line)
-    # Merge text within each line
-    merged_groups = []
-    for line in grouped_lines:
-        if len(line) == 1:
-            merged_groups.append(line[0])
-        else:
-            # Sort by horizontal position within the line
-            line.sort(key=lambda x: x[0][0][0])
-            # Merge text
-            merged_text = ' '.join([item[1] for item in line])
-            # Create combined bounding box
-            all_points = []
-            for item in line:
-                all_points.extend(item[0])
-            # Find min/max coordinates
-            x_coords = [point[0] for point in all_points]
-            y_coords = [point[1] for point in all_points]
-            min_x, max_x = min(x_coords), max(x_coords)
-            min_y, max_y = min(y_coords), max(y_coords)
-            # Create new bounding box
-            merged_bbox = [[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]]
-            # Use average confidence
-            avg_confidence = sum([item[2] for item in line]) / len(line)
-            merged_groups.append((merged_bbox, merged_text, avg_confidence))
-    return merged_groups
 def get_font_for_text(text: str, target_size: int = 20) -> ImageFont.FreeTypeFont:
-    """Get appropriate font based on text content"""
-    # Check for different scripts
     has_devanagari = bool(re.search(r'[\u0900-\u097F]', text))
-    has_chinese = bool(re.search(r'[\u4e00-\u9fff]', text))
-    has_japanese = bool(re.search(r'[\u3040-\u309f\u30a0-\u30ff]', text))
-    has_korean = bool(re.search(r'[\uac00-\ud7af]', text))
-    has_arabic = bool(re.search(r'[\u0600-\u06ff]', text))
     # Font paths for different scripts
-    font_paths = []
-    if has_devanagari:
-        font_paths.extend([
-            "/usr/share/fonts/truetype/noto/NotoSansDevanagari-Regular.ttf",
-            "/usr/share/fonts/truetype/lohit-devanagari/Lohit-Devanagari.ttf"
-        ])
-    if has_chinese or has_japanese:
-        font_paths.extend([
-            "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
-            "/usr/share/fonts/truetype/arphic/uming.ttc"
-        ])
-    if has_korean:
-        font_paths.append("/usr/share/fonts/truetype/noto/NotoSansKR-Regular.otf")
-    if has_arabic:
-        font_paths.append("/usr/share/fonts/truetype/noto/NotoSansArabic-Regular.ttf")
-    # Default fonts
-    font_paths.extend([
         "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
         "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
-        "/usr/share/fonts/truetype/noto/NotoSans-Bold.ttf"
-    ])
     for font_path in font_paths:
         try:
@@ -211,14 +68,14 @@ def get_font_for_text(text: str, target_size: int = 20) -> ImageFont.FreeTypeFon
         except (OSError, IOError):
             continue
-    # Fallback
     try:
         return ImageFont.load_default()
     except:
         return None
-def smart_translate_with_context(text: str, target_lang: str, source_lang: str = 'auto') -> str:
-    """Enhanced translation with better context handling"""
     if not text or not text.strip():
         return ""
@@ -230,42 +87,16 @@ def smart_translate_with_context(text: str, target_lang: str, source_lang: str =
     if cache_key in translation_cache:
         return translation_cache[cache_key]
-    # Pre-processing for better translation context
-    # Handle common signboard patterns
-    signboard_patterns = {
-        r'\b(no|not|don\'t|do not)\s+(use|mobile|phone|cell)\b': 'prohibition_mobile',
-        r'\b(please|kindly)\s+(do not|don\'t)\s+(use|mobile|phone)\b': 'polite_prohibition_mobile',
-        r'\b(exit|entrance|entry|way out|way in)\b': 'direction',
-        r'\b(toilet|restroom|bathroom|washroom)\b': 'facility',
-        r'\b(parking|park|no parking)\b': 'parking',
-        r'\b(emergency|fire|safety)\b': 'safety'
-    }
-    context_hint = ""
-    for pattern, context in signboard_patterns.items():
-        if re.search(pattern, cleaned_text.lower()):
-            context_hint = f"[Signboard context: {context}] "
-            break
-    max_retries = 3
     for attempt in range(max_retries):
         try:
             translator = GoogleTranslator(source=source_lang, target=target_lang)
-            # Add context hint for better translation
-            text_to_translate = context_hint + cleaned_text if context_hint else cleaned_text
-            translated = translator.translate(text_to_translate)
-            if translated and translated.strip():
-                # Remove context hint from result if it was added
-                if context_hint and translated.startswith('['):
-                    # Try to remove the context hint from translation
-                    bracket_end = translated.find('] ')
-                    if bracket_end != -1:
-                        translated = translated[bracket_end + 2:].strip()
-                # Post-process for common improvements
-                translated = post_process_translation(translated, target_lang)
                 # Cache successful translation
                 translation_cache[cache_key] = translated
@@ -274,149 +105,137 @@ def smart_translate_with_context(text: str, target_lang: str, source_lang: str =
         except Exception as e:
             print(f"Translation attempt {attempt + 1} failed: {e}")
             if attempt < max_retries - 1:
-                time.sleep(0.5)
-    return cleaned_text
-def post_process_translation(translated_text: str, target_lang: str) -> str:
-    """Post-process translation for better quality"""
-    # Language-specific post-processing
-    if target_lang == 'hi':  # Hindi
-        # Common corrections for Hindi translations
-        corrections = {
-            'मत करो': 'न करें',  # More polite form
-            'का उपयोग मत करो': 'का उपयोग न करें',
-            'फोन का उपयोग': 'मोबाइल का उपयोग'
-        }
-        for old, new in corrections.items():
-            translated_text = translated_text.replace(old, new)
-    return translated_text.strip()
-def calculate_optimal_font_size(text: str, bbox_width: int, bbox_height: int, min_size: int = 12, max_size: int = 48) -> int:
-    """Calculate optimal font size with better scaling"""
     if not text:
         return min_size
-    # Estimate character width (varies by language)
-    char_width_ratio = 0.7  # More conservative estimate
-    # For non-Latin scripts, adjust ratio
-    if re.search(r'[\u0900-\u097F\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]', text):
-        char_width_ratio = 0.9  # Wider characters
-    # Calculate based on width constraint
-    width_based_size = int(bbox_width / (len(text) * char_width_ratio))
-    # Calculate based on height constraint (use 80% of available height)
-    height_based_size = int(bbox_height * 0.8)
-    # Take the smaller constraint
-    optimal_size = min(width_based_size, height_based_size)
     # Apply bounds
     return max(min_size, min(optimal_size, max_size))
-def get_contrasting_color(bg_color: Tuple[int, int, int]) -> Tuple[int, int, int]:
-    """Get contrasting text color"""
-    r, g, b = bg_color[:3]
     luminance = (0.299 * r + 0.587 * g + 0.114 * b) / 255
-    if luminance > 0.5:
-        return (0, 0, 0)  # Black text for light background
     else:
-        return (255, 255, 255)  # White text for dark background
-def extract_dominant_color(image: np.ndarray, bbox: List) -> Tuple[int, int, int]:
-    """Extract dominant color from the bounding box region"""
     try:
         # Get bounding box coordinates
-        points = np.array(bbox, dtype=np.int32)
-        # Create mask for the region
-        mask = np.zeros(image.shape[:2], dtype=np.uint8)
-        cv2.fillPoly(mask, [points], 255)
-        # Extract pixels within the region
-        region_pixels = image[mask > 0]
-        if len(region_pixels) > 0:
             # Calculate mean color
-            mean_color = np.mean(region_pixels, axis=0)
-            return tuple(map(int, mean_color))
     except Exception as e:
-        print(f"Error extracting color: {e}")
-    return (240, 240, 240)  # Default light gray
-def create_enhanced_overlay(image: Image.Image, bbox: List, translated_text: str, bg_opacity: int = 180):
-    """Create enhanced overlay with better positioning"""
     draw = ImageDraw.Draw(image, 'RGBA')
-    # Convert bbox to integer coordinates
-    points = [[int(p[0]), int(p[1])] for p in bbox]
-    # Calculate bounding rectangle
-    x_coords = [p[0] for p in points]
-    y_coords = [p[1] for p in points]
-    x_min, x_max = min(x_coords), max(x_coords)
-    y_min, y_max = min(y_coords), max(y_coords)
-    width = x_max - x_min
-    height = y_max - y_min
     # Calculate optimal font size
     font_size = calculate_optimal_font_size(translated_text, width, height)
     font = get_font_for_text(translated_text, font_size)
-    # Extract background color
     img_array = np.array(image.convert('RGB'))
-    bg_color = extract_dominant_color(img_array, bbox)
-    # Create semi-transparent background
-    padding = max(4, font_size // 6)
     bg_rect = [
-        x_min - padding,
-        y_min - padding,
-        x_max + padding,
-        y_max + padding
     ]
-    # Draw background with original color but semi-transparent
-    bg_color_with_alpha = bg_color + (bg_opacity,)
-    draw.rectangle(bg_rect, fill=bg_color_with_alpha)
-    # Calculate text position (center alignment)
     try:
         bbox_text = draw.textbbox((0, 0), translated_text, font=font)
         text_width = bbox_text[2] - bbox_text[0]
         text_height = bbox_text[3] - bbox_text[1]
     except:
         text_width = len(translated_text) * font_size * 0.6
         text_height = font_size
-    text_x = x_min + (width - text_width) / 2
-    text_y = y_min + (height - text_height) / 2
-    # Get contrasting text color
-    text_color = get_contrasting_color(bg_color)
-    # Draw text with slight shadow for better readability
-    shadow_offset = max(1, font_size // 20)
-    shadow_color = (0, 0, 0) if text_color == (255, 255, 255) else (255, 255, 255)
-    # Draw shadow
-    draw.text((text_x + shadow_offset, text_y + shadow_offset), translated_text,
-              fill=shadow_color + (100,), font=font)
-    # Draw main text
     draw.text((text_x, text_y), translated_text, fill=text_color, font=font)
-def process_image_enhanced(image: Image.Image, target_language: str, progress=gr.Progress()) -> Tuple[Optional[Image.Image], str]:
-    """Enhanced image processing with better text grouping"""
     if image is None:
         return None, "❌ Please upload an image first."
@@ -428,106 +247,73 @@ def process_image_enhanced(image: Image.Image, target_language: str, progress=gr
     progress(0.1, "🔧 Initializing OCR engine...")
-    # Initialize OCR with better error handling
-    try:
-        ocr = initialize_reader()
-        if ocr is None:
-            return image, """❌ OCR initialization failed. This might be due to:
-• Missing system dependencies
-• Network issues downloading models
-• Insufficient memory
-Please try refreshing the page or contact support."""
-        # Test OCR with a simple operation
-        test_array = np.array(image.convert('RGB'))
-        if test_array.size == 0:
-            return image, "❌ Invalid image format. Please upload a valid image file."
-    except Exception as e:
-        error_details = str(e)
-        return image, f"""❌ OCR Setup Error: {error_details}
-Possible solutions:
-• Refresh the browser and try again
-• Upload a different image format (JPG/PNG)
-• Check if the image is not corrupted
-Technical details: {type(e).__name__}"""
-    progress(0.3, "🔍 Extracting and grouping text regions...")
     try:
-        # Convert PIL image to numpy array with error handling
-        img_array = np.array(image.convert('RGB'))
-        if img_array is None or img_array.size == 0:
-            return image, "❌ Error processing image. Please try a different image."
-        print(f"Image shape: {img_array.shape}")
-        # Perform OCR with error handling and fallback options
-        try:
-            results = ocr.readtext(img_array, detail=1, paragraph=False, width_ths=0.7, height_ths=0.7)
-        except Exception as ocr_error:
-            print(f"Primary OCR failed: {ocr_error}")
-            # Fallback: try with different parameters
-            try:
-                results = ocr.readtext(img_array, detail=1)
-            except Exception as fallback_error:
-                print(f"Fallback OCR failed: {fallback_error}")
-                return image, f"""❌ OCR Processing Failed: {str(ocr_error)}
-Troubleshooting:
-• Image might be too complex or low quality
-• Try uploading a clearer image
-• Ensure text is clearly visible
-Fallback error: {str(fallback_error)}"""
-        if not results:
-            return image, """ℹ️ No readable text found in the image.
-Tips for better results:
-• Ensure text is clearly visible and well-lit
-• Upload higher resolution images
-• Make sure text is not too small or blurry"""
-        # Filter by confidence
-        filtered_results = [(bbox, text, conf) for bbox, text, conf in results
-                          if conf > 0.4 and text.strip()]
         if not filtered_results:
             return image, "ℹ️ No text detected with sufficient confidence."
-        progress(0.5, "🔗 Grouping related text regions...")
-        # Group text regions for contextual translation
-        grouped_results = group_text_regions(filtered_results)
-        progress(0.6, f"🌐 Translating {len(grouped_results)} text groups...")
-        # Create result image
         result_image = image.copy().convert('RGBA')
-        translation_info = []
-        for i, (bbox, text, confidence) in enumerate(grouped_results):
-            progress(0.6 + (0.3 * i / len(grouped_results)),
-                    f"Translating group {i+1}/{len(grouped_results)}")
             if text and text.strip():
-                # Clean text
                 cleaned_text = re.sub(r'\s+', ' ', text.strip())
-                # Translate with context
-                translated = smart_translate_with_context(cleaned_text, target_lang_code)
-                # Create overlay
-                create_enhanced_overlay(result_image, bbox, translated)
-                # Store info
-                translation_info.append({
                     'original': cleaned_text,
                     'translated': translated,
                     'confidence': confidence
@@ -535,18 +321,17 @@ Tips for better results:
         progress(1.0, "✅ Translation completed!")
-        # Convert to RGB
         final_image = result_image.convert('RGB')
-        # Create detailed summary
-        summary_lines = [f"🎯 Successfully processed {len(translation_info)} text groups:\n"]
-        for i, info in enumerate(translation_info, 1):
-            summary_lines.append(f"**Group {i}:**")
-            summary_lines.append(f"📝 Original: _{info['original']}_")
-            summary_lines.append(f"🌐 Translation: **{info['translated']}**")
-            summary_lines.append(f"📊 Confidence: {info['confidence']:.2f}")
-            summary_lines.append("")
         summary_text = "\n".join(summary_lines)
@@ -557,66 +342,42 @@ Tips for better results:
         print(f"Processing error: {e}")
         return image, error_msg
-# Enhanced CSS
 custom_css = """
 .gradio-container {
-    max-width: 1400px;
     margin: auto;
-    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
 }
 .main-header {
     text-align: center;
-    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
     -webkit-background-clip: text;
     -webkit-text-fill-color: transparent;
-    background-clip: text;
-    font-size: 2.8em;
-    font-weight: 800;
     margin-bottom: 0.5em;
-    text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
 }
 .description {
     text-align: center;
-    font-size: 1.2em;
-    color: #555;
     margin-bottom: 2em;
-    line-height: 1.6;
 }
 .feature-box {
-    background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
-    padding: 1.5em;
-    border-radius: 12px;
-    margin: 1.5em 0;
-    box-shadow: 0 4px 6px rgba(0,0,0,0.1);
-}
-.improvement-box {
-    background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%);
-    padding: 1.2em;
-    border-radius: 10px;
     margin: 1em 0;
-    border-left: 4px solid #667eea;
-}
-.btn-primary {
-    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-    border: none;
-    font-weight: 600;
-    text-transform: uppercase;
-    letter-spacing: 1px;
 }
 """
-# Create Gradio interface
-with gr.Blocks(css=custom_css, title="Enhanced Multilingual Signboard Translator") as demo:
     gr.HTML("""
-        <div class="main-header">🌐 Enhanced Multilingual Signboard Translator</div>
         <div class="description">
-            Advanced OCR with intelligent text grouping and contextual translation overlay
         </div>
     """)
@@ -625,105 +386,73 @@ with gr.Blocks(css=custom_css, title="Enhanced Multilingual Signboard Translator
             gr.Markdown("### 📤 Upload & Configure")
             input_image = gr.Image(
-                label="📷 Upload Signboard Image",
                 type="pil",
-                height=350
             )
             target_language = gr.Dropdown(
                 choices=list(LANG_CODE_MAP.keys()),
                 value="Hindi",
-                label="🎯 Target Language",
-                info="Select language for translation"
             )
             translate_btn = gr.Button(
-                "🚀 Translate Signboard",
                 variant="primary",
-                size="lg",
-                elem_classes=["btn-primary"]
             )
         with gr.Column(scale=1):
-            gr.Markdown("### 📋 Results")
             output_image = gr.Image(
-                label="🖼️ Translated Signboard",
-                type="pil",
-                height=350
             )
             output_text = gr.Textbox(
-                label="📝 Translation Analysis",
-                lines=10,
-                max_lines=20,
-                info="Detailed breakdown of detected and translated text"
             )
     # Event binding
     translate_btn.click(
-        fn=process_image_enhanced,
         inputs=[input_image, target_language],
         outputs=[output_image, output_text],
         show_progress=True
     )
-    # Enhanced information sections
-    gr.HTML("""
-        <div class="improvement-box">
-            <h3>🚀 Key Improvements in This Version:</h3>
-            <ul>
-                <li><strong>🧠 Intelligent Text Grouping:</strong> Combines fragmented words into meaningful phrases</li>
-                <li><strong>🎯 Contextual Translation:</strong> Uses signboard context for accurate translations</li>
-                <li><strong>🌈 Smart Color Preservation:</strong> Maintains original background colors with transparency</li>
-                <li><strong>📝 Multi-Script Support:</strong> Enhanced font handling for various languages</li>
-                <li><strong>⚡ Optimized Performance:</strong> Better caching and processing algorithms</li>
-            </ul>
-        </div>
-    """)
     gr.HTML("""
         <div class="feature-box">
-            <h3>✨ Advanced Features:</h3>
             <ul>
-                <li><strong>🔍 Smart OCR:</strong> Groups nearby text elements for better context</li>
-                <li><strong>🌐 Context-Aware Translation:</strong> Recognizes signboard patterns for accurate meaning</li>
-                <li><strong>🎨 Adaptive Overlays:</strong> Preserves original aesthetics while ensuring readability</li>
-                <li><strong>🔤 Multi-Language Support:</strong> Enhanced support for 8+ languages</li>
-                <li><strong>📊 Confidence Analysis:</strong> Shows detection confidence for quality assessment</li>
-                <li><strong>⚡ Performance Optimized:</strong> Faster processing with intelligent caching</li>
             </ul>
         </div>
     """)
 if __name__ == "__main__":
-    print("🔧 Initializing Enhanced OCR Translator...")
-    print("System Information:")
-    print(f"Python version: {os.sys.version}")
-    print(f"NumPy version: {np.__version__}")
-    # Pre-initialize with detailed logging
     try:
-        print("Starting OCR initialization...")
-        ocr_reader = initialize_reader()
-        if ocr_reader:
-            print("✅ OCR System ready!")
-        else:
-            print("⚠️ OCR initialization failed - will retry when needed")
     except Exception as e:
-        print(f"⚠️ Pre-initialization error: {e}")
-        print("OCR will be initialized on first use")
-    # Launch with better error handling
-    try:
-        demo.launch(
-            share=True,
-            show_error=True,
-            server_name="0.0.0.0",
-            server_port=7860,
-            enable_queue=True
-        )
-    except Exception as e:
-        print(f"Launch error: {e}")
-        # Fallback launch
-        demo.launch()

 import cv2
 import time
 import re
+from typing import Tuple, List, Optional
 import io
 import os
 # Global variables
 reader = None
 translation_cache = {}
+# Define supported languages with better language detection
 SUPPORTED_LANGUAGES = {
     'en': 'English',
+    'hi': 'Hindi'
 }
 # Language code mapping for Google Translator
 LANG_CODE_MAP = {
     'English': 'en',
+    'Hindi': 'hi'
 }
 def initialize_reader():
+    """Initialize EasyOCR reader with optimized language support"""
     global reader
     if reader is None:
+        try:
+            # Initialize with English and Hindi only for faster loading
+            reader = easyocr.Reader(['en', 'hi'], gpu=False, verbose=False, download_enabled=True)
+            print("EasyOCR initialized successfully")
+        except Exception as e:
+            print(f"Error initializing EasyOCR: {e}")
+            return None
     return reader
 def get_font_for_text(text: str, target_size: int = 20) -> ImageFont.FreeTypeFont:
+    """Get appropriate font based on text content and size"""
+    # Check if text contains Devanagari script (Hindi/Marathi)
     has_devanagari = bool(re.search(r'[\u0900-\u097F]', text))
     # Font paths for different scripts
+    devanagari_fonts = [
+        "/usr/share/fonts/truetype/noto/NotoSansDevanagari-Regular.ttf",
+        "/usr/share/fonts/truetype/noto/NotoSansDevanagari-Bold.ttf",
+        "/usr/share/fonts/truetype/lohit-devanagari/Lohit-Devanagari.ttf",
+        "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
+    ]
+    english_fonts = [
         "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
         "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
+        "/usr/share/fonts/truetype/noto/NotoSans-Bold.ttf",
+        "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
+    ]
+    font_paths = devanagari_fonts if has_devanagari else english_fonts
     for font_path in font_paths:
         try:
         except (OSError, IOError):
             continue
+    # Fallback to default font
     try:
         return ImageFont.load_default()
     except:
         return None
+def smart_translate(text: str, target_lang: str, source_lang: str = 'auto') -> str:
+    """Enhanced translation with context awareness and caching"""
     if not text or not text.strip():
         return ""
     if cache_key in translation_cache:
         return translation_cache[cache_key]
+    max_retries = 2  # Reduced retries for faster response
     for attempt in range(max_retries):
         try:
+            # Use GoogleTranslator with better error handling
             translator = GoogleTranslator(source=source_lang, target=target_lang)
+            translated = translator.translate(cleaned_text)
+            if translated and translated.strip() and translated != cleaned_text:
+                # Post-process translation for better readability
+                translated = translated.strip()
                 # Cache successful translation
                 translation_cache[cache_key] = translated
         except Exception as e:
             print(f"Translation attempt {attempt + 1} failed: {e}")
             if attempt < max_retries - 1:
+                time.sleep(0.3)  # Shorter wait time
+    return cleaned_text  # Return original text if translation fails
+def calculate_optimal_font_size(text: str, bbox_width: int, bbox_height: int, min_size: int = 10, max_size: int = 50) -> int:
+    """Calculate optimal font size based on bounding box dimensions and text length"""
     if not text:
         return min_size
+    # Base calculation on text length and available space
+    char_width_ratio = 0.6  # Approximate character width to height ratio
+    estimated_char_width = bbox_height * char_width_ratio
+    calculated_size = int(bbox_width / (len(text) * char_width_ratio))
+    # Consider height constraint
+    height_based_size = int(bbox_height * 0.7)  # Use 70% of available height
+    # Take the smaller of the two constraints
+    optimal_size = min(calculated_size, height_based_size)
     # Apply bounds
     return max(min_size, min(optimal_size, max_size))
+def get_text_color_with_contrast(background_color: Tuple[int, int, int]) -> Tuple[int, int, int, int]:
+    """Calculate optimal text color based on background for maximum contrast"""
+    r, g, b = background_color[:3]
+    # Calculate luminance using standard formula
     luminance = (0.299 * r + 0.587 * g + 0.114 * b) / 255
+    # Return white for dark backgrounds, black for light backgrounds
+    if luminance < 0.5:
+        return (255, 255, 255, 255)  # White text
     else:
+        return (0, 0, 0, 255)  # Black text
+def extract_background_color(image: np.ndarray, bbox: List, expand_factor: float = 1.2) -> Tuple[int, int, int, int]:
+    """Extract representative background color from around the text region"""
     try:
         # Get bounding box coordinates
+        top_left, top_right, bottom_right, bottom_left = bbox
+        # Calculate center and dimensions
+        center_x = (top_left[0] + top_right[0]) / 2
+        center_y = (top_left[1] + bottom_left[1]) / 2
+        width = abs(top_right[0] - top_left[0])
+        height = abs(bottom_left[1] - top_left[1])
+        # Expand region for better color sampling
+        expanded_width = width * expand_factor
+        expanded_height = height * expand_factor
+        # Calculate expanded coordinates
+        x1 = max(0, int(center_x - expanded_width / 2))
+        y1 = max(0, int(center_y - expanded_height / 2))
+        x2 = min(image.shape[1], int(center_x + expanded_width / 2))
+        y2 = min(image.shape[0], int(center_y + expanded_height / 2))
+        # Extract region
+        region = image[y1:y2, x1:x2]
+        if region.size > 0:
             # Calculate mean color
+            mean_color = np.mean(region.reshape(-1, region.shape[-1]), axis=0)
+            return tuple(map(int, mean_color)) + (220,)  # Add alpha for semi-transparency
     except Exception as e:
+        print(f"Error extracting background color: {e}")
+    # Default background color
+    return (240, 240, 240, 200)
+def create_smart_overlay(image: Image.Image, bbox: List, original_text: str, translated_text: str) -> None:
+    """Create intelligent overlay with proper sizing and positioning"""
     draw = ImageDraw.Draw(image, 'RGBA')
+    # Extract bounding box coordinates
+    top_left, top_right, bottom_right, bottom_left = bbox
+    # Calculate dimensions
+    x = int(min(top_left[0], bottom_left[0]))
+    y = int(min(top_left[1], top_right[1]))
+    width = int(max(top_right[0], bottom_right[0]) - x)
+    height = int(max(bottom_left[1], bottom_right[1]) - y)
     # Calculate optimal font size
     font_size = calculate_optimal_font_size(translated_text, width, height)
+    # Get appropriate font
     font = get_font_for_text(translated_text, font_size)
+    if font is None:
+        font = get_font_for_text(translated_text, 14)  # Fallback size
+    # Get background color from image
     img_array = np.array(image.convert('RGB'))
+    bg_color = extract_background_color(img_array, bbox)
+    # Create background rectangle with padding
+    padding = max(2, font_size // 8)
     bg_rect = [
+        x - padding,
+        y - padding,
+        x + width + padding,
+        y + height + padding
     ]
+    # Draw semi-transparent background
+    draw.rectangle(bg_rect, fill=bg_color)
+    # Calculate text position for centering
     try:
         bbox_text = draw.textbbox((0, 0), translated_text, font=font)
         text_width = bbox_text[2] - bbox_text[0]
         text_height = bbox_text[3] - bbox_text[1]
     except:
+        # Fallback for older PIL versions
         text_width = len(translated_text) * font_size * 0.6
         text_height = font_size
+    # Center the text
+    text_x = x + (width - text_width) / 2
+    text_y = y + (height - text_height) / 2
+    # Get optimal text color
+    text_color = get_text_color_with_contrast(bg_color[:3])
+    # Draw the translated text
     draw.text((text_x, text_y), translated_text, fill=text_color, font=font)
+def process_image(image: Image.Image, target_language: str, progress=gr.Progress()) -> Tuple[Optional[Image.Image], str]:
+    """Main image processing function with enhanced OCR and translation"""
     if image is None:
         return None, "❌ Please upload an image first."
     progress(0.1, "🔧 Initializing OCR engine...")
+    # Initialize OCR
+    ocr = initialize_reader()
+    if ocr is None:
+        return image, "❌ Failed to initialize OCR. Please try again."
+    progress(0.3, "🔍 Extracting text from image...")
     try:
+        # Convert PIL image to numpy array for OCR
+        img_array = np.array(image)
+        # Perform OCR with simplified parameters
+        results = ocr.readtext(img_array)
+        if not results:
+            return image, "ℹ️ No readable text found in the image."
+        print(f"OCR Results format: {results[0] if results else 'Empty'}")
+        # Handle different OCR result formats
+        processed_results = []
+        for result in results:
+            if len(result) == 3:
+                # Standard format: (bbox, text, confidence)
+                bbox, text, confidence = result
+                processed_results.append((bbox, text, confidence))
+            elif len(result) == 2:
+                # Alternative format: (bbox, text) - assume high confidence
+                bbox, text = result
+                processed_results.append((bbox, text, 0.8))
+            else:
+                print(f"Unexpected result format: {result}")
+                continue
+        # Filter results by confidence and text quality
+        filtered_results = []
+        for bbox, text, confidence in processed_results:
+            if text and text.strip() and confidence > 0.3:  # Lower threshold for better detection
+                filtered_results.append((bbox, text, confidence))
         if not filtered_results:
             return image, "ℹ️ No text detected with sufficient confidence."
+        progress(0.5, f"🌐 Translating {len(filtered_results)} text regions...")
+        # Create a copy of the image for overlay
         result_image = image.copy().convert('RGBA')
+        # Process each detected text region
+        translations_info = []
+        for i, (bbox, text, confidence) in enumerate(filtered_results):
+            # Update progress
+            progress(0.5 + (0.4 * i / len(filtered_results)), f"Translating region {i+1}/{len(filtered_results)}")
             if text and text.strip():
+                # Clean the extracted text
                 cleaned_text = re.sub(r'\s+', ' ', text.strip())
+                # Translate the text
+                translated = smart_translate(cleaned_text, target_lang_code)
+                # Create overlay on image
+                create_smart_overlay(result_image, bbox, cleaned_text, translated)
+                # Store translation info
+                translations_info.append({
                     'original': cleaned_text,
                     'translated': translated,
                     'confidence': confidence
         progress(1.0, "✅ Translation completed!")
+        # Convert back to RGB for final output
         final_image = result_image.convert('RGB')
+        # Create summary text
+        summary_lines = []
+        summary_lines.append(f"🎯 Successfully processed {len(translations_info)} text regions:\n")
+        for i, info in enumerate(translations_info, 1):
+            summary_lines.append(f"{i}. Original: {info['original']}")
+            summary_lines.append(f"   Translation: {info['translated']}")
+            summary_lines.append(f"   Confidence: {info['confidence']:.2f}\n")
         summary_text = "\n".join(summary_lines)
         print(f"Processing error: {e}")
         return image, error_msg
+# Custom CSS for better UI
 custom_css = """
 .gradio-container {
+    max-width: 1200px;
     margin: auto;
 }
 .main-header {
     text-align: center;
+    background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
     -webkit-background-clip: text;
     -webkit-text-fill-color: transparent;
+    font-size: 2.5em;
+    font-weight: bold;
     margin-bottom: 0.5em;
 }
 .description {
     text-align: center;
+    font-size: 1.1em;
+    color: #666;
     margin-bottom: 2em;
 }
 .feature-box {
+    background: #f8f9fa;
+    padding: 1em;
+    border-radius: 8px;
     margin: 1em 0;
 }
 """
+# Create the Gradio interface
+with gr.Blocks(css=custom_css, title="Multilingual Signboard Translator") as demo:
     gr.HTML("""
+        <div class="main-header">🌐 Multilingual Signboard Translator</div>
         <div class="description">
+            Extract and translate text from images with intelligent overlay technology
         </div>
     """)
             gr.Markdown("### 📤 Upload & Configure")
             input_image = gr.Image(
+                label="📷 Upload Image",
                 type="pil",
+                height=300
             )
             target_language = gr.Dropdown(
                 choices=list(LANG_CODE_MAP.keys()),
                 value="Hindi",
+                label="🎯 Translate To",
+                info="Select target language for translation"
             )
             translate_btn = gr.Button(
+                "🚀 Translate Text",
                 variant="primary",
+                size="lg"
             )
         with gr.Column(scale=1):
+            gr.Markdown("### 📤 Results")
             output_image = gr.Image(
+                label="🖼️ Translated Image",
+                type="pil",
+                height=300
             )
             output_text = gr.Textbox(
+                label="📝 Translation Details",
+                lines=8,
+                max_lines=15,
+                info="Detailed translation information"
             )
     # Event binding
     translate_btn.click(
+        fn=process_image,
         inputs=[input_image, target_language],
         outputs=[output_image, output_text],
         show_progress=True
     )
+    # Feature information
     gr.HTML("""
         <div class="feature-box">
+            <h3>✨ Key Features:</h3>
             <ul>
+                <li><strong>🎯 Smart OCR:</strong> Advanced text detection with confidence filtering</li>
+                <li><strong>🌐 Bilingual Support:</strong> English ↔ Hindi translation</li>
+                <li><strong>🎨 Intelligent Overlay:</strong> Context-aware text positioning and sizing</li>
+                <li><strong>🔧 Adaptive Fonts:</strong> Script-specific font selection for better readability</li>
+                <li><strong>⚡ Optimized Performance:</strong> Fast processing with caching</li>
             </ul>
         </div>
     """)
 if __name__ == "__main__":
+    # Pre-initialize OCR for faster first-time usage
+    print("🔧 Pre-initializing OCR engine...")
     try:
+        initialize_reader()
+        print("✅ OCR engine ready!")
     except Exception as e:
+        print(f"⚠️ OCR initialization warning: {e}")
+    # Launch the application
+    demo.launch(
+        share=False,
+        show_error=True
+    )