Spaces:

Amandeep01
/

Sign_Win

Sleeping

App Files Files Community

Amandeep01 commited on May 27, 2025

Commit

4281ed9

verified ·

1 Parent(s): eed6b37

Update app.py

Browse files

Files changed (1) hide show

app.py +487 -216

app.py CHANGED Viewed

@@ -6,60 +6,203 @@ import numpy as np
 import cv2
 import time
 import re
-from typing import Tuple, List, Optional
 import io
 import os
 # Global variables
 reader = None
 translation_cache = {}
-# Define supported languages with better language detection
 SUPPORTED_LANGUAGES = {
     'en': 'English',
-    'hi': 'Hindi'
 }
 # Language code mapping for Google Translator
 LANG_CODE_MAP = {
     'English': 'en',
-    'Hindi': 'hi'
 }
 def initialize_reader():
-    """Initialize EasyOCR reader with optimized language support"""
     global reader
     if reader is None:
-        try:
-            # Initialize with English and Hindi only for faster loading
-            reader = easyocr.Reader(['en', 'hi'], gpu=False, verbose=False, download_enabled=True)
-            print("EasyOCR initialized successfully")
-        except Exception as e:
-            print(f"Error initializing EasyOCR: {e}")
-            return None
     return reader
 def get_font_for_text(text: str, target_size: int = 20) -> ImageFont.FreeTypeFont:
-    """Get appropriate font based on text content and size"""
-    # Check if text contains Devanagari script (Hindi/Marathi)
     has_devanagari = bool(re.search(r'[\u0900-\u097F]', text))
     # Font paths for different scripts
-    devanagari_fonts = [
-        "/usr/share/fonts/truetype/noto/NotoSansDevanagari-Regular.ttf",
-        "/usr/share/fonts/truetype/noto/NotoSansDevanagari-Bold.ttf",
-        "/usr/share/fonts/truetype/lohit-devanagari/Lohit-Devanagari.ttf",
-        "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
-    ]
-    english_fonts = [
         "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
         "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
-        "/usr/share/fonts/truetype/noto/NotoSans-Bold.ttf",
-        "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
-    ]
-    font_paths = devanagari_fonts if has_devanagari else english_fonts
     for font_path in font_paths:
         try:
@@ -68,14 +211,14 @@ def get_font_for_text(text: str, target_size: int = 20) -> ImageFont.FreeTypeFon
         except (OSError, IOError):
             continue
-    # Fallback to default font
     try:
         return ImageFont.load_default()
     except:
         return None
-def smart_translate(text: str, target_lang: str, source_lang: str = 'auto') -> str:
-    """Enhanced translation with context awareness and caching"""
     if not text or not text.strip():
         return ""
@@ -87,16 +230,42 @@ def smart_translate(text: str, target_lang: str, source_lang: str = 'auto') -> s
     if cache_key in translation_cache:
         return translation_cache[cache_key]
-    max_retries = 2  # Reduced retries for faster response
     for attempt in range(max_retries):
         try:
-            # Use GoogleTranslator with better error handling
             translator = GoogleTranslator(source=source_lang, target=target_lang)
-            translated = translator.translate(cleaned_text)
-            if translated and translated.strip() and translated != cleaned_text:
-                # Post-process translation for better readability
-                translated = translated.strip()
                 # Cache successful translation
                 translation_cache[cache_key] = translated
@@ -105,137 +274,149 @@ def smart_translate(text: str, target_lang: str, source_lang: str = 'auto') -> s
         except Exception as e:
             print(f"Translation attempt {attempt + 1} failed: {e}")
             if attempt < max_retries - 1:
-                time.sleep(0.3)  # Shorter wait time
-    return cleaned_text  # Return original text if translation fails
-def calculate_optimal_font_size(text: str, bbox_width: int, bbox_height: int, min_size: int = 10, max_size: int = 50) -> int:
-    """Calculate optimal font size based on bounding box dimensions and text length"""
     if not text:
         return min_size
-    # Base calculation on text length and available space
-    char_width_ratio = 0.6  # Approximate character width to height ratio
-    estimated_char_width = bbox_height * char_width_ratio
-    calculated_size = int(bbox_width / (len(text) * char_width_ratio))
-    # Consider height constraint
-    height_based_size = int(bbox_height * 0.7)  # Use 70% of available height
-    # Take the smaller of the two constraints
-    optimal_size = min(calculated_size, height_based_size)
     # Apply bounds
     return max(min_size, min(optimal_size, max_size))
-def get_text_color_with_contrast(background_color: Tuple[int, int, int]) -> Tuple[int, int, int, int]:
-    """Calculate optimal text color based on background for maximum contrast"""
-    r, g, b = background_color[:3]
-    # Calculate luminance using standard formula
     luminance = (0.299 * r + 0.587 * g + 0.114 * b) / 255
-    # Return white for dark backgrounds, black for light backgrounds
-    if luminance < 0.5:
-        return (255, 255, 255, 255)  # White text
     else:
-        return (0, 0, 0, 255)  # Black text
-def extract_background_color(image: np.ndarray, bbox: List, expand_factor: float = 1.2) -> Tuple[int, int, int, int]:
-    """Extract representative background color from around the text region"""
     try:
         # Get bounding box coordinates
-        top_left, top_right, bottom_right, bottom_left = bbox
-        # Calculate center and dimensions
-        center_x = (top_left[0] + top_right[0]) / 2
-        center_y = (top_left[1] + bottom_left[1]) / 2
-        width = abs(top_right[0] - top_left[0])
-        height = abs(bottom_left[1] - top_left[1])
-        # Expand region for better color sampling
-        expanded_width = width * expand_factor
-        expanded_height = height * expand_factor
-        # Calculate expanded coordinates
-        x1 = max(0, int(center_x - expanded_width / 2))
-        y1 = max(0, int(center_y - expanded_height / 2))
-        x2 = min(image.shape[1], int(center_x + expanded_width / 2))
-        y2 = min(image.shape[0], int(center_y + expanded_height / 2))
-        # Extract region
-        region = image[y1:y2, x1:x2]
-        if region.size > 0:
             # Calculate mean color
-            mean_color = np.mean(region.reshape(-1, region.shape[-1]), axis=0)
-            return tuple(map(int, mean_color)) + (220,)  # Add alpha for semi-transparency
     except Exception as e:
-        print(f"Error extracting background color: {e}")
-    # Default background color
-    return (240, 240, 240, 200)
-def create_smart_overlay(image: Image.Image, bbox: List, original_text: str, translated_text: str) -> None:
-    """Create intelligent overlay with proper sizing and positioning"""
     draw = ImageDraw.Draw(image, 'RGBA')
-    # Extract bounding box coordinates
-    top_left, top_right, bottom_right, bottom_left = bbox
-    # Calculate dimensions
-    x = int(min(top_left[0], bottom_left[0]))
-    y = int(min(top_left[1], top_right[1]))
-    width = int(max(top_right[0], bottom_right[0]) - x)
-    height = int(max(bottom_left[1], bottom_right[1]) - y)
     # Calculate optimal font size
     font_size = calculate_optimal_font_size(translated_text, width, height)
-    # Get appropriate font
     font = get_font_for_text(translated_text, font_size)
-    if font is None:
-        font = get_font_for_text(translated_text, 14)  # Fallback size
-    # Get background color from image
     img_array = np.array(image.convert('RGB'))
-    bg_color = extract_background_color(img_array, bbox)
-    # Create background rectangle with padding
-    padding = max(2, font_size // 8)
     bg_rect = [
-        x - padding,
-        y - padding,
-        x + width + padding,
-        y + height + padding
     ]
-    # Draw semi-transparent background
-    draw.rectangle(bg_rect, fill=bg_color)
-    # Calculate text position for centering
     try:
         bbox_text = draw.textbbox((0, 0), translated_text, font=font)
         text_width = bbox_text[2] - bbox_text[0]
         text_height = bbox_text[3] - bbox_text[1]
     except:
-        # Fallback for older PIL versions
         text_width = len(translated_text) * font_size * 0.6
         text_height = font_size
-    # Center the text
-    text_x = x + (width - text_width) / 2
-    text_y = y + (height - text_height) / 2
-    # Get optimal text color
-    text_color = get_text_color_with_contrast(bg_color[:3])
-    # Draw the translated text
     draw.text((text_x, text_y), translated_text, fill=text_color, font=font)
-def process_image(image: Image.Image, target_language: str, progress=gr.Progress()) -> Tuple[Optional[Image.Image], str]:
-    """Main image processing function with enhanced OCR and translation"""
     if image is None:
         return None, "❌ Please upload an image first."
@@ -247,73 +428,106 @@ def process_image(image: Image.Image, target_language: str, progress=gr.Progress
     progress(0.1, "🔧 Initializing OCR engine...")
-    # Initialize OCR
-    ocr = initialize_reader()
-    if ocr is None:
-        return image, "❌ Failed to initialize OCR. Please try again."
-    progress(0.3, "🔍 Extracting text from image...")
     try:
-        # Convert PIL image to numpy array for OCR
-        img_array = np.array(image)
-        # Perform OCR with simplified parameters
-        results = ocr.readtext(img_array)
-        if not results:
-            return image, "ℹ️ No readable text found in the image."
-        print(f"OCR Results format: {results[0] if results else 'Empty'}")
-        # Handle different OCR result formats
-        processed_results = []
-        for result in results:
-            if len(result) == 3:
-                # Standard format: (bbox, text, confidence)
-                bbox, text, confidence = result
-                processed_results.append((bbox, text, confidence))
-            elif len(result) == 2:
-                # Alternative format: (bbox, text) - assume high confidence
-                bbox, text = result
-                processed_results.append((bbox, text, 0.8))
-            else:
-                print(f"Unexpected result format: {result}")
-                continue
-        # Filter results by confidence and text quality
-        filtered_results = []
-        for bbox, text, confidence in processed_results:
-            if text and text.strip() and confidence > 0.3:  # Lower threshold for better detection
-                filtered_results.append((bbox, text, confidence))
         if not filtered_results:
             return image, "ℹ️ No text detected with sufficient confidence."
-        progress(0.5, f"🌐 Translating {len(filtered_results)} text regions...")
-        # Create a copy of the image for overlay
         result_image = image.copy().convert('RGBA')
-        # Process each detected text region
-        translations_info = []
-        for i, (bbox, text, confidence) in enumerate(filtered_results):
-            # Update progress
-            progress(0.5 + (0.4 * i / len(filtered_results)), f"Translating region {i+1}/{len(filtered_results)}")
             if text and text.strip():
-                # Clean the extracted text
                 cleaned_text = re.sub(r'\s+', ' ', text.strip())
-                # Translate the text
-                translated = smart_translate(cleaned_text, target_lang_code)
-                # Create overlay on image
-                create_smart_overlay(result_image, bbox, cleaned_text, translated)
-                # Store translation info
-                translations_info.append({
                     'original': cleaned_text,
                     'translated': translated,
                     'confidence': confidence
@@ -321,17 +535,18 @@ def process_image(image: Image.Image, target_language: str, progress=gr.Progress
         progress(1.0, "✅ Translation completed!")
-        # Convert back to RGB for final output
         final_image = result_image.convert('RGB')
-        # Create summary text
-        summary_lines = []
-        summary_lines.append(f"🎯 Successfully processed {len(translations_info)} text regions:\n")
-        for i, info in enumerate(translations_info, 1):
-            summary_lines.append(f"{i}. Original: {info['original']}")
-            summary_lines.append(f"   Translation: {info['translated']}")
-            summary_lines.append(f"   Confidence: {info['confidence']:.2f}\n")
         summary_text = "\n".join(summary_lines)
@@ -342,42 +557,66 @@ def process_image(image: Image.Image, target_language: str, progress=gr.Progress
         print(f"Processing error: {e}")
         return image, error_msg
-# Custom CSS for better UI
 custom_css = """
 .gradio-container {
-    max-width: 1200px;
     margin: auto;
 }
 .main-header {
     text-align: center;
-    background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
     -webkit-background-clip: text;
     -webkit-text-fill-color: transparent;
-    font-size: 2.5em;
-    font-weight: bold;
     margin-bottom: 0.5em;
 }
 .description {
     text-align: center;
-    font-size: 1.1em;
-    color: #666;
     margin-bottom: 2em;
 }
 .feature-box {
-    background: #f8f9fa;
-    padding: 1em;
-    border-radius: 8px;
     margin: 1em 0;
 }
 """
-# Create the Gradio interface
-with gr.Blocks(css=custom_css, title="Multilingual Signboard Translator") as demo:
     gr.HTML("""
-        <div class="main-header">🌐 Multilingual Signboard Translator</div>
         <div class="description">
-            Extract and translate text from images with intelligent overlay technology
         </div>
     """)
@@ -386,73 +625,105 @@ with gr.Blocks(css=custom_css, title="Multilingual Signboard Translator") as dem
             gr.Markdown("### 📤 Upload & Configure")
             input_image = gr.Image(
-                label="📷 Upload Image",
                 type="pil",
-                height=300
             )
             target_language = gr.Dropdown(
                 choices=list(LANG_CODE_MAP.keys()),
                 value="Hindi",
-                label="🎯 Translate To",
-                info="Select target language for translation"
             )
             translate_btn = gr.Button(
-                "🚀 Translate Text",
                 variant="primary",
-                size="lg"
             )
         with gr.Column(scale=1):
-            gr.Markdown("### 📤 Results")
             output_image = gr.Image(
-                label="🖼️ Translated Image",
-                type="pil",
-                height=300
             )
             output_text = gr.Textbox(
-                label="📝 Translation Details",
-                lines=8,
-                max_lines=15,
-                info="Detailed translation information"
             )
     # Event binding
     translate_btn.click(
-        fn=process_image,
         inputs=[input_image, target_language],
         outputs=[output_image, output_text],
         show_progress=True
     )
-    # Feature information
     gr.HTML("""
         <div class="feature-box">
-            <h3>✨ Key Features:</h3>
             <ul>
-                <li><strong>🎯 Smart OCR:</strong> Advanced text detection with confidence filtering</li>
-                <li><strong>🌐 Bilingual Support:</strong> English ↔ Hindi translation</li>
-                <li><strong>🎨 Intelligent Overlay:</strong> Context-aware text positioning and sizing</li>
-                <li><strong>🔧 Adaptive Fonts:</strong> Script-specific font selection for better readability</li>
-                <li><strong>⚡ Optimized Performance:</strong> Fast processing with caching</li>
             </ul>
         </div>
     """)
 if __name__ == "__main__":
-    # Pre-initialize OCR for faster first-time usage
-    print("🔧 Pre-initializing OCR engine...")
     try:
-        initialize_reader()
-        print("✅ OCR engine ready!")
     except Exception as e:
-        print(f"⚠️ OCR initialization warning: {e}")
-    # Launch the application
-    demo.launch(
-        share=False,
-        show_error=True
-    )

 import cv2
 import time
 import re
+from typing import Tuple, List, Optional, Dict
 import io
 import os
+from collections import defaultdict
+import math
 # Global variables
 reader = None
 translation_cache = {}
+# Define supported languages
 SUPPORTED_LANGUAGES = {
     'en': 'English',
+    'hi': 'Hindi',
+    'es': 'Spanish',
+    'fr': 'French',
+    'de': 'German',
+    'ja': 'Japanese',
+    'ko': 'Korean',
+    'zh': 'Chinese'
 }
 # Language code mapping for Google Translator
 LANG_CODE_MAP = {
     'English': 'en',
+    'Hindi': 'hi',
+    'Spanish': 'es',
+    'French': 'fr',
+    'German': 'de',
+    'Japanese': 'ja',
+    'Korean': 'ko',
+    'Chinese': 'zh'
 }
 def initialize_reader():
+    """Initialize EasyOCR reader with fallback options"""
     global reader
     if reader is None:
+        # Try different initialization strategies
+        init_strategies = [
+            (['en', 'hi'], "English and Hindi"),
+            (['en'], "English only"),
+            (['en', 'hi'], "English and Hindi with verbose"),
+        ]
+        for i, (languages, description) in enumerate(init_strategies):
+            try:
+                print(f"Attempting OCR initialization: {description}")
+                verbose_setting = True if i == 2 else False
+                reader = easyocr.Reader(
+                    languages,
+                    gpu=False,
+                    verbose=verbose_setting,
+                    download_enabled=True,
+                    detector=True,
+                    recognizer=True
+                )
+                print(f"✅ EasyOCR initialized successfully with {description}")
+                return reader
+            except ImportError as e:
+                print(f"❌ Import error: {e}")
+                continue
+            except Exception as e:
+                print(f"❌ Initialization attempt {i+1} failed: {e}")
+                if i < len(init_strategies) - 1:
+                    print("Trying alternative approach...")
+                    continue
+                else:
+                    print("All initialization strategies failed")
+        # If all strategies fail, return None
+        reader = None
+        print("❌ Could not initialize EasyOCR with any strategy")
     return reader
+def calculate_distance(box1, box2):
+    """Calculate distance between two bounding boxes"""
+    # Get center points
+    center1 = [(box1[0][0] + box1[2][0]) / 2, (box1[0][1] + box1[2][1]) / 2]
+    center2 = [(box2[0][0] + box2[2][0]) / 2, (box2[0][1] + box2[2][1]) / 2]
+    return math.sqrt((center1[0] - center2[0])**2 + (center1[1] - center2[1])**2)
+def are_boxes_on_same_line(box1, box2, tolerance=20):
+    """Check if two bounding boxes are on the same horizontal line"""
+    # Get y-coordinates (vertical positions)
+    y1_avg = (box1[0][1] + box1[2][1]) / 2
+    y2_avg = (box2[0][1] + box2[2][1]) / 2
+    return abs(y1_avg - y2_avg) <= tolerance
+def group_text_regions(ocr_results, line_tolerance=25, proximity_threshold=50):
+    """Group OCR results into meaningful text blocks"""
+    if not ocr_results:
+        return []
+    # Sort by vertical position first, then horizontal
+    sorted_results = sorted(ocr_results, key=lambda x: (x[0][0][1], x[0][0][0]))
+    grouped_lines = []
+    current_line = [sorted_results[0]]
+    for i in range(1, len(sorted_results)):
+        current_box = sorted_results[i][0]
+        prev_box = current_line[-1][0]
+        # Check if boxes are on the same line
+        if are_boxes_on_same_line(current_box, prev_box, line_tolerance):
+            # Check proximity (not too far apart horizontally)
+            if calculate_distance(current_box, prev_box) <= proximity_threshold:
+                current_line.append(sorted_results[i])
+            else:
+                # Start new line if too far apart
+                grouped_lines.append(current_line)
+                current_line = [sorted_results[i]]
+        else:
+            # Different line
+            grouped_lines.append(current_line)
+            current_line = [sorted_results[i]]
+    # Don't forget the last line
+    if current_line:
+        grouped_lines.append(current_line)
+    # Merge text within each line
+    merged_groups = []
+    for line in grouped_lines:
+        if len(line) == 1:
+            merged_groups.append(line[0])
+        else:
+            # Sort by horizontal position within the line
+            line.sort(key=lambda x: x[0][0][0])
+            # Merge text
+            merged_text = ' '.join([item[1] for item in line])
+            # Create combined bounding box
+            all_points = []
+            for item in line:
+                all_points.extend(item[0])
+            # Find min/max coordinates
+            x_coords = [point[0] for point in all_points]
+            y_coords = [point[1] for point in all_points]
+            min_x, max_x = min(x_coords), max(x_coords)
+            min_y, max_y = min(y_coords), max(y_coords)
+            # Create new bounding box
+            merged_bbox = [[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]]
+            # Use average confidence
+            avg_confidence = sum([item[2] for item in line]) / len(line)
+            merged_groups.append((merged_bbox, merged_text, avg_confidence))
+    return merged_groups
 def get_font_for_text(text: str, target_size: int = 20) -> ImageFont.FreeTypeFont:
+    """Get appropriate font based on text content"""
+    # Check for different scripts
     has_devanagari = bool(re.search(r'[\u0900-\u097F]', text))
+    has_chinese = bool(re.search(r'[\u4e00-\u9fff]', text))
+    has_japanese = bool(re.search(r'[\u3040-\u309f\u30a0-\u30ff]', text))
+    has_korean = bool(re.search(r'[\uac00-\ud7af]', text))
+    has_arabic = bool(re.search(r'[\u0600-\u06ff]', text))
     # Font paths for different scripts
+    font_paths = []
+    if has_devanagari:
+        font_paths.extend([
+            "/usr/share/fonts/truetype/noto/NotoSansDevanagari-Regular.ttf",
+            "/usr/share/fonts/truetype/lohit-devanagari/Lohit-Devanagari.ttf"
+        ])
+    if has_chinese or has_japanese:
+        font_paths.extend([
+            "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
+            "/usr/share/fonts/truetype/arphic/uming.ttc"
+        ])
+    if has_korean:
+        font_paths.append("/usr/share/fonts/truetype/noto/NotoSansKR-Regular.otf")
+    if has_arabic:
+        font_paths.append("/usr/share/fonts/truetype/noto/NotoSansArabic-Regular.ttf")
+    # Default fonts
+    font_paths.extend([
         "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
         "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
+        "/usr/share/fonts/truetype/noto/NotoSans-Bold.ttf"
+    ])
     for font_path in font_paths:
         try:
         except (OSError, IOError):
             continue
+    # Fallback
     try:
         return ImageFont.load_default()
     except:
         return None
+def smart_translate_with_context(text: str, target_lang: str, source_lang: str = 'auto') -> str:
+    """Enhanced translation with better context handling"""
     if not text or not text.strip():
         return ""
     if cache_key in translation_cache:
         return translation_cache[cache_key]
+    # Pre-processing for better translation context
+    # Handle common signboard patterns
+    signboard_patterns = {
+        r'\b(no|not|don\'t|do not)\s+(use|mobile|phone|cell)\b': 'prohibition_mobile',
+        r'\b(please|kindly)\s+(do not|don\'t)\s+(use|mobile|phone)\b': 'polite_prohibition_mobile',
+        r'\b(exit|entrance|entry|way out|way in)\b': 'direction',
+        r'\b(toilet|restroom|bathroom|washroom)\b': 'facility',
+        r'\b(parking|park|no parking)\b': 'parking',
+        r'\b(emergency|fire|safety)\b': 'safety'
+    }
+    context_hint = ""
+    for pattern, context in signboard_patterns.items():
+        if re.search(pattern, cleaned_text.lower()):
+            context_hint = f"[Signboard context: {context}] "
+            break
+    max_retries = 3
     for attempt in range(max_retries):
         try:
             translator = GoogleTranslator(source=source_lang, target=target_lang)
+            # Add context hint for better translation
+            text_to_translate = context_hint + cleaned_text if context_hint else cleaned_text
+            translated = translator.translate(text_to_translate)
+            if translated and translated.strip():
+                # Remove context hint from result if it was added
+                if context_hint and translated.startswith('['):
+                    # Try to remove the context hint from translation
+                    bracket_end = translated.find('] ')
+                    if bracket_end != -1:
+                        translated = translated[bracket_end + 2:].strip()
+                # Post-process for common improvements
+                translated = post_process_translation(translated, target_lang)
                 # Cache successful translation
                 translation_cache[cache_key] = translated
         except Exception as e:
             print(f"Translation attempt {attempt + 1} failed: {e}")
             if attempt < max_retries - 1:
+                time.sleep(0.5)
+    return cleaned_text
+def post_process_translation(translated_text: str, target_lang: str) -> str:
+    """Post-process translation for better quality"""
+    # Language-specific post-processing
+    if target_lang == 'hi':  # Hindi
+        # Common corrections for Hindi translations
+        corrections = {
+            'मत करो': 'न करें',  # More polite form
+            'का उपयोग मत करो': 'का उपयोग न करें',
+            'फोन का उपयोग': 'मोबाइल का उपयोग'
+        }
+        for old, new in corrections.items():
+            translated_text = translated_text.replace(old, new)
+    return translated_text.strip()
+def calculate_optimal_font_size(text: str, bbox_width: int, bbox_height: int, min_size: int = 12, max_size: int = 48) -> int:
+    """Calculate optimal font size with better scaling"""
     if not text:
         return min_size
+    # Estimate character width (varies by language)
+    char_width_ratio = 0.7  # More conservative estimate
+    # For non-Latin scripts, adjust ratio
+    if re.search(r'[\u0900-\u097F\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]', text):
+        char_width_ratio = 0.9  # Wider characters
+    # Calculate based on width constraint
+    width_based_size = int(bbox_width / (len(text) * char_width_ratio))
+    # Calculate based on height constraint (use 80% of available height)
+    height_based_size = int(bbox_height * 0.8)
+    # Take the smaller constraint
+    optimal_size = min(width_based_size, height_based_size)
     # Apply bounds
     return max(min_size, min(optimal_size, max_size))
+def get_contrasting_color(bg_color: Tuple[int, int, int]) -> Tuple[int, int, int]:
+    """Get contrasting text color"""
+    r, g, b = bg_color[:3]
     luminance = (0.299 * r + 0.587 * g + 0.114 * b) / 255
+    if luminance > 0.5:
+        return (0, 0, 0)  # Black text for light background
     else:
+        return (255, 255, 255)  # White text for dark background
+def extract_dominant_color(image: np.ndarray, bbox: List) -> Tuple[int, int, int]:
+    """Extract dominant color from the bounding box region"""
     try:
         # Get bounding box coordinates
+        points = np.array(bbox, dtype=np.int32)
+        # Create mask for the region
+        mask = np.zeros(image.shape[:2], dtype=np.uint8)
+        cv2.fillPoly(mask, [points], 255)
+        # Extract pixels within the region
+        region_pixels = image[mask > 0]
+        if len(region_pixels) > 0:
             # Calculate mean color
+            mean_color = np.mean(region_pixels, axis=0)
+            return tuple(map(int, mean_color))
     except Exception as e:
+        print(f"Error extracting color: {e}")
+    return (240, 240, 240)  # Default light gray
+def create_enhanced_overlay(image: Image.Image, bbox: List, translated_text: str, bg_opacity: int = 180):
+    """Create enhanced overlay with better positioning"""
     draw = ImageDraw.Draw(image, 'RGBA')
+    # Convert bbox to integer coordinates
+    points = [[int(p[0]), int(p[1])] for p in bbox]
+    # Calculate bounding rectangle
+    x_coords = [p[0] for p in points]
+    y_coords = [p[1] for p in points]
+    x_min, x_max = min(x_coords), max(x_coords)
+    y_min, y_max = min(y_coords), max(y_coords)
+    width = x_max - x_min
+    height = y_max - y_min
     # Calculate optimal font size
     font_size = calculate_optimal_font_size(translated_text, width, height)
     font = get_font_for_text(translated_text, font_size)
+    # Extract background color
     img_array = np.array(image.convert('RGB'))
+    bg_color = extract_dominant_color(img_array, bbox)
+    # Create semi-transparent background
+    padding = max(4, font_size // 6)
     bg_rect = [
+        x_min - padding,
+        y_min - padding,
+        x_max + padding,
+        y_max + padding
     ]
+    # Draw background with original color but semi-transparent
+    bg_color_with_alpha = bg_color + (bg_opacity,)
+    draw.rectangle(bg_rect, fill=bg_color_with_alpha)
+    # Calculate text position (center alignment)
     try:
         bbox_text = draw.textbbox((0, 0), translated_text, font=font)
         text_width = bbox_text[2] - bbox_text[0]
         text_height = bbox_text[3] - bbox_text[1]
     except:
         text_width = len(translated_text) * font_size * 0.6
         text_height = font_size
+    text_x = x_min + (width - text_width) / 2
+    text_y = y_min + (height - text_height) / 2
+    # Get contrasting text color
+    text_color = get_contrasting_color(bg_color)
+    # Draw text with slight shadow for better readability
+    shadow_offset = max(1, font_size // 20)
+    shadow_color = (0, 0, 0) if text_color == (255, 255, 255) else (255, 255, 255)
+    # Draw shadow
+    draw.text((text_x + shadow_offset, text_y + shadow_offset), translated_text,
+              fill=shadow_color + (100,), font=font)
+    # Draw main text
     draw.text((text_x, text_y), translated_text, fill=text_color, font=font)
+def process_image_enhanced(image: Image.Image, target_language: str, progress=gr.Progress()) -> Tuple[Optional[Image.Image], str]:
+    """Enhanced image processing with better text grouping"""
     if image is None:
         return None, "❌ Please upload an image first."
     progress(0.1, "🔧 Initializing OCR engine...")
+    # Initialize OCR with better error handling
+    try:
+        ocr = initialize_reader()
+        if ocr is None:
+            return image, """❌ OCR initialization failed. This might be due to:
+• Missing system dependencies
+• Network issues downloading models
+• Insufficient memory
+Please try refreshing the page or contact support."""
+        # Test OCR with a simple operation
+        test_array = np.array(image.convert('RGB'))
+        if test_array.size == 0:
+            return image, "❌ Invalid image format. Please upload a valid image file."
+    except Exception as e:
+        error_details = str(e)
+        return image, f"""❌ OCR Setup Error: {error_details}
+Possible solutions:
+• Refresh the browser and try again
+• Upload a different image format (JPG/PNG)
+• Check if the image is not corrupted
+Technical details: {type(e).__name__}"""
+    progress(0.3, "🔍 Extracting and grouping text regions...")
     try:
+        # Convert PIL image to numpy array with error handling
+        img_array = np.array(image.convert('RGB'))
+        if img_array is None or img_array.size == 0:
+            return image, "❌ Error processing image. Please try a different image."
+        print(f"Image shape: {img_array.shape}")
+        # Perform OCR with error handling and fallback options
+        try:
+            results = ocr.readtext(img_array, detail=1, paragraph=False, width_ths=0.7, height_ths=0.7)
+        except Exception as ocr_error:
+            print(f"Primary OCR failed: {ocr_error}")
+            # Fallback: try with different parameters
+            try:
+                results = ocr.readtext(img_array, detail=1)
+            except Exception as fallback_error:
+                print(f"Fallback OCR failed: {fallback_error}")
+                return image, f"""❌ OCR Processing Failed: {str(ocr_error)}
+Troubleshooting:
+• Image might be too complex or low quality
+• Try uploading a clearer image
+• Ensure text is clearly visible
+Fallback error: {str(fallback_error)}"""
+        if not results:
+            return image, """ℹ️ No readable text found in the image.
+Tips for better results:
+• Ensure text is clearly visible and well-lit
+• Upload higher resolution images
+• Make sure text is not too small or blurry"""
+        # Filter by confidence
+        filtered_results = [(bbox, text, conf) for bbox, text, conf in results
+                          if conf > 0.4 and text.strip()]
         if not filtered_results:
             return image, "ℹ️ No text detected with sufficient confidence."
+        progress(0.5, "🔗 Grouping related text regions...")
+        # Group text regions for contextual translation
+        grouped_results = group_text_regions(filtered_results)
+        progress(0.6, f"🌐 Translating {len(grouped_results)} text groups...")
+        # Create result image
         result_image = image.copy().convert('RGBA')
+        translation_info = []
+        for i, (bbox, text, confidence) in enumerate(grouped_results):
+            progress(0.6 + (0.3 * i / len(grouped_results)),
+                    f"Translating group {i+1}/{len(grouped_results)}")
             if text and text.strip():
+                # Clean text
                 cleaned_text = re.sub(r'\s+', ' ', text.strip())
+                # Translate with context
+                translated = smart_translate_with_context(cleaned_text, target_lang_code)
+                # Create overlay
+                create_enhanced_overlay(result_image, bbox, translated)
+                # Store info
+                translation_info.append({
                     'original': cleaned_text,
                     'translated': translated,
                     'confidence': confidence
         progress(1.0, "✅ Translation completed!")
+        # Convert to RGB
         final_image = result_image.convert('RGB')
+        # Create detailed summary
+        summary_lines = [f"🎯 Successfully processed {len(translation_info)} text groups:\n"]
+        for i, info in enumerate(translation_info, 1):
+            summary_lines.append(f"**Group {i}:**")
+            summary_lines.append(f"📝 Original: _{info['original']}_")
+            summary_lines.append(f"🌐 Translation: **{info['translated']}**")
+            summary_lines.append(f"📊 Confidence: {info['confidence']:.2f}")
+            summary_lines.append("")
         summary_text = "\n".join(summary_lines)
         print(f"Processing error: {e}")
         return image, error_msg
+# Enhanced CSS
 custom_css = """
 .gradio-container {
+    max-width: 1400px;
     margin: auto;
+    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
 }
 .main-header {
     text-align: center;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
     -webkit-background-clip: text;
     -webkit-text-fill-color: transparent;
+    background-clip: text;
+    font-size: 2.8em;
+    font-weight: 800;
     margin-bottom: 0.5em;
+    text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
 }
 .description {
     text-align: center;
+    font-size: 1.2em;
+    color: #555;
     margin-bottom: 2em;
+    line-height: 1.6;
 }
 .feature-box {
+    background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
+    padding: 1.5em;
+    border-radius: 12px;
+    margin: 1.5em 0;
+    box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+}
+.improvement-box {
+    background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%);
+    padding: 1.2em;
+    border-radius: 10px;
     margin: 1em 0;
+    border-left: 4px solid #667eea;
+}
+.btn-primary {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    border: none;
+    font-weight: 600;
+    text-transform: uppercase;
+    letter-spacing: 1px;
 }
 """
+# Create Gradio interface
+with gr.Blocks(css=custom_css, title="Enhanced Multilingual Signboard Translator") as demo:
     gr.HTML("""
+        <div class="main-header">🌐 Enhanced Multilingual Signboard Translator</div>
         <div class="description">
+            Advanced OCR with intelligent text grouping and contextual translation overlay
         </div>
     """)
             gr.Markdown("### 📤 Upload & Configure")
             input_image = gr.Image(
+                label="📷 Upload Signboard Image",
                 type="pil",
+                height=350
             )
             target_language = gr.Dropdown(
                 choices=list(LANG_CODE_MAP.keys()),
                 value="Hindi",
+                label="🎯 Target Language",
+                info="Select language for translation"
             )
             translate_btn = gr.Button(
+                "🚀 Translate Signboard",
                 variant="primary",
+                size="lg",
+                elem_classes=["btn-primary"]
             )
         with gr.Column(scale=1):
+            gr.Markdown("### 📋 Results")
             output_image = gr.Image(
+                label="🖼️ Translated Signboard",
+                type="pil",
+                height=350
             )
             output_text = gr.Textbox(
+                label="📝 Translation Analysis",
+                lines=10,
+                max_lines=20,
+                info="Detailed breakdown of detected and translated text"
             )
     # Event binding
     translate_btn.click(
+        fn=process_image_enhanced,
         inputs=[input_image, target_language],
         outputs=[output_image, output_text],
         show_progress=True
     )
+    # Enhanced information sections
+    gr.HTML("""
+        <div class="improvement-box">
+            <h3>🚀 Key Improvements in This Version:</h3>
+            <ul>
+                <li><strong>🧠 Intelligent Text Grouping:</strong> Combines fragmented words into meaningful phrases</li>
+                <li><strong>🎯 Contextual Translation:</strong> Uses signboard context for accurate translations</li>
+                <li><strong>🌈 Smart Color Preservation:</strong> Maintains original background colors with transparency</li>
+                <li><strong>📝 Multi-Script Support:</strong> Enhanced font handling for various languages</li>
+                <li><strong>⚡ Optimized Performance:</strong> Better caching and processing algorithms</li>
+            </ul>
+        </div>
+    """)
     gr.HTML("""
         <div class="feature-box">
+            <h3>✨ Advanced Features:</h3>
             <ul>
+                <li><strong>🔍 Smart OCR:</strong> Groups nearby text elements for better context</li>
+                <li><strong>🌐 Context-Aware Translation:</strong> Recognizes signboard patterns for accurate meaning</li>
+                <li><strong>🎨 Adaptive Overlays:</strong> Preserves original aesthetics while ensuring readability</li>
+                <li><strong>🔤 Multi-Language Support:</strong> Enhanced support for 8+ languages</li>
+                <li><strong>📊 Confidence Analysis:</strong> Shows detection confidence for quality assessment</li>
+                <li><strong>⚡ Performance Optimized:</strong> Faster processing with intelligent caching</li>
             </ul>
         </div>
     """)
 if __name__ == "__main__":
+    print("🔧 Initializing Enhanced OCR Translator...")
+    print("System Information:")
+    print(f"Python version: {os.sys.version}")
+    print(f"NumPy version: {np.__version__}")
+    # Pre-initialize with detailed logging
     try:
+        print("Starting OCR initialization...")
+        ocr_reader = initialize_reader()
+        if ocr_reader:
+            print("✅ OCR System ready!")
+        else:
+            print("⚠️ OCR initialization failed - will retry when needed")
     except Exception as e:
+        print(f"⚠️ Pre-initialization error: {e}")
+        print("OCR will be initialized on first use")
+    # Launch with better error handling
+    try:
+        demo.launch(
+            share=True,
+            show_error=True,
+            server_name="0.0.0.0",
+            server_port=7860,
+            enable_queue=True
+        )
+    except Exception as e:
+        print(f"Launch error: {e}")
+        # Fallback launch
+        demo.launch()