File size: 11,199 Bytes
5923804
6aae403
6eefa77
 
a92f7ce
97eb386
 
aa3c16f
5923804
97eb386
 
 
 
 
6eefa77
 
 
 
97eb386
6eefa77
5923804
97eb386
 
6eefa77
 
97eb386
 
 
 
 
 
6eefa77
5923804
97eb386
 
6eefa77
97eb386
 
 
6eefa77
56d6668
 
 
6eefa77
 
97eb386
6eefa77
97eb386
6eefa77
 
 
97eb386
6eefa77
97eb386
 
 
4509f95
97eb386
 
1d83b49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa3c16f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5923804
6eefa77
 
 
 
 
97eb386
 
 
 
 
 
 
6eefa77
97eb386
6eefa77
97eb386
 
 
 
6eefa77
97eb386
6eefa77
97eb386
 
 
 
6eefa77
97eb386
 
 
 
 
 
aa3c16f
97eb386
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56d6668
 
97eb386
56d6668
 
 
 
 
aa3c16f
 
 
 
56d6668
97eb386
aa3c16f
97eb386
56d6668
 
 
 
 
 
aa3c16f
97eb386
 
56d6668
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa3c16f
 
 
 
 
 
 
97eb386
 
 
 
aa3c16f
 
 
 
97eb386
 
 
 
 
 
6eefa77
97eb386
d1d3e5b
 
1d83b49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6eefa77
 
1d83b49
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
import gradio as gr
import easyocr
from deep_translator import GoogleTranslator
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import io
import time
import cv2

# Global variables
reader = None
translation_cache = {}

# Define supported languages - use codes that both EasyOCR and Google Translator support
SUPPORTED_LANGUAGES = {
    'en': 'English',
    'hi': 'Hindi',
    'mr': 'Marathi',
    'ne': 'Nepali'
}

def initialize_reader():
    """Initialize EasyOCR reader with minimal languages to save memory"""
    global reader
    if reader is None:
        try:
            # Only load essential languages to reduce memory usage
            reader = easyocr.Reader(['en', 'hi', 'mr', 'ne'], gpu=False)
        except Exception as e:
            print(f"Error initializing EasyOCR: {e}")
            return None
    return reader

def get_default_font(size=20):
    """Get a default font that works on Hugging Face"""
    try:
        # Try common font paths
        font_paths = [
            "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
            "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
            "/usr/share/fonts/truetype/freefont/FreeSans.ttf",
            "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
            "/usr/share/fonts/truetype/noto/NotoSans-Regular.ttf"
        ]
        
        for path in font_paths:
            try:
                return ImageFont.truetype(path, size=size)
            except OSError:
                continue
                
        # If all fail, use default font
        return ImageFont.load_default()
    except Exception as e:
        print(f"Font error: {e}")
        return None

def translate_text(text, target_lang):
    """Translate text with error handling and caching"""
    if not text or not text.strip():
        return ""
    
    # Use cache if available
    cache_key = f"{text}|{target_lang}"
    if cache_key in translation_cache:
        return translation_cache[cache_key]
    
    # Handle translation with retries
    max_retries = 3
    for attempt in range(max_retries):
        try:
            translated = GoogleTranslator(source='auto', target=target_lang).translate(text)
            if translated:
                translation_cache[cache_key] = translated
                return translated
            time.sleep(1)  # Short delay before retry
        except Exception as e:
            print(f"Translation error (attempt {attempt+1}): {e}")
            if attempt == max_retries - 1:
                return f"[Translation Error: {text}]"
            time.sleep(1)  # Wait before retry
    
    return f"[Unable to translate: {text}]"

def get_dominant_color(image, bbox, padding=4):
    """Extract the dominant color around text for better background matching"""
    try:
        # Convert PIL to numpy if needed
        if not isinstance(image, np.ndarray):
            img_array = np.array(image)
        else:
            img_array = image
            
        # Extract coordinates with padding
        top_left, top_right, bottom_right, bottom_left = bbox
        x, y = int(top_left[0]), int(top_left[1])
        width = int(top_right[0] - top_left[0])
        height = int(bottom_left[1] - top_left[1])
        
        # Expand the area slightly to capture surrounding colors
        x1 = max(0, x - padding)
        y1 = max(0, y - padding)
        x2 = min(img_array.shape[1], x + width + padding)
        y2 = min(img_array.shape[0], y + height + padding)
        
        # Get region around the text
        region = img_array[y1:y2, x1:x2]
        
        if region.size == 0:
            # Fallback if region is empty
            return (240, 240, 240, 180)
        
        # Convert to RGB if it's in BGR format (OpenCV default)
        if len(region.shape) == 3 and region.shape[2] == 3:
            region_rgb = cv2.cvtColor(region, cv2.COLOR_BGR2RGB) if isinstance(region, np.ndarray) else region
        else:
            region_rgb = region
            
        # Reshape and get mean color
        pixels = region_rgb.reshape(-1, region_rgb.shape[-1])
        dominant_color = np.mean(pixels, axis=0).astype(int)
        
        # Add alpha channel for semi-transparency
        return (int(dominant_color[0]), int(dominant_color[1]), int(dominant_color[2]), 230)
    except Exception as e:
        print(f"Error getting dominant color: {e}")
        # Return a default semi-transparent light color
        return (240, 240, 240, 180)

def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress()):
    """Process image with OCR and translation"""
    if image is None:
        return None, "Please upload an image"
    
    # Validate target language
    if target_lang not in SUPPORTED_LANGUAGES:
        # Check if it's a language name instead of code
        target_lang = next((code for code, name in SUPPORTED_LANGUAGES.items() 
                        if name.lower() == target_lang.lower()), None)
        if not target_lang:
            return image, f"Unsupported language. Supported: {', '.join(SUPPORTED_LANGUAGES.values())}"
    
    progress(0.1, "Initializing...")
    
    # Initialize OCR reader
    ocr = initialize_reader()
    if ocr is None:
        return image, "Failed to initialize OCR. Please try again."
    
    progress(0.3, "Detecting text...")
    
    try:
        # Convert to numpy array for OCR
        img_array = np.array(image)
        results = ocr.readtext(img_array)
        
        if not results:
            return image, "No text detected in the image"
        
        progress(0.6, "Translating text...")
        
        # Create a copy for overlay
        img_pil = image.copy().convert("RGBA")  # Convert to RGBA for transparency support
        font = get_default_font(size=20)
        
        # If font creation failed, return with error
        if font is None:
            return image, "Error loading fonts. Processing without overlay."
        
        draw = ImageDraw.Draw(img_pil, 'RGBA')
        
        # Process detected text
        translations = []
        for i, (bbox, text, prob) in enumerate(results):
            if text and text.strip():
                # Update progress
                progress(0.6 + (0.4 * (i / len(results))), f"Translating text {i+1}/{len(results)}")
                
                # Translate text
                translated = translate_text(text, target_lang)
                translations.append(f"{text}{translated}")
                
                # Extract bounding box coordinates
                top_left, top_right, bottom_right, bottom_left = bbox
                
                # Calculate text dimensions and position
                x, y = top_left[0], top_left[1]
                width = top_right[0] - top_left[0]
                height = bottom_left[1] - top_left[1]
                
                # Get dominant color for better background matching
                bg_color = get_dominant_color(img_array, bbox)
                
                # Add padding
                padding = 4
                
                # Create background that matches surrounding area
                draw.rectangle(
                    [
                        x - padding, 
                        y - padding, 
                        x + width + padding, 
                        y + height + padding
                    ],
                    fill=bg_color  # Semi-transparent background that matches surrounding colors
                )
                
                # Calculate font size to fit within the bounding box
                fontsize = min(20, int(width / (len(translated) * 0.5))) if translated else 20
                fontsize = max(fontsize, 12)  # Ensure minimum readability
                
                # Get adjusted font
                adjusted_font = get_default_font(size=fontsize)
                if adjusted_font is None:
                    adjusted_font = font
                
                # Get text size for centering
                text_size = draw.textbbox((0, 0), translated, font=adjusted_font)
                text_width = text_size[2] - text_size[0]
                text_height = text_size[3] - text_size[1]
                
                # Center text in the bounding box
                text_x = x + (width - text_width) / 2
                text_y = y + (height - text_height) / 2
                
                # Determine text color based on background brightness
                r, g, b, _ = bg_color
                brightness = (r * 299 + g * 587 + b * 114) / 1000
                text_color = (0, 0, 0, 255) if brightness > 128 else (255, 255, 255, 255)  # Black or white based on background
                
                # Draw text with appropriate contrast
                draw.text((text_x, text_y), translated, fill=text_color, font=adjusted_font)
        
        # Join all translations
        all_translations = "\n".join(translations)
        
        # Convert back to RGB for display
        result_image = img_pil.convert('RGB')
        
        return result_image, all_translations
        
    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        print(f"Error in process_image: {str(e)}\n{error_details}")
        return image, f"Error processing image: {str(e)}"

# Create Gradio interface
with gr.Blocks(title="Multilingual Signboard Translator with Image Overlay") as iface:
    gr.Markdown("# Multilingual Signboard Translator with Image Overlay")
    gr.Markdown("Extract & translate text from images with improved overlay visualization")
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="pil", label="Upload Image")
            
            with gr.Row():
                target_lang = gr.Dropdown(
                    choices=list(SUPPORTED_LANGUAGES.values()),
                    value="Hindi",
                    label="Translate To"
                )
                overlay_opacity = gr.Slider(
                    minimum=0.1, 
                    maximum=1.0, 
                    value=0.7, 
                    step=0.1, 
                    label="Overlay Opacity"
                )
            
            translate_btn = gr.Button("Translate", variant="primary")
            
        with gr.Column():
            output_image = gr.Image(type="pil", label="Image with Translated Overlay")
            output_text = gr.Textbox(label="Translated Text Output", lines=10)
    
    # Connect the button to the processing function
    translate_btn.click(
        fn=process_image,
        inputs=[input_image, target_lang, overlay_opacity],
        outputs=[output_image, output_text]
    )
    
    gr.Markdown("""
    ## Features
    - Supports multiple languages for translation
    - Semi-transparent overlays for better readability
    - Simple and efficient text extraction and translation
    """)

if __name__ == "__main__":
    # Initialize OCR model at startup to avoid delay on first request
    try:
        initialize_reader()
    except:
        pass
    
    # Launch the app
    iface.launch()