Amandeep01 commited on
Commit
6eefa77
·
verified ·
1 Parent(s): 0ca326d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +217 -29
app.py CHANGED
@@ -1,41 +1,229 @@
1
  import gradio as gr
2
  import easyocr
3
- from libretranslatepy import LibreTranslateAPI
4
- from PIL import Image
5
  import numpy as np
6
- import cv2
 
 
 
 
7
 
8
- # Initialize OCR reader and translator
9
- reader = easyocr.Reader(['en', 'hi'])
10
- lt = LibreTranslateAPI("https://libretranslate.de")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- def process_image(img):
13
- # Convert PIL image to NumPy array
14
- img_array = np.array(img)
15
- img_gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
16
 
17
- result = reader.readtext(img_gray)
 
18
 
19
- extracted_text = ""
20
- translated_text = ""
21
 
22
- for detection in result:
23
- text = detection[1]
24
- extracted_text += text + " "
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- # Translate extracted text to Hindi
27
- if extracted_text.strip():
28
- translated_text = lt.translate(extracted_text, "en", "hi")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- return extracted_text.strip(), translated_text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
- # Gradio interface
33
- iface = gr.Interface(
34
- fn=process_image,
35
- inputs=gr.Image(type="pil"),
36
- outputs=["text", "text"],
37
- title="Image Text Translator (English to Hindi)",
38
- description="Upload an image with English text, and get Hindi translation using LibreTranslate (no API key needed)."
39
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import easyocr
3
+ from deep_translator import GoogleTranslator
4
+ from PIL import Image, ImageDraw, ImageFont
5
  import numpy as np
6
+ import os
7
+ import time
8
+ import functools
9
+ import torch
10
+ from pathlib import Path
11
 
12
+ # Constants
13
+ FONT_PATH = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
14
+ SUPPORTED_LANGUAGES = {
15
+ 'en': 'English',
16
+ 'hi': 'Hindi',
17
+ 'mr': 'Marathi',
18
+ 'ne': 'Nepali',
19
+ 'es': 'Spanish',
20
+ 'fr': 'French',
21
+ 'de': 'German',
22
+ 'ja': 'Japanese',
23
+ 'ko': 'Korean',
24
+ 'zh-CN': 'Chinese (Simplified)',
25
+ 'ar': 'Arabic',
26
+ 'ru': 'Russian',
27
+ 'pt': 'Portuguese',
28
+ 'it': 'Italian',
29
+ }
30
 
31
+ # OCR Language Support - add languages that EasyOCR supports
32
+ OCR_LANGUAGES = ['en', 'hi', 'mr', 'ne', 'ja', 'ko', 'zh_cn', 'ar', 'ru']
 
 
33
 
34
+ # Global cache for translations to avoid repeated API calls
35
+ translation_cache = {}
36
 
37
+ # Initialize reader with common languages - lazy loading
38
+ reader = None
39
 
40
+ def get_reader(progress=None):
41
+ """Lazy loading of EasyOCR reader"""
42
+ global reader
43
+ if reader is None:
44
+ if progress:
45
+ progress(0.1, "Loading OCR model...")
46
+
47
+ # Force CPU if memory is limited on deployment
48
+ gpu = torch.cuda.is_available()
49
+ reader = easyocr.Reader(OCR_LANGUAGES, gpu=gpu)
50
+
51
+ if progress:
52
+ progress(0.3, "OCR model loaded successfully")
53
+ return reader
54
 
55
+ def get_font(size=20):
56
+ """Get font with fallback options"""
57
+ try:
58
+ return ImageFont.truetype(FONT_PATH, size=size)
59
+ except OSError:
60
+ # Fallback options if the primary font isn't available
61
+ fallback_fonts = [
62
+ "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
63
+ "/usr/share/fonts/truetype/freefont/FreeSans.ttf",
64
+ ]
65
+
66
+ for font_path in fallback_fonts:
67
+ try:
68
+ return ImageFont.truetype(font_path, size=size)
69
+ except OSError:
70
+ continue
71
+
72
+ # Last resort: use default font
73
+ return ImageFont.load_default()
74
 
75
+ def translate_text(text, target_lang, source_lang='auto'):
76
+ """Translate text with caching"""
77
+ if not text.strip():
78
+ return ""
79
+
80
+ cache_key = f"{text}|{source_lang}|{target_lang}"
81
+ if cache_key in translation_cache:
82
+ return translation_cache[cache_key]
83
+
84
+ try:
85
+ translated = GoogleTranslator(source=source_lang, target=target_lang).translate(text)
86
+ translation_cache[cache_key] = translated
87
+ return translated
88
+ except Exception as e:
89
+ print(f"Translation error: {e}")
90
+ return "[Translation Error]"
91
 
92
+ def draw_bbox_with_translation(img_pil, bbox, translated_text, font, opacity=0.7, text_color=(255, 0, 0)):
93
+ """Draw a semi-transparent background with text overlay"""
94
+ draw = ImageDraw.Draw(img_pil, 'RGBA')
95
+
96
+ # Calculate text dimensions and position
97
+ text_width, text_height = draw.textbbox((0, 0), translated_text, font=font)[2:]
98
+ x, y = bbox[0][0], max(0, bbox[0][1] - text_height - 10)
99
+
100
+ # Draw semi-transparent background for better readability
101
+ padding = 5
102
+ bg_box = (
103
+ x - padding,
104
+ y - padding,
105
+ x + text_width + padding,
106
+ y + text_height + padding
107
+ )
108
+ draw.rectangle(bg_box, fill=(0, 0, 0, int(255 * opacity)))
109
+
110
+ # Draw text
111
+ draw.text((x, y), translated_text, fill=text_color, font=font)
112
+
113
+ return img_pil
114
 
115
+ def process_image(image, target_lang, overlay_opacity=0.7, progress=gr.Progress()):
116
+ """Process image with OCR and translation"""
117
+ if image is None:
118
+ return None, "Please upload an image"
119
+
120
+ # Ensure target language is valid
121
+ if target_lang not in SUPPORTED_LANGUAGES and target_lang not in SUPPORTED_LANGUAGES.values():
122
+ return image, f"Unsupported target language. Supported languages: {', '.join(SUPPORTED_LANGUAGES.values())}"
123
+
124
+ # Convert language name to code if needed
125
+ lang_code = target_lang
126
+ for code, name in SUPPORTED_LANGUAGES.items():
127
+ if name.lower() == target_lang.lower():
128
+ lang_code = code
129
+ break
130
+
131
+ progress(0.05, "Preparing image...")
132
+
133
+ # Convert to numpy array
134
+ img_array = np.array(image)
135
+
136
+ # Get OCR reader
137
+ ocr = get_reader(progress)
138
+
139
+ progress(0.4, "Detecting text...")
140
+ results = ocr.readtext(img_array)
141
+
142
+ if not results:
143
+ return image, "No text detected in the image"
144
+
145
+ progress(0.6, "Translating detected text...")
146
+
147
+ # Create a copy of the image for overlay
148
+ img_pil = image.copy().convert("RGB")
149
+ font = get_font(size=max(20, int(img_pil.width / 40))) # Responsive font size
150
+
151
+ final_translations = []
152
+
153
+ # Process each text region
154
+ for idx, (bbox, text, prob) in enumerate(results):
155
+ progress(0.6 + 0.3 * (idx / len(results)), f"Translating text {idx+1}/{len(results)}")
156
+
157
+ if text.strip():
158
+ translated = translate_text(text, lang_code)
159
+ final_translations.append(f"{text} → {translated}")
160
+
161
+ # Draw translation with improved overlay
162
+ img_pil = draw_bbox_with_translation(
163
+ img_pil,
164
+ bbox,
165
+ translated,
166
+ font,
167
+ opacity=overlay_opacity
168
+ )
169
+
170
+ progress(1.0, "Completed!")
171
+ full_text = "\n".join(final_translations)
172
+ return img_pil, full_text
173
+
174
+ # Gradio UI with enhanced features
175
+ with gr.Blocks(title="Enhanced Image Translator") as iface:
176
+ gr.Markdown("# Enhanced Image Translator")
177
+ gr.Markdown("Extract & translate text from images with improved overlay visualization")
178
+
179
+ with gr.Row():
180
+ with gr.Column():
181
+ input_image = gr.Image(type="pil", label="Upload Image")
182
+
183
+ with gr.Row():
184
+ target_lang = gr.Dropdown(
185
+ choices=list(SUPPORTED_LANGUAGES.values()),
186
+ value="Hindi",
187
+ label="Translate To"
188
+ )
189
+ overlay_opacity = gr.Slider(
190
+ minimum=0.1,
191
+ maximum=1.0,
192
+ value=0.7,
193
+ step=0.1,
194
+ label="Overlay Opacity"
195
+ )
196
+
197
+ submit_btn = gr.Button("Translate", variant="primary")
198
+
199
+ with gr.Column():
200
+ output_image = gr.Image(type="pil", label="Image with Translated Overlay")
201
+ output_text = gr.Textbox(label="Translated Text Output", lines=10)
202
+
203
+ submit_btn.click(
204
+ fn=process_image,
205
+ inputs=[input_image, target_lang, overlay_opacity],
206
+ outputs=[output_image, output_text]
207
+ )
208
+
209
+ gr.Examples(
210
+ examples=[
211
+ ["examples/hindi_sample.jpg", "English"],
212
+ ["examples/english_sample.jpg", "Hindi"],
213
+ ],
214
+ inputs=[input_image, target_lang],
215
+ outputs=[output_image, output_text],
216
+ fn=process_image,
217
+ cache_examples=True,
218
+ )
219
+
220
+ gr.Markdown("""
221
+ ## Features
222
+ - Supports multiple languages for translation
223
+ - Semi-transparent overlays for better readability
224
+ - Caching for faster repeated translations
225
+ - Responsive font sizing based on image dimensions
226
+ """)
227
+
228
+ if __name__ == "__main__":
229
+ iface.launch()