Amandeep01 commited on
Commit
32d6440
·
verified ·
1 Parent(s): 2dba53f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -228
app.py CHANGED
@@ -1,253 +1,117 @@
 
 
1
  import gradio as gr
2
- import cv2
3
- import numpy as np
4
- import pytesseract
5
- from PIL import Image, ImageDraw, ImageFont
6
  import torch
7
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
8
- import re
 
9
 
10
- class UltimateTravelOCR:
11
  def __init__(self):
12
- # Tesseract configuration for multiple languages
13
- self.tesseract_config = r'--oem 3 --psm 6 -l eng+hin'
14
-
15
- # Translation model cache
16
- self.translation_models = {}
17
- self.translation_tokenizers = {}
18
-
19
- def preprocess_image(self, image):
20
- """
21
- Advanced image preprocessing for better OCR accuracy
22
- """
23
- # Convert to grayscale
24
- gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
25
-
26
- # Apply adaptive thresholding
27
- thresh = cv2.adaptiveThreshold(
28
- gray, 255,
29
- cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
30
- cv2.THRESH_BINARY, 11, 2
31
- )
32
-
33
- # Denoise
34
- denoised = cv2.fastNlMeansDenoising(thresh, None, 10, 7, 21)
35
-
36
- return denoised
37
-
38
- def extract_text(self, preprocessed_image):
39
- """
40
- Advanced text extraction using Tesseract
41
- """
42
- # Extract text using Tesseract
43
- text = pytesseract.image_to_string(
44
- preprocessed_image,
45
- config=self.tesseract_config
46
- )
47
-
48
- # Clean and process extracted text
49
- def clean_text(txt):
50
- # Remove special characters and extra whitespace
51
- txt = re.sub(r'[^\w\s]', '', txt)
52
- txt = ' '.join(txt.split())
53
- return txt
54
-
55
- # Split text into lines and clean
56
- lines = text.split('\n')
57
- cleaned_lines = [clean_text(line) for line in lines if clean_text(line)]
58
 
59
- return cleaned_lines
 
 
 
60
 
61
- def get_text_regions(self, preprocessed_image):
62
  """
63
- Detect text regions with precise bounding boxes
64
- """
65
- # Find contours
66
- contours, _ = cv2.findContours(
67
- preprocessed_image,
68
- cv2.RETR_EXTERNAL,
69
- cv2.CHAIN_APPROX_SIMPLE
70
- )
71
-
72
- # Filter and process contours
73
- text_regions = []
74
- for contour in contours:
75
- # Filter contours by area to remove noise
76
- area = cv2.contourArea(contour)
77
- if 100 < area < 10000: # Adjust these thresholds as needed
78
- x, y, w, h = cv2.boundingRect(contour)
79
- text_regions.append((x, y, w, h))
80
-
81
- return text_regions
82
-
83
- def _load_translation_model(self, src_lang, tgt_lang):
84
- """
85
- Load and cache translation models
86
- """
87
- model_key = f"{src_lang}-{tgt_lang}"
88
-
89
- if model_key not in self.translation_models:
90
- try:
91
- model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}"
92
- tokenizer = AutoTokenizer.from_pretrained(model_name)
93
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
94
-
95
- self.translation_models[model_key] = model
96
- self.translation_tokenizers[model_key] = tokenizer
97
- except Exception as e:
98
- print(f"Translation model loading error: {e}")
99
- return None, None
100
-
101
- return self.translation_models[model_key], self.translation_tokenizers[model_key]
102
-
103
- def translate_text(self, text, target_lang):
104
- """
105
- Advanced text translation with fallback mechanisms
106
  """
107
  try:
108
- # Determine source language (default to English)
109
- src_lang = 'en'
110
-
111
- # Load translation model
112
- model, tokenizer = self._load_translation_model(src_lang, target_lang)
113
-
114
- if not model or not tokenizer:
115
- return text
116
-
117
- # Prepare and translate
118
- inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
119
-
120
- with torch.no_grad():
121
- outputs = model.generate(**inputs)
122
 
123
- translated = tokenizer.decode(outputs[0], skip_special_tokens=True)
124
- return translated
 
125
  except Exception as e:
126
- print(f"Translation error for '{text}': {e}")
127
- return text
128
 
129
- def overlay_translations(self, original_image, preprocessed_image, text_regions, lines, target_lang):
130
  """
131
- Overlay translated text with advanced rendering
 
 
 
 
132
  """
133
- # Convert to PIL for drawing
134
- pil_image = Image.fromarray(original_image)
135
- draw = ImageDraw.Draw(pil_image)
136
-
137
- # Load a robust font
138
  try:
139
- font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 25)
140
- except IOError:
141
- font = ImageFont.load_default()
142
-
143
- # Translate and overlay each text region
144
- for (x, y, w, h), text in zip(text_regions, lines):
145
- # Skip empty texts
146
- if not text.strip():
147
- continue
148
-
149
- # Translate text
150
- translated_text = self.translate_text(text, target_lang)
151
-
152
- # Draw bounding box
153
- draw.rectangle(
154
- [x, y, x+w, y+h],
155
- outline='red',
156
- width=2
157
- )
158
-
159
- # Position translation text
160
- text_position = (x, max(0, y - 35))
161
-
162
- # Draw semi-transparent background
163
- text_bbox = draw.textbbox(text_position, translated_text, font=font)
164
- draw.rectangle(
165
- text_bbox,
166
- fill=(0, 0, 0, 128) # Semi-transparent black
167
- )
168
-
169
- # Draw translated text
170
- draw.text(
171
- text_position,
172
- translated_text,
173
- fill='white',
174
- font=font
175
- )
176
-
177
- return np.array(pil_image)
178
 
179
- def process_image(self, image, target_lang):
180
  """
181
- Comprehensive image processing pipeline
 
 
 
 
182
  """
183
- if image is None:
184
- return None
185
-
186
- try:
187
- # Convert to numpy if needed
188
- original_image = np.array(image)
189
-
190
- # Preprocess image
191
- preprocessed_image = self.preprocess_image(original_image)
192
-
193
- # Extract text
194
- lines = self.extract_text(preprocessed_image)
195
-
196
- if not lines:
197
- print("No text detected in the image.")
198
- return original_image
199
-
200
- # Get text regions
201
- text_regions = self.get_text_regions(preprocessed_image)
202
-
203
- # Ensure we have enough regions
204
- if len(text_regions) < len(lines):
205
- text_regions = [(0, i*30, original_image.shape[1], 30) for i in range(len(lines))]
206
-
207
- # Overlay translations
208
- result_image = self.overlay_translations(
209
- original_image,
210
- preprocessed_image,
211
- text_regions[:len(lines)],
212
- lines,
213
- target_lang
214
- )
215
-
216
- return result_image
217
-
218
- except Exception as e:
219
- print(f"Comprehensive processing error: {e}")
220
- return original_image
221
 
222
- # Create global OCR translator instance
223
- ocr_translator = UltimateTravelOCR()
224
 
225
  # Gradio Interface
226
- def create_interface():
227
- with gr.Blocks() as demo:
228
- gr.Markdown("# 🌍 Ultimate TravelOCR: Multilingual Signboard Translator")
229
-
230
- with gr.Row():
231
- image_input = gr.Image(type="pil", label="Upload Signboard Image")
232
- lang_dropdown = gr.Dropdown(
233
- label="Target Language",
234
- choices=['en', 'hi', 'fr', 'de', 'es'],
235
- value="hi"
236
- )
237
-
238
- translate_btn = gr.Button("Translate & Overlay")
239
- output_img = gr.Image(label="Translated Output")
240
-
241
- translate_btn.click(
242
- fn=ocr_translator.process_image,
243
- inputs=[image_input, lang_dropdown],
244
- outputs=output_img
245
- )
246
 
247
- return demo
 
248
 
249
- # Launch the app
250
- demo = create_interface()
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
 
252
  if __name__ == "__main__":
253
- demo.launch()
 
1
+ # File: app.py
2
+ import os
3
  import gradio as gr
 
 
 
 
4
  import torch
5
+ from PIL import Image
6
+ import pytesseract
7
+ from transformers import MarianMTModel, MarianTokenizer
8
 
9
+ class HindiSignboardTranslator:
10
  def __init__(self):
11
+ # OCR Configuration
12
+ pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract' # Adjust path as needed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ # Translation Model
15
+ model_name = 'Helsinki-NLP/opus-mt-hi-en'
16
+ self.model = MarianMTModel.from_pretrained(model_name)
17
+ self.tokenizer = MarianTokenizer.from_pretrained(model_name)
18
 
19
+ def extract_text(self, image):
20
  """
21
+ Extract text from Hindi signboard image
22
+ Args:
23
+ image (PIL.Image): Input image
24
+ Returns:
25
+ str: Extracted Hindi text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  """
27
  try:
28
+ # Ensure image is in RGB mode
29
+ if image.mode != 'RGB':
30
+ image = image.convert('RGB')
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ # Extract text using Tesseract
33
+ hindi_text = pytesseract.image_to_string(image, lang='hin')
34
+ return hindi_text.strip()
35
  except Exception as e:
36
+ print(f"OCR Error: {e}")
37
+ return None
38
 
39
+ def translate_text(self, hindi_text):
40
  """
41
+ Translate Hindi text to English
42
+ Args:
43
+ hindi_text (str): Input Hindi text
44
+ Returns:
45
+ str: Translated English text
46
  """
 
 
 
 
 
47
  try:
48
+ # Tokenize and translate
49
+ inputs = self.tokenizer(hindi_text, return_tensors="pt", padding=True)
50
+ outputs = self.model.generate(**inputs)
51
+ english_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
52
+ return english_text
53
+ except Exception as e:
54
+ print(f"Translation Error: {e}")
55
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
+ def translate_signboard(self, image):
58
  """
59
+ Complete pipeline for signboard translation
60
+ Args:
61
+ image (PIL.Image): Signboard image
62
+ Returns:
63
+ dict: Translation results
64
  """
65
+ # Extract text via OCR
66
+ hindi_text = self.extract_text(image)
67
+
68
+ if not hindi_text:
69
+ return {
70
+ "status": "error",
71
+ "message": "Could not extract text from image",
72
+ "original_text": "",
73
+ "translated_text": ""
74
+ }
75
+
76
+ # Translate to English
77
+ english_text = self.translate_text(hindi_text)
78
+
79
+ return {
80
+ "status": "success",
81
+ "original_text": hindi_text,
82
+ "translated_text": english_text or "Translation failed"
83
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
+ # Initialize the translator
86
+ translator = HindiSignboardTranslator()
87
 
88
  # Gradio Interface
89
+ def translate_image(image):
90
+ """
91
+ Gradio-friendly translation function
92
+ """
93
+ if image is None:
94
+ return "", ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
+ result = translator.translate_signboard(image)
97
+ return result['original_text'], result['translated_text']
98
 
99
+ # Create Gradio Interface
100
+ iface = gr.Interface(
101
+ fn=translate_image,
102
+ inputs=gr.Image(type="pil", label="Upload Hindi Signboard"),
103
+ outputs=[
104
+ gr.Textbox(label="Original Hindi Text"),
105
+ gr.Textbox(label="English Translation")
106
+ ],
107
+ title="Hindi Signboard Translator",
108
+ description="Upload a Hindi signboard image to extract and translate its text.",
109
+ examples=[
110
+ ["example_signboard1.jpg"],
111
+ ["example_signboard2.jpg"]
112
+ ]
113
+ )
114
 
115
+ # Launch the app
116
  if __name__ == "__main__":
117
+ iface.launch()