Amandeep01 commited on
Commit
caefe8c
·
verified ·
1 Parent(s): 1ecd105

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -111
app.py CHANGED
@@ -1,127 +1,163 @@
1
- # File: app.py
2
- import os
3
  import gradio as gr
 
 
 
 
4
  import torch
5
- from PIL import Image
6
- import pytesseract
7
- from transformers import MarianMTModel, MarianTokenizer
8
 
9
- class HindiSignboardTranslator:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def __init__(self):
11
- # OCR Configuration
12
- pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract' # Adjust path as needed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- # Translation Model
15
- model_name = 'Helsinki-NLP/opus-mt-hi-en'
16
- self.model = MarianMTModel.from_pretrained(model_name)
17
- self.tokenizer = MarianTokenizer.from_pretrained(model_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- def extract_text(self, image):
20
- """
21
- Extract text from Hindi signboard image
22
- Args:
23
- image (PIL.Image): Input image
24
- Returns:
25
- str: Extracted Hindi text
26
- """
 
 
 
 
 
 
 
27
  try:
28
- # Ensure image is in RGB mode
29
- if image.mode != 'RGB':
30
- image = image.convert('RGB')
 
 
 
 
31
 
32
- # Extract text using Tesseract
33
- hindi_text = pytesseract.image_to_string(image, lang='hin')
34
- return hindi_text.strip()
35
- except Exception as e:
36
- print(f"OCR Error: {e}")
37
- return None
38
-
39
- def translate_text(self, hindi_text):
40
- """
41
- Translate Hindi text to English
42
- Args:
43
- hindi_text (str): Input Hindi text
44
- Returns:
45
- str: Translated English text
46
- """
47
- try:
48
- # Handle empty or None input
49
- if not hindi_text:
50
- return "No text detected"
51
 
52
- # Tokenize and translate
53
- inputs = self.tokenizer(hindi_text, return_tensors="pt", padding=True)
54
- outputs = self.model.generate(**inputs)
55
- english_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
56
- return english_text
57
- except Exception as e:
58
- print(f"Translation Error: {e}")
59
- return "Translation failed"
60
 
61
- def translate_signboard(self, image):
62
- """
63
- Complete pipeline for signboard translation
64
- Args:
65
- image (PIL.Image): Signboard image
66
- Returns:
67
- dict: Translation results
68
- """
69
- # Validate input
70
- if image is None:
71
- return {
72
- "status": "error",
73
- "message": "No image provided",
74
- "original_text": "",
75
- "translated_text": ""
76
- }
77
-
78
- # Extract text via OCR
79
- hindi_text = self.extract_text(image)
80
-
81
- if not hindi_text:
82
- return {
83
- "status": "error",
84
- "message": "Could not extract text from image",
85
- "original_text": "",
86
- "translated_text": ""
87
- }
88
-
89
- # Translate to English
90
- english_text = self.translate_text(hindi_text)
91
-
92
- return {
93
- "status": "success",
94
- "original_text": hindi_text,
95
- "translated_text": english_text
96
- }
97
-
98
- # Initialize the translator
99
- translator = HindiSignboardTranslator()
100
 
101
  # Gradio Interface
102
- def translate_image(image):
103
- """
104
- Gradio-friendly translation function
105
- """
106
- if image is None:
107
- return "", ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
- result = translator.translate_signboard(image)
110
- return result['original_text'], result['translated_text']
111
-
112
- # Create Gradio Interface
113
- iface = gr.Interface(
114
- fn=translate_image,
115
- inputs=gr.Image(type="pil", label="Upload Hindi Signboard"),
116
- outputs=[
117
- gr.Textbox(label="Original Hindi Text"),
118
- gr.Textbox(label="English Translation")
119
- ],
120
- title="Hindi Signboard Translator",
121
- description="Upload a Hindi signboard image to extract and translate its text.",
122
- # Removed example images
123
- )
124
 
125
  # Launch the app
 
 
126
  if __name__ == "__main__":
127
- iface.launch()
 
 
 
1
  import gradio as gr
2
+ import easyocr
3
+ import numpy as np
4
+ from PIL import Image, ImageDraw, ImageFont
5
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
  import torch
 
 
 
7
 
8
+ # Simplified Language Mapping
9
+ LANG_MAP = {
10
+ 'en': 'eng',
11
+ 'hi': 'hin',
12
+ 'mr': 'mar',
13
+ 'fr': 'fra',
14
+ 'de': 'deu',
15
+ 'es': 'spa'
16
+ }
17
+
18
+ # Initialize OCR Reader with optimized languages
19
+ ocr_reader = easyocr.Reader(['en', 'hi'], gpu=False)
20
+
21
+ # Translation Model Cache
22
+ class TranslationCache:
23
  def __init__(self):
24
+ self.models = {}
25
+ self.tokenizers = {}
26
+
27
+ def get_model(self, src_lang, tgt_lang):
28
+ model_key = f"{src_lang}-{tgt_lang}"
29
+
30
+ if model_key not in self.models:
31
+ try:
32
+ model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}"
33
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
34
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
35
+
36
+ self.models[model_key] = model
37
+ self.tokenizers[model_key] = tokenizer
38
+ except Exception as e:
39
+ print(f"Error loading translation model {model_key}: {e}")
40
+ return None, None
41
+
42
+ return self.models[model_key], self.tokenizers[model_key]
43
+
44
+ # Global translation cache
45
+ translation_cache = TranslationCache()
46
+
47
+ def detect_language(text):
48
+ """Attempt to detect language more accurately"""
49
+ # Simple language detection based on script
50
+ if any('\u0900' <= char <= '\u097F' for char in text):
51
+ return 'hi'
52
+ return 'en'
53
+
54
+ def translate_text(text, src_lang, tgt_lang):
55
+ """Improved translation function with better error handling"""
56
+ try:
57
+ # Ensure language codes match model requirements
58
+ src_lang = src_lang.lower()[:2]
59
+ tgt_lang = tgt_lang.lower()[:2]
60
 
61
+ # Get model and tokenizer
62
+ model, tokenizer = translation_cache.get_model(src_lang, tgt_lang)
63
+
64
+ if not model or not tokenizer:
65
+ return text # Fallback to original text if model fails
66
+
67
+ # Prepare inputs
68
+ inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
69
+
70
+ # Generate translation
71
+ with torch.no_grad():
72
+ outputs = model.generate(**inputs)
73
+
74
+ # Decode translation
75
+ translated = tokenizer.decode(outputs[0], skip_special_tokens=True)
76
+ return translated
77
+ except Exception as e:
78
+ print(f"Translation error: {e}")
79
+ return text
80
+
81
+ def process_image(image, target_lang):
82
+ """Optimized image processing with improved error handling"""
83
+ if image is None:
84
+ return "Please upload an image."
85
 
86
+ try:
87
+ # Convert image to numpy
88
+ image_np = np.array(image)
89
+
90
+ # Perform OCR with confidence filtering
91
+ results = ocr_reader.readtext(image_np, threshold=0.3, low_text=0.4)
92
+
93
+ if not results:
94
+ return "No clear text detected in the image."
95
+
96
+ # Prepare PIL image for drawing
97
+ pil_img = Image.fromarray(image_np)
98
+ draw = ImageDraw.Draw(pil_img)
99
+
100
+ # Use a more universal font
101
  try:
102
+ font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 20)
103
+ except IOError:
104
+ font = ImageFont.load_default()
105
+
106
+ # Process each detected text
107
+ for detection in results:
108
+ bbox, text, confidence = detection
109
 
110
+ # Detect source language
111
+ src_lang = detect_language(text)
112
+
113
+ # Translate text
114
+ translated_text = translate_text(text, src_lang, target_lang)
115
+
116
+ # Convert bbox to integers
117
+ bbox = np.array(bbox).astype(int)
118
+
119
+ # Draw bounding box
120
+ draw.polygon(bbox.reshape(-1, 2).tolist(), outline='red', width=2)
 
 
 
 
 
 
 
 
121
 
122
+ # Draw translated text
123
+ text_bbox = bbox[0] # Top-left corner
124
+ draw.text((text_bbox[0], text_bbox[1] - 25),
125
+ translated_text,
126
+ fill='yellow',
127
+ font=font)
128
+
129
+ return np.array(pil_img)
130
 
131
+ except Exception as e:
132
+ print(f"Processing error: {e}")
133
+ return f"An error occurred: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
  # Gradio Interface
136
+ def create_interface():
137
+ with gr.Blocks() as demo:
138
+ gr.Markdown("# 🌍 TravelOCR: Multilingual Signboard Translator")
139
+
140
+ with gr.Row():
141
+ image_input = gr.Image(type="pil", label="Upload Signboard Image")
142
+ lang_dropdown = gr.Dropdown(
143
+ label="Target Language",
144
+ choices=["en", "hi", "fr", "de", "es"],
145
+ value="en"
146
+ )
147
+
148
+ translate_btn = gr.Button("Translate & Overlay")
149
+ output_img = gr.Image(label="Translated Output")
150
+
151
+ translate_btn.click(
152
+ fn=process_image,
153
+ inputs=[image_input, lang_dropdown],
154
+ outputs=output_img
155
+ )
156
 
157
+ return demo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
  # Launch the app
160
+ demo = create_interface()
161
+
162
  if __name__ == "__main__":
163
+ demo.launch()