Spaces:

Amandeep01
/

Signboard_Overlay_Project

Sleeping

App Files Files Community

Amandeep01 commited on May 12, 2025

Commit

32d6440

verified ·

1 Parent(s): 2dba53f

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -228

app.py CHANGED Viewed

@@ -1,253 +1,117 @@
 import gradio as gr
-import cv2
-import numpy as np
-import pytesseract
-from PIL import Image, ImageDraw, ImageFont
 import torch
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-import re
-class UltimateTravelOCR:
     def __init__(self):
-        # Tesseract configuration for multiple languages
-        self.tesseract_config = r'--oem 3 --psm 6 -l eng+hin'
-        # Translation model cache
-        self.translation_models = {}
-        self.translation_tokenizers = {}
-    def preprocess_image(self, image):
-        """
-        Advanced image preprocessing for better OCR accuracy
-        """
-        # Convert to grayscale
-        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-        # Apply adaptive thresholding
-        thresh = cv2.adaptiveThreshold(
-            gray, 255,
-            cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-            cv2.THRESH_BINARY, 11, 2
-        )
-        # Denoise
-        denoised = cv2.fastNlMeansDenoising(thresh, None, 10, 7, 21)
-        return denoised
-    def extract_text(self, preprocessed_image):
-        """
-        Advanced text extraction using Tesseract
-        """
-        # Extract text using Tesseract
-        text = pytesseract.image_to_string(
-            preprocessed_image,
-            config=self.tesseract_config
-        )
-        # Clean and process extracted text
-        def clean_text(txt):
-            # Remove special characters and extra whitespace
-            txt = re.sub(r'[^\w\s]', '', txt)
-            txt = ' '.join(txt.split())
-            return txt
-        # Split text into lines and clean
-        lines = text.split('\n')
-        cleaned_lines = [clean_text(line) for line in lines if clean_text(line)]
-        return cleaned_lines
-    def get_text_regions(self, preprocessed_image):
         """
-        Detect text regions with precise bounding boxes
-        """
-        # Find contours
-        contours, _ = cv2.findContours(
-            preprocessed_image,
-            cv2.RETR_EXTERNAL,
-            cv2.CHAIN_APPROX_SIMPLE
-        )
-        # Filter and process contours
-        text_regions = []
-        for contour in contours:
-            # Filter contours by area to remove noise
-            area = cv2.contourArea(contour)
-            if 100 < area < 10000:  # Adjust these thresholds as needed
-                x, y, w, h = cv2.boundingRect(contour)
-                text_regions.append((x, y, w, h))
-        return text_regions
-    def _load_translation_model(self, src_lang, tgt_lang):
-        """
-        Load and cache translation models
-        """
-        model_key = f"{src_lang}-{tgt_lang}"
-        if model_key not in self.translation_models:
-            try:
-                model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}"
-                tokenizer = AutoTokenizer.from_pretrained(model_name)
-                model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
-                self.translation_models[model_key] = model
-                self.translation_tokenizers[model_key] = tokenizer
-            except Exception as e:
-                print(f"Translation model loading error: {e}")
-                return None, None
-        return self.translation_models[model_key], self.translation_tokenizers[model_key]
-    def translate_text(self, text, target_lang):
-        """
-        Advanced text translation with fallback mechanisms
         """
         try:
-            # Determine source language (default to English)
-            src_lang = 'en'
-            # Load translation model
-            model, tokenizer = self._load_translation_model(src_lang, target_lang)
-            if not model or not tokenizer:
-                return text
-            # Prepare and translate
-            inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
-            with torch.no_grad():
-                outputs = model.generate(**inputs)
-            translated = tokenizer.decode(outputs[0], skip_special_tokens=True)
-            return translated
         except Exception as e:
-            print(f"Translation error for '{text}': {e}")
-            return text
-    def overlay_translations(self, original_image, preprocessed_image, text_regions, lines, target_lang):
         """
-        Overlay translated text with advanced rendering
         """
-        # Convert to PIL for drawing
-        pil_image = Image.fromarray(original_image)
-        draw = ImageDraw.Draw(pil_image)
-        # Load a robust font
         try:
-            font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 25)
-        except IOError:
-            font = ImageFont.load_default()
-        # Translate and overlay each text region
-        for (x, y, w, h), text in zip(text_regions, lines):
-            # Skip empty texts
-            if not text.strip():
-                continue
-            # Translate text
-            translated_text = self.translate_text(text, target_lang)
-            # Draw bounding box
-            draw.rectangle(
-                [x, y, x+w, y+h],
-                outline='red',
-                width=2
-            )
-            # Position translation text
-            text_position = (x, max(0, y - 35))
-            # Draw semi-transparent background
-            text_bbox = draw.textbbox(text_position, translated_text, font=font)
-            draw.rectangle(
-                text_bbox,
-                fill=(0, 0, 0, 128)  # Semi-transparent black
-            )
-            # Draw translated text
-            draw.text(
-                text_position,
-                translated_text,
-                fill='white',
-                font=font
-            )
-        return np.array(pil_image)
-    def process_image(self, image, target_lang):
         """
-        Comprehensive image processing pipeline
         """
-        if image is None:
-            return None
-        try:
-            # Convert to numpy if needed
-            original_image = np.array(image)
-            # Preprocess image
-            preprocessed_image = self.preprocess_image(original_image)
-            # Extract text
-            lines = self.extract_text(preprocessed_image)
-            if not lines:
-                print("No text detected in the image.")
-                return original_image
-            # Get text regions
-            text_regions = self.get_text_regions(preprocessed_image)
-            # Ensure we have enough regions
-            if len(text_regions) < len(lines):
-                text_regions = [(0, i*30, original_image.shape[1], 30) for i in range(len(lines))]
-            # Overlay translations
-            result_image = self.overlay_translations(
-                original_image,
-                preprocessed_image,
-                text_regions[:len(lines)],
-                lines,
-                target_lang
-            )
-            return result_image
-        except Exception as e:
-            print(f"Comprehensive processing error: {e}")
-            return original_image
-# Create global OCR translator instance
-ocr_translator = UltimateTravelOCR()
 # Gradio Interface
-def create_interface():
-    with gr.Blocks() as demo:
-        gr.Markdown("# 🌍 Ultimate TravelOCR: Multilingual Signboard Translator")
-        with gr.Row():
-            image_input = gr.Image(type="pil", label="Upload Signboard Image")
-            lang_dropdown = gr.Dropdown(
-                label="Target Language",
-                choices=['en', 'hi', 'fr', 'de', 'es'],
-                value="hi"
-            )
-        translate_btn = gr.Button("Translate & Overlay")
-        output_img = gr.Image(label="Translated Output")
-        translate_btn.click(
-            fn=ocr_translator.process_image,
-            inputs=[image_input, lang_dropdown],
-            outputs=output_img
-        )
-    return demo
-# Launch the app
-demo = create_interface()
 if __name__ == "__main__":
-    demo.launch()

+# File: app.py
+import os
 import gradio as gr
 import torch
+from PIL import Image
+import pytesseract
+from transformers import MarianMTModel, MarianTokenizer
+class HindiSignboardTranslator:
     def __init__(self):
+        # OCR Configuration
+        pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'  # Adjust path as needed
+        # Translation Model
+        model_name = 'Helsinki-NLP/opus-mt-hi-en'
+        self.model = MarianMTModel.from_pretrained(model_name)
+        self.tokenizer = MarianTokenizer.from_pretrained(model_name)
+    def extract_text(self, image):
         """
+        Extract text from Hindi signboard image
+        Args:
+            image (PIL.Image): Input image
+        Returns:
+            str: Extracted Hindi text
         """
         try:
+            # Ensure image is in RGB mode
+            if image.mode != 'RGB':
+                image = image.convert('RGB')
+            # Extract text using Tesseract
+            hindi_text = pytesseract.image_to_string(image, lang='hin')
+            return hindi_text.strip()
         except Exception as e:
+            print(f"OCR Error: {e}")
+            return None
+    def translate_text(self, hindi_text):
         """
+        Translate Hindi text to English
+        Args:
+            hindi_text (str): Input Hindi text
+        Returns:
+            str: Translated English text
         """
         try:
+            # Tokenize and translate
+            inputs = self.tokenizer(hindi_text, return_tensors="pt", padding=True)
+            outputs = self.model.generate(**inputs)
+            english_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            return english_text
+        except Exception as e:
+            print(f"Translation Error: {e}")
+            return None
+    def translate_signboard(self, image):
         """
+        Complete pipeline for signboard translation
+        Args:
+            image (PIL.Image): Signboard image
+        Returns:
+            dict: Translation results
         """
+        # Extract text via OCR
+        hindi_text = self.extract_text(image)
+        if not hindi_text:
+            return {
+                "status": "error",
+                "message": "Could not extract text from image",
+                "original_text": "",
+                "translated_text": ""
+            }
+        # Translate to English
+        english_text = self.translate_text(hindi_text)
+        return {
+            "status": "success",
+            "original_text": hindi_text,
+            "translated_text": english_text or "Translation failed"
+        }
+# Initialize the translator
+translator = HindiSignboardTranslator()
 # Gradio Interface
+def translate_image(image):
+    """
+    Gradio-friendly translation function
+    """
+    if image is None:
+        return "", ""
+    result = translator.translate_signboard(image)
+    return result['original_text'], result['translated_text']
+# Create Gradio Interface
+iface = gr.Interface(
+    fn=translate_image,
+    inputs=gr.Image(type="pil", label="Upload Hindi Signboard"),
+    outputs=[
+        gr.Textbox(label="Original Hindi Text"),
+        gr.Textbox(label="English Translation")
+    ],
+    title="Hindi Signboard Translator",
+    description="Upload a Hindi signboard image to extract and translate its text.",
+    examples=[
+        ["example_signboard1.jpg"],
+        ["example_signboard2.jpg"]
+    ]
+)
+# Launch the app
 if __name__ == "__main__":
+    iface.launch()