Spaces:

GDSI
/

luhya-multilingual-translator

Runtime error

App Files Files Community

mamakobe commited on Aug 22, 2025

Commit

d0d33dd

verified ·

1 Parent(s): abfa033

Create app.py

Browse files

Files changed (1) hide show

app.py +300 -0

app.py ADDED Viewed

	@@ -0,0 +1,300 @@

+# ================================================================
+# GRADIO UI FOR LUHYA MULTILINGUAL TRANSLATION MODEL
+# ================================================================
+import gradio as gr
+import torch
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+import time
+import json
+class LuhyaTranslationInterface:
+    """Gradio interface for Luhya translation model"""
+    def __init__(self, model_name: str):
+        self.model_name = model_name
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # Load model and tokenizer
+        print(f"Loading model: {model_name}")
+        self.tokenizer = M2M100Tokenizer.from_pretrained(model_name)
+        self.model = M2M100ForConditionalGeneration.from_pretrained(model_name)
+        self.model.to(self.device)
+        self.model.eval()
+        # Language and dialect mappings
+        self.languages = {
+            "English": "en",
+            "Swahili": "sw",
+            "Luhya (General)": "luy"
+        }
+        self.dialects = {
+            "Bukusu": "luy_bukusu",
+            "Wanga": "luy_wanga",
+            "Kisa": "luy_kisa",
+            "Maragoli": "luy_maragoli",
+            "Tachoni": "luy_tachoni",
+            "Kabras": "luy_kabras",
+            "Tsotso": "luy_tsotso",
+            "Marachi": "luy_marachi",
+            "Luwanga": "luy_luwanga"
+        }
+        # Example translations for quick testing
+        self.examples = [
+            ["Good morning", "English", "Tsotso", "Basic greeting"],
+            ["Hello, how are you?", "English", "Bukusu", "Common question"],
+            ["Thank you very much", "English", "Wanga", "Gratitude expression"],
+            ["What is your name?", "English", "Maragoli", "Personal question"],
+            ["I love you", "English", "Kabras", "Emotional expression"],
+            ["Where are you going?", "English", "Tachoni", "Direction question"]
+        ]
+    def translate_text(self, text: str, source_lang: str, target_dialect: str, max_length: int = 128):
+        """Translate text using the model"""
+        if not text.strip():
+            return "Please enter some text to translate.", "", 0.0
+        try:
+            start_time = time.time()
+            # Map language names to codes
+            source_code = self.languages.get(source_lang, "en")
+            target_code = self.dialects.get(target_dialect, "luy_bukusu")
+            # Set tokenizer languages
+            self.tokenizer.src_lang = source_code if source_code in ["en", "sw"] else "sw"
+            self.tokenizer.tgt_lang = "sw"  # Use Swahili as base target
+            # Prepare input text with dialect token
+            if source_code != "en":
+                # For non-English input, add source dialect token
+                input_text = text
+            else:
+                # For English input, add target dialect token to guide translation
+                input_text = f"<{target_code}> {text}"
+            # Tokenize
+            inputs = self.tokenizer(input_text, return_tensors="pt", max_length=max_length, truncation=True).to(self.device)
+            # Generate translation
+            with torch.no_grad():
+                outputs = self.model.generate(
+                    **inputs,
+                    max_length=max_length,
+                    num_beams=4,
+                    early_stopping=True,
+                    pad_token_id=self.tokenizer.pad_token_id,
+                    eos_token_id=self.tokenizer.eos_token_id,
+                    do_sample=False,
+                    temperature=1.0
+                )
+            # Decode result
+            translation = self.tokenizer.decode(outputs[0], skip_special_tokens=False)
+            translation = translation.replace('<s>', '').replace('</s>', '').strip()
+            # Calculate translation time
+            translation_time = time.time() - start_time
+            # Simple confidence score based on presence of target dialect token and length
+            confidence = self.calculate_confidence(translation, target_code, text)
+            return translation, f"Translation completed in {translation_time:.2f} seconds", confidence
+        except Exception as e:
+            return f"Translation error: {str(e)}", "Error occurred during translation", 0.0
+    def calculate_confidence(self, translation: str, target_code: str, source_text: str) -> float:
+        """Calculate a simple confidence score for the translation"""
+        score = 0.0
+        # Check if target dialect token is present
+        if f"<{target_code}>" in translation:
+            score += 0.4
+        # Check if translation is not just copying source
+        if source_text.lower() not in translation.lower():
+            score += 0.3
+        # Check reasonable length
+        words = translation.split()
+        if 1 <= len(words) <= 15:
+            score += 0.2
+        # Check for repetitive patterns
+        if not (".)" in translation or "..." in translation):
+            score += 0.1
+        return min(1.0, score)
+    def create_interface(self):
+        """Create the Gradio interface"""
+        # Custom CSS for better styling
+        css = """
+        .gradio-container {
+            font-family: 'Arial', sans-serif;
+        }
+        .title {
+            text-align: center;
+            color: #2E8B57;
+            margin-bottom: 20px;
+        }
+        .description {
+            text-align: center;
+            color: #666;
+            margin-bottom: 30px;
+        }
+        .confidence-high { color: #28a745; }
+        .confidence-medium { color: #ffc107; }
+        .confidence-low { color: #dc3545; }
+        """
+        # Create interface
+        with gr.Blocks(css=css, title="Luhya Multilingual Translator") as demo:
+            # Header
+            gr.HTML("""
+            <div class="title">
+                <h1>🌍 Luhya Multilingual Translation Model</h1>
+            </div>
+            <div class="description">
+                <p>Translate between English, Swahili, and various Luhya dialects including Bukusu, Wanga, Maragoli, and more.</p>
+                <p><em>This model supports bidirectional translation and dialect-specific outputs.</em></p>
+            </div>
+            """)
+            # Main interface
+            with gr.Row():
+                with gr.Column(scale=1):
+                    # Input section
+                    gr.HTML("<h3>📝 Input</h3>")
+                    input_text = gr.Textbox(
+                        label="Text to translate",
+                        placeholder="Enter text in English, Swahili, or Luhya...",
+                        lines=3,
+                        max_lines=5
+                    )
+                    with gr.Row():
+                        source_lang = gr.Dropdown(
+                            choices=list(self.languages.keys()),
+                            label="Source Language",
+                            value="English"
+                        )
+                        target_dialect = gr.Dropdown(
+                            choices=list(self.dialects.keys()),
+                            label="Target Dialect",
+                            value="Bukusu"
+                        )
+                    translate_btn = gr.Button("🔄 Translate", variant="primary", size="lg")
+                with gr.Column(scale=1):
+                    # Output section
+                    gr.HTML("<h3>✨ Translation</h3>")
+                    output_text = gr.Textbox(
+                        label="Translated text",
+                        lines=3,
+                        max_lines=5,
+                        interactive=False
+                    )
+                    with gr.Row():
+                        status_text = gr.Textbox(
+                            label="Status",
+                            interactive=False,
+                            scale=2
+                        )
+                        confidence_score = gr.Number(
+                            label="Confidence",
+                            interactive=False,
+                            scale=1
+                        )
+            # Examples section
+            gr.HTML("<h3>💡 Try these examples:</h3>")
+            examples_component = gr.Examples(
+                examples=self.examples,
+                inputs=[input_text, source_lang, target_dialect, gr.Textbox(visible=False)],
+                outputs=[output_text, status_text, confidence_score],
+                fn=lambda t, s, d, _: self.translate_text(t, s, d),
+                cache_examples=False
+            )
+            # Information section
+            with gr.Accordion("ℹ️ Model Information", open=False):
+                gr.HTML(f"""
+                <div style="padding: 15px;">
+                    <h4>Model Details</h4>
+                    <ul>
+                        <li><strong>Base Model:</strong> facebook/m2m100_418M</li>
+                        <li><strong>Model Repository:</strong> <a href="https://huggingface.co/{self.model_name}" target="_blank">{self.model_name}</a></li>
+                        <li><strong>Supported Languages:</strong> English, Swahili</li>
+                        <li><strong>Supported Dialects:</strong> Bukusu, Wanga, Kisa, Maragoli, Tachoni, Kabras, Tsotso, Marachi, Luwanga</li>
+                        <li><strong>Training:</strong> Fine-tuned on community-sourced Luhya translations</li>
+                    </ul>
+                    <h4>Usage Tips</h4>
+                    <ul>
+                        <li>Keep sentences reasonably short (under 100 words) for best results</li>
+                        <li>The model works best with common phrases and everyday language</li>
+                        <li>Confidence scores indicate model certainty about the translation</li>
+                        <li>Try different dialects to see variations in translation</li>
+                    </ul>
+                    <h4>Cultural Context</h4>
+                    <p>This model was developed to support Luhya language preservation and accessibility.
+                    Luhya is a group of related Bantu languages spoken in western Kenya by the Luhya people.</p>
+                </div>
+                """)
+            # Set up the translation function
+            translate_btn.click(
+                fn=self.translate_text,
+                inputs=[input_text, source_lang, target_dialect],
+                outputs=[output_text, status_text, confidence_score]
+            )
+            # Footer
+            gr.HTML("""
+            <div style="text-align: center; margin-top: 30px; padding: 20px; background-color: #f8f9fa; border-radius: 10px;">
+                <p><strong>Luhya Multilingual Translation Model</strong></p>
+                <p>Built with ❤️ for language preservation and community accessibility</p>
+                <p><em>Part of the effort to digitize and preserve African languages</em></p>
+            </div>
+            """)
+        return demo
+# ================================================================
+# FOR HUGGINGFACE SPACES DEPLOYMENT
+# ================================================================
+# This is the main file that HuggingFace Spaces will run
+if __name__ == "__main__":
+    import os
+    # Get model name from environment variable or use default
+    model_name = os.getenv("MODEL_NAME", "mamakobe/luhya-multilingual-m2m100")
+    # Create and launch the app
+    demo = create_luhya_translator_app(model_name)
+    # Launch with specific settings for HuggingFace Spaces
+    demo.launch(
+        server_name="0.0.0.0",  # Required for HuggingFace Spaces
+        server_port=7860,       # Default port for HuggingFace Spaces
+        share=False,            # Don't create public link when on Spaces
+        show_error=True,        # Show errors in interface
+        show_tips=True,         # Show Gradio tips
+        enable_queue=True       # Enable queueing for better performance
+    )