Spaces:
Runtime error
Runtime error
| # ================================================================ | |
| # GRADIO UI FOR LUHYA MULTILINGUAL TRANSLATION MODEL | |
| # ================================================================ | |
| import gradio as gr | |
| import torch | |
| from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer | |
| import time | |
| import json | |
| class LuhyaTranslationInterface: | |
| """Gradio interface for Luhya translation model""" | |
| def __init__(self, model_name: str): | |
| self.model_name = model_name | |
| self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # Load model and tokenizer | |
| print(f"Loading model: {model_name}") | |
| self.tokenizer = M2M100Tokenizer.from_pretrained(model_name) | |
| self.model = M2M100ForConditionalGeneration.from_pretrained(model_name) | |
| self.model.to(self.device) | |
| self.model.eval() | |
| # Language and dialect mappings | |
| self.languages = { | |
| "English": "en", | |
| "Swahili": "sw", | |
| "Luhya (General)": "luy" | |
| } | |
| self.dialects = { | |
| "Bukusu": "luy_bukusu", | |
| "Wanga": "luy_wanga", | |
| "Kisa": "luy_kisa", | |
| "Maragoli": "luy_maragoli", | |
| "Tachoni": "luy_tachoni", | |
| "Kabras": "luy_kabras", | |
| "Tsotso": "luy_tsotso", | |
| "Marachi": "luy_marachi", | |
| "Luwanga": "luy_luwanga" | |
| } | |
| # Example translations for quick testing | |
| self.examples = [ | |
| ["Good morning", "English", "Tsotso", "Basic greeting"], | |
| ["Hello, how are you?", "English", "Bukusu", "Common question"], | |
| ["Thank you very much", "English", "Wanga", "Gratitude expression"], | |
| ["What is your name?", "English", "Maragoli", "Personal question"], | |
| ["I love you", "English", "Kabras", "Emotional expression"], | |
| ["Where are you going?", "English", "Tachoni", "Direction question"] | |
| ] | |
| def translate_text(self, text: str, source_lang: str, target_dialect: str, max_length: int = 128): | |
| """Translate text using the model""" | |
| if not text.strip(): | |
| return "Please enter some text to translate.", "", 0.0 | |
| try: | |
| start_time = time.time() | |
| # Map language names to codes | |
| source_code = self.languages.get(source_lang, "en") | |
| target_code = self.dialects.get(target_dialect, "luy_bukusu") | |
| # Set tokenizer languages | |
| self.tokenizer.src_lang = source_code if source_code in ["en", "sw"] else "sw" | |
| self.tokenizer.tgt_lang = "sw" # Use Swahili as base target | |
| # Prepare input text with dialect token | |
| if source_code != "en": | |
| # For non-English input, add source dialect token | |
| input_text = text | |
| else: | |
| # For English input, add target dialect token to guide translation | |
| input_text = f"<{target_code}> {text}" | |
| # Tokenize | |
| inputs = self.tokenizer(input_text, return_tensors="pt", max_length=max_length, truncation=True).to(self.device) | |
| # Generate translation | |
| with torch.no_grad(): | |
| outputs = self.model.generate( | |
| **inputs, | |
| max_length=max_length, | |
| num_beams=4, | |
| early_stopping=True, | |
| pad_token_id=self.tokenizer.pad_token_id, | |
| eos_token_id=self.tokenizer.eos_token_id, | |
| do_sample=False, | |
| temperature=1.0 | |
| ) | |
| # Decode result | |
| translation = self.tokenizer.decode(outputs[0], skip_special_tokens=False) | |
| translation = translation.replace('<s>', '').replace('</s>', '').strip() | |
| # Calculate translation time | |
| translation_time = time.time() - start_time | |
| # Simple confidence score based on presence of target dialect token and length | |
| confidence = self.calculate_confidence(translation, target_code, text) | |
| return translation, f"Translation completed in {translation_time:.2f} seconds", confidence | |
| except Exception as e: | |
| return f"Translation error: {str(e)}", "Error occurred during translation", 0.0 | |
| def calculate_confidence(self, translation: str, target_code: str, source_text: str) -> float: | |
| """Calculate a simple confidence score for the translation""" | |
| score = 0.0 | |
| # Check if target dialect token is present | |
| if f"<{target_code}>" in translation: | |
| score += 0.4 | |
| # Check if translation is not just copying source | |
| if source_text.lower() not in translation.lower(): | |
| score += 0.3 | |
| # Check reasonable length | |
| words = translation.split() | |
| if 1 <= len(words) <= 15: | |
| score += 0.2 | |
| # Check for repetitive patterns | |
| if not (".)" in translation or "..." in translation): | |
| score += 0.1 | |
| return min(1.0, score) | |
| def create_interface(self): | |
| """Create the Gradio interface""" | |
| # Custom CSS for better styling | |
| css = """ | |
| .gradio-container { | |
| font-family: 'Arial', sans-serif; | |
| } | |
| .title { | |
| text-align: center; | |
| color: #2E8B57; | |
| margin-bottom: 20px; | |
| } | |
| .description { | |
| text-align: center; | |
| color: #666; | |
| margin-bottom: 30px; | |
| } | |
| .confidence-high { color: #28a745; } | |
| .confidence-medium { color: #ffc107; } | |
| .confidence-low { color: #dc3545; } | |
| """ | |
| # Create interface | |
| with gr.Blocks(css=css, title="Luhya Multilingual Translator") as demo: | |
| # Header | |
| gr.HTML(""" | |
| <div class="title"> | |
| <h1>π Luhya Multilingual Translation Model</h1> | |
| </div> | |
| <div class="description"> | |
| <p>Translate between English, Swahili, and various Luhya dialects including Bukusu, Wanga, Maragoli, and more.</p> | |
| <p><em>This model supports bidirectional translation and dialect-specific outputs.</em></p> | |
| </div> | |
| """) | |
| # Main interface | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| # Input section | |
| gr.HTML("<h3>π Input</h3>") | |
| input_text = gr.Textbox( | |
| label="Text to translate", | |
| placeholder="Enter text in English, Swahili, or Luhya...", | |
| lines=3, | |
| max_lines=5 | |
| ) | |
| with gr.Row(): | |
| source_lang = gr.Dropdown( | |
| choices=list(self.languages.keys()), | |
| label="Source Language", | |
| value="English" | |
| ) | |
| target_dialect = gr.Dropdown( | |
| choices=list(self.dialects.keys()), | |
| label="Target Dialect", | |
| value="Bukusu" | |
| ) | |
| translate_btn = gr.Button("π Translate", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| # Output section | |
| gr.HTML("<h3>β¨ Translation</h3>") | |
| output_text = gr.Textbox( | |
| label="Translated text", | |
| lines=3, | |
| max_lines=5, | |
| interactive=False | |
| ) | |
| with gr.Row(): | |
| status_text = gr.Textbox( | |
| label="Status", | |
| interactive=False, | |
| scale=2 | |
| ) | |
| confidence_score = gr.Number( | |
| label="Confidence", | |
| interactive=False, | |
| scale=1 | |
| ) | |
| # Examples section | |
| gr.HTML("<h3>π‘ Try these examples:</h3>") | |
| examples_component = gr.Examples( | |
| examples=self.examples, | |
| inputs=[input_text, source_lang, target_dialect, gr.Textbox(visible=False)], | |
| outputs=[output_text, status_text, confidence_score], | |
| fn=lambda t, s, d, _: self.translate_text(t, s, d), | |
| cache_examples=False | |
| ) | |
| # Information section | |
| with gr.Accordion("βΉοΈ Model Information", open=False): | |
| gr.HTML(f""" | |
| <div style="padding: 15px;"> | |
| <h4>Model Details</h4> | |
| <ul> | |
| <li><strong>Base Model:</strong> facebook/m2m100_418M</li> | |
| <li><strong>Model Repository:</strong> <a href="https://huggingface.co/{self.model_name}" target="_blank">{self.model_name}</a></li> | |
| <li><strong>Supported Languages:</strong> English, Swahili</li> | |
| <li><strong>Supported Dialects:</strong> Bukusu, Wanga, Kisa, Maragoli, Tachoni, Kabras, Tsotso, Marachi, Luwanga</li> | |
| <li><strong>Training:</strong> Fine-tuned on community-sourced Luhya translations</li> | |
| </ul> | |
| <h4>Usage Tips</h4> | |
| <ul> | |
| <li>Keep sentences reasonably short (under 100 words) for best results</li> | |
| <li>The model works best with common phrases and everyday language</li> | |
| <li>Confidence scores indicate model certainty about the translation</li> | |
| <li>Try different dialects to see variations in translation</li> | |
| </ul> | |
| <h4>Cultural Context</h4> | |
| <p>This model was developed to support Luhya language preservation and accessibility. | |
| Luhya is a group of related Bantu languages spoken in western Kenya by the Luhya people.</p> | |
| </div> | |
| """) | |
| # Set up the translation function | |
| translate_btn.click( | |
| fn=self.translate_text, | |
| inputs=[input_text, source_lang, target_dialect], | |
| outputs=[output_text, status_text, confidence_score] | |
| ) | |
| # Footer | |
| gr.HTML(""" | |
| <div style="text-align: center; margin-top: 30px; padding: 20px; background-color: #f8f9fa; border-radius: 10px;"> | |
| <p><strong>Luhya Multilingual Translation Model</strong></p> | |
| <p>Built with β€οΈ for language preservation and community accessibility</p> | |
| <p><em>Part of the effort to digitize and preserve African languages</em></p> | |
| </div> | |
| """) | |
| return demo | |
| # ================================================================ | |
| # STANDALONE GRADIO APP | |
| # ================================================================ | |
| def create_luhya_translator_app(model_name: str = "your-username/luhya-multilingual-m2m100"): | |
| """Create and launch the Luhya translation app""" | |
| # Initialize the interface | |
| translator = LuhyaTranslationInterface(model_name) | |
| # Create the Gradio interface | |
| demo = translator.create_interface() | |
| return demo | |
| # ================================================================ | |
| # FOR HUGGINGFACE SPACES DEPLOYMENT | |
| # ================================================================ | |
| # This is the main file that HuggingFace Spaces will run | |
| if __name__ == "__main__": | |
| import os | |
| # Get model name from environment variable or use default | |
| model_name = os.getenv("MODEL_NAME", "mamakobe/luhya-multilingual-m2m100") | |
| # Create and launch the app | |
| demo = create_luhya_translator_app(model_name) | |
| # Launch with specific settings for HuggingFace Spaces | |
| demo.launch( | |
| server_name="0.0.0.0", # Required for HuggingFace Spaces | |
| server_port=7860, # Default port for HuggingFace Spaces | |
| share=False, # Don't create public link when on Spaces | |
| show_error=True, # Show errors in interface | |
| show_tips=True, # Show Gradio tips | |
| enable_queue=True # Enable queueing for better performance | |
| ) | |