# ================================================================
# GRADIO UI FOR LUHYA MULTILINGUAL TRANSLATION MODEL
# ================================================================
import gradio as gr
import torch
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
import time
import json
class LuhyaTranslationInterface:
"""Gradio interface for Luhya translation model"""
def __init__(self, model_name: str):
self.model_name = model_name
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load model and tokenizer
print(f"Loading model: {model_name}")
self.tokenizer = M2M100Tokenizer.from_pretrained(model_name)
self.model = M2M100ForConditionalGeneration.from_pretrained(model_name)
self.model.to(self.device)
self.model.eval()
# Language and dialect mappings
self.languages = {
"English": "en",
"Swahili": "sw",
"Luhya (General)": "luy"
}
self.dialects = {
"Bukusu": "luy_bukusu",
"Wanga": "luy_wanga",
"Kisa": "luy_kisa",
"Maragoli": "luy_maragoli",
"Tachoni": "luy_tachoni",
"Kabras": "luy_kabras",
"Tsotso": "luy_tsotso",
"Marachi": "luy_marachi",
"Luwanga": "luy_luwanga"
}
# Example translations for quick testing
self.examples = [
["Good morning", "English", "Tsotso", "Basic greeting"],
["Hello, how are you?", "English", "Bukusu", "Common question"],
["Thank you very much", "English", "Wanga", "Gratitude expression"],
["What is your name?", "English", "Maragoli", "Personal question"],
["I love you", "English", "Kabras", "Emotional expression"],
["Where are you going?", "English", "Tachoni", "Direction question"]
]
def translate_text(self, text: str, source_lang: str, target_dialect: str, max_length: int = 128):
"""Translate text using the model"""
if not text.strip():
return "Please enter some text to translate.", "", 0.0
try:
start_time = time.time()
# Map language names to codes
source_code = self.languages.get(source_lang, "en")
target_code = self.dialects.get(target_dialect, "luy_bukusu")
# Set tokenizer languages
self.tokenizer.src_lang = source_code if source_code in ["en", "sw"] else "sw"
self.tokenizer.tgt_lang = "sw" # Use Swahili as base target
# Prepare input text with dialect token
if source_code != "en":
# For non-English input, add source dialect token
input_text = text
else:
# For English input, add target dialect token to guide translation
input_text = f"<{target_code}> {text}"
# Tokenize
inputs = self.tokenizer(input_text, return_tensors="pt", max_length=max_length, truncation=True).to(self.device)
# Generate translation
with torch.no_grad():
outputs = self.model.generate(
**inputs,
max_length=max_length,
num_beams=4,
early_stopping=True,
pad_token_id=self.tokenizer.pad_token_id,
eos_token_id=self.tokenizer.eos_token_id,
do_sample=False,
temperature=1.0
)
# Decode result
translation = self.tokenizer.decode(outputs[0], skip_special_tokens=False)
translation = translation.replace('', '').replace('', '').strip()
# Calculate translation time
translation_time = time.time() - start_time
# Simple confidence score based on presence of target dialect token and length
confidence = self.calculate_confidence(translation, target_code, text)
return translation, f"Translation completed in {translation_time:.2f} seconds", confidence
except Exception as e:
return f"Translation error: {str(e)}", "Error occurred during translation", 0.0
def calculate_confidence(self, translation: str, target_code: str, source_text: str) -> float:
"""Calculate a simple confidence score for the translation"""
score = 0.0
# Check if target dialect token is present
if f"<{target_code}>" in translation:
score += 0.4
# Check if translation is not just copying source
if source_text.lower() not in translation.lower():
score += 0.3
# Check reasonable length
words = translation.split()
if 1 <= len(words) <= 15:
score += 0.2
# Check for repetitive patterns
if not (".)" in translation or "..." in translation):
score += 0.1
return min(1.0, score)
def create_interface(self):
"""Create the Gradio interface"""
# Custom CSS for better styling
css = """
.gradio-container {
font-family: 'Arial', sans-serif;
}
.title {
text-align: center;
color: #2E8B57;
margin-bottom: 20px;
}
.description {
text-align: center;
color: #666;
margin-bottom: 30px;
}
.confidence-high { color: #28a745; }
.confidence-medium { color: #ffc107; }
.confidence-low { color: #dc3545; }
"""
# Create interface
with gr.Blocks(css=css, title="Luhya Multilingual Translator") as demo:
# Header
gr.HTML("""
Translate between English, Swahili, and various Luhya dialects including Bukusu, Wanga, Maragoli, and more.
This model supports bidirectional translation and dialect-specific outputs.
This model was developed to support Luhya language preservation and accessibility. Luhya is a group of related Bantu languages spoken in western Kenya by the Luhya people.
Luhya Multilingual Translation Model
Built with â¤ī¸ for language preservation and community accessibility
Part of the effort to digitize and preserve African languages