carsa_api / translation_engine.py
athmontech's picture
Remove Hausa language support - model discontinued
130ce6d
"""
Translation Engine for Carsa AI
A comprehensive translation engine that supports translation from English
to multiple African languages using Helsinki-NLP models.
Supported Languages:
- Twi (Akan) - 'twi'
- Ga - 'ga'
- Ewe - 'ewe'
- Igbo - 'igbo'
- Swahili - 'swahili'
- Amharic - 'amharic'
- Zulu - 'zulu'
- Xhosa - 'xhosa'
Author: Carsa AI Team
Version: 1.0.0
"""
import torch
from transformers import pipeline
import logging
import time
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class TranslationEngine:
"""
A production-ready translation engine for African languages.
This class provides translation capabilities from English to various
African languages using pre-trained Helsinki-NLP models.
"""
def __init__(self):
"""
Initialize the Translation Engine with support for multiple African languages.
Raises:
Exception: If critical models fail to load
"""
self.device = 0 if torch.cuda.is_available() else -1
device_name = "GPU" if torch.cuda.is_available() else "CPU"
logger.info(f"Translation Engine using device: {device_name}")
# Define supported language models
self.language_models = {
"twi": "Helsinki-NLP/opus-mt-en-tw",
"ga": "Helsinki-NLP/opus-mt-en-gaa",
"ewe": "Helsinki-NLP/opus-mt-en-ee",
# Hausa removed - model discontinued
# "hausa": "Helsinki-NLP/opus-mt-en-ha",
# Note: Yoruba model temporarily disabled - no valid model found
# "yoruba": "Helsinki-NLP/opus-mt-en-yo", # This model doesn't exist
"igbo": "Helsinki-NLP/opus-mt-en-ig",
"swahili": "Helsinki-NLP/opus-mt-en-sw",
"amharic": "Helsinki-NLP/opus-mt-en-am",
"zulu": "Helsinki-NLP/opus-mt-en-zu",
"xhosa": "Helsinki-NLP/opus-mt-en-xh"
}
# Store loaded translators
self.translators = {}
# Load critical models (the ones your Flutter app primarily uses)
self.critical_languages = ["twi", "ga", "ewe"]
self._load_critical_models()
logger.info("Translation Engine initialized successfully!")
def _load_critical_models(self):
"""Load the most important models that your Flutter app uses."""
for lang in self.critical_languages:
try:
self._load_single_model(lang)
except Exception as e:
logger.error(f"Failed to load critical model for '{lang}': {e}")
# Don't raise exception - continue with other models
continue
def _load_single_model(self, language_code):
"""
Load a single translation model.
Args:
language_code (str): The language code to load
Returns:
bool: True if successful, False otherwise
"""
if language_code in self.translators:
return True
if language_code not in self.language_models:
logger.warning(f"Language '{language_code}' not supported")
return False
try:
model_name = self.language_models[language_code]
logger.info(f"Loading model for '{language_code}': {model_name}")
# Create the appropriate task name
if language_code == "twi":
task = "translation_en_to_tw"
else:
task = f"translation_en_to_{language_code}"
# Load the model
translator = pipeline(
task,
model=model_name,
device=self.device,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)
self.translators[language_code] = translator
logger.info(f"βœ… Successfully loaded model for '{language_code}'")
return True
except Exception as e:
logger.error(f"❌ Failed to load model for '{language_code}': {e}")
return False
def translate(self, text, target_language):
"""
Translate text from English to the specified target language.
Args:
text (str): The English text to translate
target_language (str): Target language code
Returns:
str: The translated text
Raises:
ValueError: If input parameters are invalid
RuntimeError: If translation fails
"""
# Input validation
if not text or not text.strip():
raise ValueError("Text cannot be empty")
if not target_language:
raise ValueError("Target language cannot be empty")
target_language = target_language.lower().strip()
# Check if language is supported
if target_language not in self.language_models:
supported = ", ".join(self.language_models.keys())
raise ValueError(f"Language '{target_language}' not supported. Supported languages: {supported}")
# Load model if not already loaded
if target_language not in self.translators:
logger.info(f"Loading model for '{target_language}' on demand...")
if not self._load_single_model(target_language):
raise RuntimeError(f"Failed to load translation model for '{target_language}'")
try:
# Perform translation
translator = self.translators[target_language]
# Log translation request
logger.info(f"Translating to {target_language}: '{text[:50]}{'...' if len(text) > 50 else ''}'")
start_time = time.time()
result = translator(text)
translation_time = time.time() - start_time
# Extract translated text
if isinstance(result, list) and len(result) > 0:
translated_text = result[0].get('translation_text', '')
else:
translated_text = str(result)
logger.info(f"Translation completed in {translation_time:.2f}s: '{translated_text[:50]}{'...' if len(translated_text) > 50 else ''}'")
return translated_text
except Exception as e:
logger.error(f"Translation failed for '{target_language}': {e}")
raise RuntimeError(f"Translation failed: {str(e)}")
def get_supported_languages(self):
"""
Get list of supported languages.
Returns:
list: List of supported language codes
"""
return list(self.language_models.keys())
def get_loaded_languages(self):
"""
Get list of currently loaded languages.
Returns:
list: List of loaded language codes
"""
return list(self.translators.keys())
def is_language_supported(self, language_code):
"""
Check if a language is supported.
Args:
language_code (str): Language code to check
Returns:
bool: True if supported, False otherwise
"""
return language_code.lower() in self.language_models
def get_engine_info(self):
"""
Get information about the translation engine.
Returns:
dict: Engine information including supported and loaded languages
"""
return {
"engine": "Translation Engine",
"version": "1.0.0",
"device": "GPU" if torch.cuda.is_available() else "CPU",
"supported_languages": self.get_supported_languages(),
"loaded_languages": self.get_loaded_languages(),
"total_models": len(self.language_models),
"loaded_models": len(self.translators)
}
def main():
"""Example usage and testing of the Translation Engine."""
try:
# Initialize the engine
logger.info("Testing Translation Engine...")
engine = TranslationEngine()
# Print engine info
info = engine.get_engine_info()
logger.info(f"Engine Info: {info}")
# Test translations for critical languages
test_text = "Hello, how are you today? This is a test of the translation system."
for lang in ["twi", "ga", "ewe", "hausa"]:
try:
translated = engine.translate(test_text, lang)
logger.info(f"🎯 {lang.upper()}: {translated}")
except Exception as e:
logger.error(f"❌ Failed to translate to {lang}: {e}")
logger.info("πŸŽ‰ Translation Engine testing completed!")
except Exception as e:
logger.error(f"❌ Translation Engine test failed: {e}")
if __name__ == "__main__":
main()