|
|
""" |
|
|
Translation Engine for Carsa AI |
|
|
|
|
|
A comprehensive translation engine that supports translation from English |
|
|
to multiple African languages using Helsinki-NLP models. |
|
|
|
|
|
Supported Languages: |
|
|
- Twi (Akan) - 'twi' |
|
|
- Ga - 'ga' |
|
|
- Ewe - 'ewe' |
|
|
- Igbo - 'igbo' |
|
|
- Swahili - 'swahili' |
|
|
- Amharic - 'amharic' |
|
|
- Zulu - 'zulu' |
|
|
- Xhosa - 'xhosa' |
|
|
|
|
|
Author: Carsa AI Team |
|
|
Version: 1.0.0 |
|
|
""" |
|
|
|
|
|
import torch |
|
|
from transformers import pipeline |
|
|
import logging |
|
|
import time |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
class TranslationEngine: |
|
|
""" |
|
|
A production-ready translation engine for African languages. |
|
|
|
|
|
This class provides translation capabilities from English to various |
|
|
African languages using pre-trained Helsinki-NLP models. |
|
|
""" |
|
|
|
|
|
def __init__(self): |
|
|
""" |
|
|
Initialize the Translation Engine with support for multiple African languages. |
|
|
|
|
|
Raises: |
|
|
Exception: If critical models fail to load |
|
|
""" |
|
|
self.device = 0 if torch.cuda.is_available() else -1 |
|
|
device_name = "GPU" if torch.cuda.is_available() else "CPU" |
|
|
logger.info(f"Translation Engine using device: {device_name}") |
|
|
|
|
|
|
|
|
self.language_models = { |
|
|
"twi": "Helsinki-NLP/opus-mt-en-tw", |
|
|
"ga": "Helsinki-NLP/opus-mt-en-gaa", |
|
|
"ewe": "Helsinki-NLP/opus-mt-en-ee", |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"igbo": "Helsinki-NLP/opus-mt-en-ig", |
|
|
"swahili": "Helsinki-NLP/opus-mt-en-sw", |
|
|
"amharic": "Helsinki-NLP/opus-mt-en-am", |
|
|
"zulu": "Helsinki-NLP/opus-mt-en-zu", |
|
|
"xhosa": "Helsinki-NLP/opus-mt-en-xh" |
|
|
} |
|
|
|
|
|
|
|
|
self.translators = {} |
|
|
|
|
|
|
|
|
self.critical_languages = ["twi", "ga", "ewe"] |
|
|
self._load_critical_models() |
|
|
|
|
|
logger.info("Translation Engine initialized successfully!") |
|
|
|
|
|
def _load_critical_models(self): |
|
|
"""Load the most important models that your Flutter app uses.""" |
|
|
for lang in self.critical_languages: |
|
|
try: |
|
|
self._load_single_model(lang) |
|
|
except Exception as e: |
|
|
logger.error(f"Failed to load critical model for '{lang}': {e}") |
|
|
|
|
|
continue |
|
|
|
|
|
def _load_single_model(self, language_code): |
|
|
""" |
|
|
Load a single translation model. |
|
|
|
|
|
Args: |
|
|
language_code (str): The language code to load |
|
|
|
|
|
Returns: |
|
|
bool: True if successful, False otherwise |
|
|
""" |
|
|
if language_code in self.translators: |
|
|
return True |
|
|
|
|
|
if language_code not in self.language_models: |
|
|
logger.warning(f"Language '{language_code}' not supported") |
|
|
return False |
|
|
|
|
|
try: |
|
|
model_name = self.language_models[language_code] |
|
|
logger.info(f"Loading model for '{language_code}': {model_name}") |
|
|
|
|
|
|
|
|
if language_code == "twi": |
|
|
task = "translation_en_to_tw" |
|
|
else: |
|
|
task = f"translation_en_to_{language_code}" |
|
|
|
|
|
|
|
|
translator = pipeline( |
|
|
task, |
|
|
model=model_name, |
|
|
device=self.device, |
|
|
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32 |
|
|
) |
|
|
|
|
|
self.translators[language_code] = translator |
|
|
logger.info(f"β
Successfully loaded model for '{language_code}'") |
|
|
return True |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"β Failed to load model for '{language_code}': {e}") |
|
|
return False |
|
|
|
|
|
def translate(self, text, target_language): |
|
|
""" |
|
|
Translate text from English to the specified target language. |
|
|
|
|
|
Args: |
|
|
text (str): The English text to translate |
|
|
target_language (str): Target language code |
|
|
|
|
|
Returns: |
|
|
str: The translated text |
|
|
|
|
|
Raises: |
|
|
ValueError: If input parameters are invalid |
|
|
RuntimeError: If translation fails |
|
|
""" |
|
|
|
|
|
if not text or not text.strip(): |
|
|
raise ValueError("Text cannot be empty") |
|
|
|
|
|
if not target_language: |
|
|
raise ValueError("Target language cannot be empty") |
|
|
|
|
|
target_language = target_language.lower().strip() |
|
|
|
|
|
|
|
|
if target_language not in self.language_models: |
|
|
supported = ", ".join(self.language_models.keys()) |
|
|
raise ValueError(f"Language '{target_language}' not supported. Supported languages: {supported}") |
|
|
|
|
|
|
|
|
if target_language not in self.translators: |
|
|
logger.info(f"Loading model for '{target_language}' on demand...") |
|
|
if not self._load_single_model(target_language): |
|
|
raise RuntimeError(f"Failed to load translation model for '{target_language}'") |
|
|
|
|
|
try: |
|
|
|
|
|
translator = self.translators[target_language] |
|
|
|
|
|
|
|
|
logger.info(f"Translating to {target_language}: '{text[:50]}{'...' if len(text) > 50 else ''}'") |
|
|
|
|
|
start_time = time.time() |
|
|
result = translator(text) |
|
|
translation_time = time.time() - start_time |
|
|
|
|
|
|
|
|
if isinstance(result, list) and len(result) > 0: |
|
|
translated_text = result[0].get('translation_text', '') |
|
|
else: |
|
|
translated_text = str(result) |
|
|
|
|
|
logger.info(f"Translation completed in {translation_time:.2f}s: '{translated_text[:50]}{'...' if len(translated_text) > 50 else ''}'") |
|
|
|
|
|
return translated_text |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Translation failed for '{target_language}': {e}") |
|
|
raise RuntimeError(f"Translation failed: {str(e)}") |
|
|
|
|
|
def get_supported_languages(self): |
|
|
""" |
|
|
Get list of supported languages. |
|
|
|
|
|
Returns: |
|
|
list: List of supported language codes |
|
|
""" |
|
|
return list(self.language_models.keys()) |
|
|
|
|
|
def get_loaded_languages(self): |
|
|
""" |
|
|
Get list of currently loaded languages. |
|
|
|
|
|
Returns: |
|
|
list: List of loaded language codes |
|
|
""" |
|
|
return list(self.translators.keys()) |
|
|
|
|
|
def is_language_supported(self, language_code): |
|
|
""" |
|
|
Check if a language is supported. |
|
|
|
|
|
Args: |
|
|
language_code (str): Language code to check |
|
|
|
|
|
Returns: |
|
|
bool: True if supported, False otherwise |
|
|
""" |
|
|
return language_code.lower() in self.language_models |
|
|
|
|
|
def get_engine_info(self): |
|
|
""" |
|
|
Get information about the translation engine. |
|
|
|
|
|
Returns: |
|
|
dict: Engine information including supported and loaded languages |
|
|
""" |
|
|
return { |
|
|
"engine": "Translation Engine", |
|
|
"version": "1.0.0", |
|
|
"device": "GPU" if torch.cuda.is_available() else "CPU", |
|
|
"supported_languages": self.get_supported_languages(), |
|
|
"loaded_languages": self.get_loaded_languages(), |
|
|
"total_models": len(self.language_models), |
|
|
"loaded_models": len(self.translators) |
|
|
} |
|
|
|
|
|
|
|
|
def main(): |
|
|
"""Example usage and testing of the Translation Engine.""" |
|
|
try: |
|
|
|
|
|
logger.info("Testing Translation Engine...") |
|
|
engine = TranslationEngine() |
|
|
|
|
|
|
|
|
info = engine.get_engine_info() |
|
|
logger.info(f"Engine Info: {info}") |
|
|
|
|
|
|
|
|
test_text = "Hello, how are you today? This is a test of the translation system." |
|
|
|
|
|
for lang in ["twi", "ga", "ewe", "hausa"]: |
|
|
try: |
|
|
translated = engine.translate(test_text, lang) |
|
|
logger.info(f"π― {lang.upper()}: {translated}") |
|
|
except Exception as e: |
|
|
logger.error(f"β Failed to translate to {lang}: {e}") |
|
|
|
|
|
logger.info("π Translation Engine testing completed!") |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"β Translation Engine test failed: {e}") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|