|
|
from transformers import pipeline |
|
|
import torch |
|
|
from config import Config |
|
|
from typing import Dict, Any |
|
|
import re |
|
|
|
|
|
|
|
|
LANGUAGE_CODES = { |
|
|
"en": "English", |
|
|
"fr": "French", |
|
|
"de": "German", |
|
|
"es": "Spanish", |
|
|
"pt": "Portuguese", |
|
|
"ru": "Russian", |
|
|
"ja": "Japanese", |
|
|
"ko": "Korean", |
|
|
"zh": "Chinese", |
|
|
"ar": "Arabic", |
|
|
"hi": "Hindi", |
|
|
"nl": "Dutch", |
|
|
"sv": "Swedish", |
|
|
"da": "Danish", |
|
|
"no": "Norwegian", |
|
|
"fi": "Finnish", |
|
|
"pl": "Polish", |
|
|
"cs": "Czech", |
|
|
"hu": "Hungarian", |
|
|
"ro": "Romanian", |
|
|
"bg": "Bulgarian", |
|
|
"hr": "Croatian", |
|
|
"sk": "Slovak", |
|
|
"sl": "Slovenian", |
|
|
"et": "Estonian", |
|
|
"lv": "Latvian", |
|
|
"lt": "Lithuanian", |
|
|
"mt": "Maltese", |
|
|
"el": "Greek", |
|
|
"tr": "Turkish" |
|
|
} |
|
|
|
|
|
|
|
|
HELSINKI_MODELS = { |
|
|
"en": "Helsinki-NLP/opus-mt-it-en", |
|
|
"fr": "Helsinki-NLP/opus-mt-it-fr", |
|
|
"de": "Helsinki-NLP/opus-mt-it-de", |
|
|
"es": "Helsinki-NLP/opus-mt-it-es", |
|
|
"pt": "Helsinki-NLP/opus-mt-it-pt", |
|
|
"ru": "Helsinki-NLP/opus-mt-it-ru", |
|
|
"nl": "Helsinki-NLP/opus-mt-it-nl", |
|
|
"sv": "Helsinki-NLP/opus-mt-it-sv", |
|
|
"da": "Helsinki-NLP/opus-mt-it-da", |
|
|
"no": "Helsinki-NLP/opus-mt-it-no", |
|
|
"fi": "Helsinki-NLP/opus-mt-it-fi", |
|
|
"pl": "Helsinki-NLP/opus-mt-it-pl", |
|
|
"cs": "Helsinki-NLP/opus-mt-it-cs", |
|
|
"hu": "Helsinki-NLP/opus-mt-it-hu", |
|
|
"ro": "Helsinki-NLP/opus-mt-it-ro", |
|
|
"bg": "Helsinki-NLP/opus-mt-it-bg", |
|
|
"hr": "Helsinki-NLP/opus-mt-it-hr", |
|
|
"sk": "Helsinki-NLP/opus-mt-it-sk", |
|
|
"sl": "Helsinki-NLP/opus-mt-it-sl", |
|
|
"et": "Helsinki-NLP/opus-mt-it-et", |
|
|
"lv": "Helsinki-NLP/opus-mt-it-lv", |
|
|
"lt": "Helsinki-NLP/opus-mt-it-lt", |
|
|
"el": "Helsinki-NLP/opus-mt-it-el", |
|
|
"tr": "Helsinki-NLP/opus-mt-it-tr" |
|
|
} |
|
|
|
|
|
|
|
|
class TranslationService: |
|
|
def __init__(self, device: str = "cpu"): |
|
|
self.device = device |
|
|
self.translators = {} |
|
|
|
|
|
def _get_translator(self, target_language: str): |
|
|
"""Ottiene o crea un translator per la lingua target.""" |
|
|
if target_language not in self.translators: |
|
|
if target_language in HELSINKI_MODELS: |
|
|
model_name = HELSINKI_MODELS[target_language] |
|
|
try: |
|
|
self.translators[target_language] = pipeline( |
|
|
"translation", |
|
|
model=model_name, |
|
|
device=0 if self.device == "cuda" else -1, |
|
|
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32 |
|
|
) |
|
|
except Exception as e: |
|
|
|
|
|
print(f"Modello non disponibile per {target_language}, usando fallback: {e}") |
|
|
return None |
|
|
else: |
|
|
return None |
|
|
return self.translators[target_language] |
|
|
|
|
|
def _extract_placeholders(self, text: str) -> tuple[str, Dict[str, str]]: |
|
|
"""Estrae i placeholders HTML e delle parentesi graffe dal testo.""" |
|
|
placeholders = {} |
|
|
|
|
|
|
|
|
html_pattern = r'<[^>]+>' |
|
|
html_matches = re.findall(html_pattern, text) |
|
|
|
|
|
|
|
|
brace_pattern = r'\{[^}]+\}' |
|
|
brace_matches = re.findall(brace_pattern, text) |
|
|
|
|
|
|
|
|
processed_text = text |
|
|
for i, match in enumerate(html_matches): |
|
|
placeholder = f"HTMLTAG{i}" |
|
|
placeholders[placeholder] = match |
|
|
processed_text = processed_text.replace(match, placeholder, 1) |
|
|
|
|
|
|
|
|
for i, match in enumerate(brace_matches): |
|
|
placeholder = f"PLACEHOLDER{i}" |
|
|
placeholders[placeholder] = match |
|
|
processed_text = processed_text.replace(match, placeholder, 1) |
|
|
|
|
|
return processed_text, placeholders |
|
|
|
|
|
def _restore_placeholders(self, text: str, placeholders: Dict[str, str]) -> str: |
|
|
"""Ripristina i placeholders nel testo tradotto.""" |
|
|
for placeholder, original in placeholders.items(): |
|
|
|
|
|
text = text.replace(f" {placeholder} ", original) |
|
|
text = text.replace(f" {placeholder}", original) |
|
|
text = text.replace(f"{placeholder} ", original) |
|
|
text = text.replace(placeholder, original) |
|
|
return text |
|
|
|
|
|
def translate_text(self, text: str, target_language: str) -> str: |
|
|
"""Traduce il testo dall'italiano alla lingua target.""" |
|
|
if target_language not in LANGUAGE_CODES: |
|
|
raise ValueError(f"Lingua non supportata: {target_language}") |
|
|
|
|
|
|
|
|
clean_text, placeholders = self._extract_placeholders(text) |
|
|
|
|
|
|
|
|
translator = self._get_translator(target_language) |
|
|
if translator is None: |
|
|
|
|
|
return f"[TRANSLATION NOT AVAILABLE: {text}]" |
|
|
|
|
|
try: |
|
|
|
|
|
result = translator(clean_text, max_length=512) |
|
|
|
|
|
if isinstance(result, list) and len(result) > 0: |
|
|
translated_text = result[0]['translation_text'] |
|
|
else: |
|
|
translated_text = str(result) |
|
|
|
|
|
|
|
|
translated_text = self._restore_placeholders(translated_text, placeholders) |
|
|
|
|
|
return translated_text.strip() |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Errore durante la traduzione: {e}") |
|
|
return f"[TRANSLATION ERROR: {text}]" |
|
|
|
|
|
def translate_locale_object(self, locale_data: Dict[str, Any], target_language: str) -> Dict[str, Any]: |
|
|
"""Traduce un oggetto locale completo.""" |
|
|
if target_language not in LANGUAGE_CODES: |
|
|
raise ValueError(f"Lingua non supportata: {target_language}") |
|
|
|
|
|
translated_data = {} |
|
|
|
|
|
for key, content in locale_data.items(): |
|
|
if isinstance(content, str): |
|
|
translated_data[key] = self.translate_text(content, target_language) |
|
|
else: |
|
|
|
|
|
translated_data[key] = content |
|
|
|
|
|
return translated_data |
|
|
|
|
|
|
|
|
|
|
|
translation_service = None |
|
|
|
|
|
def get_translation_service(): |
|
|
global translation_service |
|
|
if translation_service is None: |
|
|
config = Config() |
|
|
translation_service = TranslationService(device=config.DEVICE) |
|
|
return translation_service |
|
|
|
|
|
def translate_locale(locale_data: Dict[str, Any], target_language: str) -> Dict[str, Any]: |
|
|
"""Funzione helper per tradurre un oggetto locale.""" |
|
|
service = get_translation_service() |
|
|
return service.translate_locale_object(locale_data, target_language) |
|
|
|
|
|
def get_supported_languages(): |
|
|
"""Restituisce la lista delle lingue supportate.""" |
|
|
return list(HELSINKI_MODELS.keys()) |
|
|
|