locale_translate_server / chat_service.py
Daniele
wip
d14edbf
raw
history blame
7.35 kB
from transformers import pipeline
import torch
from config import Config
from typing import Dict, Any
import re
# Mapping delle lingue supportate con i loro nomi per i modelli Helsinki-NLP
LANGUAGE_CODES = {
"en": "English",
"fr": "French",
"de": "German",
"es": "Spanish",
"pt": "Portuguese",
"ru": "Russian",
"ja": "Japanese",
"ko": "Korean",
"zh": "Chinese",
"ar": "Arabic",
"hi": "Hindi",
"nl": "Dutch",
"sv": "Swedish",
"da": "Danish",
"no": "Norwegian",
"fi": "Finnish",
"pl": "Polish",
"cs": "Czech",
"hu": "Hungarian",
"ro": "Romanian",
"bg": "Bulgarian",
"hr": "Croatian",
"sk": "Slovak",
"sl": "Slovenian",
"et": "Estonian",
"lv": "Latvian",
"lt": "Lithuanian",
"mt": "Maltese",
"el": "Greek",
"tr": "Turkish"
}
# Mapping per i modelli di traduzione Helsinki-NLP (da italiano verso altre lingue)
HELSINKI_MODELS = {
"en": "Helsinki-NLP/opus-mt-it-en",
"fr": "Helsinki-NLP/opus-mt-it-fr",
"de": "Helsinki-NLP/opus-mt-it-de",
"es": "Helsinki-NLP/opus-mt-it-es",
"pt": "Helsinki-NLP/opus-mt-it-pt",
"ru": "Helsinki-NLP/opus-mt-it-ru",
"nl": "Helsinki-NLP/opus-mt-it-nl",
"sv": "Helsinki-NLP/opus-mt-it-sv",
"da": "Helsinki-NLP/opus-mt-it-da",
"no": "Helsinki-NLP/opus-mt-it-no",
"fi": "Helsinki-NLP/opus-mt-it-fi",
"pl": "Helsinki-NLP/opus-mt-it-pl",
"cs": "Helsinki-NLP/opus-mt-it-cs",
"hu": "Helsinki-NLP/opus-mt-it-hu",
"ro": "Helsinki-NLP/opus-mt-it-ro",
"bg": "Helsinki-NLP/opus-mt-it-bg",
"hr": "Helsinki-NLP/opus-mt-it-hr",
"sk": "Helsinki-NLP/opus-mt-it-sk",
"sl": "Helsinki-NLP/opus-mt-it-sl",
"et": "Helsinki-NLP/opus-mt-it-et",
"lv": "Helsinki-NLP/opus-mt-it-lv",
"lt": "Helsinki-NLP/opus-mt-it-lt",
"el": "Helsinki-NLP/opus-mt-it-el",
"tr": "Helsinki-NLP/opus-mt-it-tr"
}
class TranslationService:
def __init__(self, device: str = "cpu"):
self.device = device
self.translators = {} # Cache per i translator
def _get_translator(self, target_language: str):
"""Ottiene o crea un translator per la lingua target."""
if target_language not in self.translators:
if target_language in HELSINKI_MODELS:
model_name = HELSINKI_MODELS[target_language]
try:
self.translators[target_language] = pipeline(
"translation",
model=model_name,
device=0 if self.device == "cuda" else -1,
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
)
except Exception as e:
# Fallback per lingue senza modelli specifici
print(f"Modello non disponibile per {target_language}, usando fallback: {e}")
return None
else:
return None
return self.translators[target_language]
def _extract_placeholders(self, text: str) -> tuple[str, Dict[str, str]]:
"""Estrae i placeholders HTML e delle parentesi graffe dal testo."""
placeholders = {}
# Pattern per HTML tags
html_pattern = r'<[^>]+>'
html_matches = re.findall(html_pattern, text)
# Pattern per parentesi graffe
brace_pattern = r'\{[^}]+\}'
brace_matches = re.findall(brace_pattern, text)
# Sostituisce HTML tags con placeholders
processed_text = text
for i, match in enumerate(html_matches):
placeholder = f"HTMLTAG{i}"
placeholders[placeholder] = match
processed_text = processed_text.replace(match, placeholder, 1)
# Sostituisce parentesi graffe con placeholders
for i, match in enumerate(brace_matches):
placeholder = f"PLACEHOLDER{i}"
placeholders[placeholder] = match
processed_text = processed_text.replace(match, placeholder, 1)
return processed_text, placeholders
def _restore_placeholders(self, text: str, placeholders: Dict[str, str]) -> str:
"""Ripristina i placeholders nel testo tradotto."""
for placeholder, original in placeholders.items():
# Rimuovi spazi extra attorno ai placeholder
text = text.replace(f" {placeholder} ", original)
text = text.replace(f" {placeholder}", original)
text = text.replace(f"{placeholder} ", original)
text = text.replace(placeholder, original)
return text
def translate_text(self, text: str, target_language: str) -> str:
"""Traduce il testo dall'italiano alla lingua target."""
if target_language not in LANGUAGE_CODES:
raise ValueError(f"Lingua non supportata: {target_language}")
# Estrai placeholders
clean_text, placeholders = self._extract_placeholders(text)
# Ottieni il translator
translator = self._get_translator(target_language)
if translator is None:
# Traduzione semplice di fallback (mantiene il testo originale)
return f"[TRANSLATION NOT AVAILABLE: {text}]"
try:
# Esegui la traduzione
result = translator(clean_text, max_length=512)
if isinstance(result, list) and len(result) > 0:
translated_text = result[0]['translation_text']
else:
translated_text = str(result)
# Ripristina i placeholders
translated_text = self._restore_placeholders(translated_text, placeholders)
return translated_text.strip()
except Exception as e:
print(f"Errore durante la traduzione: {e}")
return f"[TRANSLATION ERROR: {text}]"
def translate_locale_object(self, locale_data: Dict[str, Any], target_language: str) -> Dict[str, Any]:
"""Traduce un oggetto locale completo."""
if target_language not in LANGUAGE_CODES:
raise ValueError(f"Lingua non supportata: {target_language}")
translated_data = {}
for key, content in locale_data.items():
if isinstance(content, str):
translated_data[key] = self.translate_text(content, target_language)
else:
# Mantieni il valore originale se non è una stringa
translated_data[key] = content
return translated_data
# Istanza globale del servizio di traduzione
translation_service = None
def get_translation_service():
global translation_service
if translation_service is None:
config = Config()
translation_service = TranslationService(device=config.DEVICE)
return translation_service
def translate_locale(locale_data: Dict[str, Any], target_language: str) -> Dict[str, Any]:
"""Funzione helper per tradurre un oggetto locale."""
service = get_translation_service()
return service.translate_locale_object(locale_data, target_language)
def get_supported_languages():
"""Restituisce la lista delle lingue supportate."""
return list(HELSINKI_MODELS.keys())