TranslateMyBook / translator_engine.py
Adjoumani's picture
Create translator_engine.py
c6312a3 verified
# 4. translator_engine.py
"""
Moteur de traduction multi-engines avec fallback intelligent
"""
import time
import logging
from typing import List, Optional, Dict, Any
from abc import ABC, abstractmethod
from concurrent.futures import ThreadPoolExecutor, as_completed
import streamlit as st
# Import des différents moteurs
try:
import translators as ts
except ImportError:
ts = None
try:
from googletrans import Translator as GoogleTranslator
except ImportError:
GoogleTranslator = None
try:
import deepl
except ImportError:
deepl = None
try:
import openai
except ImportError:
openai = None
try:
import anthropic
except ImportError:
anthropic = None
from utils import RateLimiter, TranslationCache
class TranslationEngine(ABC):
"""Classe abstraite pour les moteurs de traduction"""
def __init__(self, name: str):
self.name = name
self.logger = logging.getLogger(f"Engine.{name}")
self.rate_limiter = RateLimiter()
self.is_available = self.check_availability()
@abstractmethod
def check_availability(self) -> bool:
"""Vérifie si le moteur est disponible"""
pass
@abstractmethod
def translate(self, text: str, source_lang: str, target_lang: str) -> Optional[str]:
"""Traduit le texte"""
pass
def translate_with_retry(self, text: str, source_lang: str, target_lang: str, max_retries: int = 3) -> Optional[str]:
"""Traduit avec retry automatique"""
for attempt in range(max_retries):
try:
self.rate_limiter.wait()
result = self.translate(text, source_lang, target_lang)
if result:
self.rate_limiter.reset_errors()
return result
except Exception as e:
self.logger.warning(f"Tentative {attempt + 1}/{max_retries} échouée: {e}")
self.rate_limiter.register_error()
if attempt < max_retries - 1:
time.sleep(2 ** attempt) # Backoff exponentiel
return None
class TranslatorsEngine(TranslationEngine):
"""Moteur utilisant la bibliothèque translators"""
def __init__(self, provider: str = 'google'):
self.provider = provider
super().__init__(f"Translators-{provider}")
def check_availability(self) -> bool:
return ts is not None
def translate(self, text: str, source_lang: str, target_lang: str) -> Optional[str]:
if not self.is_available:
return None
try:
# Conversion des codes de langue si nécessaire
if source_lang == 'auto':
source_lang = 'auto'
result = ts.translate_text(
text,
translator=self.provider,
from_language=source_lang,
to_language=target_lang,
timeout=30
)
return result
except Exception as e:
self.logger.error(f"Erreur traduction {self.provider}: {e}")
return None
class GoogleTransEngine(TranslationEngine):
"""Moteur Google Translate (googletrans)"""
def __init__(self):
super().__init__("GoogleTrans")
self.translator = GoogleTranslator() if GoogleTranslator else None
def check_availability(self) -> bool:
return GoogleTranslator is not None
def translate(self, text: str, source_lang: str, target_lang: str) -> Optional[str]:
if not self.is_available:
return None
try:
result = self.translator.translate(
text,
src=source_lang if source_lang != 'auto' else 'auto',
dest=target_lang
)
return result.text
except Exception as e:
self.logger.error(f"Erreur GoogleTrans: {e}")
return None
class DeepLEngine(TranslationEngine):
"""Moteur DeepL (nécessite une clé API)"""
def __init__(self, api_key: str = None):
super().__init__("DeepL")
self.api_key = api_key
self.translator = None
if api_key and deepl:
try:
self.translator = deepl.Translator(api_key)
except:
pass
def check_availability(self) -> bool:
return self.translator is not None
def translate(self, text: str, source_lang: str, target_lang: str) -> Optional[str]:
if not self.is_available:
return None
try:
# Conversion des codes de langue pour DeepL
target_lang_deepl = target_lang.upper()
if target_lang_deepl == 'EN':
target_lang_deepl = 'EN-US'
result = self.translator.translate_text(
text,
source_lang=None if source_lang == 'auto' else source_lang.upper(),
target_lang=target_lang_deepl
)
return result.text
except Exception as e:
self.logger.error(f"Erreur DeepL: {e}")
return None
class OpenAIEngine(TranslationEngine):
"""Moteur OpenAI GPT (nécessite une clé API)"""
def __init__(self, api_key: str = None, model: str = "gpt-3.5-turbo"):
super().__init__("OpenAI")
self.api_key = api_key
self.model = model
if api_key and openai:
openai.api_key = api_key
def check_availability(self) -> bool:
return self.api_key is not None and openai is not None
def translate(self, text: str, source_lang: str, target_lang: str) -> Optional[str]:
if not self.is_available:
return None
try:
# Mapping des codes de langue vers les noms complets
lang_names = {
'en': 'English', 'fr': 'French', 'es': 'Spanish',
'de': 'German', 'it': 'Italian', 'pt': 'Portuguese',
'ru': 'Russian', 'ja': 'Japanese', 'ko': 'Korean',
'zh': 'Chinese', 'ar': 'Arabic', 'hi': 'Hindi'
}
target_name = lang_names.get(target_lang, target_lang)
prompt = f"Translate the following text to {target_name}. Only provide the translation, no explanations:\n\n{text}"
response = openai.ChatCompletion.create(
model=self.model,
messages=[
{"role": "system", "content": "You are a professional translator. Provide accurate translations while preserving the original meaning and tone."},
{"role": "user", "content": prompt}
],
temperature=0.3,
max_tokens=len(text) * 2 # Estimation généreuse
)
return response.choices[0].message.content.strip()
except Exception as e:
self.logger.error(f"Erreur OpenAI: {e}")
return None
class AnthropicEngine(TranslationEngine):
"""Moteur Anthropic Claude (nécessite une clé API)"""
def __init__(self, api_key: str = None):
super().__init__("Anthropic")
self.api_key = api_key
self.client = None
if api_key and anthropic:
try:
self.client = anthropic.Anthropic(api_key=api_key)
except:
pass
def check_availability(self) -> bool:
return self.client is not None
def translate(self, text: str, source_lang: str, target_lang: str) -> Optional[str]:
if not self.is_available:
return None
try:
# Mapping des codes de langue
lang_names = {
'en': 'English', 'fr': 'French', 'es': 'Spanish',
'de': 'German', 'it': 'Italian', 'pt': 'Portuguese',
'ru': 'Russian', 'ja': 'Japanese', 'ko': 'Korean',
'zh': 'Chinese', 'ar': 'Arabic', 'hi': 'Hindi'
}
target_name = lang_names.get(target_lang, target_lang)
message = self.client.messages.create(
model="claude-3-sonnet-20240229",
max_tokens=len(text) * 2,
temperature=0.3,
messages=[
{
"role": "user",
"content": f"Translate this text to {target_name}. Provide only the translation:\n\n{text}"
}
]
)
return message.content[0].text
except Exception as e:
self.logger.error(f"Erreur Anthropic: {e}")
return None
class MultiEngineTranslator:
"""Gestionnaire principal avec fallback entre moteurs"""
def __init__(self, config: Dict[str, Any] = None):
self.config = config or {}
self.cache = TranslationCache()
self.logger = logging.getLogger("MultiEngineTranslator")
self.engines = []
self._initialize_engines()
def _initialize_engines(self):
"""Initialise tous les moteurs disponibles"""
# Moteurs gratuits (translators)
for provider in ['google', 'bing', 'yandex', 'baidu']:
engine = TranslatorsEngine(provider)
if engine.is_available:
self.engines.append(engine)
# Google Translate alternatif
google_engine = GoogleTransEngine()
if google_engine.is_available:
self.engines.append(google_engine)
# Moteurs avec API (si les clés sont fournies)
if self.config.get('deepl_api_key'):
deepl_engine = DeepLEngine(self.config['deepl_api_key'])
if deepl_engine.is_available:
self.engines.insert(0, deepl_engine) # Priorité haute
if self.config.get('openai_api_key'):
openai_engine = OpenAIEngine(
self.config['openai_api_key'],
self.config.get('openai_model', 'gpt-3.5-turbo')
)
if openai_engine.is_available:
self.engines.insert(0, openai_engine)
if self.config.get('anthropic_api_key'):
anthropic_engine = AnthropicEngine(self.config['anthropic_api_key'])
if anthropic_engine.is_available:
self.engines.insert(0, anthropic_engine)
self.logger.info(f"Moteurs disponibles: {[e.name for e in self.engines]}")
def translate(self, text: str, source_lang: str = 'auto', target_lang: str = 'fr') -> str:
"""
Traduit le texte avec fallback automatique entre moteurs
"""
if not text or not text.strip():
return text
# Vérifier le cache pour chaque moteur
for engine in self.engines:
cached = self.cache.get(text, source_lang, target_lang, engine.name)
if cached:
self.logger.debug(f"Traduction trouvée en cache ({engine.name})")
return cached
# Essayer chaque moteur dans l'ordre
for engine in self.engines:
self.logger.info(f"Tentative avec {engine.name}")
try:
result = engine.translate_with_retry(text, source_lang, target_lang)
if result:
# Sauvegarder en cache
self.cache.set(text, result, source_lang, target_lang, engine.name)
return result
except Exception as e:
self.logger.warning(f"Échec {engine.name}: {e}")
continue
# Si tous les moteurs échouent, retourner le texte original
self.logger.error("Tous les moteurs ont échoué, retour du texte original")
return text
def translate_batch(self, texts: List[str], source_lang: str = 'auto',
target_lang: str = 'fr', max_workers: int = 3) -> List[str]:
"""
Traduit plusieurs textes en parallèle
"""
results = [None] * len(texts)
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = {
executor.submit(self.translate, text, source_lang, target_lang): i
for i, text in enumerate(texts)
}
for future in as_completed(futures):
index = futures[future]
try:
results[index] = future.result()
except Exception as e:
self.logger.error(f"Erreur traduction batch index {index}: {e}")
results[index] = texts[index]
return results
def get_available_engines(self) -> List[str]:
"""Retourne la liste des moteurs disponibles"""
return [engine.name for engine in self.engines]
def estimate_cost(self, char_count: int) -> Dict[str, float]:
"""Estime le coût de traduction pour les APIs payantes"""
costs = {}
# DeepL: ~20€ pour 1M caractères
if any(e.name == 'DeepL' for e in self.engines):
costs['DeepL'] = (char_count / 1_000_000) * 20
# OpenAI GPT-3.5: ~$0.002 per 1K tokens (environ 4 caractères par token)
if any(e.name == 'OpenAI' for e in self.engines):
token_count = char_count / 4
costs['OpenAI'] = (token_count / 1000) * 0.002
# Anthropic Claude: ~$0.003 per 1K tokens
if any(e.name == 'Anthropic' for e in self.engines):
token_count = char_count / 4
costs['Anthropic'] = (token_count / 1000) * 0.003
return costs