Spaces:
Sleeping
Sleeping
| # 4. translator_engine.py | |
| """ | |
| Moteur de traduction multi-engines avec fallback intelligent | |
| """ | |
| import time | |
| import logging | |
| from typing import List, Optional, Dict, Any | |
| from abc import ABC, abstractmethod | |
| from concurrent.futures import ThreadPoolExecutor, as_completed | |
| import streamlit as st | |
| # Import des différents moteurs | |
| try: | |
| import translators as ts | |
| except ImportError: | |
| ts = None | |
| try: | |
| from googletrans import Translator as GoogleTranslator | |
| except ImportError: | |
| GoogleTranslator = None | |
| try: | |
| import deepl | |
| except ImportError: | |
| deepl = None | |
| try: | |
| import openai | |
| except ImportError: | |
| openai = None | |
| try: | |
| import anthropic | |
| except ImportError: | |
| anthropic = None | |
| from utils import RateLimiter, TranslationCache | |
| class TranslationEngine(ABC): | |
| """Classe abstraite pour les moteurs de traduction""" | |
| def __init__(self, name: str): | |
| self.name = name | |
| self.logger = logging.getLogger(f"Engine.{name}") | |
| self.rate_limiter = RateLimiter() | |
| self.is_available = self.check_availability() | |
| def check_availability(self) -> bool: | |
| """Vérifie si le moteur est disponible""" | |
| pass | |
| def translate(self, text: str, source_lang: str, target_lang: str) -> Optional[str]: | |
| """Traduit le texte""" | |
| pass | |
| def translate_with_retry(self, text: str, source_lang: str, target_lang: str, max_retries: int = 3) -> Optional[str]: | |
| """Traduit avec retry automatique""" | |
| for attempt in range(max_retries): | |
| try: | |
| self.rate_limiter.wait() | |
| result = self.translate(text, source_lang, target_lang) | |
| if result: | |
| self.rate_limiter.reset_errors() | |
| return result | |
| except Exception as e: | |
| self.logger.warning(f"Tentative {attempt + 1}/{max_retries} échouée: {e}") | |
| self.rate_limiter.register_error() | |
| if attempt < max_retries - 1: | |
| time.sleep(2 ** attempt) # Backoff exponentiel | |
| return None | |
| class TranslatorsEngine(TranslationEngine): | |
| """Moteur utilisant la bibliothèque translators""" | |
| def __init__(self, provider: str = 'google'): | |
| self.provider = provider | |
| super().__init__(f"Translators-{provider}") | |
| def check_availability(self) -> bool: | |
| return ts is not None | |
| def translate(self, text: str, source_lang: str, target_lang: str) -> Optional[str]: | |
| if not self.is_available: | |
| return None | |
| try: | |
| # Conversion des codes de langue si nécessaire | |
| if source_lang == 'auto': | |
| source_lang = 'auto' | |
| result = ts.translate_text( | |
| text, | |
| translator=self.provider, | |
| from_language=source_lang, | |
| to_language=target_lang, | |
| timeout=30 | |
| ) | |
| return result | |
| except Exception as e: | |
| self.logger.error(f"Erreur traduction {self.provider}: {e}") | |
| return None | |
| class GoogleTransEngine(TranslationEngine): | |
| """Moteur Google Translate (googletrans)""" | |
| def __init__(self): | |
| super().__init__("GoogleTrans") | |
| self.translator = GoogleTranslator() if GoogleTranslator else None | |
| def check_availability(self) -> bool: | |
| return GoogleTranslator is not None | |
| def translate(self, text: str, source_lang: str, target_lang: str) -> Optional[str]: | |
| if not self.is_available: | |
| return None | |
| try: | |
| result = self.translator.translate( | |
| text, | |
| src=source_lang if source_lang != 'auto' else 'auto', | |
| dest=target_lang | |
| ) | |
| return result.text | |
| except Exception as e: | |
| self.logger.error(f"Erreur GoogleTrans: {e}") | |
| return None | |
| class DeepLEngine(TranslationEngine): | |
| """Moteur DeepL (nécessite une clé API)""" | |
| def __init__(self, api_key: str = None): | |
| super().__init__("DeepL") | |
| self.api_key = api_key | |
| self.translator = None | |
| if api_key and deepl: | |
| try: | |
| self.translator = deepl.Translator(api_key) | |
| except: | |
| pass | |
| def check_availability(self) -> bool: | |
| return self.translator is not None | |
| def translate(self, text: str, source_lang: str, target_lang: str) -> Optional[str]: | |
| if not self.is_available: | |
| return None | |
| try: | |
| # Conversion des codes de langue pour DeepL | |
| target_lang_deepl = target_lang.upper() | |
| if target_lang_deepl == 'EN': | |
| target_lang_deepl = 'EN-US' | |
| result = self.translator.translate_text( | |
| text, | |
| source_lang=None if source_lang == 'auto' else source_lang.upper(), | |
| target_lang=target_lang_deepl | |
| ) | |
| return result.text | |
| except Exception as e: | |
| self.logger.error(f"Erreur DeepL: {e}") | |
| return None | |
| class OpenAIEngine(TranslationEngine): | |
| """Moteur OpenAI GPT (nécessite une clé API)""" | |
| def __init__(self, api_key: str = None, model: str = "gpt-3.5-turbo"): | |
| super().__init__("OpenAI") | |
| self.api_key = api_key | |
| self.model = model | |
| if api_key and openai: | |
| openai.api_key = api_key | |
| def check_availability(self) -> bool: | |
| return self.api_key is not None and openai is not None | |
| def translate(self, text: str, source_lang: str, target_lang: str) -> Optional[str]: | |
| if not self.is_available: | |
| return None | |
| try: | |
| # Mapping des codes de langue vers les noms complets | |
| lang_names = { | |
| 'en': 'English', 'fr': 'French', 'es': 'Spanish', | |
| 'de': 'German', 'it': 'Italian', 'pt': 'Portuguese', | |
| 'ru': 'Russian', 'ja': 'Japanese', 'ko': 'Korean', | |
| 'zh': 'Chinese', 'ar': 'Arabic', 'hi': 'Hindi' | |
| } | |
| target_name = lang_names.get(target_lang, target_lang) | |
| prompt = f"Translate the following text to {target_name}. Only provide the translation, no explanations:\n\n{text}" | |
| response = openai.ChatCompletion.create( | |
| model=self.model, | |
| messages=[ | |
| {"role": "system", "content": "You are a professional translator. Provide accurate translations while preserving the original meaning and tone."}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| temperature=0.3, | |
| max_tokens=len(text) * 2 # Estimation généreuse | |
| ) | |
| return response.choices[0].message.content.strip() | |
| except Exception as e: | |
| self.logger.error(f"Erreur OpenAI: {e}") | |
| return None | |
| class AnthropicEngine(TranslationEngine): | |
| """Moteur Anthropic Claude (nécessite une clé API)""" | |
| def __init__(self, api_key: str = None): | |
| super().__init__("Anthropic") | |
| self.api_key = api_key | |
| self.client = None | |
| if api_key and anthropic: | |
| try: | |
| self.client = anthropic.Anthropic(api_key=api_key) | |
| except: | |
| pass | |
| def check_availability(self) -> bool: | |
| return self.client is not None | |
| def translate(self, text: str, source_lang: str, target_lang: str) -> Optional[str]: | |
| if not self.is_available: | |
| return None | |
| try: | |
| # Mapping des codes de langue | |
| lang_names = { | |
| 'en': 'English', 'fr': 'French', 'es': 'Spanish', | |
| 'de': 'German', 'it': 'Italian', 'pt': 'Portuguese', | |
| 'ru': 'Russian', 'ja': 'Japanese', 'ko': 'Korean', | |
| 'zh': 'Chinese', 'ar': 'Arabic', 'hi': 'Hindi' | |
| } | |
| target_name = lang_names.get(target_lang, target_lang) | |
| message = self.client.messages.create( | |
| model="claude-3-sonnet-20240229", | |
| max_tokens=len(text) * 2, | |
| temperature=0.3, | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": f"Translate this text to {target_name}. Provide only the translation:\n\n{text}" | |
| } | |
| ] | |
| ) | |
| return message.content[0].text | |
| except Exception as e: | |
| self.logger.error(f"Erreur Anthropic: {e}") | |
| return None | |
| class MultiEngineTranslator: | |
| """Gestionnaire principal avec fallback entre moteurs""" | |
| def __init__(self, config: Dict[str, Any] = None): | |
| self.config = config or {} | |
| self.cache = TranslationCache() | |
| self.logger = logging.getLogger("MultiEngineTranslator") | |
| self.engines = [] | |
| self._initialize_engines() | |
| def _initialize_engines(self): | |
| """Initialise tous les moteurs disponibles""" | |
| # Moteurs gratuits (translators) | |
| for provider in ['google', 'bing', 'yandex', 'baidu']: | |
| engine = TranslatorsEngine(provider) | |
| if engine.is_available: | |
| self.engines.append(engine) | |
| # Google Translate alternatif | |
| google_engine = GoogleTransEngine() | |
| if google_engine.is_available: | |
| self.engines.append(google_engine) | |
| # Moteurs avec API (si les clés sont fournies) | |
| if self.config.get('deepl_api_key'): | |
| deepl_engine = DeepLEngine(self.config['deepl_api_key']) | |
| if deepl_engine.is_available: | |
| self.engines.insert(0, deepl_engine) # Priorité haute | |
| if self.config.get('openai_api_key'): | |
| openai_engine = OpenAIEngine( | |
| self.config['openai_api_key'], | |
| self.config.get('openai_model', 'gpt-3.5-turbo') | |
| ) | |
| if openai_engine.is_available: | |
| self.engines.insert(0, openai_engine) | |
| if self.config.get('anthropic_api_key'): | |
| anthropic_engine = AnthropicEngine(self.config['anthropic_api_key']) | |
| if anthropic_engine.is_available: | |
| self.engines.insert(0, anthropic_engine) | |
| self.logger.info(f"Moteurs disponibles: {[e.name for e in self.engines]}") | |
| def translate(self, text: str, source_lang: str = 'auto', target_lang: str = 'fr') -> str: | |
| """ | |
| Traduit le texte avec fallback automatique entre moteurs | |
| """ | |
| if not text or not text.strip(): | |
| return text | |
| # Vérifier le cache pour chaque moteur | |
| for engine in self.engines: | |
| cached = self.cache.get(text, source_lang, target_lang, engine.name) | |
| if cached: | |
| self.logger.debug(f"Traduction trouvée en cache ({engine.name})") | |
| return cached | |
| # Essayer chaque moteur dans l'ordre | |
| for engine in self.engines: | |
| self.logger.info(f"Tentative avec {engine.name}") | |
| try: | |
| result = engine.translate_with_retry(text, source_lang, target_lang) | |
| if result: | |
| # Sauvegarder en cache | |
| self.cache.set(text, result, source_lang, target_lang, engine.name) | |
| return result | |
| except Exception as e: | |
| self.logger.warning(f"Échec {engine.name}: {e}") | |
| continue | |
| # Si tous les moteurs échouent, retourner le texte original | |
| self.logger.error("Tous les moteurs ont échoué, retour du texte original") | |
| return text | |
| def translate_batch(self, texts: List[str], source_lang: str = 'auto', | |
| target_lang: str = 'fr', max_workers: int = 3) -> List[str]: | |
| """ | |
| Traduit plusieurs textes en parallèle | |
| """ | |
| results = [None] * len(texts) | |
| with ThreadPoolExecutor(max_workers=max_workers) as executor: | |
| futures = { | |
| executor.submit(self.translate, text, source_lang, target_lang): i | |
| for i, text in enumerate(texts) | |
| } | |
| for future in as_completed(futures): | |
| index = futures[future] | |
| try: | |
| results[index] = future.result() | |
| except Exception as e: | |
| self.logger.error(f"Erreur traduction batch index {index}: {e}") | |
| results[index] = texts[index] | |
| return results | |
| def get_available_engines(self) -> List[str]: | |
| """Retourne la liste des moteurs disponibles""" | |
| return [engine.name for engine in self.engines] | |
| def estimate_cost(self, char_count: int) -> Dict[str, float]: | |
| """Estime le coût de traduction pour les APIs payantes""" | |
| costs = {} | |
| # DeepL: ~20€ pour 1M caractères | |
| if any(e.name == 'DeepL' for e in self.engines): | |
| costs['DeepL'] = (char_count / 1_000_000) * 20 | |
| # OpenAI GPT-3.5: ~$0.002 per 1K tokens (environ 4 caractères par token) | |
| if any(e.name == 'OpenAI' for e in self.engines): | |
| token_count = char_count / 4 | |
| costs['OpenAI'] = (token_count / 1000) * 0.002 | |
| # Anthropic Claude: ~$0.003 per 1K tokens | |
| if any(e.name == 'Anthropic' for e in self.engines): | |
| token_count = char_count / 4 | |
| costs['Anthropic'] = (token_count / 1000) * 0.003 | |
| return costs |