BabelSlide_2.0 / core /base_translator.py
Marek4321's picture
Upload 14 files
1df1e0b verified
from abc import ABC, abstractmethod
from typing import Dict, Any
import re
from utils.constants import STRICT_TRANSLATION_PROMPT, UNWANTED_PATTERNS
from core.exceptions import TranslationError
class BaseTranslator(ABC):
"""Abstract base class for all translators"""
def __init__(self, api_key: str):
self.api_key = api_key
self._validate_api_key()
@abstractmethod
def _validate_api_key(self) -> None:
"""Validate API key format and accessibility"""
pass
@abstractmethod
def _make_translation_request(self, text: str, source_lang: str, target_lang: str) -> str:
"""Make the actual API request for translation"""
pass
def translate_text(self, text: str, source_lang: str, target_lang: str) -> str:
"""
Translate text with strict post-processing to remove LLM commentary
Args:
text: Text to translate
source_lang: Source language code
target_lang: Target language code
Returns:
Clean translated text without LLM commentary
"""
if not text.strip():
return text
try:
# Get translation from API
translated = self._make_translation_request(text, source_lang, target_lang)
# Clean the response from unwanted LLM additions
cleaned = self._clean_translation_output(translated)
return cleaned
except Exception as e:
raise TranslationError(f"Translation failed: {str(e)}")
def _clean_translation_output(self, output: str) -> str:
"""
Remove common LLM commentary and formatting artifacts
Args:
output: Raw output from LLM
Returns:
Cleaned translation text
"""
cleaned = output.strip()
# Apply regex patterns to remove unwanted additions
for pattern in UNWANTED_PATTERNS:
cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE | re.MULTILINE)
# Remove excessive whitespace while preserving intentional formatting
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned) # Max 2 consecutive newlines
cleaned = re.sub(r'[ \t]+', ' ', cleaned) # Normalize spaces
return cleaned.strip()
def get_system_prompt(self, source_lang: str, target_lang: str) -> str:
"""
Get the strict system prompt for translation
Args:
source_lang: Source language name
target_lang: Target language name
Returns:
Formatted system prompt
"""
return STRICT_TRANSLATION_PROMPT.format(
source_lang=source_lang,
target_lang=target_lang
)
@property
@abstractmethod
def provider_name(self) -> str:
"""Return the name of the translation provider"""
pass