Spaces:
Sleeping
Sleeping
| from abc import ABC, abstractmethod | |
| from typing import Dict, Any | |
| import re | |
| from utils.constants import STRICT_TRANSLATION_PROMPT, UNWANTED_PATTERNS | |
| from core.exceptions import TranslationError | |
| class BaseTranslator(ABC): | |
| """Abstract base class for all translators""" | |
| def __init__(self, api_key: str): | |
| self.api_key = api_key | |
| self._validate_api_key() | |
| def _validate_api_key(self) -> None: | |
| """Validate API key format and accessibility""" | |
| pass | |
| def _make_translation_request(self, text: str, source_lang: str, target_lang: str) -> str: | |
| """Make the actual API request for translation""" | |
| pass | |
| def translate_text(self, text: str, source_lang: str, target_lang: str) -> str: | |
| """ | |
| Translate text with strict post-processing to remove LLM commentary | |
| Args: | |
| text: Text to translate | |
| source_lang: Source language code | |
| target_lang: Target language code | |
| Returns: | |
| Clean translated text without LLM commentary | |
| """ | |
| if not text.strip(): | |
| return text | |
| try: | |
| # Get translation from API | |
| translated = self._make_translation_request(text, source_lang, target_lang) | |
| # Clean the response from unwanted LLM additions | |
| cleaned = self._clean_translation_output(translated) | |
| return cleaned | |
| except Exception as e: | |
| raise TranslationError(f"Translation failed: {str(e)}") | |
| def _clean_translation_output(self, output: str) -> str: | |
| """ | |
| Remove common LLM commentary and formatting artifacts | |
| Args: | |
| output: Raw output from LLM | |
| Returns: | |
| Cleaned translation text | |
| """ | |
| cleaned = output.strip() | |
| # Apply regex patterns to remove unwanted additions | |
| for pattern in UNWANTED_PATTERNS: | |
| cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE | re.MULTILINE) | |
| # Remove excessive whitespace while preserving intentional formatting | |
| cleaned = re.sub(r'\n{3,}', '\n\n', cleaned) # Max 2 consecutive newlines | |
| cleaned = re.sub(r'[ \t]+', ' ', cleaned) # Normalize spaces | |
| return cleaned.strip() | |
| def get_system_prompt(self, source_lang: str, target_lang: str) -> str: | |
| """ | |
| Get the strict system prompt for translation | |
| Args: | |
| source_lang: Source language name | |
| target_lang: Target language name | |
| Returns: | |
| Formatted system prompt | |
| """ | |
| return STRICT_TRANSLATION_PROMPT.format( | |
| source_lang=source_lang, | |
| target_lang=target_lang | |
| ) | |
| def provider_name(self) -> str: | |
| """Return the name of the translation provider""" | |
| pass |