Spaces:
Sleeping
Sleeping
File size: 3,033 Bytes
1df1e0b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
from abc import ABC, abstractmethod
from typing import Dict, Any
import re
from utils.constants import STRICT_TRANSLATION_PROMPT, UNWANTED_PATTERNS
from core.exceptions import TranslationError
class BaseTranslator(ABC):
"""Abstract base class for all translators"""
def __init__(self, api_key: str):
self.api_key = api_key
self._validate_api_key()
@abstractmethod
def _validate_api_key(self) -> None:
"""Validate API key format and accessibility"""
pass
@abstractmethod
def _make_translation_request(self, text: str, source_lang: str, target_lang: str) -> str:
"""Make the actual API request for translation"""
pass
def translate_text(self, text: str, source_lang: str, target_lang: str) -> str:
"""
Translate text with strict post-processing to remove LLM commentary
Args:
text: Text to translate
source_lang: Source language code
target_lang: Target language code
Returns:
Clean translated text without LLM commentary
"""
if not text.strip():
return text
try:
# Get translation from API
translated = self._make_translation_request(text, source_lang, target_lang)
# Clean the response from unwanted LLM additions
cleaned = self._clean_translation_output(translated)
return cleaned
except Exception as e:
raise TranslationError(f"Translation failed: {str(e)}")
def _clean_translation_output(self, output: str) -> str:
"""
Remove common LLM commentary and formatting artifacts
Args:
output: Raw output from LLM
Returns:
Cleaned translation text
"""
cleaned = output.strip()
# Apply regex patterns to remove unwanted additions
for pattern in UNWANTED_PATTERNS:
cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE | re.MULTILINE)
# Remove excessive whitespace while preserving intentional formatting
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned) # Max 2 consecutive newlines
cleaned = re.sub(r'[ \t]+', ' ', cleaned) # Normalize spaces
return cleaned.strip()
def get_system_prompt(self, source_lang: str, target_lang: str) -> str:
"""
Get the strict system prompt for translation
Args:
source_lang: Source language name
target_lang: Target language name
Returns:
Formatted system prompt
"""
return STRICT_TRANSLATION_PROMPT.format(
source_lang=source_lang,
target_lang=target_lang
)
@property
@abstractmethod
def provider_name(self) -> str:
"""Return the name of the translation provider"""
pass |