File size: 3,033 Bytes
1df1e0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from abc import ABC, abstractmethod
from typing import Dict, Any
import re
from utils.constants import STRICT_TRANSLATION_PROMPT, UNWANTED_PATTERNS
from core.exceptions import TranslationError

class BaseTranslator(ABC):
    """Abstract base class for all translators"""
    
    def __init__(self, api_key: str):
        self.api_key = api_key
        self._validate_api_key()
    
    @abstractmethod
    def _validate_api_key(self) -> None:
        """Validate API key format and accessibility"""
        pass
    
    @abstractmethod
    def _make_translation_request(self, text: str, source_lang: str, target_lang: str) -> str:
        """Make the actual API request for translation"""
        pass
    
    def translate_text(self, text: str, source_lang: str, target_lang: str) -> str:
        """
        Translate text with strict post-processing to remove LLM commentary
        
        Args:
            text: Text to translate
            source_lang: Source language code
            target_lang: Target language code
            
        Returns:
            Clean translated text without LLM commentary
        """
        if not text.strip():
            return text
        
        try:
            # Get translation from API
            translated = self._make_translation_request(text, source_lang, target_lang)
            
            # Clean the response from unwanted LLM additions
            cleaned = self._clean_translation_output(translated)
            
            return cleaned
        
        except Exception as e:
            raise TranslationError(f"Translation failed: {str(e)}")
    
    def _clean_translation_output(self, output: str) -> str:
        """
        Remove common LLM commentary and formatting artifacts
        
        Args:
            output: Raw output from LLM
            
        Returns:
            Cleaned translation text
        """
        cleaned = output.strip()
        
        # Apply regex patterns to remove unwanted additions
        for pattern in UNWANTED_PATTERNS:
            cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE | re.MULTILINE)
        
        # Remove excessive whitespace while preserving intentional formatting
        cleaned = re.sub(r'\n{3,}', '\n\n', cleaned)  # Max 2 consecutive newlines
        cleaned = re.sub(r'[ \t]+', ' ', cleaned)      # Normalize spaces
        
        return cleaned.strip()
    
    def get_system_prompt(self, source_lang: str, target_lang: str) -> str:
        """
        Get the strict system prompt for translation
        
        Args:
            source_lang: Source language name
            target_lang: Target language name
            
        Returns:
            Formatted system prompt
        """
        return STRICT_TRANSLATION_PROMPT.format(
            source_lang=source_lang,
            target_lang=target_lang
        )
    
    @property
    @abstractmethod
    def provider_name(self) -> str:
        """Return the name of the translation provider"""
        pass