File size: 5,613 Bytes
1df1e0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import os
from pathlib import Path
from typing import List, Optional
from core.exceptions import ValidationError, UnsupportedFileError, FileSizeError
from utils.constants import MAX_FILE_SIZE_MB, SUPPORTED_EXTENSIONS, LANGUAGES

class FileValidator:
    """Validator for file uploads and processing parameters"""
    
    @staticmethod
    def validate_file(file_path: Path) -> None:
        """
        Validate uploaded file
        
        Args:
            file_path: Path to the file to validate
            
        Raises:
            ValidationError: If file is invalid
            UnsupportedFileError: If file format is not supported
            FileSizeError: If file is too large
        """
        if not file_path.exists():
            raise ValidationError(f"File does not exist: {file_path}")
        
        if not file_path.is_file():
            raise ValidationError(f"Path is not a file: {file_path}")
        
        # Check file extension
        extension = file_path.suffix.lower()
        if extension not in SUPPORTED_EXTENSIONS:
            raise UnsupportedFileError(
                f"Unsupported file format: {extension}. "
                f"Supported formats: {', '.join(SUPPORTED_EXTENSIONS)}"
            )
        
        # Check file size
        file_size_mb = file_path.stat().st_size / (1024 * 1024)
        if file_size_mb > MAX_FILE_SIZE_MB:
            raise FileSizeError(
                f"File too large: {file_size_mb:.1f}MB. "
                f"Maximum allowed size: {MAX_FILE_SIZE_MB}MB"
            )
        
        # Check if file is readable
        try:
            with open(file_path, 'rb') as f:
                f.read(1024)  # Try to read first KB
        except Exception as e:
            raise ValidationError(f"Cannot read file: {str(e)}")
    
    @staticmethod
    def validate_language(language: str) -> str:
        """
        Validate and normalize language input
        
        Args:
            language: Language name or code
            
        Returns:
            Normalized language name
            
        Raises:
            ValidationError: If language is not supported
        """
        if not language:
            raise ValidationError("Language cannot be empty")
        
        # Check if it's a valid language name
        if language in LANGUAGES:
            return language
        
        # Check if it's a valid language code
        for name, code in LANGUAGES.items():
            if code == language:
                return name
        
        raise ValidationError(
            f"Unsupported language: {language}. "
            f"Supported languages: {', '.join(LANGUAGES.keys())}"
        )
    
    @staticmethod
    def validate_api_key(api_key: str, provider: str) -> None:
        """
        Validate API key format
        
        Args:
            api_key: API key to validate
            provider: API provider name
            
        Raises:
            ValidationError: If API key is invalid
        """
        if not api_key or not api_key.strip():
            raise ValidationError("API key cannot be empty")
        
        api_key = api_key.strip()
        
        if provider == "ChatGPT":
            if not api_key.startswith('sk-'):
                raise ValidationError("OpenAI API key must start with 'sk-'")
            if len(api_key) < 20:
                raise ValidationError("OpenAI API key appears too short")
        
        elif provider == "DeepSeek":
            if len(api_key) < 10:
                raise ValidationError("DeepSeek API key appears too short")
        
        else:
            raise ValidationError(f"Unknown provider: {provider}")
    
    @staticmethod
    def validate_translation_params(
        source_lang: str,
        target_lang: str,
        api_provider: str,
        api_key: str
    ) -> tuple[str, str]:
        """
        Validate all translation parameters
        
        Args:
            source_lang: Source language
            target_lang: Target language  
            api_provider: API provider name
            api_key: API key
            
        Returns:
            Tuple of normalized (source_lang, target_lang)
            
        Raises:
            ValidationError: If any parameter is invalid
        """
        # Validate languages
        norm_source = FileValidator.validate_language(source_lang)
        norm_target = FileValidator.validate_language(target_lang)
        
        if norm_source == norm_target:
            raise ValidationError("Source and target languages cannot be the same")
        
        # Validate API provider
        if api_provider not in ["ChatGPT", "DeepSeek"]:
            raise ValidationError(f"Unsupported API provider: {api_provider}")
        
        # Validate API key
        FileValidator.validate_api_key(api_key, api_provider)
        
        return norm_source, norm_target
    
    @staticmethod
    def sanitize_filename(filename: str) -> str:
        """
        Sanitize filename for safe file operations
        
        Args:
            filename: Original filename
            
        Returns:
            Sanitized filename
        """
        # Remove or replace unsafe characters
        unsafe_chars = '<>:"/\\|?*'
        for char in unsafe_chars:
            filename = filename.replace(char, '_')
        
        # Remove leading/trailing spaces and dots
        filename = filename.strip(' .')
        
        # Ensure filename is not empty
        if not filename:
            filename = "translated_document"
        
        return filename