Spaces:
Sleeping
Sleeping
File size: 5,613 Bytes
1df1e0b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
import os
from pathlib import Path
from typing import List, Optional
from core.exceptions import ValidationError, UnsupportedFileError, FileSizeError
from utils.constants import MAX_FILE_SIZE_MB, SUPPORTED_EXTENSIONS, LANGUAGES
class FileValidator:
"""Validator for file uploads and processing parameters"""
@staticmethod
def validate_file(file_path: Path) -> None:
"""
Validate uploaded file
Args:
file_path: Path to the file to validate
Raises:
ValidationError: If file is invalid
UnsupportedFileError: If file format is not supported
FileSizeError: If file is too large
"""
if not file_path.exists():
raise ValidationError(f"File does not exist: {file_path}")
if not file_path.is_file():
raise ValidationError(f"Path is not a file: {file_path}")
# Check file extension
extension = file_path.suffix.lower()
if extension not in SUPPORTED_EXTENSIONS:
raise UnsupportedFileError(
f"Unsupported file format: {extension}. "
f"Supported formats: {', '.join(SUPPORTED_EXTENSIONS)}"
)
# Check file size
file_size_mb = file_path.stat().st_size / (1024 * 1024)
if file_size_mb > MAX_FILE_SIZE_MB:
raise FileSizeError(
f"File too large: {file_size_mb:.1f}MB. "
f"Maximum allowed size: {MAX_FILE_SIZE_MB}MB"
)
# Check if file is readable
try:
with open(file_path, 'rb') as f:
f.read(1024) # Try to read first KB
except Exception as e:
raise ValidationError(f"Cannot read file: {str(e)}")
@staticmethod
def validate_language(language: str) -> str:
"""
Validate and normalize language input
Args:
language: Language name or code
Returns:
Normalized language name
Raises:
ValidationError: If language is not supported
"""
if not language:
raise ValidationError("Language cannot be empty")
# Check if it's a valid language name
if language in LANGUAGES:
return language
# Check if it's a valid language code
for name, code in LANGUAGES.items():
if code == language:
return name
raise ValidationError(
f"Unsupported language: {language}. "
f"Supported languages: {', '.join(LANGUAGES.keys())}"
)
@staticmethod
def validate_api_key(api_key: str, provider: str) -> None:
"""
Validate API key format
Args:
api_key: API key to validate
provider: API provider name
Raises:
ValidationError: If API key is invalid
"""
if not api_key or not api_key.strip():
raise ValidationError("API key cannot be empty")
api_key = api_key.strip()
if provider == "ChatGPT":
if not api_key.startswith('sk-'):
raise ValidationError("OpenAI API key must start with 'sk-'")
if len(api_key) < 20:
raise ValidationError("OpenAI API key appears too short")
elif provider == "DeepSeek":
if len(api_key) < 10:
raise ValidationError("DeepSeek API key appears too short")
else:
raise ValidationError(f"Unknown provider: {provider}")
@staticmethod
def validate_translation_params(
source_lang: str,
target_lang: str,
api_provider: str,
api_key: str
) -> tuple[str, str]:
"""
Validate all translation parameters
Args:
source_lang: Source language
target_lang: Target language
api_provider: API provider name
api_key: API key
Returns:
Tuple of normalized (source_lang, target_lang)
Raises:
ValidationError: If any parameter is invalid
"""
# Validate languages
norm_source = FileValidator.validate_language(source_lang)
norm_target = FileValidator.validate_language(target_lang)
if norm_source == norm_target:
raise ValidationError("Source and target languages cannot be the same")
# Validate API provider
if api_provider not in ["ChatGPT", "DeepSeek"]:
raise ValidationError(f"Unsupported API provider: {api_provider}")
# Validate API key
FileValidator.validate_api_key(api_key, api_provider)
return norm_source, norm_target
@staticmethod
def sanitize_filename(filename: str) -> str:
"""
Sanitize filename for safe file operations
Args:
filename: Original filename
Returns:
Sanitized filename
"""
# Remove or replace unsafe characters
unsafe_chars = '<>:"/\\|?*'
for char in unsafe_chars:
filename = filename.replace(char, '_')
# Remove leading/trailing spaces and dots
filename = filename.strip(' .')
# Ensure filename is not empty
if not filename:
filename = "translated_document"
return filename |