import os from pathlib import Path from typing import List, Optional from core.exceptions import ValidationError, UnsupportedFileError, FileSizeError from utils.constants import MAX_FILE_SIZE_MB, SUPPORTED_EXTENSIONS, LANGUAGES class FileValidator: """Validator for file uploads and processing parameters""" @staticmethod def validate_file(file_path: Path) -> None: """ Validate uploaded file Args: file_path: Path to the file to validate Raises: ValidationError: If file is invalid UnsupportedFileError: If file format is not supported FileSizeError: If file is too large """ if not file_path.exists(): raise ValidationError(f"File does not exist: {file_path}") if not file_path.is_file(): raise ValidationError(f"Path is not a file: {file_path}") # Check file extension extension = file_path.suffix.lower() if extension not in SUPPORTED_EXTENSIONS: raise UnsupportedFileError( f"Unsupported file format: {extension}. " f"Supported formats: {', '.join(SUPPORTED_EXTENSIONS)}" ) # Check file size file_size_mb = file_path.stat().st_size / (1024 * 1024) if file_size_mb > MAX_FILE_SIZE_MB: raise FileSizeError( f"File too large: {file_size_mb:.1f}MB. " f"Maximum allowed size: {MAX_FILE_SIZE_MB}MB" ) # Check if file is readable try: with open(file_path, 'rb') as f: f.read(1024) # Try to read first KB except Exception as e: raise ValidationError(f"Cannot read file: {str(e)}") @staticmethod def validate_language(language: str) -> str: """ Validate and normalize language input Args: language: Language name or code Returns: Normalized language name Raises: ValidationError: If language is not supported """ if not language: raise ValidationError("Language cannot be empty") # Check if it's a valid language name if language in LANGUAGES: return language # Check if it's a valid language code for name, code in LANGUAGES.items(): if code == language: return name raise ValidationError( f"Unsupported language: {language}. " f"Supported languages: {', '.join(LANGUAGES.keys())}" ) @staticmethod def validate_api_key(api_key: str, provider: str) -> None: """ Validate API key format Args: api_key: API key to validate provider: API provider name Raises: ValidationError: If API key is invalid """ if not api_key or not api_key.strip(): raise ValidationError("API key cannot be empty") api_key = api_key.strip() if provider == "ChatGPT": if not api_key.startswith('sk-'): raise ValidationError("OpenAI API key must start with 'sk-'") if len(api_key) < 20: raise ValidationError("OpenAI API key appears too short") elif provider == "DeepSeek": if len(api_key) < 10: raise ValidationError("DeepSeek API key appears too short") else: raise ValidationError(f"Unknown provider: {provider}") @staticmethod def validate_translation_params( source_lang: str, target_lang: str, api_provider: str, api_key: str ) -> tuple[str, str]: """ Validate all translation parameters Args: source_lang: Source language target_lang: Target language api_provider: API provider name api_key: API key Returns: Tuple of normalized (source_lang, target_lang) Raises: ValidationError: If any parameter is invalid """ # Validate languages norm_source = FileValidator.validate_language(source_lang) norm_target = FileValidator.validate_language(target_lang) if norm_source == norm_target: raise ValidationError("Source and target languages cannot be the same") # Validate API provider if api_provider not in ["ChatGPT", "DeepSeek"]: raise ValidationError(f"Unsupported API provider: {api_provider}") # Validate API key FileValidator.validate_api_key(api_key, api_provider) return norm_source, norm_target @staticmethod def sanitize_filename(filename: str) -> str: """ Sanitize filename for safe file operations Args: filename: Original filename Returns: Sanitized filename """ # Remove or replace unsafe characters unsafe_chars = '<>:"/\\|?*' for char in unsafe_chars: filename = filename.replace(char, '_') # Remove leading/trailing spaces and dots filename = filename.strip(' .') # Ensure filename is not empty if not filename: filename = "translated_document" return filename