Spaces:
Sleeping
Sleeping
| import os | |
| from pathlib import Path | |
| from typing import List, Optional | |
| from core.exceptions import ValidationError, UnsupportedFileError, FileSizeError | |
| from utils.constants import MAX_FILE_SIZE_MB, SUPPORTED_EXTENSIONS, LANGUAGES | |
| class FileValidator: | |
| """Validator for file uploads and processing parameters""" | |
| def validate_file(file_path: Path) -> None: | |
| """ | |
| Validate uploaded file | |
| Args: | |
| file_path: Path to the file to validate | |
| Raises: | |
| ValidationError: If file is invalid | |
| UnsupportedFileError: If file format is not supported | |
| FileSizeError: If file is too large | |
| """ | |
| if not file_path.exists(): | |
| raise ValidationError(f"File does not exist: {file_path}") | |
| if not file_path.is_file(): | |
| raise ValidationError(f"Path is not a file: {file_path}") | |
| # Check file extension | |
| extension = file_path.suffix.lower() | |
| if extension not in SUPPORTED_EXTENSIONS: | |
| raise UnsupportedFileError( | |
| f"Unsupported file format: {extension}. " | |
| f"Supported formats: {', '.join(SUPPORTED_EXTENSIONS)}" | |
| ) | |
| # Check file size | |
| file_size_mb = file_path.stat().st_size / (1024 * 1024) | |
| if file_size_mb > MAX_FILE_SIZE_MB: | |
| raise FileSizeError( | |
| f"File too large: {file_size_mb:.1f}MB. " | |
| f"Maximum allowed size: {MAX_FILE_SIZE_MB}MB" | |
| ) | |
| # Check if file is readable | |
| try: | |
| with open(file_path, 'rb') as f: | |
| f.read(1024) # Try to read first KB | |
| except Exception as e: | |
| raise ValidationError(f"Cannot read file: {str(e)}") | |
| def validate_language(language: str) -> str: | |
| """ | |
| Validate and normalize language input | |
| Args: | |
| language: Language name or code | |
| Returns: | |
| Normalized language name | |
| Raises: | |
| ValidationError: If language is not supported | |
| """ | |
| if not language: | |
| raise ValidationError("Language cannot be empty") | |
| # Check if it's a valid language name | |
| if language in LANGUAGES: | |
| return language | |
| # Check if it's a valid language code | |
| for name, code in LANGUAGES.items(): | |
| if code == language: | |
| return name | |
| raise ValidationError( | |
| f"Unsupported language: {language}. " | |
| f"Supported languages: {', '.join(LANGUAGES.keys())}" | |
| ) | |
| def validate_api_key(api_key: str, provider: str) -> None: | |
| """ | |
| Validate API key format | |
| Args: | |
| api_key: API key to validate | |
| provider: API provider name | |
| Raises: | |
| ValidationError: If API key is invalid | |
| """ | |
| if not api_key or not api_key.strip(): | |
| raise ValidationError("API key cannot be empty") | |
| api_key = api_key.strip() | |
| if provider == "ChatGPT": | |
| if not api_key.startswith('sk-'): | |
| raise ValidationError("OpenAI API key must start with 'sk-'") | |
| if len(api_key) < 20: | |
| raise ValidationError("OpenAI API key appears too short") | |
| elif provider == "DeepSeek": | |
| if len(api_key) < 10: | |
| raise ValidationError("DeepSeek API key appears too short") | |
| else: | |
| raise ValidationError(f"Unknown provider: {provider}") | |
| def validate_translation_params( | |
| source_lang: str, | |
| target_lang: str, | |
| api_provider: str, | |
| api_key: str | |
| ) -> tuple[str, str]: | |
| """ | |
| Validate all translation parameters | |
| Args: | |
| source_lang: Source language | |
| target_lang: Target language | |
| api_provider: API provider name | |
| api_key: API key | |
| Returns: | |
| Tuple of normalized (source_lang, target_lang) | |
| Raises: | |
| ValidationError: If any parameter is invalid | |
| """ | |
| # Validate languages | |
| norm_source = FileValidator.validate_language(source_lang) | |
| norm_target = FileValidator.validate_language(target_lang) | |
| if norm_source == norm_target: | |
| raise ValidationError("Source and target languages cannot be the same") | |
| # Validate API provider | |
| if api_provider not in ["ChatGPT", "DeepSeek"]: | |
| raise ValidationError(f"Unsupported API provider: {api_provider}") | |
| # Validate API key | |
| FileValidator.validate_api_key(api_key, api_provider) | |
| return norm_source, norm_target | |
| def sanitize_filename(filename: str) -> str: | |
| """ | |
| Sanitize filename for safe file operations | |
| Args: | |
| filename: Original filename | |
| Returns: | |
| Sanitized filename | |
| """ | |
| # Remove or replace unsafe characters | |
| unsafe_chars = '<>:"/\\|?*' | |
| for char in unsafe_chars: | |
| filename = filename.replace(char, '_') | |
| # Remove leading/trailing spaces and dots | |
| filename = filename.strip(' .') | |
| # Ensure filename is not empty | |
| if not filename: | |
| filename = "translated_document" | |
| return filename |