BabelSlide_2.0 / utils /validator.py
Marek4321's picture
Upload 14 files
1df1e0b verified
import os
from pathlib import Path
from typing import List, Optional
from core.exceptions import ValidationError, UnsupportedFileError, FileSizeError
from utils.constants import MAX_FILE_SIZE_MB, SUPPORTED_EXTENSIONS, LANGUAGES
class FileValidator:
"""Validator for file uploads and processing parameters"""
@staticmethod
def validate_file(file_path: Path) -> None:
"""
Validate uploaded file
Args:
file_path: Path to the file to validate
Raises:
ValidationError: If file is invalid
UnsupportedFileError: If file format is not supported
FileSizeError: If file is too large
"""
if not file_path.exists():
raise ValidationError(f"File does not exist: {file_path}")
if not file_path.is_file():
raise ValidationError(f"Path is not a file: {file_path}")
# Check file extension
extension = file_path.suffix.lower()
if extension not in SUPPORTED_EXTENSIONS:
raise UnsupportedFileError(
f"Unsupported file format: {extension}. "
f"Supported formats: {', '.join(SUPPORTED_EXTENSIONS)}"
)
# Check file size
file_size_mb = file_path.stat().st_size / (1024 * 1024)
if file_size_mb > MAX_FILE_SIZE_MB:
raise FileSizeError(
f"File too large: {file_size_mb:.1f}MB. "
f"Maximum allowed size: {MAX_FILE_SIZE_MB}MB"
)
# Check if file is readable
try:
with open(file_path, 'rb') as f:
f.read(1024) # Try to read first KB
except Exception as e:
raise ValidationError(f"Cannot read file: {str(e)}")
@staticmethod
def validate_language(language: str) -> str:
"""
Validate and normalize language input
Args:
language: Language name or code
Returns:
Normalized language name
Raises:
ValidationError: If language is not supported
"""
if not language:
raise ValidationError("Language cannot be empty")
# Check if it's a valid language name
if language in LANGUAGES:
return language
# Check if it's a valid language code
for name, code in LANGUAGES.items():
if code == language:
return name
raise ValidationError(
f"Unsupported language: {language}. "
f"Supported languages: {', '.join(LANGUAGES.keys())}"
)
@staticmethod
def validate_api_key(api_key: str, provider: str) -> None:
"""
Validate API key format
Args:
api_key: API key to validate
provider: API provider name
Raises:
ValidationError: If API key is invalid
"""
if not api_key or not api_key.strip():
raise ValidationError("API key cannot be empty")
api_key = api_key.strip()
if provider == "ChatGPT":
if not api_key.startswith('sk-'):
raise ValidationError("OpenAI API key must start with 'sk-'")
if len(api_key) < 20:
raise ValidationError("OpenAI API key appears too short")
elif provider == "DeepSeek":
if len(api_key) < 10:
raise ValidationError("DeepSeek API key appears too short")
else:
raise ValidationError(f"Unknown provider: {provider}")
@staticmethod
def validate_translation_params(
source_lang: str,
target_lang: str,
api_provider: str,
api_key: str
) -> tuple[str, str]:
"""
Validate all translation parameters
Args:
source_lang: Source language
target_lang: Target language
api_provider: API provider name
api_key: API key
Returns:
Tuple of normalized (source_lang, target_lang)
Raises:
ValidationError: If any parameter is invalid
"""
# Validate languages
norm_source = FileValidator.validate_language(source_lang)
norm_target = FileValidator.validate_language(target_lang)
if norm_source == norm_target:
raise ValidationError("Source and target languages cannot be the same")
# Validate API provider
if api_provider not in ["ChatGPT", "DeepSeek"]:
raise ValidationError(f"Unsupported API provider: {api_provider}")
# Validate API key
FileValidator.validate_api_key(api_key, api_provider)
return norm_source, norm_target
@staticmethod
def sanitize_filename(filename: str) -> str:
"""
Sanitize filename for safe file operations
Args:
filename: Original filename
Returns:
Sanitized filename
"""
# Remove or replace unsafe characters
unsafe_chars = '<>:"/\\|?*'
for char in unsafe_chars:
filename = filename.replace(char, '_')
# Remove leading/trailing spaces and dots
filename = filename.strip(' .')
# Ensure filename is not empty
if not filename:
filename = "translated_document"
return filename