""" Input Validation Agent Validates and normalizes input data (file or text) into a standard format. """ from typing import Dict, Any, Optional import mimetypes from pathlib import Path from core.agent_base import Agent from core.errors import ValidationError from config.settings import settings class InputValidatorAgent(Agent): """ Validates input type, size, format and normalizes to internal format. """ def execute(self, input_data: Dict[str, Any]) -> Dict[str, Any]: """ Validate and normalize input. Expected input_data: { "type": "file" | "text", "content": bytes | str, "filename": str (optional, for files), } Returns: { "content": bytes, "content_type": str, "size": int, "filename": str | None, "validation_status": "valid" } """ input_type = input_data.get("type") content = input_data.get("content") if not input_type or not content: raise ValidationError("Missing required fields: 'type' and 'content'") if input_type == "file": return self._validate_file(content, input_data.get("filename")) elif input_type == "text": return self._validate_text(content) else: raise ValidationError(f"Invalid input type: {input_type}") def _validate_file(self, content: bytes, filename: Optional[str]) -> Dict[str, Any]: """Validate file input.""" if not isinstance(content, bytes): raise ValidationError("File content must be bytes") if len(content) == 0: raise ValidationError("File is empty") size = len(content) max_size = settings.get_max_file_size_bytes() if size > max_size: raise ValidationError( f"File size {size} bytes exceeds maximum {max_size} bytes" ) # Detect content type content_type = "application/octet-stream" if filename: guessed_type, _ = mimetypes.guess_type(filename) if guessed_type: content_type = guessed_type return { "content": content, "content_type": content_type, "size": size, "filename": filename, "validation_status": "valid" } def _validate_text(self, content: str) -> Dict[str, Any]: """Validate text input.""" if not isinstance(content, str): raise ValidationError("Text content must be string") if not content.strip(): raise ValidationError("Text is empty") # Convert to bytes for consistent handling content_bytes = content.encode('utf-8') size = len(content_bytes) max_size = settings.get_max_file_size_bytes() if size > max_size: raise ValidationError( f"Text size {size} bytes exceeds maximum {max_size} bytes" ) return { "content": content_bytes, "content_type": "text/plain", "size": size, "filename": None, "validation_status": "valid" }