|
|
""" |
|
|
Input Validation Agent |
|
|
Validates and normalizes input data (file or text) into a standard format. |
|
|
""" |
|
|
|
|
|
from typing import Dict, Any, Optional |
|
|
import mimetypes |
|
|
from pathlib import Path |
|
|
|
|
|
from core.agent_base import Agent |
|
|
from core.errors import ValidationError |
|
|
from config.settings import settings |
|
|
|
|
|
|
|
|
class InputValidatorAgent(Agent): |
|
|
""" |
|
|
Validates input type, size, format and normalizes to internal format. |
|
|
""" |
|
|
|
|
|
def execute(self, input_data: Dict[str, Any]) -> Dict[str, Any]: |
|
|
""" |
|
|
Validate and normalize input. |
|
|
|
|
|
Expected input_data: |
|
|
{ |
|
|
"type": "file" | "text", |
|
|
"content": bytes | str, |
|
|
"filename": str (optional, for files), |
|
|
} |
|
|
|
|
|
Returns: |
|
|
{ |
|
|
"content": bytes, |
|
|
"content_type": str, |
|
|
"size": int, |
|
|
"filename": str | None, |
|
|
"validation_status": "valid" |
|
|
} |
|
|
""" |
|
|
input_type = input_data.get("type") |
|
|
content = input_data.get("content") |
|
|
|
|
|
if not input_type or not content: |
|
|
raise ValidationError("Missing required fields: 'type' and 'content'") |
|
|
|
|
|
if input_type == "file": |
|
|
return self._validate_file(content, input_data.get("filename")) |
|
|
elif input_type == "text": |
|
|
return self._validate_text(content) |
|
|
else: |
|
|
raise ValidationError(f"Invalid input type: {input_type}") |
|
|
|
|
|
def _validate_file(self, content: bytes, filename: Optional[str]) -> Dict[str, Any]: |
|
|
"""Validate file input.""" |
|
|
if not isinstance(content, bytes): |
|
|
raise ValidationError("File content must be bytes") |
|
|
|
|
|
if len(content) == 0: |
|
|
raise ValidationError("File is empty") |
|
|
|
|
|
size = len(content) |
|
|
max_size = settings.get_max_file_size_bytes() |
|
|
|
|
|
if size > max_size: |
|
|
raise ValidationError( |
|
|
f"File size {size} bytes exceeds maximum {max_size} bytes" |
|
|
) |
|
|
|
|
|
|
|
|
content_type = "application/octet-stream" |
|
|
if filename: |
|
|
guessed_type, _ = mimetypes.guess_type(filename) |
|
|
if guessed_type: |
|
|
content_type = guessed_type |
|
|
|
|
|
return { |
|
|
"content": content, |
|
|
"content_type": content_type, |
|
|
"size": size, |
|
|
"filename": filename, |
|
|
"validation_status": "valid" |
|
|
} |
|
|
|
|
|
def _validate_text(self, content: str) -> Dict[str, Any]: |
|
|
"""Validate text input.""" |
|
|
if not isinstance(content, str): |
|
|
raise ValidationError("Text content must be string") |
|
|
|
|
|
if not content.strip(): |
|
|
raise ValidationError("Text is empty") |
|
|
|
|
|
|
|
|
content_bytes = content.encode('utf-8') |
|
|
size = len(content_bytes) |
|
|
|
|
|
max_size = settings.get_max_file_size_bytes() |
|
|
if size > max_size: |
|
|
raise ValidationError( |
|
|
f"Text size {size} bytes exceeds maximum {max_size} bytes" |
|
|
) |
|
|
|
|
|
return { |
|
|
"content": content_bytes, |
|
|
"content_type": "text/plain", |
|
|
"size": size, |
|
|
"filename": None, |
|
|
"validation_status": "valid" |
|
|
} |