Proofly / agents /input_validator.py
Dipan04's picture
Initial clean commit for Hugging Face Space
2c41dce
"""
Input Validation Agent
Validates and normalizes input data (file or text) into a standard format.
"""
from typing import Dict, Any, Optional
import mimetypes
from pathlib import Path
from core.agent_base import Agent
from core.errors import ValidationError
from config.settings import settings
class InputValidatorAgent(Agent):
"""
Validates input type, size, format and normalizes to internal format.
"""
def execute(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Validate and normalize input.
Expected input_data:
{
"type": "file" | "text",
"content": bytes | str,
"filename": str (optional, for files),
}
Returns:
{
"content": bytes,
"content_type": str,
"size": int,
"filename": str | None,
"validation_status": "valid"
}
"""
input_type = input_data.get("type")
content = input_data.get("content")
if not input_type or not content:
raise ValidationError("Missing required fields: 'type' and 'content'")
if input_type == "file":
return self._validate_file(content, input_data.get("filename"))
elif input_type == "text":
return self._validate_text(content)
else:
raise ValidationError(f"Invalid input type: {input_type}")
def _validate_file(self, content: bytes, filename: Optional[str]) -> Dict[str, Any]:
"""Validate file input."""
if not isinstance(content, bytes):
raise ValidationError("File content must be bytes")
if len(content) == 0:
raise ValidationError("File is empty")
size = len(content)
max_size = settings.get_max_file_size_bytes()
if size > max_size:
raise ValidationError(
f"File size {size} bytes exceeds maximum {max_size} bytes"
)
# Detect content type
content_type = "application/octet-stream"
if filename:
guessed_type, _ = mimetypes.guess_type(filename)
if guessed_type:
content_type = guessed_type
return {
"content": content,
"content_type": content_type,
"size": size,
"filename": filename,
"validation_status": "valid"
}
def _validate_text(self, content: str) -> Dict[str, Any]:
"""Validate text input."""
if not isinstance(content, str):
raise ValidationError("Text content must be string")
if not content.strip():
raise ValidationError("Text is empty")
# Convert to bytes for consistent handling
content_bytes = content.encode('utf-8')
size = len(content_bytes)
max_size = settings.get_max_file_size_bytes()
if size > max_size:
raise ValidationError(
f"Text size {size} bytes exceeds maximum {max_size} bytes"
)
return {
"content": content_bytes,
"content_type": "text/plain",
"size": size,
"filename": None,
"validation_status": "valid"
}