File size: 3,413 Bytes
2c41dce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
"""
Input Validation Agent
Validates and normalizes input data (file or text) into a standard format.
"""

from typing import Dict, Any, Optional
import mimetypes
from pathlib import Path

from core.agent_base import Agent
from core.errors import ValidationError
from config.settings import settings


class InputValidatorAgent(Agent):
    """
    Validates input type, size, format and normalizes to internal format.
    """
    
    def execute(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
        """
        Validate and normalize input.
        
        Expected input_data:
            {
                "type": "file" | "text",
                "content": bytes | str,
                "filename": str (optional, for files),
            }
        
        Returns:
            {
                "content": bytes,
                "content_type": str,
                "size": int,
                "filename": str | None,
                "validation_status": "valid"
            }
        """
        input_type = input_data.get("type")
        content = input_data.get("content")
        
        if not input_type or not content:
            raise ValidationError("Missing required fields: 'type' and 'content'")
        
        if input_type == "file":
            return self._validate_file(content, input_data.get("filename"))
        elif input_type == "text":
            return self._validate_text(content)
        else:
            raise ValidationError(f"Invalid input type: {input_type}")
    
    def _validate_file(self, content: bytes, filename: Optional[str]) -> Dict[str, Any]:
        """Validate file input."""
        if not isinstance(content, bytes):
            raise ValidationError("File content must be bytes")
        
        if len(content) == 0:
            raise ValidationError("File is empty")
        
        size = len(content)
        max_size = settings.get_max_file_size_bytes()
        
        if size > max_size:
            raise ValidationError(
                f"File size {size} bytes exceeds maximum {max_size} bytes"
            )
        
        # Detect content type
        content_type = "application/octet-stream"
        if filename:
            guessed_type, _ = mimetypes.guess_type(filename)
            if guessed_type:
                content_type = guessed_type
        
        return {
            "content": content,
            "content_type": content_type,
            "size": size,
            "filename": filename,
            "validation_status": "valid"
        }
    
    def _validate_text(self, content: str) -> Dict[str, Any]:
        """Validate text input."""
        if not isinstance(content, str):
            raise ValidationError("Text content must be string")
        
        if not content.strip():
            raise ValidationError("Text is empty")
        
        # Convert to bytes for consistent handling
        content_bytes = content.encode('utf-8')
        size = len(content_bytes)
        
        max_size = settings.get_max_file_size_bytes()
        if size > max_size:
            raise ValidationError(
                f"Text size {size} bytes exceeds maximum {max_size} bytes"
            )
        
        return {
            "content": content_bytes,
            "content_type": "text/plain",
            "size": size,
            "filename": None,
            "validation_status": "valid"
        }