File size: 12,333 Bytes
d1e5882
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
"""
Tool Metadata and Latency Prediction System

Provides:
1. Per-tool latency predictions (expected latency ranges)
2. Tool output schemas (strict JSON type definitions)
3. Context-aware routing hints
"""

from typing import Dict, Any, Optional, List
from dataclasses import dataclass
from enum import Enum


class ToolType(str, Enum):
    """Tool type enumeration"""
    RAG = "rag"
    WEB = "web"
    ADMIN = "admin"
    LLM = "llm"


@dataclass
class ToolLatencyMetadata:
    """Latency metadata for a tool"""
    tool_name: str
    min_ms: int
    max_ms: int
    avg_ms: int
    description: str
    
    def estimate_latency(self, context: Optional[Dict[str, Any]] = None) -> int:
        """
        Estimate expected latency based on context.
        Returns estimated latency in milliseconds.
        """
        # Base estimate is average
        estimate = self.avg_ms
        
        # Context-aware adjustments
        if context:
            # RAG: Higher latency for longer queries or more chunks
            if self.tool_name == "rag":
                query_length = context.get("query_length", 0)
                if query_length > 100:
                    estimate = int(self.avg_ms * 1.2)
                elif query_length < 20:
                    estimate = int(self.avg_ms * 0.8)
            
            # Web: Higher latency for complex queries
            elif self.tool_name == "web":
                query_complexity = context.get("query_complexity", "medium")
                if query_complexity == "high":
                    estimate = int(self.avg_ms * 1.5)
                elif query_complexity == "low":
                    estimate = int(self.avg_ms * 0.7)
        
        return min(max(estimate, self.min_ms), self.max_ms)


@dataclass
class ToolOutputSchema:
    """JSON schema definition for tool output"""
    tool_name: str
    schema: Dict[str, Any]
    description: str
    example: Dict[str, Any]


# Tool latency metadata
TOOL_LATENCY_METADATA: Dict[str, ToolLatencyMetadata] = {
    "rag": ToolLatencyMetadata(
        tool_name="rag",
        min_ms=60,
        max_ms=120,
        avg_ms=90,
        description="RAG search with vector similarity and re-ranking"
    ),
    "web": ToolLatencyMetadata(
        tool_name="web",
        min_ms=400,
        max_ms=1800,
        avg_ms=800,
        description="Web search via Google Custom Search API"
    ),
    "admin": ToolLatencyMetadata(
        tool_name="admin",
        min_ms=5,
        max_ms=20,
        avg_ms=10,
        description="Admin rule checking and violation logging"
    ),
    "llm": ToolLatencyMetadata(
        tool_name="llm",
        min_ms=500,
        max_ms=5000,
        avg_ms=2000,
        description="LLM generation and reasoning"
    )
}


# Tool output schemas (JSON Schema format)
TOOL_OUTPUT_SCHEMAS: Dict[str, ToolOutputSchema] = {
    "rag": ToolOutputSchema(
        tool_name="rag",
        schema={
            "type": "object",
            "required": ["results", "query", "tenant_id"],
            "properties": {
                "results": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "required": ["text", "similarity"],
                        "properties": {
                            "text": {"type": "string"},
                            "similarity": {"type": "number", "minimum": 0, "maximum": 1},
                            "metadata": {"type": "object"},
                            "doc_id": {"type": "string"}
                        }
                    }
                },
                "query": {"type": "string"},
                "tenant_id": {"type": "string"},
                "hits_count": {"type": "integer"},
                "avg_score": {"type": "number"},
                "top_score": {"type": "number"},
                "latency_ms": {"type": "integer"}
            }
        },
        description="RAG search results with similarity scores",
        example={
            "results": [
                {
                    "text": "Document chunk text...",
                    "similarity": 0.85,
                    "metadata": {"title": "API Docs", "source_type": "pdf"},
                    "doc_id": "doc123"
                }
            ],
            "query": "user query",
            "tenant_id": "tenant1",
            "hits_count": 3,
            "avg_score": 0.75,
            "top_score": 0.85,
            "latency_ms": 90
        }
    ),
    "web": ToolOutputSchema(
        tool_name="web",
        schema={
            "type": "object",
            "required": ["results", "query"],
            "properties": {
                "results": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "required": ["title", "snippet", "link"],
                        "properties": {
                            "title": {"type": "string"},
                            "snippet": {"type": "string"},
                            "link": {"type": "string"},
                            "displayLink": {"type": "string"}
                        }
                    }
                },
                "query": {"type": "string"},
                "total_results": {"type": "integer"},
                "latency_ms": {"type": "integer"}
            }
        },
        description="Web search results from Google Custom Search",
        example={
            "results": [
                {
                    "title": "Search Result Title",
                    "snippet": "Result snippet text...",
                    "link": "https://example.com",
                    "displayLink": "example.com"
                }
            ],
            "query": "search query",
            "total_results": 10,
            "latency_ms": 800
        }
    ),
    "admin": ToolOutputSchema(
        tool_name="admin",
        schema={
            "type": "object",
            "required": ["violations", "checked"],
            "properties": {
                "violations": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "required": ["rule_id", "severity", "matched_text"],
                        "properties": {
                            "rule_id": {"type": "string"},
                            "rule_pattern": {"type": "string"},
                            "severity": {"type": "string", "enum": ["low", "medium", "high", "critical"]},
                            "matched_text": {"type": "string"},
                            "confidence": {"type": "number", "minimum": 0, "maximum": 1},
                            "message_preview": {"type": "string"}
                        }
                    }
                },
                "checked": {"type": "boolean"},
                "rules_count": {"type": "integer"},
                "latency_ms": {"type": "integer"}
            }
        },
        description="Admin rule violations and safety checks",
        example={
            "violations": [
                {
                    "rule_id": "rule1",
                    "rule_pattern": ".*password.*",
                    "severity": "high",
                    "matched_text": "password",
                    "confidence": 0.95,
                    "message_preview": "User asked for password"
                }
            ],
            "checked": True,
            "rules_count": 5,
            "latency_ms": 10
        }
    ),
    "llm": ToolOutputSchema(
        tool_name="llm",
        schema={
            "type": "object",
            "required": ["text", "tokens_used"],
            "properties": {
                "text": {"type": "string"},
                "tokens_used": {"type": "integer"},
                "latency_ms": {"type": "integer"},
                "model": {"type": "string"},
                "temperature": {"type": "number"}
            }
        },
        description="LLM-generated response",
        example={
            "text": "Generated response text...",
            "tokens_used": 150,
            "latency_ms": 2000,
            "model": "llama3.1:latest",
            "temperature": 0.0
        }
    )
}


def get_tool_latency_estimate(tool_name: str, context: Optional[Dict[str, Any]] = None) -> int:
    """
    Get estimated latency for a tool in milliseconds.
    
    Args:
        tool_name: Name of the tool (rag, web, admin, llm)
        context: Optional context for more accurate estimation
    
    Returns:
        Estimated latency in milliseconds
    """
    metadata = TOOL_LATENCY_METADATA.get(tool_name)
    if not metadata:
        # Default estimate for unknown tools
        return 1000
    
    return metadata.estimate_latency(context)


def get_tool_schema(tool_name: str) -> Optional[ToolOutputSchema]:
    """Get the output schema for a tool"""
    return TOOL_OUTPUT_SCHEMAS.get(tool_name)


def validate_tool_output(tool_name: str, output: Dict[str, Any]) -> tuple[bool, Optional[str]]:
    """
    Validate tool output against its schema.
    
    Returns:
        (is_valid, error_message)
    """
    schema_obj = get_tool_schema(tool_name)
    if not schema_obj:
        return True, None  # Unknown tool, skip validation
    
    # Simple validation (full JSON Schema validation would require jsonschema library)
    schema = schema_obj.schema
    required = schema.get("required", [])
    
    for field in required:
        if field not in output:
            return False, f"Missing required field: {field}"
    
    # Type checking for top-level fields
    properties = schema.get("properties", {})
    for field, value in output.items():
        if field in properties:
            expected_type = properties[field].get("type")
            if expected_type:
                if expected_type == "array" and not isinstance(value, list):
                    return False, f"Field '{field}' must be array, got {type(value).__name__}"
                elif expected_type == "object" and not isinstance(value, dict):
                    return False, f"Field '{field}' must be object, got {type(value).__name__}"
                elif expected_type == "string" and not isinstance(value, str):
                    return False, f"Field '{field}' must be string, got {type(value).__name__}"
                elif expected_type == "integer" and not isinstance(value, int):
                    return False, f"Field '{field}' must be integer, got {type(value).__name__}"
                elif expected_type == "number" and not isinstance(value, (int, float)):
                    return False, f"Field '{field}' must be number, got {type(value).__name__}"
                elif expected_type == "boolean" and not isinstance(value, bool):
                    return False, f"Field '{field}' must be boolean, got {type(value).__name__}"
    
    return True, None


def estimate_path_latency(tool_sequence: List[str], context: Optional[Dict[str, Any]] = None) -> int:
    """
    Estimate total latency for a sequence of tools.
    
    Args:
        tool_sequence: List of tool names in execution order
        context: Optional context for each tool
    
    Returns:
        Total estimated latency in milliseconds
    """
    total = 0
    for tool in tool_sequence:
        tool_context = context.get(tool, {}) if context else {}
        total += get_tool_latency_estimate(tool, tool_context)
    return total


def get_fastest_path(
    required_tools: List[str],
    context: Optional[Dict[str, Any]] = None
) -> List[str]:
    """
    Determine the fastest execution order for required tools.
    Currently tools are executed sequentially, but this could be extended
    to suggest parallel execution for independent tools.
    
    Args:
        required_tools: List of required tool names
        context: Optional context for latency estimation
    
    Returns:
        Optimized tool sequence
    """
    # Sort by estimated latency (fastest first)
    tool_latencies = [
        (tool, get_tool_latency_estimate(tool, context.get(tool, {}) if context else {}))
        for tool in required_tools
    ]
    tool_latencies.sort(key=lambda x: x[1])
    
    return [tool for tool, _ in tool_latencies]