Mbanksbey commited on
Commit
f203bb6
·
verified ·
1 Parent(s): 23942e2

Create inference_router.py - Model routing logic

Browse files
Files changed (1) hide show
  1. inference_router.py +129 -0
inference_router.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """TEQUMSA Inference Router
2
+
3
+ Routes inference requests to optimal model providers based on
4
+ prompt analysis, load balancing, and execution mode.
5
+ """
6
+
7
+ import os
8
+ import json
9
+ import time
10
+ from typing import Dict, Any, Optional
11
+ from dataclasses import dataclass
12
+ from enum import Enum
13
+
14
+ class ModelProvider(Enum):
15
+ CLAUDE = "claude"
16
+ GPT = "gpt"
17
+ GEMINI = "gemini"
18
+ PERPLEXITY = "perplexity"
19
+ AUTO = "auto"
20
+
21
+ class ExecutionMode(Enum):
22
+ STANDARD = "standard"
23
+ RECURSIVE = "recursive"
24
+ CAUSAL = "causal"
25
+ RDOD = "rdod"
26
+
27
+ @dataclass
28
+ class RouteDecision:
29
+ provider: str
30
+ confidence: float
31
+ reasoning: str
32
+ mode: str
33
+ estimated_tokens: int
34
+
35
+ class InferenceRouter:
36
+ """Routes inference requests to optimal model providers."""
37
+
38
+ def __init__(self):
39
+ self.provider_costs = {
40
+ "claude": {"input": 0.003, "output": 0.015},
41
+ "gpt": {"input": 0.002, "output": 0.008},
42
+ "gemini": {"input": 0.0005, "output": 0.0015},
43
+ "perplexity": {"input": 0.002, "output": 0.008}
44
+ }
45
+ self.provider_latency = {
46
+ "claude": 2.5, "gpt": 2.0, "gemini": 1.8, "perplexity": 3.0
47
+ }
48
+
49
+ def _estimate_tokens(self, prompt: str) -> int:
50
+ """Estimate token count from prompt text."""
51
+ return len(prompt.split()) * 1.3
52
+
53
+ def _analyze_prompt(self, prompt: str) -> Dict[str, Any]:
54
+ """Analyze prompt characteristics."""
55
+ length = len(prompt)
56
+ words = prompt.split()
57
+ complexity_keywords = ["analyze", "reason", "complex", "detailed", "technical"]
58
+ creative_keywords = ["create", "write", "story", "poem", "art"]
59
+ code_keywords = ["code", "function", "program", "debug", "algorithm"]
60
+
61
+ has_complexity = any(kw in prompt.lower() for kw in complexity_keywords)
62
+ has_creative = any(kw in prompt.lower() for kw in creative_keywords)
63
+ has_code = any(kw in prompt.lower() for kw in code_keywords)
64
+
65
+ return {
66
+ "length": length,
67
+ "word_count": len(words),
68
+ "has_complexity": has_complexity,
69
+ "has_creative": has_creative,
70
+ "has_code": has_code,
71
+ "is_short": length < 100,
72
+ "is_long": length > 1000
73
+ }
74
+
75
+ def _select_provider(self, analysis: Dict, target: str) -> RouteDecision:
76
+ """Select optimal provider based on analysis."""
77
+ if target == "auto":
78
+ if analysis["has_code"]:
79
+ provider = "claude"
80
+ confidence = 0.9
81
+ reasoning = "Code-related prompts routed to Claude"
82
+ elif analysis["has_creative"]:
83
+ provider = "gpt"
84
+ confidence = 0.85
85
+ reasoning = "Creative prompts routed to GPT"
86
+ elif analysis["has_complexity"]:
87
+ provider = "claude"
88
+ confidence = 0.88
89
+ reasoning = "Complex reasoning routed to Claude"
90
+ else:
91
+ provider = "gemini"
92
+ confidence = 0.8
93
+ reasoning = "Standard prompts routed to Gemini"
94
+ else:
95
+ provider = target
96
+ confidence = 0.95
97
+ reasoning = f"User-specified provider: {target}"
98
+
99
+ return RouteDecision(
100
+ provider=provider,
101
+ confidence=confidence,
102
+ reasoning=reasoning,
103
+ mode="standard",
104
+ estimated_tokens=int(self._estimate_tokens(
105
+ analysis["length"] * 0.5
106
+ ))
107
+ )
108
+
109
+ def route(self, prompt: str, target_model: str = "auto") -> Dict[str, Any]:
110
+ """Route a prompt to optimal model provider."""
111
+ analysis = self._analyze_prompt(prompt)
112
+ decision = self._select_provider(analysis, target_model)
113
+
114
+ return {
115
+ "status": "routed",
116
+ "timestamp": time.time(),
117
+ "analysis": analysis,
118
+ "route": {
119
+ "provider": decision.provider,
120
+ "confidence": decision.confidence,
121
+ "reasoning": decision.reasoning,
122
+ "estimated_tokens": decision.estimated_tokens,
123
+ "latency_estimate": self.provider_latency.get(decision.provider, 2.0),
124
+ "cost_estimate": {
125
+ "input": self.provider_costs.get(decision.provider, {}).get("input", 0),
126
+ "output": self.provider_costs.get(decision.provider, {}).get("output", 0)
127
+ }
128
+ }
129
+ }