TEQUMSA-Inference-Node / inference_router.py
Mbanksbey's picture
Create inference_router.py - Model routing logic
f203bb6 verified
"""TEQUMSA Inference Router
Routes inference requests to optimal model providers based on
prompt analysis, load balancing, and execution mode.
"""
import os
import json
import time
from typing import Dict, Any, Optional
from dataclasses import dataclass
from enum import Enum
class ModelProvider(Enum):
CLAUDE = "claude"
GPT = "gpt"
GEMINI = "gemini"
PERPLEXITY = "perplexity"
AUTO = "auto"
class ExecutionMode(Enum):
STANDARD = "standard"
RECURSIVE = "recursive"
CAUSAL = "causal"
RDOD = "rdod"
@dataclass
class RouteDecision:
provider: str
confidence: float
reasoning: str
mode: str
estimated_tokens: int
class InferenceRouter:
"""Routes inference requests to optimal model providers."""
def __init__(self):
self.provider_costs = {
"claude": {"input": 0.003, "output": 0.015},
"gpt": {"input": 0.002, "output": 0.008},
"gemini": {"input": 0.0005, "output": 0.0015},
"perplexity": {"input": 0.002, "output": 0.008}
}
self.provider_latency = {
"claude": 2.5, "gpt": 2.0, "gemini": 1.8, "perplexity": 3.0
}
def _estimate_tokens(self, prompt: str) -> int:
"""Estimate token count from prompt text."""
return len(prompt.split()) * 1.3
def _analyze_prompt(self, prompt: str) -> Dict[str, Any]:
"""Analyze prompt characteristics."""
length = len(prompt)
words = prompt.split()
complexity_keywords = ["analyze", "reason", "complex", "detailed", "technical"]
creative_keywords = ["create", "write", "story", "poem", "art"]
code_keywords = ["code", "function", "program", "debug", "algorithm"]
has_complexity = any(kw in prompt.lower() for kw in complexity_keywords)
has_creative = any(kw in prompt.lower() for kw in creative_keywords)
has_code = any(kw in prompt.lower() for kw in code_keywords)
return {
"length": length,
"word_count": len(words),
"has_complexity": has_complexity,
"has_creative": has_creative,
"has_code": has_code,
"is_short": length < 100,
"is_long": length > 1000
}
def _select_provider(self, analysis: Dict, target: str) -> RouteDecision:
"""Select optimal provider based on analysis."""
if target == "auto":
if analysis["has_code"]:
provider = "claude"
confidence = 0.9
reasoning = "Code-related prompts routed to Claude"
elif analysis["has_creative"]:
provider = "gpt"
confidence = 0.85
reasoning = "Creative prompts routed to GPT"
elif analysis["has_complexity"]:
provider = "claude"
confidence = 0.88
reasoning = "Complex reasoning routed to Claude"
else:
provider = "gemini"
confidence = 0.8
reasoning = "Standard prompts routed to Gemini"
else:
provider = target
confidence = 0.95
reasoning = f"User-specified provider: {target}"
return RouteDecision(
provider=provider,
confidence=confidence,
reasoning=reasoning,
mode="standard",
estimated_tokens=int(self._estimate_tokens(
analysis["length"] * 0.5
))
)
def route(self, prompt: str, target_model: str = "auto") -> Dict[str, Any]:
"""Route a prompt to optimal model provider."""
analysis = self._analyze_prompt(prompt)
decision = self._select_provider(analysis, target_model)
return {
"status": "routed",
"timestamp": time.time(),
"analysis": analysis,
"route": {
"provider": decision.provider,
"confidence": decision.confidence,
"reasoning": decision.reasoning,
"estimated_tokens": decision.estimated_tokens,
"latency_estimate": self.provider_latency.get(decision.provider, 2.0),
"cost_estimate": {
"input": self.provider_costs.get(decision.provider, {}).get("input", 0),
"output": self.provider_costs.get(decision.provider, {}).get("output", 0)
}
}
}