Spaces:

Mbanksbey
/

TEQUMSA-Inference-Node

Running on Zero

App Files Files Community

Mbanksbey commited on 24 days ago

Commit

f203bb6

verified ·

1 Parent(s): 23942e2

Create inference_router.py - Model routing logic

Browse files

Files changed (1) hide show

inference_router.py +129 -0

inference_router.py ADDED Viewed

	@@ -0,0 +1,129 @@

+"""TEQUMSA Inference Router
+Routes inference requests to optimal model providers based on
+prompt analysis, load balancing, and execution mode.
+"""
+import os
+import json
+import time
+from typing import Dict, Any, Optional
+from dataclasses import dataclass
+from enum import Enum
+class ModelProvider(Enum):
+    CLAUDE = "claude"
+    GPT = "gpt"
+    GEMINI = "gemini"
+    PERPLEXITY = "perplexity"
+    AUTO = "auto"
+class ExecutionMode(Enum):
+    STANDARD = "standard"
+    RECURSIVE = "recursive"
+    CAUSAL = "causal"
+    RDOD = "rdod"
+@dataclass
+class RouteDecision:
+    provider: str
+    confidence: float
+    reasoning: str
+    mode: str
+    estimated_tokens: int
+class InferenceRouter:
+    """Routes inference requests to optimal model providers."""
+    def __init__(self):
+        self.provider_costs = {
+            "claude": {"input": 0.003, "output": 0.015},
+            "gpt": {"input": 0.002, "output": 0.008},
+            "gemini": {"input": 0.0005, "output": 0.0015},
+            "perplexity": {"input": 0.002, "output": 0.008}
+        }
+        self.provider_latency = {
+            "claude": 2.5, "gpt": 2.0, "gemini": 1.8, "perplexity": 3.0
+        }
+    def _estimate_tokens(self, prompt: str) -> int:
+        """Estimate token count from prompt text."""
+        return len(prompt.split()) * 1.3
+    def _analyze_prompt(self, prompt: str) -> Dict[str, Any]:
+        """Analyze prompt characteristics."""
+        length = len(prompt)
+        words = prompt.split()
+        complexity_keywords = ["analyze", "reason", "complex", "detailed", "technical"]
+        creative_keywords = ["create", "write", "story", "poem", "art"]
+        code_keywords = ["code", "function", "program", "debug", "algorithm"]
+        has_complexity = any(kw in prompt.lower() for kw in complexity_keywords)
+        has_creative = any(kw in prompt.lower() for kw in creative_keywords)
+        has_code = any(kw in prompt.lower() for kw in code_keywords)
+        return {
+            "length": length,
+            "word_count": len(words),
+            "has_complexity": has_complexity,
+            "has_creative": has_creative,
+            "has_code": has_code,
+            "is_short": length < 100,
+            "is_long": length > 1000
+        }
+    def _select_provider(self, analysis: Dict, target: str) -> RouteDecision:
+        """Select optimal provider based on analysis."""
+        if target == "auto":
+            if analysis["has_code"]:
+                provider = "claude"
+                confidence = 0.9
+                reasoning = "Code-related prompts routed to Claude"
+            elif analysis["has_creative"]:
+                provider = "gpt"
+                confidence = 0.85
+                reasoning = "Creative prompts routed to GPT"
+            elif analysis["has_complexity"]:
+                provider = "claude"
+                confidence = 0.88
+                reasoning = "Complex reasoning routed to Claude"
+            else:
+                provider = "gemini"
+                confidence = 0.8
+                reasoning = "Standard prompts routed to Gemini"
+        else:
+            provider = target
+            confidence = 0.95
+            reasoning = f"User-specified provider: {target}"
+        return RouteDecision(
+            provider=provider,
+            confidence=confidence,
+            reasoning=reasoning,
+            mode="standard",
+            estimated_tokens=int(self._estimate_tokens(
+                analysis["length"] * 0.5
+            ))
+        )
+    def route(self, prompt: str, target_model: str = "auto") -> Dict[str, Any]:
+        """Route a prompt to optimal model provider."""
+        analysis = self._analyze_prompt(prompt)
+        decision = self._select_provider(analysis, target_model)
+        return {
+            "status": "routed",
+            "timestamp": time.time(),
+            "analysis": analysis,
+            "route": {
+                "provider": decision.provider,
+                "confidence": decision.confidence,
+                "reasoning": decision.reasoning,
+                "estimated_tokens": decision.estimated_tokens,
+                "latency_estimate": self.provider_latency.get(decision.provider, 2.0),
+                "cost_estimate": {
+                    "input": self.provider_costs.get(decision.provider, {}).get("input", 0),
+                    "output": self.provider_costs.get(decision.provider, {}).get("output", 0)
+                }
+            }
+        }