Spaces:
Running on Zero
Running on Zero
| """TEQUMSA Inference Router | |
| Routes inference requests to optimal model providers based on | |
| prompt analysis, load balancing, and execution mode. | |
| """ | |
| import os | |
| import json | |
| import time | |
| from typing import Dict, Any, Optional | |
| from dataclasses import dataclass | |
| from enum import Enum | |
| class ModelProvider(Enum): | |
| CLAUDE = "claude" | |
| GPT = "gpt" | |
| GEMINI = "gemini" | |
| PERPLEXITY = "perplexity" | |
| AUTO = "auto" | |
| class ExecutionMode(Enum): | |
| STANDARD = "standard" | |
| RECURSIVE = "recursive" | |
| CAUSAL = "causal" | |
| RDOD = "rdod" | |
| class RouteDecision: | |
| provider: str | |
| confidence: float | |
| reasoning: str | |
| mode: str | |
| estimated_tokens: int | |
| class InferenceRouter: | |
| """Routes inference requests to optimal model providers.""" | |
| def __init__(self): | |
| self.provider_costs = { | |
| "claude": {"input": 0.003, "output": 0.015}, | |
| "gpt": {"input": 0.002, "output": 0.008}, | |
| "gemini": {"input": 0.0005, "output": 0.0015}, | |
| "perplexity": {"input": 0.002, "output": 0.008} | |
| } | |
| self.provider_latency = { | |
| "claude": 2.5, "gpt": 2.0, "gemini": 1.8, "perplexity": 3.0 | |
| } | |
| def _estimate_tokens(self, prompt: str) -> int: | |
| """Estimate token count from prompt text.""" | |
| return len(prompt.split()) * 1.3 | |
| def _analyze_prompt(self, prompt: str) -> Dict[str, Any]: | |
| """Analyze prompt characteristics.""" | |
| length = len(prompt) | |
| words = prompt.split() | |
| complexity_keywords = ["analyze", "reason", "complex", "detailed", "technical"] | |
| creative_keywords = ["create", "write", "story", "poem", "art"] | |
| code_keywords = ["code", "function", "program", "debug", "algorithm"] | |
| has_complexity = any(kw in prompt.lower() for kw in complexity_keywords) | |
| has_creative = any(kw in prompt.lower() for kw in creative_keywords) | |
| has_code = any(kw in prompt.lower() for kw in code_keywords) | |
| return { | |
| "length": length, | |
| "word_count": len(words), | |
| "has_complexity": has_complexity, | |
| "has_creative": has_creative, | |
| "has_code": has_code, | |
| "is_short": length < 100, | |
| "is_long": length > 1000 | |
| } | |
| def _select_provider(self, analysis: Dict, target: str) -> RouteDecision: | |
| """Select optimal provider based on analysis.""" | |
| if target == "auto": | |
| if analysis["has_code"]: | |
| provider = "claude" | |
| confidence = 0.9 | |
| reasoning = "Code-related prompts routed to Claude" | |
| elif analysis["has_creative"]: | |
| provider = "gpt" | |
| confidence = 0.85 | |
| reasoning = "Creative prompts routed to GPT" | |
| elif analysis["has_complexity"]: | |
| provider = "claude" | |
| confidence = 0.88 | |
| reasoning = "Complex reasoning routed to Claude" | |
| else: | |
| provider = "gemini" | |
| confidence = 0.8 | |
| reasoning = "Standard prompts routed to Gemini" | |
| else: | |
| provider = target | |
| confidence = 0.95 | |
| reasoning = f"User-specified provider: {target}" | |
| return RouteDecision( | |
| provider=provider, | |
| confidence=confidence, | |
| reasoning=reasoning, | |
| mode="standard", | |
| estimated_tokens=int(self._estimate_tokens( | |
| analysis["length"] * 0.5 | |
| )) | |
| ) | |
| def route(self, prompt: str, target_model: str = "auto") -> Dict[str, Any]: | |
| """Route a prompt to optimal model provider.""" | |
| analysis = self._analyze_prompt(prompt) | |
| decision = self._select_provider(analysis, target_model) | |
| return { | |
| "status": "routed", | |
| "timestamp": time.time(), | |
| "analysis": analysis, | |
| "route": { | |
| "provider": decision.provider, | |
| "confidence": decision.confidence, | |
| "reasoning": decision.reasoning, | |
| "estimated_tokens": decision.estimated_tokens, | |
| "latency_estimate": self.provider_latency.get(decision.provider, 2.0), | |
| "cost_estimate": { | |
| "input": self.provider_costs.get(decision.provider, {}).get("input", 0), | |
| "output": self.provider_costs.get(decision.provider, {}).get("output", 0) | |
| } | |
| } | |
| } |