Spaces:

Mbanksbey
/

TEQUMSA-Inference-Node

Running on Zero

App Files Files Community

TEQUMSA-Inference-Node / inference_router.py

Mbanksbey

Create inference_router.py - Model routing logic

f203bb6 verified 15 days ago

raw

history blame contribute delete

4.51 kB

	"""TEQUMSA Inference Router

	Routes inference requests to optimal model providers based on
	prompt analysis, load balancing, and execution mode.
	"""

	import os
	import json
	import time
	from typing import Dict, Any, Optional
	from dataclasses import dataclass
	from enum import Enum

	class ModelProvider(Enum):
	CLAUDE = "claude"
	GPT = "gpt"
	GEMINI = "gemini"
	PERPLEXITY = "perplexity"
	AUTO = "auto"

	class ExecutionMode(Enum):
	STANDARD = "standard"
	RECURSIVE = "recursive"
	CAUSAL = "causal"
	RDOD = "rdod"

	@dataclass
	class RouteDecision:
	provider: str
	confidence: float
	reasoning: str
	mode: str
	estimated_tokens: int

	class InferenceRouter:
	"""Routes inference requests to optimal model providers."""

	def __init__(self):
	self.provider_costs = {
	"claude": {"input": 0.003, "output": 0.015},
	"gpt": {"input": 0.002, "output": 0.008},
	"gemini": {"input": 0.0005, "output": 0.0015},
	"perplexity": {"input": 0.002, "output": 0.008}
	}
	self.provider_latency = {
	"claude": 2.5, "gpt": 2.0, "gemini": 1.8, "perplexity": 3.0
	}

	def _estimate_tokens(self, prompt: str) -> int:
	"""Estimate token count from prompt text."""
	return len(prompt.split()) * 1.3

	def _analyze_prompt(self, prompt: str) -> Dict[str, Any]:
	"""Analyze prompt characteristics."""
	length = len(prompt)
	words = prompt.split()
	complexity_keywords = ["analyze", "reason", "complex", "detailed", "technical"]
	creative_keywords = ["create", "write", "story", "poem", "art"]
	code_keywords = ["code", "function", "program", "debug", "algorithm"]

	has_complexity = any(kw in prompt.lower() for kw in complexity_keywords)
	has_creative = any(kw in prompt.lower() for kw in creative_keywords)
	has_code = any(kw in prompt.lower() for kw in code_keywords)

	return {
	"length": length,
	"word_count": len(words),
	"has_complexity": has_complexity,
	"has_creative": has_creative,
	"has_code": has_code,
	"is_short": length < 100,
	"is_long": length > 1000
	}

	def _select_provider(self, analysis: Dict, target: str) -> RouteDecision:
	"""Select optimal provider based on analysis."""
	if target == "auto":
	if analysis["has_code"]:
	provider = "claude"
	confidence = 0.9
	reasoning = "Code-related prompts routed to Claude"
	elif analysis["has_creative"]:
	provider = "gpt"
	confidence = 0.85
	reasoning = "Creative prompts routed to GPT"
	elif analysis["has_complexity"]:
	provider = "claude"
	confidence = 0.88
	reasoning = "Complex reasoning routed to Claude"
	else:
	provider = "gemini"
	confidence = 0.8
	reasoning = "Standard prompts routed to Gemini"
	else:
	provider = target
	confidence = 0.95
	reasoning = f"User-specified provider: {target}"

	return RouteDecision(
	provider=provider,
	confidence=confidence,
	reasoning=reasoning,
	mode="standard",
	estimated_tokens=int(self._estimate_tokens(
	analysis["length"] * 0.5
	))
	)

	def route(self, prompt: str, target_model: str = "auto") -> Dict[str, Any]:
	"""Route a prompt to optimal model provider."""
	analysis = self._analyze_prompt(prompt)
	decision = self._select_provider(analysis, target_model)

	return {
	"status": "routed",
	"timestamp": time.time(),
	"analysis": analysis,
	"route": {
	"provider": decision.provider,
	"confidence": decision.confidence,
	"reasoning": decision.reasoning,
	"estimated_tokens": decision.estimated_tokens,
	"latency_estimate": self.provider_latency.get(decision.provider, 2.0),
	"cost_estimate": {
	"input": self.provider_costs.get(decision.provider, {}).get("input", 0),
	"output": self.provider_costs.get(decision.provider, {}).get("output", 0)
	}
	}
	}