Spaces:

Yash030
/

claude-code-proxy

Running

Yash030 commited on 20 days ago

Commit

4974012

1 Parent(s): 8238c16

Add smart task-aware routing (Phase 1)

- core/model_capabilities.py: Model capability registry
- core/task_detector.py: Detect task requirements from requests
- core/chain_engine.py: Multi-model pipeline engine (placeholder)
- api/model_router.py: Add resolve_with_task_awareness method

Now 'auto' model can detect coding/reasoning and route to best model.

Files changed (4) hide show

api/model_router.py +76 -0
core/chain_engine.py +156 -0
core/model_capabilities.py +185 -0
core/task_detector.py +193 -0

api/model_router.py CHANGED Viewed

@@ -9,6 +9,12 @@ from loguru import logger
 from config.provider_ids import SUPPORTED_PROVIDER_IDS
 from config.settings import Settings
 from core.session_tracker import SessionTracker
 from .gateway_model_ids import decode_gateway_model_id
 from .models.anthropic import MessagesRequest, TokenCountRequest
@@ -278,3 +284,73 @@ class ModelRouter:
             update={"model": resolved.provider_model}, deep=True
         )
         return RoutedTokenCountRequest(request=routed, resolved=resolved)

 from config.provider_ids import SUPPORTED_PROVIDER_IDS
 from config.settings import Settings
 from core.session_tracker import SessionTracker
+from core.model_capabilities import (
+    get_model_capabilities,
+    find_best_model_for_task,
+    find_models_with_capability,
+)
+from core.task_detector import TaskDetector
 from .gateway_model_ids import decode_gateway_model_id
 from .models.anthropic import MessagesRequest, TokenCountRequest
             update={"model": resolved.provider_model}, deep=True
         )
         return RoutedTokenCountRequest(request=routed, resolved=resolved)
+    def resolve_with_task_awareness(
+        self,
+        claude_model_name: str,
+        messages: list,
+    ) -> ResolvedModel:
+        """Resolve model with task-based capability matching.
+        For 'auto' model, detects task requirements and routes to best-capable model.
+        """
+        if not self._is_auto(claude_model_name):
+            return self.resolve(claude_model_name)
+        # Detect what capabilities are needed
+        detector = TaskDetector()
+        requirements = detector.detect_requirements(messages)
+        logger.info(
+            "Task-aware routing: detected requirements={} confidence={:.2f}",
+            requirements.required_capabilities,
+            requirements.confidence,
+        )
+        # Get available candidates
+        candidates = self.resolve_candidates(claude_model_name)
+        if not candidates:
+            # Fallback to default
+            return self.resolve(claude_model_name)
+        # If confidence is low or only general text needed, use load-based selection
+        if requirements.confidence < 0.7 or (
+            not requirements.requires_vision
+            and not requirements.requires_coding
+            and not requirements.requires_reasoning
+        ):
+            logger.debug("Task-aware routing: low confidence, using load-based selection")
+            return candidates[0]
+        # Find best model matching required capabilities
+        required_caps = set()
+        if requirements.requires_coding:
+            required_caps.add("coding")
+        if requirements.requires_reasoning:
+            required_caps.add("reasoning")
+        if requirements.requires_vision:
+            required_caps.add("vision")
+        if required_caps:
+            model_refs = [c.provider_model_ref for c in candidates]
+            best = find_best_model_for_task(required_caps, model_refs)
+            if best:
+                # Find the matching candidate
+                for cand in candidates:
+                    if cand.provider_model_ref == best.model_ref:
+                        logger.info(
+                            "Task-aware routing: selected {} for capabilities={}",
+                            best.model_ref,
+                            required_caps,
+                        )
+                        return cand
+        # Default to first candidate (load-balanced)
+        return candidates[0]
+    def get_routing_hint(self, messages: list) -> str:
+        """Get a hint about what kind of model would be best."""
+        detector = TaskDetector()
+        requirements = detector.detect_requirements(messages)
+        return detector.get_priority_hint(requirements)

core/chain_engine.py ADDED Viewed

	@@ -0,0 +1,156 @@

+"""Model chaining engine for multi-stage AI pipelines."""
+from __future__ import annotations
+import asyncio
+from collections.abc import AsyncIterator
+from dataclasses import dataclass
+from typing import Any, Callable
+from loguru import logger
+@dataclass(frozen=True, slots=True)
+class ChainStage:
+    """A single stage in a model chain."""
+    model_ref: str  # e.g., "zen/minimax-m2.5-free"
+    stage_name: str  # e.g., "vision_analysis", "code_generation"
+    description: str
+@dataclass(frozen=True, slots=True)
+class ChainResult:
+    """Result from executing a chain stage."""
+    stage: ChainStage
+    output: str
+    success: bool
+    error: str | None = None
+# Chain templates for common multi-capability tasks
+CHAIN_TEMPLATES: dict[str, list[ChainStage]] = {
+    "vision_to_text": [
+        ChainStage(
+            model_ref="nvidia_nim/stepfun-ai/step-3.5-flash",
+            stage_name="image_analysis",
+            description="Analyze image content",
+        ),
+        ChainStage(
+            model_ref="zen/minimax-m2.5-free",
+            stage_name="response_generation",
+            description="Generate final response",
+        ),
+    ],
+    "reasoning_to_generation": [
+        ChainStage(
+            model_ref="nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct",
+            stage_name="analysis",
+            description="Analyze and plan",
+        ),
+        ChainStage(
+            model_ref="zen/minimax-m2.5-free",
+            stage_name="generation",
+            description="Generate output",
+        ),
+    ],
+}
+class ChainEngine:
+    """Execute multi-model pipelines for complex requests."""
+    def __init__(self, provider_getter: Callable[[str], Any]):
+        self._provider_getter = provider_getter
+    async def execute_simple_chain(
+        self,
+        stages: list[ChainStage],
+        initial_messages: list[Any],
+        system_prompt: str | None = None,
+    ) -> AsyncIterator[str]:
+        """Execute a chain of models sequentially.
+        Args:
+            stages: List of chain stages to execute
+            initial_messages: Initial user messages
+            system_prompt: Optional system prompt
+        Yields:
+            SSE events from the final model in the chain
+        """
+        if not stages:
+            return
+        logger.info("ChainEngine: executing {} stages", len(stages))
+        # For now, execute single model - full chaining requires more integration
+        # This is a placeholder for the full implementation
+        first_stage = stages[0]
+        provider = self._provider_getter(first_stage.model_ref.split("/")[0])
+        logger.info(
+            "ChainEngine: using model {} for chain",
+            first_stage.model_ref,
+        )
+        # For Phase 1, just delegate to provider - full chaining comes later
+        # The infrastructure is now in place
+        async for event in provider.stream_response(
+            initial_messages, system_prompt, {}
+        ):
+            yield event
+    def get_chain_for_requirements(
+        self,
+        required_capabilities: set[str],
+        available_models: list[str],
+    ) -> list[ChainStage] | None:
+        """Determine the appropriate chain based on required capabilities.
+        Args:
+            required_capabilities: Set of capabilities needed
+            available_models: Available model references
+        Returns:
+            Chain stages or None if single model is sufficient
+        """
+        # If only one capability needed, no chain needed
+        if len(required_capabilities) <= 1:
+            return None
+        # If multiple capabilities, build a simple chain
+        if "vision" in required_capabilities and "coding" in required_capabilities:
+            return CHAIN_TEMPLATES.get("vision_to_text")
+        if "vision" in required_capabilities and "reasoning" in required_capabilities:
+            return CHAIN_TEMPLATES.get("vision_to_text")
+        if "reasoning" in required_capabilities and "coding" in required_capabilities:
+            return CHAIN_TEMPLATES.get("reasoning_to_generation")
+        # Default: no chain for now
+        return None
+async def execute_model_for_stage(
+    provider: Any,
+    messages: list[Any],
+    system: str | None,
+    metadata: dict[str, Any],
+) -> str:
+    """Execute a single model stage and return its output."""
+    output_parts = []
+    try:
+        async for event in provider.stream_response(messages, system, metadata):
+            # Parse SSE and collect text output
+            if "content_block_delta" in event:
+                # Extract text from delta
+                pass
+        return "".join(output_parts)
+    except Exception as e:
+        logger.error("Chain stage failed: {}", e)
+        raise

core/model_capabilities.py ADDED Viewed

	@@ -0,0 +1,185 @@

+"""Model capability registry for intelligent routing."""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+@dataclass(frozen=True, slots=True)
+class ModelCapabilities:
+    """Capabilities of a specific model for routing decisions."""
+    provider_id: str
+    model_id: str
+    model_ref: str  # provider/model format
+    vision: bool = False  # Can process images
+    coding: bool = False  # Good at code generation/analysis
+    reasoning: bool = False  # Strong reasoning/thinking
+    general_text: bool = True  # General text generation
+    multimodal_input: bool = False  # Can handle multiple input types
+    multimodal_output: bool = False  # Can produce multiple output types
+    max_tokens: int = 4096
+    speed: str = "medium"  # "fast", "medium", "slow"
+    priority: int = 100  # Higher = preferred for its capabilities
+# Registry of all available models and their capabilities
+# This can be extended with actual model discovery later
+MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {
+    # Zen/minimax models
+    "zen/minimax-m2.5-free": ModelCapabilities(
+        provider_id="zen",
+        model_id="minimax-m2.5-free",
+        model_ref="zen/minimax-m2.5-free",
+        coding=True,
+        reasoning=True,
+        general_text=True,
+        max_tokens=32000,
+        speed="fast",
+        priority=80,
+    ),
+    # NVIDIA NIM models
+    "nvidia_nim/stepfun-ai/step-3.5-flash": ModelCapabilities(
+        provider_id="nvidia_nim",
+        model_id="step-3.5-flash",
+        model_ref="nvidia_nim/stepfun-ai/step-3.5-flash",
+        coding=True,
+        reasoning=True,
+        general_text=True,
+        max_tokens=32000,
+        speed="fast",
+        priority=70,
+    ),
+    "nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct": ModelCapabilities(
+        provider_id="nvidia_nim",
+        model_id="qwen3-coder-480b-a35b-instruct",
+        model_ref="nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct",
+        coding=True,
+        reasoning=True,
+        general_text=True,
+        max_tokens=32000,
+        speed="slow",
+        priority=90,
+    ),
+    "nvidia_nim/mistralai/mistral-large-3-675b-instruct-2512": ModelCapabilities(
+        provider_id="nvidia_nim",
+        model_id="mistral-large-3-675b-instruct-2512",
+        model_ref="nvidia_nim/mistralai/mistral-large-3-675b-instruct-2512",
+        coding=True,
+        reasoning=True,
+        general_text=True,
+        max_tokens=32000,
+        speed="slow",
+        priority=85,
+    ),
+    "nvidia_nim/abacusai/dracarys-llama-3.1-70b-instruct": ModelCapabilities(
+        provider_id="nvidia_nim",
+        model_id="dracarys-llama-3.1-70b-instruct",
+        model_ref="nvidia_nim/abacusai/dracarys-llama-3.1-70b-instruct",
+        coding=True,
+        reasoning=True,
+        general_text=True,
+        max_tokens=32000,
+        speed="medium",
+        priority=75,
+    ),
+    "nvidia_nim/z-ai/glm4.7": ModelCapabilities(
+        provider_id="nvidia_nim",
+        model_id="glm4.7",
+        model_ref="nvidia_nim/z-ai/glm4.7",
+        coding=True,
+        reasoning=True,
+        general_text=True,
+        max_tokens=32000,
+        speed="medium",
+        priority=70,
+    ),
+    "nvidia_nim/bytedance/seed-oss-36b-instruct": ModelCapabilities(
+        provider_id="nvidia_nim",
+        model_id="seed-oss-36b-instruct",
+        model_ref="nvidia_nim/bytedance/seed-oss-36b-instruct",
+        coding=True,
+        reasoning=True,
+        general_text=True,
+        max_tokens=32000,
+        speed="medium",
+        priority=65,
+    ),
+    "nvidia_nim/mistralai/mistral-nemotron": ModelCapabilities(
+        provider_id="nvidia_nim",
+        model_id="mistral-nemotron",
+        model_ref="nvidia_nim/mistralai/mistral-nemotron",
+        coding=True,
+        reasoning=True,
+        general_text=True,
+        max_tokens=32000,
+        speed="medium",
+        priority=60,
+    ),
+}
+def get_model_capabilities(model_ref: str) -> ModelCapabilities | None:
+    """Get capabilities for a specific model reference."""
+    return MODEL_CAPABILITIES.get(model_ref)
+def find_models_with_capability(capability: str) -> list[ModelCapabilities]:
+    """Find all models that have a specific capability."""
+    results = []
+    for caps in MODEL_CAPABILITIES.values():
+        if getattr(caps, capability, False):
+            results.append(caps)
+    # Sort by priority (higher = better)
+    results.sort(key=lambda x: x.priority, reverse=True)
+    return results
+def find_best_model_for_task(
+    required_capabilities: set[str],
+    available_models: Sequence[str] | None = None,
+) -> ModelCapabilities | None:
+    """Find the best model matching required capabilities.
+    Args:
+        required_capabilities: Set of capability names needed (e.g., {"coding", "vision"})
+        available_models: Optional list of model refs to filter by
+    Returns:
+        Best matching ModelCapabilities or None
+    """
+    candidates = []
+    models_to_check = (
+        [MODEL_CAPABILITIES[m] for m in available_models if m in MODEL_CAPABILITIES]
+        if available_models
+        else list(MODEL_CAPABILITIES.values())
+    )
+    for caps in models_to_check:
+        # Check if model has all required capabilities
+        if all(getattr(caps, cap, False) for cap in required_capabilities):
+            candidates.append(caps)
+    if not candidates:
+        return None
+    # Sort by priority and return best
+    candidates.sort(key=lambda x: x.priority, reverse=True)
+    return candidates[0]
+def get_capability_match_score(
+    model_caps: ModelCapabilities,
+    required: set[str],
+) -> tuple[int, int]:
+    """Calculate match score for routing.
+    Returns (matched_count, priority) for sorting.
+    """
+    matched = sum(1 for cap in required if getattr(model_caps, cap, False))
+    return (matched, model_caps.priority)

core/task_detector.py ADDED Viewed

	@@ -0,0 +1,193 @@

+"""Task detection - analyze requests to determine required capabilities."""
+from __future__ import annotations
+import re
+from dataclasses import dataclass
+from typing import Any
+from loguru import logger
+from core.anthropic.content import get_block_attr
+# Keywords that indicate specific task types
+CODING_KEYWORDS = {
+    "python", "javascript", "typescript", "java", "c++", "cpp", "golang",
+    "rust", "ruby", "php", "swift", "kotlin", "sql", "html", "css", "react",
+    "vue", "angular", "node", "django", "flask", "fastapi", "spring",
+    "function", "class", "method", "api", "endpoint", "database", "query",
+    "algorithm", "debug", "error", "fix", "implement", "create", "write",
+    "code", "programming", "script", "module", "import", "export",
+    "def ", "const ", "let ", "var ", "function ", "async ", "await ",
+}
+REASONING_KEYWORDS = {
+    "analyze", "analysis", "reason", "why", "how", "explain", "compare",
+    "contrast", "evaluate", "assess", "conclude", "deduce", "infer",
+    "logic", "proof", "theorem", "hypothesis", "synthesize", "strategy",
+    "think", "solve", "derive", "calculate", "compute", "math", "equation",
+    "formula", "solution", "optimal", "best", "improve", "optimize",
+    "design", "architecture", "system", "plan", "decision", "recommend",
+}
+VISION_KEYWORDS = {
+    "image", "picture", "photo", "screenshot", "diagram", "chart", "graph",
+    "visual", "see", "look at", "describe what", "what's in", "identify",
+    "recognize", "detect", "object", "scene", "face", "text in image",
+}
+@dataclass(frozen=True, slots=True)
+class TaskRequirements:
+    """Detected requirements for a request."""
+    requires_vision: bool = False
+    requires_coding: bool = False
+    requires_reasoning: bool = False
+    requires_general_text: bool = True
+    confidence: float = 0.0  # 0-1 confidence in detection
+    @property
+    def required_capabilities(self) -> set[str]:
+        caps = set()
+        if self.requires_vision:
+            caps.add("vision")
+        if self.requires_coding:
+            caps.add("coding")
+        if self.requires_reasoning:
+            caps.add("reasoning")
+        if self.requires_general_text:
+            caps.add("general_text")
+        return caps
+class TaskDetector:
+    """Analyze request messages to detect required capabilities."""
+    def detect_requirements(self, messages: list[Any]) -> TaskRequirements:
+        """Analyze messages and return required capabilities."""
+        has_vision = False
+        has_coding = False
+        has_reasoning = False
+        total_text = ""
+        for msg in messages:
+            # Handle both dict and object message formats
+            if isinstance(msg, dict):
+                content = msg.get("content")
+            elif hasattr(msg, "content"):
+                content = msg.content
+            else:
+                continue
+            if isinstance(content, str):
+                total_text += content.lower() + " "
+            elif isinstance(content, list):
+                for block in content:
+                    b_type = get_block_attr(block, "type") or ""
+                    # Check for image content
+                    if b_type == "image":
+                        has_vision = True
+                        logger.debug("TaskDetector: Found image in message")
+                    # Get text content
+                    if b_type == "text":
+                        text = get_block_attr(block, "text", "") or ""
+                        total_text += text.lower() + " "
+        # Analyze text for keywords
+        if total_text:
+            has_coding = self._detect_coding(total_text)
+            has_reasoning = self._detect_reasoning(total_text)
+        # Calculate confidence
+        confidence = self._calculate_confidence(
+            has_vision, has_coding, has_reasoning, total_text
+        )
+        # Default to general text if nothing detected
+        if not has_vision and not has_coding and not has_reasoning:
+            has_general = True
+        result = TaskRequirements(
+            requires_vision=has_vision,
+            requires_coding=has_coding,
+            requires_reasoning=has_reasoning,
+            requires_general_text=True,
+            confidence=confidence,
+        )
+        logger.info(
+            "TaskDetector: detected caps={} confidence={:.2f}",
+            result.required_capabilities,
+            confidence,
+        )
+        return result
+    def _detect_coding(self, text: str) -> bool:
+        """Detect if request requires coding capabilities."""
+        # Check exact word matches first
+        words = set(re.findall(r'\b\w+\b', text))
+        coding_matches = words & CODING_KEYWORDS
+        if len(coding_matches) >= 2:
+            return True
+        # Also check for substring matches (e.g., "python" in "write python code")
+        for keyword in CODING_KEYWORDS:
+            if keyword in text:
+                # Found one keyword as substring, check for another
+                remaining = text.replace(keyword, "")
+                for kw2 in CODING_KEYWORDS:
+                    if kw2 in remaining and kw2 != keyword:
+                        return True
+                # Also check for programming patterns
+                if any(pat in text for pat in ["def ", "function ", "class ", "import ", "const ", "let ", "var ", "()", "=>"]):
+                    return True
+        return False
+    def _detect_reasoning(self, text: str) -> bool:
+        """Detect if request requires reasoning capabilities."""
+        words = set(re.findall(r'\b\w+\b', text))
+        reasoning_matches = words & REASONING_KEYWORDS
+        if len(reasoning_matches) >= 1:
+            return True
+        # Also check substring
+        for keyword in REASONING_KEYWORDS:
+            if keyword in text:
+                return True
+        return False
+    def _calculate_confidence(
+        self,
+        has_vision: bool,
+        has_coding: bool,
+        has_reasoning: bool,
+        text: str,
+    ) -> float:
+        """Calculate confidence in the detection."""
+        if has_vision:
+            return 0.95  # Image detection is reliable
+        if has_coding or has_reasoning:
+            # More text = more confident
+            word_count = len(text.split())
+            base = 0.7
+            if word_count > 50:
+                base = 0.8
+            if word_count > 100:
+                base = 0.85
+            return base
+        return 0.5  # Default confidence for general text
+    def get_priority_hint(self, requirements: TaskRequirements) -> str:
+        """Get a hint for model priority based on requirements."""
+        if requirements.requires_vision:
+            return "vision"
+        if requirements.requires_coding:
+            return "coding"
+        if requirements.requires_reasoning:
+            return "reasoning"
+        return "balanced"