Spaces:

Yash030
/

claude-code-proxy

Running

File size: 7,533 Bytes

"""Task detection - analyze requests to determine required capabilities."""

from __future__ import annotations

import re
from dataclasses import dataclass
from typing import Any

from loguru import logger

from core.anthropic.content import get_block_attr


# Keywords that indicate specific task types
CODING_KEYWORDS = {
    "python",
    "javascript",
    "typescript",
    "java",
    "c++",
    "cpp",
    "golang",
    "rust",
    "ruby",
    "php",
    "swift",
    "kotlin",
    "sql",
    "html",
    "css",
    "react",
    "vue",
    "angular",
    "node",
    "django",
    "flask",
    "fastapi",
    "spring",
    "function",
    "class",
    "method",
    "api",
    "endpoint",
    "database",
    "query",
    "algorithm",
    "debug",
    "error",
    "fix",
    "implement",
    "create",
    "write",
    "code",
    "programming",
    "script",
    "module",
    "import",
    "export",
    "def ",
    "const ",
    "let ",
    "var ",
    "function ",
    "async ",
    "await ",
}

REASONING_KEYWORDS = {
    "analyze",
    "analysis",
    "reason",
    "why",
    "how",
    "explain",
    "compare",
    "contrast",
    "evaluate",
    "assess",
    "conclude",
    "deduce",
    "infer",
    "logic",
    "proof",
    "theorem",
    "hypothesis",
    "synthesize",
    "strategy",
    "think",
    "solve",
    "derive",
    "calculate",
    "compute",
    "math",
    "equation",
    "formula",
    "solution",
    "optimal",
    "best",
    "improve",
    "optimize",
    "design",
    "architecture",
    "system",
    "plan",
    "decision",
    "recommend",
}

VISION_KEYWORDS = {
    "image",
    "picture",
    "photo",
    "screenshot",
    "diagram",
    "chart",
    "graph",
    "visual",
    "see",
    "look at",
    "describe what",
    "what's in",
    "identify",
    "recognize",
    "detect",
    "object",
    "scene",
    "face",
    "text in image",
}


@dataclass(frozen=True, slots=True)
class TaskRequirements:
    """Detected requirements for a request."""

    requires_vision: bool = False
    requires_coding: bool = False
    requires_reasoning: bool = False
    requires_general_text: bool = True
    confidence: float = 0.0  # 0-1 confidence in detection

    @property
    def required_capabilities(self) -> set[str]:
        caps = set()
        if self.requires_vision:
            caps.add("vision")
        if self.requires_coding:
            caps.add("coding")
        if self.requires_reasoning:
            caps.add("reasoning")
        if self.requires_general_text:
            caps.add("general_text")
        return caps


class TaskDetector:
    """Analyze request messages to detect required capabilities."""

    def detect_requirements(self, messages: list[Any]) -> TaskRequirements:
        """Analyze messages and return required capabilities."""
        has_vision = False
        has_coding = False
        has_reasoning = False
        total_text = ""

        for msg in messages:
            # Handle both dict and object message formats
            if isinstance(msg, dict):
                content = msg.get("content")
            elif hasattr(msg, "content"):
                content = msg.content
            else:
                continue

            if isinstance(content, str):
                total_text += content.lower() + " "
            elif isinstance(content, list):
                for block in content:
                    b_type = get_block_attr(block, "type") or ""

                    # Check for image content
                    if b_type == "image":
                        has_vision = True
                        logger.debug("TaskDetector: Found image in message")

                    # Get text content
                    if b_type == "text":
                        text = get_block_attr(block, "text", "") or ""
                        total_text += text.lower() + " "

        # Analyze text for keywords
        if total_text:
            has_coding = self._detect_coding(total_text)
            has_reasoning = self._detect_reasoning(total_text)

        # Calculate confidence
        confidence = self._calculate_confidence(
            has_vision, has_coding, has_reasoning, total_text
        )

        # Default to general text if nothing detected
        if not has_vision and not has_coding and not has_reasoning:
            has_general = True

        result = TaskRequirements(
            requires_vision=has_vision,
            requires_coding=has_coding,
            requires_reasoning=has_reasoning,
            requires_general_text=True,
            confidence=confidence,
        )

        logger.info(
            "TaskDetector: detected caps={} confidence={:.2f}",
            result.required_capabilities,
            confidence,
        )

        return result

    def _detect_coding(self, text: str) -> bool:
        """Detect if request requires coding capabilities."""
        # Check exact word matches first
        words = set(re.findall(r"\b\w+\b", text))
        coding_matches = words & CODING_KEYWORDS
        if len(coding_matches) >= 2:
            return True

        # Also check for substring matches (e.g., "python" in "write python code")
        for keyword in CODING_KEYWORDS:
            if keyword in text:
                # Found one keyword as substring, check for another
                remaining = text.replace(keyword, "")
                for kw2 in CODING_KEYWORDS:
                    if kw2 in remaining and kw2 != keyword:
                        return True
                # Also check for programming patterns
                if any(
                    pat in text
                    for pat in [
                        "def ",
                        "function ",
                        "class ",
                        "import ",
                        "const ",
                        "let ",
                        "var ",
                        "()",
                        "=>",
                    ]
                ):
                    return True

        return False

    def _detect_reasoning(self, text: str) -> bool:
        """Detect if request requires reasoning capabilities."""
        words = set(re.findall(r"\b\w+\b", text))
        reasoning_matches = words & REASONING_KEYWORDS
        if len(reasoning_matches) >= 1:
            return True
        # Also check substring
        for keyword in REASONING_KEYWORDS:
            if keyword in text:
                return True
        return False

    def _calculate_confidence(
        self,
        has_vision: bool,
        has_coding: bool,
        has_reasoning: bool,
        text: str,
    ) -> float:
        """Calculate confidence in the detection."""
        if has_vision:
            return 0.95  # Image detection is reliable
        if has_coding or has_reasoning:
            # More text = more confident
            word_count = len(text.split())
            base = 0.7
            if word_count > 50:
                base = 0.8
            if word_count > 100:
                base = 0.85
            return base
        return 0.5  # Default confidence for general text

    def get_priority_hint(self, requirements: TaskRequirements) -> str:
        """Get a hint for model priority based on requirements."""
        if requirements.requires_vision:
            return "vision"
        if requirements.requires_coding:
            return "coding"
        if requirements.requires_reasoning:
            return "reasoning"
        return "balanced"