claude-code-proxy / core /task_detector.py
Yash030's picture
Implement image support in proxy with vision-aware routing
574e4e7
"""Task detection - analyze requests to determine required capabilities."""
from __future__ import annotations
import re
from dataclasses import dataclass
from typing import Any
from loguru import logger
from core.anthropic.content import get_block_attr
# Keywords that indicate specific task types
CODING_KEYWORDS = {
"python",
"javascript",
"typescript",
"java",
"c++",
"cpp",
"golang",
"rust",
"ruby",
"php",
"swift",
"kotlin",
"sql",
"html",
"css",
"react",
"vue",
"angular",
"node",
"django",
"flask",
"fastapi",
"spring",
"function",
"class",
"method",
"api",
"endpoint",
"database",
"query",
"algorithm",
"debug",
"error",
"fix",
"implement",
"create",
"write",
"code",
"programming",
"script",
"module",
"import",
"export",
"def ",
"const ",
"let ",
"var ",
"function ",
"async ",
"await ",
}
REASONING_KEYWORDS = {
"analyze",
"analysis",
"reason",
"why",
"how",
"explain",
"compare",
"contrast",
"evaluate",
"assess",
"conclude",
"deduce",
"infer",
"logic",
"proof",
"theorem",
"hypothesis",
"synthesize",
"strategy",
"think",
"solve",
"derive",
"calculate",
"compute",
"math",
"equation",
"formula",
"solution",
"optimal",
"best",
"improve",
"optimize",
"design",
"architecture",
"system",
"plan",
"decision",
"recommend",
}
VISION_KEYWORDS = {
"image",
"picture",
"photo",
"screenshot",
"diagram",
"chart",
"graph",
"visual",
"see",
"look at",
"describe what",
"what's in",
"identify",
"recognize",
"detect",
"object",
"scene",
"face",
"text in image",
}
@dataclass(frozen=True, slots=True)
class TaskRequirements:
"""Detected requirements for a request."""
requires_vision: bool = False
requires_coding: bool = False
requires_reasoning: bool = False
requires_general_text: bool = True
confidence: float = 0.0 # 0-1 confidence in detection
@property
def required_capabilities(self) -> set[str]:
caps = set()
if self.requires_vision:
caps.add("vision")
if self.requires_coding:
caps.add("coding")
if self.requires_reasoning:
caps.add("reasoning")
if self.requires_general_text:
caps.add("general_text")
return caps
class TaskDetector:
"""Analyze request messages to detect required capabilities."""
def detect_requirements(self, messages: list[Any]) -> TaskRequirements:
"""Analyze messages and return required capabilities."""
has_vision = False
has_coding = False
has_reasoning = False
total_text = ""
for msg in messages:
# Handle both dict and object message formats
if isinstance(msg, dict):
content = msg.get("content")
elif hasattr(msg, "content"):
content = msg.content
else:
continue
if isinstance(content, str):
total_text += content.lower() + " "
elif isinstance(content, list):
for block in content:
b_type = get_block_attr(block, "type") or ""
# Check for image content
if b_type == "image":
has_vision = True
logger.debug("TaskDetector: Found image in message")
# Get text content
if b_type == "text":
text = get_block_attr(block, "text", "") or ""
total_text += text.lower() + " "
# Analyze text for keywords
if total_text:
has_coding = self._detect_coding(total_text)
has_reasoning = self._detect_reasoning(total_text)
# Calculate confidence
confidence = self._calculate_confidence(
has_vision, has_coding, has_reasoning, total_text
)
# Default to general text if nothing detected
if not has_vision and not has_coding and not has_reasoning:
has_general = True
result = TaskRequirements(
requires_vision=has_vision,
requires_coding=has_coding,
requires_reasoning=has_reasoning,
requires_general_text=True,
confidence=confidence,
)
logger.info(
"TaskDetector: detected caps={} confidence={:.2f}",
result.required_capabilities,
confidence,
)
return result
def _detect_coding(self, text: str) -> bool:
"""Detect if request requires coding capabilities."""
# Check exact word matches first
words = set(re.findall(r"\b\w+\b", text))
coding_matches = words & CODING_KEYWORDS
if len(coding_matches) >= 2:
return True
# Also check for substring matches (e.g., "python" in "write python code")
for keyword in CODING_KEYWORDS:
if keyword in text:
# Found one keyword as substring, check for another
remaining = text.replace(keyword, "")
for kw2 in CODING_KEYWORDS:
if kw2 in remaining and kw2 != keyword:
return True
# Also check for programming patterns
if any(
pat in text
for pat in [
"def ",
"function ",
"class ",
"import ",
"const ",
"let ",
"var ",
"()",
"=>",
]
):
return True
return False
def _detect_reasoning(self, text: str) -> bool:
"""Detect if request requires reasoning capabilities."""
words = set(re.findall(r"\b\w+\b", text))
reasoning_matches = words & REASONING_KEYWORDS
if len(reasoning_matches) >= 1:
return True
# Also check substring
for keyword in REASONING_KEYWORDS:
if keyword in text:
return True
return False
def _calculate_confidence(
self,
has_vision: bool,
has_coding: bool,
has_reasoning: bool,
text: str,
) -> float:
"""Calculate confidence in the detection."""
if has_vision:
return 0.95 # Image detection is reliable
if has_coding or has_reasoning:
# More text = more confident
word_count = len(text.split())
base = 0.7
if word_count > 50:
base = 0.8
if word_count > 100:
base = 0.85
return base
return 0.5 # Default confidence for general text
def get_priority_hint(self, requirements: TaskRequirements) -> str:
"""Get a hint for model priority based on requirements."""
if requirements.requires_vision:
return "vision"
if requirements.requires_coding:
return "coding"
if requirements.requires_reasoning:
return "reasoning"
return "balanced"