Spaces:

Yash030
/

claude-code-proxy

Running

App Files Files Community

claude-code-proxy / core /task_detector.py

Yash030

Implement image support in proxy with vision-aware routing

574e4e7 2 days ago

raw

history blame contribute delete

7.53 kB

	"""Task detection - analyze requests to determine required capabilities."""

	from __future__ import annotations

	import re
	from dataclasses import dataclass
	from typing import Any

	from loguru import logger

	from core.anthropic.content import get_block_attr


	# Keywords that indicate specific task types
	CODING_KEYWORDS = {
	"python",
	"javascript",
	"typescript",
	"java",
	"c++",
	"cpp",
	"golang",
	"rust",
	"ruby",
	"php",
	"swift",
	"kotlin",
	"sql",
	"html",
	"css",
	"react",
	"vue",
	"angular",
	"node",
	"django",
	"flask",
	"fastapi",
	"spring",
	"function",
	"class",
	"method",
	"api",
	"endpoint",
	"database",
	"query",
	"algorithm",
	"debug",
	"error",
	"fix",
	"implement",
	"create",
	"write",
	"code",
	"programming",
	"script",
	"module",
	"import",
	"export",
	"def ",
	"const ",
	"let ",
	"var ",
	"function ",
	"async ",
	"await ",
	}

	REASONING_KEYWORDS = {
	"analyze",
	"analysis",
	"reason",
	"why",
	"how",
	"explain",
	"compare",
	"contrast",
	"evaluate",
	"assess",
	"conclude",
	"deduce",
	"infer",
	"logic",
	"proof",
	"theorem",
	"hypothesis",
	"synthesize",
	"strategy",
	"think",
	"solve",
	"derive",
	"calculate",
	"compute",
	"math",
	"equation",
	"formula",
	"solution",
	"optimal",
	"best",
	"improve",
	"optimize",
	"design",
	"architecture",
	"system",
	"plan",
	"decision",
	"recommend",
	}

	VISION_KEYWORDS = {
	"image",
	"picture",
	"photo",
	"screenshot",
	"diagram",
	"chart",
	"graph",
	"visual",
	"see",
	"look at",
	"describe what",
	"what's in",
	"identify",
	"recognize",
	"detect",
	"object",
	"scene",
	"face",
	"text in image",
	}


	@dataclass(frozen=True, slots=True)
	class TaskRequirements:
	"""Detected requirements for a request."""

	requires_vision: bool = False
	requires_coding: bool = False
	requires_reasoning: bool = False
	requires_general_text: bool = True
	confidence: float = 0.0 # 0-1 confidence in detection

	@property
	def required_capabilities(self) -> set[str]:
	caps = set()
	if self.requires_vision:
	caps.add("vision")
	if self.requires_coding:
	caps.add("coding")
	if self.requires_reasoning:
	caps.add("reasoning")
	if self.requires_general_text:
	caps.add("general_text")
	return caps


	class TaskDetector:
	"""Analyze request messages to detect required capabilities."""

	def detect_requirements(self, messages: list[Any]) -> TaskRequirements:
	"""Analyze messages and return required capabilities."""
	has_vision = False
	has_coding = False
	has_reasoning = False
	total_text = ""

	for msg in messages:
	# Handle both dict and object message formats
	if isinstance(msg, dict):
	content = msg.get("content")
	elif hasattr(msg, "content"):
	content = msg.content
	else:
	continue

	if isinstance(content, str):
	total_text += content.lower() + " "
	elif isinstance(content, list):
	for block in content:
	b_type = get_block_attr(block, "type") or ""

	# Check for image content
	if b_type == "image":
	has_vision = True
	logger.debug("TaskDetector: Found image in message")

	# Get text content
	if b_type == "text":
	text = get_block_attr(block, "text", "") or ""
	total_text += text.lower() + " "

	# Analyze text for keywords
	if total_text:
	has_coding = self._detect_coding(total_text)
	has_reasoning = self._detect_reasoning(total_text)

	# Calculate confidence
	confidence = self._calculate_confidence(
	has_vision, has_coding, has_reasoning, total_text
	)

	# Default to general text if nothing detected
	if not has_vision and not has_coding and not has_reasoning:
	has_general = True

	result = TaskRequirements(
	requires_vision=has_vision,
	requires_coding=has_coding,
	requires_reasoning=has_reasoning,
	requires_general_text=True,
	confidence=confidence,
	)

	logger.info(
	"TaskDetector: detected caps={} confidence={:.2f}",
	result.required_capabilities,
	confidence,
	)

	return result

	def _detect_coding(self, text: str) -> bool:
	"""Detect if request requires coding capabilities."""
	# Check exact word matches first
	words = set(re.findall(r"\b\w+\b", text))
	coding_matches = words & CODING_KEYWORDS
	if len(coding_matches) >= 2:
	return True

	# Also check for substring matches (e.g., "python" in "write python code")
	for keyword in CODING_KEYWORDS:
	if keyword in text:
	# Found one keyword as substring, check for another
	remaining = text.replace(keyword, "")
	for kw2 in CODING_KEYWORDS:
	if kw2 in remaining and kw2 != keyword:
	return True
	# Also check for programming patterns
	if any(
	pat in text
	for pat in [
	"def ",
	"function ",
	"class ",
	"import ",
	"const ",
	"let ",
	"var ",
	"()",
	"=>",
	]
	):
	return True

	return False

	def _detect_reasoning(self, text: str) -> bool:
	"""Detect if request requires reasoning capabilities."""
	words = set(re.findall(r"\b\w+\b", text))
	reasoning_matches = words & REASONING_KEYWORDS
	if len(reasoning_matches) >= 1:
	return True
	# Also check substring
	for keyword in REASONING_KEYWORDS:
	if keyword in text:
	return True
	return False

	def _calculate_confidence(
	self,
	has_vision: bool,
	has_coding: bool,
	has_reasoning: bool,
	text: str,
	) -> float:
	"""Calculate confidence in the detection."""
	if has_vision:
	return 0.95 # Image detection is reliable
	if has_coding or has_reasoning:
	# More text = more confident
	word_count = len(text.split())
	base = 0.7
	if word_count > 50:
	base = 0.8
	if word_count > 100:
	base = 0.85
	return base
	return 0.5 # Default confidence for general text

	def get_priority_hint(self, requirements: TaskRequirements) -> str:
	"""Get a hint for model priority based on requirements."""
	if requirements.requires_vision:
	return "vision"
	if requirements.requires_coding:
	return "coding"
	if requirements.requires_reasoning:
	return "reasoning"
	return "balanced"