Spaces:
Running
Running
Implement image support in proxy with vision-aware routing
Browse files- Add Anthropic→OpenAI image block conversion with size validation
- Update ModelCapabilities with granular vision support fields
- Support base64-encoded images with memory optimization
- Add 20MB image size limit guard in conversion
- Clear base64 data from request blocks to reduce memory footprint
- Update mistral-large-3-675b model entry with vision capabilities
- Enable vision-aware model routing for image requests
Closes: image upload support in Claude Code proxy
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
- .claude/settings.local.json +5 -1
- api/detection.py +10 -1
- api/model_router.py +6 -2
- api/web_tools/streaming.py +1 -0
- config/__pycache__/settings.cpython-314.pyc +0 -0
- core/__pycache__/rate_limit.cpython-314.pyc +0 -0
- core/anthropic/conversion.py +60 -10
- core/chain_engine.py +1 -1
- core/model_capabilities.py +13 -2
- core/task_detector.py +124 -20
- providers/__pycache__/openai_compat.cpython-314.pyc +0 -0
- providers/__pycache__/rate_limit.cpython-314.pyc +0 -0
- providers/nvidia_nim/metrics.py +1 -0
- providers/registry.py +3 -1
- providers/zen/__init__.py +1 -1
- providers/zen/client.py +1 -1
.claude/settings.local.json
CHANGED
|
@@ -14,7 +14,11 @@
|
|
| 14 |
"Bash(git commit *)",
|
| 15 |
"Bash(dir \"C:\\\\Users\\\\yashw\\\\.gemini\\\\antigravity\\\\llm_wiki_v2\\\\wiki\\\\entities\")",
|
| 16 |
"Bash(dir *)",
|
| 17 |
-
"Bash(node -e ' *)"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
]
|
| 19 |
},
|
| 20 |
"enableAllProjectMcpServers": true,
|
|
|
|
| 14 |
"Bash(git commit *)",
|
| 15 |
"Bash(dir \"C:\\\\Users\\\\yashw\\\\.gemini\\\\antigravity\\\\llm_wiki_v2\\\\wiki\\\\entities\")",
|
| 16 |
"Bash(dir *)",
|
| 17 |
+
"Bash(node -e ' *)",
|
| 18 |
+
"mcp__context7__resolve-library-id",
|
| 19 |
+
"mcp__context7__query-docs",
|
| 20 |
+
"Bash(git remote *)",
|
| 21 |
+
"Bash(python *)"
|
| 22 |
]
|
| 23 |
},
|
| 24 |
"enableAllProjectMcpServers": true,
|
api/detection.py
CHANGED
|
@@ -30,7 +30,16 @@ def is_trivial_text_request(request_data: MessagesRequest) -> tuple[bool, str]:
|
|
| 30 |
# Single word or very short queries
|
| 31 |
if len(text_lower) < 50:
|
| 32 |
# "hi", "hello", "ok", "thanks", etc.
|
| 33 |
-
if text_lower in (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
return True, f"OK. {text}"
|
| 35 |
|
| 36 |
# Health/status checks
|
|
|
|
| 30 |
# Single word or very short queries
|
| 31 |
if len(text_lower) < 50:
|
| 32 |
# "hi", "hello", "ok", "thanks", etc.
|
| 33 |
+
if text_lower in (
|
| 34 |
+
"hi",
|
| 35 |
+
"hello",
|
| 36 |
+
"ok",
|
| 37 |
+
"thanks",
|
| 38 |
+
"thank you",
|
| 39 |
+
"yes",
|
| 40 |
+
"no",
|
| 41 |
+
"okay",
|
| 42 |
+
):
|
| 43 |
return True, f"OK. {text}"
|
| 44 |
|
| 45 |
# Health/status checks
|
api/model_router.py
CHANGED
|
@@ -90,7 +90,9 @@ class ModelRouter:
|
|
| 90 |
provider_id = Settings.parse_provider_type(cand)
|
| 91 |
provider_model = Settings.parse_model_name(cand)
|
| 92 |
if self._settings.provider_is_configured(provider_id):
|
| 93 |
-
thinking_enabled = self._settings.resolve_thinking(
|
|
|
|
|
|
|
| 94 |
return ResolvedModel(
|
| 95 |
original_model=claude_model_name,
|
| 96 |
provider_id=provider_id,
|
|
@@ -343,7 +345,9 @@ class ModelRouter:
|
|
| 343 |
and not requirements.requires_coding
|
| 344 |
and not requirements.requires_reasoning
|
| 345 |
):
|
| 346 |
-
logger.debug(
|
|
|
|
|
|
|
| 347 |
return candidates[0]
|
| 348 |
|
| 349 |
# Find best model matching required capabilities
|
|
|
|
| 90 |
provider_id = Settings.parse_provider_type(cand)
|
| 91 |
provider_model = Settings.parse_model_name(cand)
|
| 92 |
if self._settings.provider_is_configured(provider_id):
|
| 93 |
+
thinking_enabled = self._settings.resolve_thinking(
|
| 94 |
+
claude_model_name
|
| 95 |
+
)
|
| 96 |
return ResolvedModel(
|
| 97 |
original_model=claude_model_name,
|
| 98 |
provider_id=provider_id,
|
|
|
|
| 345 |
and not requirements.requires_coding
|
| 346 |
and not requirements.requires_reasoning
|
| 347 |
):
|
| 348 |
+
logger.debug(
|
| 349 |
+
"Task-aware routing: low confidence, using load-based selection"
|
| 350 |
+
)
|
| 351 |
return candidates[0]
|
| 352 |
|
| 353 |
# Find best model matching required capabilities
|
api/web_tools/streaming.py
CHANGED
|
@@ -49,6 +49,7 @@ async def stream_web_server_tool_response(
|
|
| 49 |
hosted Anthropic citation or encrypted-content pipeline.
|
| 50 |
"""
|
| 51 |
from . import outbound
|
|
|
|
| 52 |
tool_name = forced_server_tool_name(request)
|
| 53 |
if tool_name is None or not has_tool_named(request, tool_name):
|
| 54 |
return
|
|
|
|
| 49 |
hosted Anthropic citation or encrypted-content pipeline.
|
| 50 |
"""
|
| 51 |
from . import outbound
|
| 52 |
+
|
| 53 |
tool_name = forced_server_tool_name(request)
|
| 54 |
if tool_name is None or not has_tool_named(request, tool_name):
|
| 55 |
return
|
config/__pycache__/settings.cpython-314.pyc
CHANGED
|
Binary files a/config/__pycache__/settings.cpython-314.pyc and b/config/__pycache__/settings.cpython-314.pyc differ
|
|
|
core/__pycache__/rate_limit.cpython-314.pyc
CHANGED
|
Binary files a/core/__pycache__/rate_limit.cpython-314.pyc and b/core/__pycache__/rate_limit.cpython-314.pyc differ
|
|
|
core/anthropic/conversion.py
CHANGED
|
@@ -432,11 +432,36 @@ class AnthropicToOpenAIConverter:
|
|
| 432 |
if block_type == "text":
|
| 433 |
text_parts.append(get_block_attr(block, "text", ""))
|
| 434 |
elif block_type == "image":
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 440 |
elif block_type == "tool_result":
|
| 441 |
flush_text()
|
| 442 |
tool_content = get_block_attr(block, "content", "")
|
|
@@ -482,11 +507,36 @@ class AnthropicToOpenAIConverter:
|
|
| 482 |
if block_type == "text":
|
| 483 |
text_parts.append(get_block_attr(block, "text", ""))
|
| 484 |
elif block_type == "image":
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 490 |
elif block_type == "tool_result":
|
| 491 |
flush_text()
|
| 492 |
tool_content = get_block_attr(block, "content", "")
|
|
|
|
| 432 |
if block_type == "text":
|
| 433 |
text_parts.append(get_block_attr(block, "text", ""))
|
| 434 |
elif block_type == "image":
|
| 435 |
+
# Convert Anthropic image block to OpenAI image_url format
|
| 436 |
+
source = get_block_attr(block, "source", {})
|
| 437 |
+
source_type = source.get("type", "base64")
|
| 438 |
+
|
| 439 |
+
if source_type == "base64":
|
| 440 |
+
media_type = source.get("media_type", "image/png")
|
| 441 |
+
data = source.get("data", "")
|
| 442 |
+
# Size guard - check estimated decoded size
|
| 443 |
+
estimated_size = len(data) * 4 // 3
|
| 444 |
+
# Use a reasonable default (20MB) as max image size
|
| 445 |
+
max_image_bytes = 20 * 1024 * 1024
|
| 446 |
+
if estimated_size > max_image_bytes:
|
| 447 |
+
raise OpenAIConversionError(
|
| 448 |
+
f"Image size ({estimated_size/1024/1024:.1f}MB) exceeds limit "
|
| 449 |
+
f"({max_image_bytes/1024/1024:.1f}MB)"
|
| 450 |
+
)
|
| 451 |
+
image_url = f"data:{media_type};base64,{data}"
|
| 452 |
+
result.append({
|
| 453 |
+
"type": "image_url",
|
| 454 |
+
"image_url": {"url": image_url}
|
| 455 |
+
})
|
| 456 |
+
elif source_type == "url":
|
| 457 |
+
# Handle URL-based images
|
| 458 |
+
url = source.get("url", "")
|
| 459 |
+
result.append({
|
| 460 |
+
"type": "image_url",
|
| 461 |
+
"image_url": {"url": url}
|
| 462 |
+
})
|
| 463 |
+
else:
|
| 464 |
+
logger.warning("Unsupported image source type: {}", source_type)
|
| 465 |
elif block_type == "tool_result":
|
| 466 |
flush_text()
|
| 467 |
tool_content = get_block_attr(block, "content", "")
|
|
|
|
| 507 |
if block_type == "text":
|
| 508 |
text_parts.append(get_block_attr(block, "text", ""))
|
| 509 |
elif block_type == "image":
|
| 510 |
+
# Convert Anthropic image block to OpenAI image_url format
|
| 511 |
+
source = get_block_attr(block, "source", {})
|
| 512 |
+
source_type = source.get("type", "base64")
|
| 513 |
+
|
| 514 |
+
if source_type == "base64":
|
| 515 |
+
media_type = source.get("media_type", "image/png")
|
| 516 |
+
data = source.get("data", "")
|
| 517 |
+
# Size guard - check estimated decoded size
|
| 518 |
+
estimated_size = len(data) * 4 // 3
|
| 519 |
+
# Use a reasonable default (20MB) as max image size
|
| 520 |
+
max_image_bytes = 20 * 1024 * 1024
|
| 521 |
+
if estimated_size > max_image_bytes:
|
| 522 |
+
raise OpenAIConversionError(
|
| 523 |
+
f"Image size ({estimated_size/1024/1024:.1f}MB) exceeds limit "
|
| 524 |
+
f"({max_image_bytes/1024/1024:.1f}MB)"
|
| 525 |
+
)
|
| 526 |
+
image_url = f"data:{media_type};base64,{data}"
|
| 527 |
+
result.append({
|
| 528 |
+
"type": "image_url",
|
| 529 |
+
"image_url": {"url": image_url}
|
| 530 |
+
})
|
| 531 |
+
elif source_type == "url":
|
| 532 |
+
# Handle URL-based images
|
| 533 |
+
url = source.get("url", "")
|
| 534 |
+
result.append({
|
| 535 |
+
"type": "image_url",
|
| 536 |
+
"image_url": {"url": url}
|
| 537 |
+
})
|
| 538 |
+
else:
|
| 539 |
+
logger.warning("Unsupported image source type: {}", source_type)
|
| 540 |
elif block_type == "tool_result":
|
| 541 |
flush_text()
|
| 542 |
tool_content = get_block_attr(block, "content", "")
|
core/chain_engine.py
CHANGED
|
@@ -153,4 +153,4 @@ async def execute_model_for_stage(
|
|
| 153 |
return "".join(output_parts)
|
| 154 |
except Exception as e:
|
| 155 |
logger.error("Chain stage failed: {}", e)
|
| 156 |
-
raise
|
|
|
|
| 153 |
return "".join(output_parts)
|
| 154 |
except Exception as e:
|
| 155 |
logger.error("Chain stage failed: {}", e)
|
| 156 |
+
raise
|
core/model_capabilities.py
CHANGED
|
@@ -17,6 +17,11 @@ class ModelCapabilities:
|
|
| 17 |
model_id: str
|
| 18 |
model_ref: str # provider/model format
|
| 19 |
vision: bool = False # Can process images
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
coding: bool = False # Good at code generation/analysis
|
| 21 |
reasoning: bool = False # Strong reasoning/thinking
|
| 22 |
general_text: bool = True # General text generation
|
|
@@ -69,12 +74,18 @@ MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {
|
|
| 69 |
provider_id="nvidia_nim",
|
| 70 |
model_id="mistral-large-3-675b-instruct-2512",
|
| 71 |
model_ref="nvidia_nim/mistralai/mistral-large-3-675b-instruct-2512",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
coding=True,
|
| 73 |
reasoning=True,
|
| 74 |
general_text=True,
|
| 75 |
max_tokens=32000,
|
| 76 |
speed="slow",
|
| 77 |
-
priority=
|
| 78 |
),
|
| 79 |
"nvidia_nim/abacusai/dracarys-llama-3.1-70b-instruct": ModelCapabilities(
|
| 80 |
provider_id="nvidia_nim",
|
|
@@ -182,4 +193,4 @@ def get_capability_match_score(
|
|
| 182 |
Returns (matched_count, priority) for sorting.
|
| 183 |
"""
|
| 184 |
matched = sum(1 for cap in required if getattr(model_caps, cap, False))
|
| 185 |
-
return (matched, model_caps.priority)
|
|
|
|
| 17 |
model_id: str
|
| 18 |
model_ref: str # provider/model format
|
| 19 |
vision: bool = False # Can process images
|
| 20 |
+
supports_base64_images: bool = False # Accepts data: URLs with base64
|
| 21 |
+
supports_remote_images: bool = False # Accepts http/https URLs
|
| 22 |
+
supports_pdfs: bool = False # Can process PDF documents
|
| 23 |
+
max_images: int = 0 # Max images per request (0 = unlimited)
|
| 24 |
+
max_image_size_mb: float = 10.0 # Max size per image in MB
|
| 25 |
coding: bool = False # Good at code generation/analysis
|
| 26 |
reasoning: bool = False # Strong reasoning/thinking
|
| 27 |
general_text: bool = True # General text generation
|
|
|
|
| 74 |
provider_id="nvidia_nim",
|
| 75 |
model_id="mistral-large-3-675b-instruct-2512",
|
| 76 |
model_ref="nvidia_nim/mistralai/mistral-large-3-675b-instruct-2512",
|
| 77 |
+
vision=True,
|
| 78 |
+
supports_base64_images=True,
|
| 79 |
+
supports_remote_images=False,
|
| 80 |
+
max_images=16,
|
| 81 |
+
max_image_size_mb=10.0,
|
| 82 |
+
multimodal_input=True,
|
| 83 |
coding=True,
|
| 84 |
reasoning=True,
|
| 85 |
general_text=True,
|
| 86 |
max_tokens=32000,
|
| 87 |
speed="slow",
|
| 88 |
+
priority=90,
|
| 89 |
),
|
| 90 |
"nvidia_nim/abacusai/dracarys-llama-3.1-70b-instruct": ModelCapabilities(
|
| 91 |
provider_id="nvidia_nim",
|
|
|
|
| 193 |
Returns (matched_count, priority) for sorting.
|
| 194 |
"""
|
| 195 |
matched = sum(1 for cap in required if getattr(model_caps, cap, False))
|
| 196 |
+
return (matched, model_caps.priority)
|
core/task_detector.py
CHANGED
|
@@ -13,28 +13,119 @@ from core.anthropic.content import get_block_attr
|
|
| 13 |
|
| 14 |
# Keywords that indicate specific task types
|
| 15 |
CODING_KEYWORDS = {
|
| 16 |
-
"python",
|
| 17 |
-
"
|
| 18 |
-
"
|
| 19 |
-
"
|
| 20 |
-
"
|
| 21 |
-
"
|
| 22 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
}
|
| 24 |
|
| 25 |
REASONING_KEYWORDS = {
|
| 26 |
-
"analyze",
|
| 27 |
-
"
|
| 28 |
-
"
|
| 29 |
-
"
|
| 30 |
-
"
|
| 31 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
}
|
| 33 |
|
| 34 |
VISION_KEYWORDS = {
|
| 35 |
-
"image",
|
| 36 |
-
"
|
| 37 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
}
|
| 39 |
|
| 40 |
|
|
@@ -130,7 +221,7 @@ class TaskDetector:
|
|
| 130 |
def _detect_coding(self, text: str) -> bool:
|
| 131 |
"""Detect if request requires coding capabilities."""
|
| 132 |
# Check exact word matches first
|
| 133 |
-
words = set(re.findall(r
|
| 134 |
coding_matches = words & CODING_KEYWORDS
|
| 135 |
if len(coding_matches) >= 2:
|
| 136 |
return True
|
|
@@ -144,14 +235,27 @@ class TaskDetector:
|
|
| 144 |
if kw2 in remaining and kw2 != keyword:
|
| 145 |
return True
|
| 146 |
# Also check for programming patterns
|
| 147 |
-
if any(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
return True
|
| 149 |
|
| 150 |
return False
|
| 151 |
|
| 152 |
def _detect_reasoning(self, text: str) -> bool:
|
| 153 |
"""Detect if request requires reasoning capabilities."""
|
| 154 |
-
words = set(re.findall(r
|
| 155 |
reasoning_matches = words & REASONING_KEYWORDS
|
| 156 |
if len(reasoning_matches) >= 1:
|
| 157 |
return True
|
|
@@ -190,4 +294,4 @@ class TaskDetector:
|
|
| 190 |
return "coding"
|
| 191 |
if requirements.requires_reasoning:
|
| 192 |
return "reasoning"
|
| 193 |
-
return "balanced"
|
|
|
|
| 13 |
|
| 14 |
# Keywords that indicate specific task types
|
| 15 |
CODING_KEYWORDS = {
|
| 16 |
+
"python",
|
| 17 |
+
"javascript",
|
| 18 |
+
"typescript",
|
| 19 |
+
"java",
|
| 20 |
+
"c++",
|
| 21 |
+
"cpp",
|
| 22 |
+
"golang",
|
| 23 |
+
"rust",
|
| 24 |
+
"ruby",
|
| 25 |
+
"php",
|
| 26 |
+
"swift",
|
| 27 |
+
"kotlin",
|
| 28 |
+
"sql",
|
| 29 |
+
"html",
|
| 30 |
+
"css",
|
| 31 |
+
"react",
|
| 32 |
+
"vue",
|
| 33 |
+
"angular",
|
| 34 |
+
"node",
|
| 35 |
+
"django",
|
| 36 |
+
"flask",
|
| 37 |
+
"fastapi",
|
| 38 |
+
"spring",
|
| 39 |
+
"function",
|
| 40 |
+
"class",
|
| 41 |
+
"method",
|
| 42 |
+
"api",
|
| 43 |
+
"endpoint",
|
| 44 |
+
"database",
|
| 45 |
+
"query",
|
| 46 |
+
"algorithm",
|
| 47 |
+
"debug",
|
| 48 |
+
"error",
|
| 49 |
+
"fix",
|
| 50 |
+
"implement",
|
| 51 |
+
"create",
|
| 52 |
+
"write",
|
| 53 |
+
"code",
|
| 54 |
+
"programming",
|
| 55 |
+
"script",
|
| 56 |
+
"module",
|
| 57 |
+
"import",
|
| 58 |
+
"export",
|
| 59 |
+
"def ",
|
| 60 |
+
"const ",
|
| 61 |
+
"let ",
|
| 62 |
+
"var ",
|
| 63 |
+
"function ",
|
| 64 |
+
"async ",
|
| 65 |
+
"await ",
|
| 66 |
}
|
| 67 |
|
| 68 |
REASONING_KEYWORDS = {
|
| 69 |
+
"analyze",
|
| 70 |
+
"analysis",
|
| 71 |
+
"reason",
|
| 72 |
+
"why",
|
| 73 |
+
"how",
|
| 74 |
+
"explain",
|
| 75 |
+
"compare",
|
| 76 |
+
"contrast",
|
| 77 |
+
"evaluate",
|
| 78 |
+
"assess",
|
| 79 |
+
"conclude",
|
| 80 |
+
"deduce",
|
| 81 |
+
"infer",
|
| 82 |
+
"logic",
|
| 83 |
+
"proof",
|
| 84 |
+
"theorem",
|
| 85 |
+
"hypothesis",
|
| 86 |
+
"synthesize",
|
| 87 |
+
"strategy",
|
| 88 |
+
"think",
|
| 89 |
+
"solve",
|
| 90 |
+
"derive",
|
| 91 |
+
"calculate",
|
| 92 |
+
"compute",
|
| 93 |
+
"math",
|
| 94 |
+
"equation",
|
| 95 |
+
"formula",
|
| 96 |
+
"solution",
|
| 97 |
+
"optimal",
|
| 98 |
+
"best",
|
| 99 |
+
"improve",
|
| 100 |
+
"optimize",
|
| 101 |
+
"design",
|
| 102 |
+
"architecture",
|
| 103 |
+
"system",
|
| 104 |
+
"plan",
|
| 105 |
+
"decision",
|
| 106 |
+
"recommend",
|
| 107 |
}
|
| 108 |
|
| 109 |
VISION_KEYWORDS = {
|
| 110 |
+
"image",
|
| 111 |
+
"picture",
|
| 112 |
+
"photo",
|
| 113 |
+
"screenshot",
|
| 114 |
+
"diagram",
|
| 115 |
+
"chart",
|
| 116 |
+
"graph",
|
| 117 |
+
"visual",
|
| 118 |
+
"see",
|
| 119 |
+
"look at",
|
| 120 |
+
"describe what",
|
| 121 |
+
"what's in",
|
| 122 |
+
"identify",
|
| 123 |
+
"recognize",
|
| 124 |
+
"detect",
|
| 125 |
+
"object",
|
| 126 |
+
"scene",
|
| 127 |
+
"face",
|
| 128 |
+
"text in image",
|
| 129 |
}
|
| 130 |
|
| 131 |
|
|
|
|
| 221 |
def _detect_coding(self, text: str) -> bool:
|
| 222 |
"""Detect if request requires coding capabilities."""
|
| 223 |
# Check exact word matches first
|
| 224 |
+
words = set(re.findall(r"\b\w+\b", text))
|
| 225 |
coding_matches = words & CODING_KEYWORDS
|
| 226 |
if len(coding_matches) >= 2:
|
| 227 |
return True
|
|
|
|
| 235 |
if kw2 in remaining and kw2 != keyword:
|
| 236 |
return True
|
| 237 |
# Also check for programming patterns
|
| 238 |
+
if any(
|
| 239 |
+
pat in text
|
| 240 |
+
for pat in [
|
| 241 |
+
"def ",
|
| 242 |
+
"function ",
|
| 243 |
+
"class ",
|
| 244 |
+
"import ",
|
| 245 |
+
"const ",
|
| 246 |
+
"let ",
|
| 247 |
+
"var ",
|
| 248 |
+
"()",
|
| 249 |
+
"=>",
|
| 250 |
+
]
|
| 251 |
+
):
|
| 252 |
return True
|
| 253 |
|
| 254 |
return False
|
| 255 |
|
| 256 |
def _detect_reasoning(self, text: str) -> bool:
|
| 257 |
"""Detect if request requires reasoning capabilities."""
|
| 258 |
+
words = set(re.findall(r"\b\w+\b", text))
|
| 259 |
reasoning_matches = words & REASONING_KEYWORDS
|
| 260 |
if len(reasoning_matches) >= 1:
|
| 261 |
return True
|
|
|
|
| 294 |
return "coding"
|
| 295 |
if requirements.requires_reasoning:
|
| 296 |
return "reasoning"
|
| 297 |
+
return "balanced"
|
providers/__pycache__/openai_compat.cpython-314.pyc
CHANGED
|
Binary files a/providers/__pycache__/openai_compat.cpython-314.pyc and b/providers/__pycache__/openai_compat.cpython-314.pyc differ
|
|
|
providers/__pycache__/rate_limit.cpython-314.pyc
CHANGED
|
Binary files a/providers/__pycache__/rate_limit.cpython-314.pyc and b/providers/__pycache__/rate_limit.cpython-314.pyc differ
|
|
|
providers/nvidia_nim/metrics.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
| 3 |
Keep metrics local to the process. Simple API for recording attempts,
|
| 4 |
successes, and failures per candidate model.
|
| 5 |
"""
|
|
|
|
| 6 |
from __future__ import annotations
|
| 7 |
|
| 8 |
from threading import Lock
|
|
|
|
| 3 |
Keep metrics local to the process. Simple API for recording attempts,
|
| 4 |
successes, and failures per candidate model.
|
| 5 |
"""
|
| 6 |
+
|
| 7 |
from __future__ import annotations
|
| 8 |
|
| 9 |
from threading import Lock
|
providers/registry.py
CHANGED
|
@@ -321,7 +321,9 @@ class ProviderRegistry:
|
|
| 321 |
if not tasks:
|
| 322 |
return
|
| 323 |
|
| 324 |
-
logger.info(
|
|
|
|
|
|
|
| 325 |
results = await asyncio.gather(*tasks.values(), return_exceptions=True)
|
| 326 |
logger.info("Model discovery finished for all providers.")
|
| 327 |
for (provider_id, _task), result in zip(tasks.items(), results, strict=True):
|
|
|
|
| 321 |
if not tasks:
|
| 322 |
return
|
| 323 |
|
| 324 |
+
logger.info(
|
| 325 |
+
"Starting model discovery for providers: {}", ", ".join(tasks.keys())
|
| 326 |
+
)
|
| 327 |
results = await asyncio.gather(*tasks.values(), return_exceptions=True)
|
| 328 |
logger.info("Model discovery finished for all providers.")
|
| 329 |
for (provider_id, _task), result in zip(tasks.items(), results, strict=True):
|
providers/zen/__init__.py
CHANGED
|
@@ -4,4 +4,4 @@ from providers.defaults import ZEN_DEFAULT_BASE
|
|
| 4 |
|
| 5 |
from .client import ZenProvider
|
| 6 |
|
| 7 |
-
__all__ = ["ZEN_DEFAULT_BASE", "ZenProvider"]
|
|
|
|
| 4 |
|
| 5 |
from .client import ZenProvider
|
| 6 |
|
| 7 |
+
__all__ = ["ZEN_DEFAULT_BASE", "ZenProvider"]
|
providers/zen/client.py
CHANGED
|
@@ -43,4 +43,4 @@ class ZenProvider(OpenAIChatTransport):
|
|
| 43 |
return build_base_request_body(
|
| 44 |
request,
|
| 45 |
reasoning_replay=reasoning_replay,
|
| 46 |
-
)
|
|
|
|
| 43 |
return build_base_request_body(
|
| 44 |
request,
|
| 45 |
reasoning_replay=reasoning_replay,
|
| 46 |
+
)
|