Spaces:
Running
Image agent fixes: sizing, error handling, result nudge, registry filtering
Browse files- Add max-height: 400px to all image CSS rules and inline styles
- Filter globalFigureRegistry to only include images/figures referenced
in sub-agent <result> content (prevents command center from rendering
images the sub-agent didn't explicitly include)
- Extract shared nudge_for_result() utility in agents.py, replacing
duplicated nudge code in agent.py, code.py, and image.py
- Return actual error messages from execute_generate_image/execute_edit_image
(tuple return) so the LLM can adapt its strategy on failure
- Show real error messages in frontend tool cells instead of generic
"Failed to process image"
- Resize large input images to 1024px max before sending to HF
image_to_image API (FLUX.1-Kontext-dev expects ~1024px inputs)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- backend/agent.py +26 -36
- backend/agents.py +54 -4
- backend/code.py +25 -32
- backend/image.py +27 -15
- backend/tools.py +57 -14
- frontend/script.js +63 -9
- frontend/style.css +47 -0
|
@@ -13,7 +13,9 @@ from typing import List, Dict, Optional
|
|
| 13 |
from .tools import (
|
| 14 |
web_search, read_url,
|
| 15 |
execute_web_search, execute_read_url,
|
|
|
|
| 16 |
)
|
|
|
|
| 17 |
|
| 18 |
logger = logging.getLogger(__name__)
|
| 19 |
|
|
@@ -95,7 +97,8 @@ def stream_agent_execution(
|
|
| 95 |
messages: List[Dict],
|
| 96 |
serper_key: str,
|
| 97 |
extra_params: Optional[Dict] = None,
|
| 98 |
-
abort_event=None
|
|
|
|
| 99 |
):
|
| 100 |
"""
|
| 101 |
Run the agent tool-calling loop.
|
|
@@ -229,18 +232,30 @@ def stream_agent_execution(
|
|
| 229 |
# Execute tool
|
| 230 |
result = execute_tool(func_name, args, serper_key)
|
| 231 |
|
| 232 |
-
# Build tool response
|
| 233 |
-
if result.get("image"):
|
| 234 |
-
#
|
|
|
|
| 235 |
tool_response_content = [
|
| 236 |
{"type": "text", "text": result["content"]},
|
| 237 |
-
{"type": "image_url", "image_url": {"url": f"data:image/
|
| 238 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
else:
|
| 240 |
tool_response_content = result["content"]
|
| 241 |
|
| 242 |
-
tool_response_str = tool_response_content if isinstance(tool_response_content, str) else json.dumps(tool_response_content)
|
| 243 |
-
|
| 244 |
# Add to message history
|
| 245 |
messages.append({
|
| 246 |
"role": "assistant",
|
|
@@ -250,7 +265,7 @@ def stream_agent_execution(
|
|
| 250 |
messages.append({
|
| 251 |
"role": "tool",
|
| 252 |
"tool_call_id": tool_call.id,
|
| 253 |
-
"content":
|
| 254 |
})
|
| 255 |
|
| 256 |
# Signal tool result to frontend (include response for history)
|
|
@@ -259,7 +274,7 @@ def stream_agent_execution(
|
|
| 259 |
"tool": func_name,
|
| 260 |
"tool_call_id": tool_call.id,
|
| 261 |
"result": result.get("display", {}),
|
| 262 |
-
"response":
|
| 263 |
}
|
| 264 |
if result.get("image"):
|
| 265 |
tool_result_event["image"] = result["image"]
|
|
@@ -281,32 +296,7 @@ def stream_agent_execution(
|
|
| 281 |
|
| 282 |
# If agent finished without a <result>, nudge it for one
|
| 283 |
if not has_result:
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
"content": "Please provide your final answer now. Wrap it in <result> tags."
|
| 287 |
-
})
|
| 288 |
-
try:
|
| 289 |
-
call_params = {
|
| 290 |
-
"messages": messages,
|
| 291 |
-
"model": model,
|
| 292 |
-
}
|
| 293 |
-
if extra_params:
|
| 294 |
-
call_params["extra_body"] = extra_params
|
| 295 |
-
response = client.chat.completions.create(**call_params)
|
| 296 |
-
nudge_content = response.choices[0].message.content or ""
|
| 297 |
-
result_match = re.search(r'<result>(.*?)</result>', nudge_content, re.DOTALL | re.IGNORECASE)
|
| 298 |
-
if result_match:
|
| 299 |
-
result_content = result_match.group(1).strip()
|
| 300 |
-
thinking = re.sub(r'<result>.*?</result>', '', nudge_content, flags=re.DOTALL | re.IGNORECASE).strip()
|
| 301 |
-
if thinking:
|
| 302 |
-
yield {"type": "content", "content": thinking}
|
| 303 |
-
yield {"type": "result_preview", "content": result_content}
|
| 304 |
-
yield {"type": "result", "content": result_content}
|
| 305 |
-
elif nudge_content.strip():
|
| 306 |
-
# No result tags but got content — use it as the result
|
| 307 |
-
yield {"type": "result_preview", "content": nudge_content.strip()}
|
| 308 |
-
yield {"type": "result", "content": nudge_content.strip()}
|
| 309 |
-
except Exception as e:
|
| 310 |
-
logger.warning(f"Result nudge failed: {e}")
|
| 311 |
|
| 312 |
yield {"type": "done"}
|
|
|
|
| 13 |
from .tools import (
|
| 14 |
web_search, read_url,
|
| 15 |
execute_web_search, execute_read_url,
|
| 16 |
+
extract_and_download_images,
|
| 17 |
)
|
| 18 |
+
from .image import resize_image_for_vlm
|
| 19 |
|
| 20 |
logger = logging.getLogger(__name__)
|
| 21 |
|
|
|
|
| 97 |
messages: List[Dict],
|
| 98 |
serper_key: str,
|
| 99 |
extra_params: Optional[Dict] = None,
|
| 100 |
+
abort_event=None,
|
| 101 |
+
multimodal: bool = False
|
| 102 |
):
|
| 103 |
"""
|
| 104 |
Run the agent tool-calling loop.
|
|
|
|
| 232 |
# Execute tool
|
| 233 |
result = execute_tool(func_name, args, serper_key)
|
| 234 |
|
| 235 |
+
# Build tool response content for LLM
|
| 236 |
+
if result.get("image") and multimodal:
|
| 237 |
+
# Send screenshot as multimodal content so VLM can see it
|
| 238 |
+
vlm_image = resize_image_for_vlm(result["image"])
|
| 239 |
tool_response_content = [
|
| 240 |
{"type": "text", "text": result["content"]},
|
| 241 |
+
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{vlm_image}"}}
|
| 242 |
]
|
| 243 |
+
elif func_name == "read_url" and multimodal:
|
| 244 |
+
# Extract and include page images so VLM can see them
|
| 245 |
+
page_images = extract_and_download_images(result["content"])
|
| 246 |
+
if page_images:
|
| 247 |
+
tool_response_content = [{"type": "text", "text": result["content"]}]
|
| 248 |
+
for img_b64 in page_images:
|
| 249 |
+
vlm_img = resize_image_for_vlm(img_b64)
|
| 250 |
+
tool_response_content.append({
|
| 251 |
+
"type": "image_url",
|
| 252 |
+
"image_url": {"url": f"data:image/jpeg;base64,{vlm_img}"}
|
| 253 |
+
})
|
| 254 |
+
else:
|
| 255 |
+
tool_response_content = result["content"]
|
| 256 |
else:
|
| 257 |
tool_response_content = result["content"]
|
| 258 |
|
|
|
|
|
|
|
| 259 |
# Add to message history
|
| 260 |
messages.append({
|
| 261 |
"role": "assistant",
|
|
|
|
| 265 |
messages.append({
|
| 266 |
"role": "tool",
|
| 267 |
"tool_call_id": tool_call.id,
|
| 268 |
+
"content": tool_response_content
|
| 269 |
})
|
| 270 |
|
| 271 |
# Signal tool result to frontend (include response for history)
|
|
|
|
| 274 |
"tool": func_name,
|
| 275 |
"tool_call_id": tool_call.id,
|
| 276 |
"result": result.get("display", {}),
|
| 277 |
+
"response": result.get("content", ""),
|
| 278 |
}
|
| 279 |
if result.get("image"):
|
| 280 |
tool_result_event["image"] = result["image"]
|
|
|
|
| 296 |
|
| 297 |
# If agent finished without a <result>, nudge it for one
|
| 298 |
if not has_result:
|
| 299 |
+
from .agents import nudge_for_result
|
| 300 |
+
yield from nudge_for_result(client, model, messages, extra_params=extra_params)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
|
| 302 |
yield {"type": "done"}
|
|
@@ -82,9 +82,11 @@ AGENT_REGISTRY = {
|
|
| 82 |
"(this preserves context and the Jupyter kernel for code agents).\n\n"
|
| 83 |
"## Presenting Results\n\n"
|
| 84 |
"Sub-agent results may be collapsed in the UI. When presenting results to the user, "
|
| 85 |
-
"always include the key findings in YOUR response text — don't just say \"see the agent result\".
|
| 86 |
-
"
|
| 87 |
-
"
|
|
|
|
|
|
|
| 88 |
"## Handling Aborted Agents\n\n"
|
| 89 |
"If an agent's result is 'Generation aborted by user.', the user deliberately stopped it. "
|
| 90 |
"Do NOT automatically re-launch the same task. Instead, briefly acknowledge the abort and "
|
|
@@ -313,7 +315,8 @@ AGENT_REGISTRY = {
|
|
| 313 |
"Returns an image reference (e.g., 'image_1') that you can see.\n"
|
| 314 |
"- **edit_image(prompt, source)**: Edit or transform an existing image. "
|
| 315 |
"The source can be a URL, a local file path, or an image reference from a previous tool call (e.g., 'image_1').\n"
|
| 316 |
-
"- **read_image(source)**: Load
|
|
|
|
| 317 |
"Returns an image reference that you can see and use with edit_image.\n\n"
|
| 318 |
"## Strategy\n\n"
|
| 319 |
"1. If the user provides an image URL or file path, use read_image first to load it\n"
|
|
@@ -385,6 +388,53 @@ def get_system_prompt(agent_key: str) -> str:
|
|
| 385 |
return prompt
|
| 386 |
|
| 387 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
def get_tools() -> list:
|
| 389 |
"""Get tool definitions for the command center."""
|
| 390 |
return [
|
|
|
|
| 82 |
"(this preserves context and the Jupyter kernel for code agents).\n\n"
|
| 83 |
"## Presenting Results\n\n"
|
| 84 |
"Sub-agent results may be collapsed in the UI. When presenting results to the user, "
|
| 85 |
+
"always include the key findings in YOUR response text — don't just say \"see the agent result\".\n\n"
|
| 86 |
+
"**Embedding images/figures from sub-agents:** If a sub-agent result contains image or figure "
|
| 87 |
+
"references like <image_1> or <figure_1>, you can embed them directly in your response using "
|
| 88 |
+
"the same tags (e.g., <image_1>, <figure_2>). The UI will resolve these to the actual images. "
|
| 89 |
+
"Always embed the relevant images when discussing visual results.\n\n"
|
| 90 |
"## Handling Aborted Agents\n\n"
|
| 91 |
"If an agent's result is 'Generation aborted by user.', the user deliberately stopped it. "
|
| 92 |
"Do NOT automatically re-launch the same task. Instead, briefly acknowledge the abort and "
|
|
|
|
| 315 |
"Returns an image reference (e.g., 'image_1') that you can see.\n"
|
| 316 |
"- **edit_image(prompt, source)**: Edit or transform an existing image. "
|
| 317 |
"The source can be a URL, a local file path, or an image reference from a previous tool call (e.g., 'image_1').\n"
|
| 318 |
+
"- **read_image(source)**: Load a raster image (PNG, JPEG, GIF, WebP, BMP) from a URL or local file path. "
|
| 319 |
+
"SVG is NOT supported — if given an SVG URL, tell the user and ask for a raster format instead. "
|
| 320 |
"Returns an image reference that you can see and use with edit_image.\n\n"
|
| 321 |
"## Strategy\n\n"
|
| 322 |
"1. If the user provides an image URL or file path, use read_image first to load it\n"
|
|
|
|
| 388 |
return prompt
|
| 389 |
|
| 390 |
|
| 391 |
+
def nudge_for_result(client, model, messages, extra_params=None, extra_result_data=None):
|
| 392 |
+
"""Nudge an agent that finished without <result> tags to produce one.
|
| 393 |
+
|
| 394 |
+
This is a generator that yields SSE events (content, result_preview, result).
|
| 395 |
+
Call it after an agent's tool loop when no <result> was found.
|
| 396 |
+
|
| 397 |
+
Args:
|
| 398 |
+
client: OpenAI-compatible client
|
| 399 |
+
model: Model name
|
| 400 |
+
messages: Full message history (will be mutated — nudge message appended)
|
| 401 |
+
extra_params: Optional extra_body params for the LLM call
|
| 402 |
+
extra_result_data: Optional dict of extra fields to include in result events
|
| 403 |
+
(e.g. {"figures": {...}} or {"images": {...}})
|
| 404 |
+
"""
|
| 405 |
+
import re
|
| 406 |
+
import logging
|
| 407 |
+
_logger = logging.getLogger(__name__)
|
| 408 |
+
|
| 409 |
+
messages.append({
|
| 410 |
+
"role": "user",
|
| 411 |
+
"content": "Please provide your final answer now. Wrap it in <result> tags."
|
| 412 |
+
})
|
| 413 |
+
try:
|
| 414 |
+
call_params = {"messages": messages, "model": model}
|
| 415 |
+
if extra_params:
|
| 416 |
+
call_params["extra_body"] = extra_params
|
| 417 |
+
response = client.chat.completions.create(**call_params)
|
| 418 |
+
nudge_content = response.choices[0].message.content or ""
|
| 419 |
+
result_match = re.search(r'<result>(.*?)</result>', nudge_content, re.DOTALL | re.IGNORECASE)
|
| 420 |
+
|
| 421 |
+
extra = extra_result_data or {}
|
| 422 |
+
|
| 423 |
+
if result_match:
|
| 424 |
+
result_content = result_match.group(1).strip()
|
| 425 |
+
thinking = re.sub(r'<result>.*?</result>', '', nudge_content, flags=re.DOTALL | re.IGNORECASE).strip()
|
| 426 |
+
if thinking:
|
| 427 |
+
yield {"type": "content", "content": thinking}
|
| 428 |
+
yield {"type": "result_preview", "content": result_content, **extra}
|
| 429 |
+
yield {"type": "result", "content": result_content, **extra}
|
| 430 |
+
elif nudge_content.strip():
|
| 431 |
+
# No result tags but got content — use it as the result
|
| 432 |
+
yield {"type": "result_preview", "content": nudge_content.strip(), **extra}
|
| 433 |
+
yield {"type": "result", "content": nudge_content.strip(), **extra}
|
| 434 |
+
except Exception as e:
|
| 435 |
+
_logger.warning(f"Result nudge failed: {e}")
|
| 436 |
+
|
| 437 |
+
|
| 438 |
def get_tools() -> list:
|
| 439 |
"""Get tool definitions for the command center."""
|
| 440 |
return [
|
|
@@ -9,6 +9,7 @@ from typing import List, Dict, Optional
|
|
| 9 |
from e2b_code_interpreter import Sandbox
|
| 10 |
|
| 11 |
from .tools import execute_code, upload_files, download_files
|
|
|
|
| 12 |
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
|
|
@@ -204,7 +205,7 @@ def download_files_from_sandbox(sbx: Sandbox, files: List[Dict], files_root: str
|
|
| 204 |
return "\n".join(results)
|
| 205 |
|
| 206 |
|
| 207 |
-
def stream_code_execution(client, model: str, messages: List[Dict], sbx: Sandbox, files_root: str = None, extra_params: Optional[Dict] = None, abort_event=None):
|
| 208 |
"""
|
| 209 |
Stream code execution results
|
| 210 |
|
|
@@ -429,11 +430,27 @@ def stream_code_execution(client, model: str, messages: List[Dict], sbx: Sandbox
|
|
| 429 |
}]
|
| 430 |
})
|
| 431 |
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
"
|
| 435 |
-
|
| 436 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 437 |
|
| 438 |
elif tool_call.function.name == "upload_files":
|
| 439 |
# Parse arguments
|
|
@@ -571,32 +588,8 @@ def stream_code_execution(client, model: str, messages: List[Dict], sbx: Sandbox
|
|
| 571 |
|
| 572 |
# If agent finished without a <result>, nudge it for one
|
| 573 |
if not has_result:
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
"content": "Please provide your final answer now. Wrap it in <result> tags."
|
| 577 |
-
})
|
| 578 |
-
try:
|
| 579 |
-
call_params = {
|
| 580 |
-
"messages": messages,
|
| 581 |
-
"model": model,
|
| 582 |
-
}
|
| 583 |
-
if extra_params:
|
| 584 |
-
call_params["extra_body"] = extra_params
|
| 585 |
-
response = client.chat.completions.create(**call_params)
|
| 586 |
-
nudge_content = response.choices[0].message.content or ""
|
| 587 |
-
result_match = re.search(r'<result>(.*?)</result>', nudge_content, re.DOTALL | re.IGNORECASE)
|
| 588 |
-
if result_match:
|
| 589 |
-
result_content = result_match.group(1).strip()
|
| 590 |
-
thinking = re.sub(r'<result>.*?</result>', '', nudge_content, flags=re.DOTALL | re.IGNORECASE).strip()
|
| 591 |
-
if thinking:
|
| 592 |
-
yield {"type": "content", "content": thinking}
|
| 593 |
-
yield {"type": "result_preview", "content": result_content, "figures": figure_data}
|
| 594 |
-
yield {"type": "result", "content": result_content, "figures": figure_data}
|
| 595 |
-
elif nudge_content.strip():
|
| 596 |
-
yield {"type": "result_preview", "content": nudge_content.strip(), "figures": figure_data}
|
| 597 |
-
yield {"type": "result", "content": nudge_content.strip(), "figures": figure_data}
|
| 598 |
-
except Exception as e:
|
| 599 |
-
logger.warning(f"Result nudge failed: {e}")
|
| 600 |
|
| 601 |
# Send done signal
|
| 602 |
yield {"type": "done"}
|
|
|
|
| 9 |
from e2b_code_interpreter import Sandbox
|
| 10 |
|
| 11 |
from .tools import execute_code, upload_files, download_files
|
| 12 |
+
from .image import resize_image_for_vlm
|
| 13 |
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
|
|
|
|
| 205 |
return "\n".join(results)
|
| 206 |
|
| 207 |
|
| 208 |
+
def stream_code_execution(client, model: str, messages: List[Dict], sbx: Sandbox, files_root: str = None, extra_params: Optional[Dict] = None, abort_event=None, multimodal: bool = False):
|
| 209 |
"""
|
| 210 |
Stream code execution results
|
| 211 |
|
|
|
|
| 430 |
}]
|
| 431 |
})
|
| 432 |
|
| 433 |
+
# Build tool response — include figures if multimodal
|
| 434 |
+
if multimodal and images:
|
| 435 |
+
tool_content = [{"type": "text", "text": output}]
|
| 436 |
+
for img in images:
|
| 437 |
+
if img["type"] in ("png", "jpeg"):
|
| 438 |
+
vlm_img = resize_image_for_vlm(img["data"])
|
| 439 |
+
tool_content.append({
|
| 440 |
+
"type": "image_url",
|
| 441 |
+
"image_url": {"url": f"data:image/jpeg;base64,{vlm_img}"}
|
| 442 |
+
})
|
| 443 |
+
messages.append({
|
| 444 |
+
"role": "tool",
|
| 445 |
+
"tool_call_id": tool_call.id,
|
| 446 |
+
"content": tool_content
|
| 447 |
+
})
|
| 448 |
+
else:
|
| 449 |
+
messages.append({
|
| 450 |
+
"role": "tool",
|
| 451 |
+
"tool_call_id": tool_call.id,
|
| 452 |
+
"content": output
|
| 453 |
+
})
|
| 454 |
|
| 455 |
elif tool_call.function.name == "upload_files":
|
| 456 |
# Parse arguments
|
|
|
|
| 588 |
|
| 589 |
# If agent finished without a <result>, nudge it for one
|
| 590 |
if not has_result:
|
| 591 |
+
from .agents import nudge_for_result
|
| 592 |
+
yield from nudge_for_result(client, model, messages, extra_params=extra_params, extra_result_data={"figures": figure_data})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 593 |
|
| 594 |
# Send done signal
|
| 595 |
yield {"type": "done"}
|
|
@@ -97,7 +97,7 @@ def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, i
|
|
| 97 |
if tool_name == "generate_image":
|
| 98 |
prompt = args.get("prompt", "")
|
| 99 |
model = args.get("model") or default_gen_model or "black-forest-labs/FLUX.1-schnell"
|
| 100 |
-
base64_png = execute_generate_image(prompt, hf_token, model)
|
| 101 |
|
| 102 |
if base64_png:
|
| 103 |
image_counter += 1
|
|
@@ -112,7 +112,7 @@ def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, i
|
|
| 112 |
}
|
| 113 |
else:
|
| 114 |
return {
|
| 115 |
-
"content": f"Failed to generate image
|
| 116 |
"display": {"type": "generate_error", "prompt": prompt},
|
| 117 |
"image_counter": image_counter,
|
| 118 |
}
|
|
@@ -138,7 +138,7 @@ def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, i
|
|
| 138 |
"image_counter": image_counter,
|
| 139 |
}
|
| 140 |
|
| 141 |
-
base64_png = execute_edit_image(prompt, source_bytes, hf_token, model)
|
| 142 |
|
| 143 |
if base64_png:
|
| 144 |
image_counter += 1
|
|
@@ -153,7 +153,7 @@ def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, i
|
|
| 153 |
}
|
| 154 |
else:
|
| 155 |
return {
|
| 156 |
-
"content": f"Failed to edit image
|
| 157 |
"display": {"type": "edit_error", "source": source},
|
| 158 |
"image_counter": image_counter,
|
| 159 |
}
|
|
@@ -174,8 +174,14 @@ def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, i
|
|
| 174 |
"image_counter": image_counter,
|
| 175 |
}
|
| 176 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
return {
|
| 178 |
-
"content":
|
| 179 |
"display": {"type": "read_image_error", "url": source},
|
| 180 |
"image_counter": image_counter,
|
| 181 |
}
|
|
@@ -196,7 +202,8 @@ def stream_image_execution(
|
|
| 196 |
image_edit_model: Optional[str] = None,
|
| 197 |
extra_params: Optional[Dict] = None,
|
| 198 |
abort_event=None,
|
| 199 |
-
files_root: str = None
|
|
|
|
| 200 |
):
|
| 201 |
"""
|
| 202 |
Run the image agent tool-calling loop.
|
|
@@ -334,9 +341,8 @@ def stream_image_execution(
|
|
| 334 |
result = execute_tool(func_name, args, hf_token, image_store, image_counter, default_gen_model=image_gen_model, default_edit_model=image_edit_model, files_root=files_root)
|
| 335 |
image_counter = result.get("image_counter", image_counter)
|
| 336 |
|
| 337 |
-
# Build tool response
|
| 338 |
-
if result.get("image"):
|
| 339 |
-
# Resize image for VLM context to avoid token overflow
|
| 340 |
vlm_image = resize_image_for_vlm(result["image"])
|
| 341 |
tool_response_content = [
|
| 342 |
{"type": "text", "text": result["content"]},
|
|
@@ -385,12 +391,18 @@ def stream_image_execution(
|
|
| 385 |
if not done:
|
| 386 |
yield {"type": "generating"}
|
| 387 |
|
| 388 |
-
#
|
| 389 |
if not result_sent and image_store:
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 395 |
|
| 396 |
yield {"type": "done"}
|
|
|
|
| 97 |
if tool_name == "generate_image":
|
| 98 |
prompt = args.get("prompt", "")
|
| 99 |
model = args.get("model") or default_gen_model or "black-forest-labs/FLUX.1-schnell"
|
| 100 |
+
base64_png, error = execute_generate_image(prompt, hf_token, model)
|
| 101 |
|
| 102 |
if base64_png:
|
| 103 |
image_counter += 1
|
|
|
|
| 112 |
}
|
| 113 |
else:
|
| 114 |
return {
|
| 115 |
+
"content": f"Failed to generate image: {error}",
|
| 116 |
"display": {"type": "generate_error", "prompt": prompt},
|
| 117 |
"image_counter": image_counter,
|
| 118 |
}
|
|
|
|
| 138 |
"image_counter": image_counter,
|
| 139 |
}
|
| 140 |
|
| 141 |
+
base64_png, error = execute_edit_image(prompt, source_bytes, hf_token, model)
|
| 142 |
|
| 143 |
if base64_png:
|
| 144 |
image_counter += 1
|
|
|
|
| 153 |
}
|
| 154 |
else:
|
| 155 |
return {
|
| 156 |
+
"content": f"Failed to edit image: {error}",
|
| 157 |
"display": {"type": "edit_error", "source": source},
|
| 158 |
"image_counter": image_counter,
|
| 159 |
}
|
|
|
|
| 174 |
"image_counter": image_counter,
|
| 175 |
}
|
| 176 |
else:
|
| 177 |
+
# Provide more specific error for SVG files
|
| 178 |
+
is_svg = source.lower().endswith(".svg") or "/svg" in source.lower()
|
| 179 |
+
if is_svg:
|
| 180 |
+
error_msg = f"Failed to load image from '{source}'. SVG format is not supported — only raster formats (PNG, JPEG, GIF, WebP, BMP) are accepted. Ask the user for a raster version of the image."
|
| 181 |
+
else:
|
| 182 |
+
error_msg = f"Failed to load image from '{source}'. Check that the path or URL is correct and that it is a raster image (PNG, JPEG, GIF, WebP, BMP)."
|
| 183 |
return {
|
| 184 |
+
"content": error_msg,
|
| 185 |
"display": {"type": "read_image_error", "url": source},
|
| 186 |
"image_counter": image_counter,
|
| 187 |
}
|
|
|
|
| 202 |
image_edit_model: Optional[str] = None,
|
| 203 |
extra_params: Optional[Dict] = None,
|
| 204 |
abort_event=None,
|
| 205 |
+
files_root: str = None,
|
| 206 |
+
multimodal: bool = False
|
| 207 |
):
|
| 208 |
"""
|
| 209 |
Run the image agent tool-calling loop.
|
|
|
|
| 341 |
result = execute_tool(func_name, args, hf_token, image_store, image_counter, default_gen_model=image_gen_model, default_edit_model=image_edit_model, files_root=files_root)
|
| 342 |
image_counter = result.get("image_counter", image_counter)
|
| 343 |
|
| 344 |
+
# Build tool response content for LLM
|
| 345 |
+
if result.get("image") and multimodal:
|
|
|
|
| 346 |
vlm_image = resize_image_for_vlm(result["image"])
|
| 347 |
tool_response_content = [
|
| 348 |
{"type": "text", "text": result["content"]},
|
|
|
|
| 391 |
if not done:
|
| 392 |
yield {"type": "generating"}
|
| 393 |
|
| 394 |
+
# If agent finished without a <result>, nudge it for one
|
| 395 |
if not result_sent and image_store:
|
| 396 |
+
from .agents import nudge_for_result
|
| 397 |
+
nudge_produced_result = False
|
| 398 |
+
for event in nudge_for_result(client, model, messages, extra_params=extra_params, extra_result_data={"images": image_store}):
|
| 399 |
+
yield event
|
| 400 |
+
if event.get("type") == "result":
|
| 401 |
+
nudge_produced_result = True
|
| 402 |
+
|
| 403 |
+
# Final fallback: synthesize a result with all images
|
| 404 |
+
if not nudge_produced_result:
|
| 405 |
+
fallback_parts = [f"<{name}>" for name in image_store]
|
| 406 |
+
yield {"type": "result", "content": "\n\n".join(fallback_parts), "images": image_store}
|
| 407 |
|
| 408 |
yield {"type": "done"}
|
|
@@ -419,7 +419,7 @@ read_image = {
|
|
| 419 |
"type": "function",
|
| 420 |
"function": {
|
| 421 |
"name": "read_image",
|
| 422 |
-
"description": "Load
|
| 423 |
"parameters": {
|
| 424 |
"type": "object",
|
| 425 |
"properties": {
|
|
@@ -441,48 +441,56 @@ read_image_url = read_image
|
|
| 441 |
# Image tool execution functions
|
| 442 |
# ============================================================
|
| 443 |
|
| 444 |
-
def execute_generate_image(prompt: str, hf_token: str, model: str = "black-forest-labs/FLUX.1-schnell") ->
|
| 445 |
-
"""Text-to-image via HF InferenceClient. Returns
|
| 446 |
try:
|
| 447 |
from huggingface_hub import InferenceClient
|
| 448 |
except ImportError:
|
| 449 |
-
|
| 450 |
-
return None
|
| 451 |
|
| 452 |
try:
|
| 453 |
client = InferenceClient(token=hf_token)
|
| 454 |
image = client.text_to_image(prompt, model=model)
|
| 455 |
buffer = io.BytesIO()
|
| 456 |
image.save(buffer, format="PNG")
|
| 457 |
-
return base64.b64encode(buffer.getvalue()).decode("utf-8")
|
| 458 |
except Exception as e:
|
| 459 |
logger.error(f"Generate image error: {e}")
|
| 460 |
-
return None
|
| 461 |
|
| 462 |
|
| 463 |
-
def execute_edit_image(prompt: str, source_image_bytes: bytes, hf_token: str, model: str = "black-forest-labs/FLUX.1-Kontext-dev") ->
|
| 464 |
-
"""Image-to-image via HF InferenceClient.
|
| 465 |
try:
|
| 466 |
from huggingface_hub import InferenceClient
|
| 467 |
from PIL import Image
|
| 468 |
except ImportError:
|
| 469 |
-
|
| 470 |
-
return None
|
| 471 |
|
| 472 |
try:
|
| 473 |
client = InferenceClient(token=hf_token)
|
| 474 |
input_image = Image.open(io.BytesIO(source_image_bytes))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
result = client.image_to_image(input_image, prompt=prompt, model=model)
|
| 476 |
buffer = io.BytesIO()
|
| 477 |
result.save(buffer, format="PNG")
|
| 478 |
-
return base64.b64encode(buffer.getvalue()).decode("utf-8")
|
| 479 |
except Exception as e:
|
| 480 |
logger.error(f"Edit image error: {e}")
|
| 481 |
-
return None
|
| 482 |
|
| 483 |
|
| 484 |
def execute_read_image(source: str, files_root: str = None) -> Optional[str]:
|
| 485 |
-
"""Load image from URL or local file path, return base64 string or None on error.
|
|
|
|
|
|
|
|
|
|
| 486 |
import os
|
| 487 |
|
| 488 |
# Check if it's a URL
|
|
@@ -523,6 +531,41 @@ def execute_read_image(source: str, files_root: str = None) -> Optional[str]:
|
|
| 523 |
return None
|
| 524 |
|
| 525 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 526 |
# Keep old name as alias
|
| 527 |
def execute_read_image_url(url: str) -> Optional[str]:
|
| 528 |
return execute_read_image(url)
|
|
|
|
| 419 |
"type": "function",
|
| 420 |
"function": {
|
| 421 |
"name": "read_image",
|
| 422 |
+
"description": "Load a raster image (PNG, JPEG, GIF, WebP, BMP) from a URL or local file path. SVG is NOT supported. Returns an image reference name (e.g., 'image_1') that you can see and use with edit_image.",
|
| 423 |
"parameters": {
|
| 424 |
"type": "object",
|
| 425 |
"properties": {
|
|
|
|
| 441 |
# Image tool execution functions
|
| 442 |
# ============================================================
|
| 443 |
|
| 444 |
+
def execute_generate_image(prompt: str, hf_token: str, model: str = "black-forest-labs/FLUX.1-schnell") -> tuple:
|
| 445 |
+
"""Text-to-image via HF InferenceClient. Returns (base64_png, None) on success or (None, error_str) on failure."""
|
| 446 |
try:
|
| 447 |
from huggingface_hub import InferenceClient
|
| 448 |
except ImportError:
|
| 449 |
+
return None, "huggingface_hub not installed"
|
|
|
|
| 450 |
|
| 451 |
try:
|
| 452 |
client = InferenceClient(token=hf_token)
|
| 453 |
image = client.text_to_image(prompt, model=model)
|
| 454 |
buffer = io.BytesIO()
|
| 455 |
image.save(buffer, format="PNG")
|
| 456 |
+
return base64.b64encode(buffer.getvalue()).decode("utf-8"), None
|
| 457 |
except Exception as e:
|
| 458 |
logger.error(f"Generate image error: {e}")
|
| 459 |
+
return None, str(e)
|
| 460 |
|
| 461 |
|
| 462 |
+
def execute_edit_image(prompt: str, source_image_bytes: bytes, hf_token: str, model: str = "black-forest-labs/FLUX.1-Kontext-dev") -> tuple:
|
| 463 |
+
"""Image-to-image via HF InferenceClient. Returns (base64_png, None) on success or (None, error_str) on failure."""
|
| 464 |
try:
|
| 465 |
from huggingface_hub import InferenceClient
|
| 466 |
from PIL import Image
|
| 467 |
except ImportError:
|
| 468 |
+
return None, "huggingface_hub or Pillow not installed"
|
|
|
|
| 469 |
|
| 470 |
try:
|
| 471 |
client = InferenceClient(token=hf_token)
|
| 472 |
input_image = Image.open(io.BytesIO(source_image_bytes))
|
| 473 |
+
|
| 474 |
+
# Resize large images to avoid API failures (most models expect ~1024px)
|
| 475 |
+
MAX_EDIT_DIM = 1024
|
| 476 |
+
if max(input_image.size) > MAX_EDIT_DIM:
|
| 477 |
+
input_image.thumbnail((MAX_EDIT_DIM, MAX_EDIT_DIM), Image.LANCZOS)
|
| 478 |
+
logger.info(f"Resized input image to {input_image.size} for editing")
|
| 479 |
+
|
| 480 |
result = client.image_to_image(input_image, prompt=prompt, model=model)
|
| 481 |
buffer = io.BytesIO()
|
| 482 |
result.save(buffer, format="PNG")
|
| 483 |
+
return base64.b64encode(buffer.getvalue()).decode("utf-8"), None
|
| 484 |
except Exception as e:
|
| 485 |
logger.error(f"Edit image error: {e}")
|
| 486 |
+
return None, str(e)
|
| 487 |
|
| 488 |
|
| 489 |
def execute_read_image(source: str, files_root: str = None) -> Optional[str]:
|
| 490 |
+
"""Load image from URL or local file path, return base64 string or None on error.
|
| 491 |
+
|
| 492 |
+
Supported formats: PNG, JPEG, GIF, WebP, BMP. SVG is NOT supported.
|
| 493 |
+
"""
|
| 494 |
import os
|
| 495 |
|
| 496 |
# Check if it's a URL
|
|
|
|
| 531 |
return None
|
| 532 |
|
| 533 |
|
| 534 |
+
def extract_and_download_images(markdown: str, max_images: int = 5) -> List[str]:
|
| 535 |
+
"""Extract image URLs from markdown and download them as base64 strings.
|
| 536 |
+
|
| 537 |
+
Returns list of base64-encoded image strings (PNG/JPEG).
|
| 538 |
+
Skips SVGs, data URIs, and failed downloads.
|
| 539 |
+
"""
|
| 540 |
+
import re as _re
|
| 541 |
+
img_pattern = _re.compile(r'!\[[^\]]*\]\(([^)]+)\)')
|
| 542 |
+
urls = img_pattern.findall(markdown)
|
| 543 |
+
|
| 544 |
+
results = []
|
| 545 |
+
for url in urls:
|
| 546 |
+
if len(results) >= max_images:
|
| 547 |
+
break
|
| 548 |
+
if url.startswith("data:") or url.endswith(".svg"):
|
| 549 |
+
continue
|
| 550 |
+
try:
|
| 551 |
+
resp = httpx.get(
|
| 552 |
+
url,
|
| 553 |
+
follow_redirects=True,
|
| 554 |
+
timeout=10,
|
| 555 |
+
headers={"User-Agent": _USER_AGENT}
|
| 556 |
+
)
|
| 557 |
+
if resp.status_code != 200:
|
| 558 |
+
continue
|
| 559 |
+
ct = resp.headers.get("content-type", "")
|
| 560 |
+
if not ct.startswith("image/"):
|
| 561 |
+
continue
|
| 562 |
+
results.append(base64.b64encode(resp.content).decode("utf-8"))
|
| 563 |
+
except Exception:
|
| 564 |
+
continue
|
| 565 |
+
|
| 566 |
+
return results
|
| 567 |
+
|
| 568 |
+
|
| 569 |
# Keep old name as alias
|
| 570 |
def execute_read_image_url(url: str) -> Optional[str]:
|
| 571 |
return execute_read_image(url)
|
|
@@ -93,6 +93,10 @@ const actionWidgets = {};
|
|
| 93 |
// Track tool call IDs for result updates (maps tabId -> tool_call_id)
|
| 94 |
const toolCallIds = {};
|
| 95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
// Track agents by task_id for reuse (maps task_id -> tabId)
|
| 97 |
const taskIdToTabId = {};
|
| 98 |
|
|
@@ -128,6 +132,7 @@ function resetLocalState() {
|
|
| 128 |
// Clear object maps
|
| 129 |
Object.keys(actionWidgets).forEach(k => delete actionWidgets[k]);
|
| 130 |
Object.keys(toolCallIds).forEach(k => delete toolCallIds[k]);
|
|
|
|
| 131 |
Object.keys(taskIdToTabId).forEach(k => delete taskIdToTabId[k]);
|
| 132 |
researchQueryTabIds = {};
|
| 133 |
showAllTurns = true;
|
|
@@ -2021,6 +2026,7 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
|
|
| 2021 |
token: modelConfig.token || null,
|
| 2022 |
model: modelConfig.model,
|
| 2023 |
extra_params: modelConfig.extraParams || null,
|
|
|
|
| 2024 |
e2b_key: currentSettings.e2bKey || null,
|
| 2025 |
serper_key: currentSettings.serperKey || null,
|
| 2026 |
hf_token: currentSettings.hfToken || null,
|
|
@@ -2084,7 +2090,7 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
|
|
| 2084 |
currentMessageEl = createAssistantMessage(chatContainer);
|
| 2085 |
}
|
| 2086 |
fullResponse += data.content;
|
| 2087 |
-
appendToMessage(currentMessageEl, parseMarkdown(fullResponse));
|
| 2088 |
scrollChatToBottom(chatContainer);
|
| 2089 |
|
| 2090 |
} else if (data.type === 'code') {
|
|
@@ -2128,6 +2134,22 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
|
|
| 2128 |
// Still generating - no action needed
|
| 2129 |
|
| 2130 |
} else if (data.type === 'result') {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2131 |
// Agent result - update command center widget
|
| 2132 |
updateActionWidgetWithResult(tabId, data.content, data.figures, data.images);
|
| 2133 |
|
|
@@ -2175,7 +2197,7 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
|
|
| 2175 |
for (const [placeholderId, figureData] of Object.entries(figurePlaceholders)) {
|
| 2176 |
let imageHtml = '';
|
| 2177 |
if (figureData.type === 'png' || figureData.type === 'jpeg') {
|
| 2178 |
-
imageHtml = `<img src="data:image/${figureData.type};base64,${figureData.data}" style="max-width: 400px; height: auto; border-radius: 4px; margin: 12px 0; display: block;" onclick="openImageModal(this.src)">`;
|
| 2179 |
} else if (figureData.type === 'svg') {
|
| 2180 |
imageHtml = `<div style="margin: 12px 0;">${atob(figureData.data)}</div>`;
|
| 2181 |
}
|
|
@@ -2371,7 +2393,8 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
|
|
| 2371 |
const imgName = data.image_name || 'image';
|
| 2372 |
outputHtml = `<img src="data:image/png;base64,${data.image}" alt="${escapeHtml(imgName)}" class="generated-img" />`;
|
| 2373 |
} else if ((data.tool === 'generate_image' || data.tool === 'edit_image' || data.tool === 'read_image_url' || data.tool === 'read_image') && !data.image) {
|
| 2374 |
-
|
|
|
|
| 2375 |
}
|
| 2376 |
|
| 2377 |
if (outputHtml && lastToolCell) {
|
|
@@ -2389,7 +2412,7 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
|
|
| 2389 |
currentMessageEl = createAssistantMessage(chatContainer);
|
| 2390 |
}
|
| 2391 |
fullResponse += data.content;
|
| 2392 |
-
appendToMessage(currentMessageEl, parseMarkdown(fullResponse));
|
| 2393 |
scrollChatToBottom(chatContainer);
|
| 2394 |
|
| 2395 |
} else if (data.type === 'launch') {
|
|
@@ -2521,6 +2544,16 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
|
|
| 2521 |
errorDiv.innerHTML = `<div class="message-content" style="color: #c62828;">Error: ${escapeHtml(data.content)}</div>`;
|
| 2522 |
chatContainer.appendChild(errorDiv);
|
| 2523 |
scrollChatToBottom(chatContainer);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2524 |
}
|
| 2525 |
}
|
| 2526 |
}
|
|
@@ -2865,7 +2898,7 @@ async function updateActionWidgetWithResult(tabId, resultContent, figures, image
|
|
| 2865 |
for (const [placeholderId, figureData] of Object.entries(figurePlaceholders)) {
|
| 2866 |
let imageHtml = '';
|
| 2867 |
if (figureData.type === 'png' || figureData.type === 'jpeg') {
|
| 2868 |
-
imageHtml = `<img src="data:image/${figureData.type};base64,${figureData.data}" style="max-width: 400px; height: auto; border-radius: 4px; margin: 12px 0; display: block;" onclick="openImageModal(this.src)">`;
|
| 2869 |
} else if (figureData.type === 'svg') {
|
| 2870 |
imageHtml = `<div style="margin: 12px 0;">${atob(figureData.data)}</div>`;
|
| 2871 |
}
|
|
@@ -3115,6 +3148,22 @@ if (typeof marked !== 'undefined') {
|
|
| 3115 |
});
|
| 3116 |
}
|
| 3117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3118 |
function parseMarkdown(text) {
|
| 3119 |
// Use marked library for proper markdown parsing
|
| 3120 |
let html;
|
|
@@ -4128,7 +4177,7 @@ function deleteProvider(providerId) {
|
|
| 4128 |
// Show add/edit model dialog
|
| 4129 |
function showModelDialog(modelId = null) {
|
| 4130 |
const isEdit = !!modelId;
|
| 4131 |
-
const model = isEdit ? settings.models[modelId] : { name: '', providerId: '', modelId: '', extraParams: null };
|
| 4132 |
|
| 4133 |
const dialog = document.getElementById('model-dialog');
|
| 4134 |
const title = document.getElementById('model-dialog-title');
|
|
@@ -4136,11 +4185,13 @@ function showModelDialog(modelId = null) {
|
|
| 4136 |
const providerSelect = document.getElementById('model-provider');
|
| 4137 |
const modelIdInput = document.getElementById('model-model-id');
|
| 4138 |
const extraParamsInput = document.getElementById('model-extra-params');
|
|
|
|
| 4139 |
|
| 4140 |
title.textContent = isEdit ? 'Edit Model' : 'Add Model';
|
| 4141 |
nameInput.value = model.name;
|
| 4142 |
modelIdInput.value = model.modelId;
|
| 4143 |
extraParamsInput.value = model.extraParams ? JSON.stringify(model.extraParams, null, 2) : '';
|
|
|
|
| 4144 |
|
| 4145 |
// Populate provider dropdown
|
| 4146 |
providerSelect.innerHTML = '<option value="">-- Select Provider --</option>';
|
|
@@ -4187,7 +4238,8 @@ function saveModelFromDialog() {
|
|
| 4187 |
}
|
| 4188 |
}
|
| 4189 |
|
| 4190 |
-
|
|
|
|
| 4191 |
hideModelDialog();
|
| 4192 |
renderModelsList();
|
| 4193 |
populateModelDropdowns();
|
|
@@ -4492,7 +4544,8 @@ function resolveModelConfig(agentType) {
|
|
| 4492 |
endpoint: provider.endpoint,
|
| 4493 |
token: provider.token,
|
| 4494 |
model: model.modelId,
|
| 4495 |
-
extraParams: model.extraParams || null
|
|
|
|
| 4496 |
};
|
| 4497 |
}
|
| 4498 |
|
|
@@ -4510,7 +4563,8 @@ function getDefaultModelConfig() {
|
|
| 4510 |
endpoint: provider.endpoint,
|
| 4511 |
token: provider.token,
|
| 4512 |
model: model.modelId,
|
| 4513 |
-
extraParams: model.extraParams || null
|
|
|
|
| 4514 |
};
|
| 4515 |
}
|
| 4516 |
|
|
|
|
| 93 |
// Track tool call IDs for result updates (maps tabId -> tool_call_id)
|
| 94 |
const toolCallIds = {};
|
| 95 |
|
| 96 |
+
// Global figure/image registry populated by sub-agents for cross-agent reference resolution
|
| 97 |
+
// Maps "figure_1" -> {type, data} and "image_1" -> {type: "png", data: base64}
|
| 98 |
+
const globalFigureRegistry = {};
|
| 99 |
+
|
| 100 |
// Track agents by task_id for reuse (maps task_id -> tabId)
|
| 101 |
const taskIdToTabId = {};
|
| 102 |
|
|
|
|
| 132 |
// Clear object maps
|
| 133 |
Object.keys(actionWidgets).forEach(k => delete actionWidgets[k]);
|
| 134 |
Object.keys(toolCallIds).forEach(k => delete toolCallIds[k]);
|
| 135 |
+
Object.keys(globalFigureRegistry).forEach(k => delete globalFigureRegistry[k]);
|
| 136 |
Object.keys(taskIdToTabId).forEach(k => delete taskIdToTabId[k]);
|
| 137 |
researchQueryTabIds = {};
|
| 138 |
showAllTurns = true;
|
|
|
|
| 2026 |
token: modelConfig.token || null,
|
| 2027 |
model: modelConfig.model,
|
| 2028 |
extra_params: modelConfig.extraParams || null,
|
| 2029 |
+
multimodal: modelConfig.multimodal || false,
|
| 2030 |
e2b_key: currentSettings.e2bKey || null,
|
| 2031 |
serper_key: currentSettings.serperKey || null,
|
| 2032 |
hf_token: currentSettings.hfToken || null,
|
|
|
|
| 2090 |
currentMessageEl = createAssistantMessage(chatContainer);
|
| 2091 |
}
|
| 2092 |
fullResponse += data.content;
|
| 2093 |
+
appendToMessage(currentMessageEl, resolveGlobalFigureRefs(parseMarkdown(fullResponse)));
|
| 2094 |
scrollChatToBottom(chatContainer);
|
| 2095 |
|
| 2096 |
} else if (data.type === 'code') {
|
|
|
|
| 2134 |
// Still generating - no action needed
|
| 2135 |
|
| 2136 |
} else if (data.type === 'result') {
|
| 2137 |
+
// Populate global figure/image registry only for items referenced in result content
|
| 2138 |
+
const resultText = data.content || '';
|
| 2139 |
+
if (data.figures) {
|
| 2140 |
+
for (const [name, figData] of Object.entries(data.figures)) {
|
| 2141 |
+
if (new RegExp(`</?${name}>`, 'i').test(resultText)) {
|
| 2142 |
+
globalFigureRegistry[name] = figData;
|
| 2143 |
+
}
|
| 2144 |
+
}
|
| 2145 |
+
}
|
| 2146 |
+
if (data.images) {
|
| 2147 |
+
for (const [name, imgBase64] of Object.entries(data.images)) {
|
| 2148 |
+
if (new RegExp(`</?${name}>`, 'i').test(resultText)) {
|
| 2149 |
+
globalFigureRegistry[name] = { type: 'png', data: imgBase64 };
|
| 2150 |
+
}
|
| 2151 |
+
}
|
| 2152 |
+
}
|
| 2153 |
// Agent result - update command center widget
|
| 2154 |
updateActionWidgetWithResult(tabId, data.content, data.figures, data.images);
|
| 2155 |
|
|
|
|
| 2197 |
for (const [placeholderId, figureData] of Object.entries(figurePlaceholders)) {
|
| 2198 |
let imageHtml = '';
|
| 2199 |
if (figureData.type === 'png' || figureData.type === 'jpeg') {
|
| 2200 |
+
imageHtml = `<img src="data:image/${figureData.type};base64,${figureData.data}" style="max-width: 400px; max-height: 400px; height: auto; border-radius: 4px; margin: 12px 0; display: block;" onclick="openImageModal(this.src)">`;
|
| 2201 |
} else if (figureData.type === 'svg') {
|
| 2202 |
imageHtml = `<div style="margin: 12px 0;">${atob(figureData.data)}</div>`;
|
| 2203 |
}
|
|
|
|
| 2393 |
const imgName = data.image_name || 'image';
|
| 2394 |
outputHtml = `<img src="data:image/png;base64,${data.image}" alt="${escapeHtml(imgName)}" class="generated-img" />`;
|
| 2395 |
} else if ((data.tool === 'generate_image' || data.tool === 'edit_image' || data.tool === 'read_image_url' || data.tool === 'read_image') && !data.image) {
|
| 2396 |
+
const errMsg = data.response || 'Failed to process image';
|
| 2397 |
+
outputHtml = `<div class="tool-cell-read-summary">${escapeHtml(errMsg)}</div>`;
|
| 2398 |
}
|
| 2399 |
|
| 2400 |
if (outputHtml && lastToolCell) {
|
|
|
|
| 2412 |
currentMessageEl = createAssistantMessage(chatContainer);
|
| 2413 |
}
|
| 2414 |
fullResponse += data.content;
|
| 2415 |
+
appendToMessage(currentMessageEl, resolveGlobalFigureRefs(parseMarkdown(fullResponse)));
|
| 2416 |
scrollChatToBottom(chatContainer);
|
| 2417 |
|
| 2418 |
} else if (data.type === 'launch') {
|
|
|
|
| 2544 |
errorDiv.innerHTML = `<div class="message-content" style="color: #c62828;">Error: ${escapeHtml(data.content)}</div>`;
|
| 2545 |
chatContainer.appendChild(errorDiv);
|
| 2546 |
scrollChatToBottom(chatContainer);
|
| 2547 |
+
|
| 2548 |
+
// Propagate error to parent action widget
|
| 2549 |
+
updateActionWidgetWithResult(tabId, `Error: ${data.content}`, {}, {});
|
| 2550 |
+
const errorWidget = actionWidgets[tabId];
|
| 2551 |
+
if (errorWidget) {
|
| 2552 |
+
const doneIndicator = errorWidget.querySelector('.done-indicator');
|
| 2553 |
+
if (doneIndicator) {
|
| 2554 |
+
doneIndicator.classList.add('errored');
|
| 2555 |
+
}
|
| 2556 |
+
}
|
| 2557 |
}
|
| 2558 |
}
|
| 2559 |
}
|
|
|
|
| 2898 |
for (const [placeholderId, figureData] of Object.entries(figurePlaceholders)) {
|
| 2899 |
let imageHtml = '';
|
| 2900 |
if (figureData.type === 'png' || figureData.type === 'jpeg') {
|
| 2901 |
+
imageHtml = `<img src="data:image/${figureData.type};base64,${figureData.data}" style="max-width: 400px; max-height: 400px; height: auto; border-radius: 4px; margin: 12px 0; display: block;" onclick="openImageModal(this.src)">`;
|
| 2902 |
} else if (figureData.type === 'svg') {
|
| 2903 |
imageHtml = `<div style="margin: 12px 0;">${atob(figureData.data)}</div>`;
|
| 2904 |
}
|
|
|
|
| 3148 |
});
|
| 3149 |
}
|
| 3150 |
|
| 3151 |
+
// Resolve <figure_N> and <image_N> references using the global registry
|
| 3152 |
+
function resolveGlobalFigureRefs(html) {
|
| 3153 |
+
return html.replace(/<\/?(figure_\d+|image_\d+)>/gi, (match) => {
|
| 3154 |
+
// Extract the name (strip < > and /)
|
| 3155 |
+
const name = match.replace(/[<>/]/g, '');
|
| 3156 |
+
const data = globalFigureRegistry[name];
|
| 3157 |
+
if (!data) return match; // Leave unresolved refs as-is
|
| 3158 |
+
if (data.type === 'png' || data.type === 'jpeg') {
|
| 3159 |
+
return `<img src="data:image/${data.type};base64,${data.data}" style="max-width: 400px; max-height: 400px; height: auto; border-radius: 4px; margin: 12px 0; display: block;" onclick="openImageModal(this.src)">`;
|
| 3160 |
+
} else if (data.type === 'svg') {
|
| 3161 |
+
return `<div style="margin: 12px 0;">${atob(data.data)}</div>`;
|
| 3162 |
+
}
|
| 3163 |
+
return match;
|
| 3164 |
+
});
|
| 3165 |
+
}
|
| 3166 |
+
|
| 3167 |
function parseMarkdown(text) {
|
| 3168 |
// Use marked library for proper markdown parsing
|
| 3169 |
let html;
|
|
|
|
| 4177 |
// Show add/edit model dialog
|
| 4178 |
function showModelDialog(modelId = null) {
|
| 4179 |
const isEdit = !!modelId;
|
| 4180 |
+
const model = isEdit ? settings.models[modelId] : { name: '', providerId: '', modelId: '', extraParams: null, multimodal: false };
|
| 4181 |
|
| 4182 |
const dialog = document.getElementById('model-dialog');
|
| 4183 |
const title = document.getElementById('model-dialog-title');
|
|
|
|
| 4185 |
const providerSelect = document.getElementById('model-provider');
|
| 4186 |
const modelIdInput = document.getElementById('model-model-id');
|
| 4187 |
const extraParamsInput = document.getElementById('model-extra-params');
|
| 4188 |
+
const multimodalCheckbox = document.getElementById('model-multimodal');
|
| 4189 |
|
| 4190 |
title.textContent = isEdit ? 'Edit Model' : 'Add Model';
|
| 4191 |
nameInput.value = model.name;
|
| 4192 |
modelIdInput.value = model.modelId;
|
| 4193 |
extraParamsInput.value = model.extraParams ? JSON.stringify(model.extraParams, null, 2) : '';
|
| 4194 |
+
multimodalCheckbox.checked = !!model.multimodal;
|
| 4195 |
|
| 4196 |
// Populate provider dropdown
|
| 4197 |
providerSelect.innerHTML = '<option value="">-- Select Provider --</option>';
|
|
|
|
| 4238 |
}
|
| 4239 |
}
|
| 4240 |
|
| 4241 |
+
const multimodal = document.getElementById('model-multimodal').checked;
|
| 4242 |
+
settings.models[modelId] = { name, providerId, modelId: apiModelId, extraParams, multimodal };
|
| 4243 |
hideModelDialog();
|
| 4244 |
renderModelsList();
|
| 4245 |
populateModelDropdowns();
|
|
|
|
| 4544 |
endpoint: provider.endpoint,
|
| 4545 |
token: provider.token,
|
| 4546 |
model: model.modelId,
|
| 4547 |
+
extraParams: model.extraParams || null,
|
| 4548 |
+
multimodal: !!model.multimodal
|
| 4549 |
};
|
| 4550 |
}
|
| 4551 |
|
|
|
|
| 4563 |
endpoint: provider.endpoint,
|
| 4564 |
token: provider.token,
|
| 4565 |
model: model.modelId,
|
| 4566 |
+
extraParams: model.extraParams || null,
|
| 4567 |
+
multimodal: !!model.multimodal
|
| 4568 |
};
|
| 4569 |
}
|
| 4570 |
|
|
@@ -1129,6 +1129,20 @@ body {
|
|
| 1129 |
margin: 16px 0;
|
| 1130 |
}
|
| 1131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1132 |
/* LaTeX / KaTeX */
|
| 1133 |
.message-content .katex-display {
|
| 1134 |
margin: 12px 0;
|
|
@@ -1298,6 +1312,7 @@ pre code [class*="token"] {
|
|
| 1298 |
|
| 1299 |
.code-cell-image img {
|
| 1300 |
max-width: 400px;
|
|
|
|
| 1301 |
height: auto;
|
| 1302 |
border-radius: 4px;
|
| 1303 |
cursor: pointer;
|
|
@@ -1798,6 +1813,22 @@ pre code [class*="token"] {
|
|
| 1798 |
color: var(--bg-primary);
|
| 1799 |
}
|
| 1800 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1801 |
.action-widget-body {
|
| 1802 |
padding: 12px;
|
| 1803 |
background: var(--bg-tertiary);
|
|
@@ -1834,6 +1865,7 @@ pre code [class*="token"] {
|
|
| 1834 |
.action-widget .section-content img,
|
| 1835 |
.action-widget img {
|
| 1836 |
max-width: 400px !important;
|
|
|
|
| 1837 |
width: auto !important;
|
| 1838 |
height: auto !important;
|
| 1839 |
margin: 8px 0;
|
|
@@ -1896,6 +1928,7 @@ pre code [class*="token"] {
|
|
| 1896 |
|
| 1897 |
.action-widget-result img {
|
| 1898 |
max-width: 400px;
|
|
|
|
| 1899 |
height: auto;
|
| 1900 |
margin: 8px 0;
|
| 1901 |
border-radius: 3px;
|
|
@@ -2598,6 +2631,7 @@ pre code [class*="token"] {
|
|
| 2598 |
|
| 2599 |
.result-content img {
|
| 2600 |
max-width: 400px;
|
|
|
|
| 2601 |
height: auto;
|
| 2602 |
margin: 8px 0;
|
| 2603 |
border-radius: 3px;
|
|
@@ -3192,6 +3226,19 @@ pre code [class*="token"] {
|
|
| 3192 |
font-style: italic;
|
| 3193 |
}
|
| 3194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3195 |
.settings-dialog-actions {
|
| 3196 |
display: flex;
|
| 3197 |
gap: 6px;
|
|
|
|
| 1129 |
margin: 16px 0;
|
| 1130 |
}
|
| 1131 |
|
| 1132 |
+
.message-content img {
|
| 1133 |
+
max-width: 400px;
|
| 1134 |
+
max-height: 400px;
|
| 1135 |
+
height: auto;
|
| 1136 |
+
margin: 8px 0;
|
| 1137 |
+
border-radius: 3px;
|
| 1138 |
+
cursor: pointer;
|
| 1139 |
+
transition: opacity 0.2s;
|
| 1140 |
+
}
|
| 1141 |
+
|
| 1142 |
+
.message-content img:hover {
|
| 1143 |
+
opacity: 0.85;
|
| 1144 |
+
}
|
| 1145 |
+
|
| 1146 |
/* LaTeX / KaTeX */
|
| 1147 |
.message-content .katex-display {
|
| 1148 |
margin: 12px 0;
|
|
|
|
| 1312 |
|
| 1313 |
.code-cell-image img {
|
| 1314 |
max-width: 400px;
|
| 1315 |
+
max-height: 400px;
|
| 1316 |
height: auto;
|
| 1317 |
border-radius: 4px;
|
| 1318 |
cursor: pointer;
|
|
|
|
| 1813 |
color: var(--bg-primary);
|
| 1814 |
}
|
| 1815 |
|
| 1816 |
+
/* Errored action widget - red background with exclamation */
|
| 1817 |
+
.action-widget .done-indicator.errored {
|
| 1818 |
+
background: #c62828;
|
| 1819 |
+
}
|
| 1820 |
+
.action-widget .done-indicator.errored::before {
|
| 1821 |
+
content: '!';
|
| 1822 |
+
width: auto;
|
| 1823 |
+
height: auto;
|
| 1824 |
+
border: none;
|
| 1825 |
+
transform: none;
|
| 1826 |
+
font-size: 11px;
|
| 1827 |
+
font-weight: bold;
|
| 1828 |
+
line-height: 1;
|
| 1829 |
+
color: white;
|
| 1830 |
+
}
|
| 1831 |
+
|
| 1832 |
.action-widget-body {
|
| 1833 |
padding: 12px;
|
| 1834 |
background: var(--bg-tertiary);
|
|
|
|
| 1865 |
.action-widget .section-content img,
|
| 1866 |
.action-widget img {
|
| 1867 |
max-width: 400px !important;
|
| 1868 |
+
max-height: 400px !important;
|
| 1869 |
width: auto !important;
|
| 1870 |
height: auto !important;
|
| 1871 |
margin: 8px 0;
|
|
|
|
| 1928 |
|
| 1929 |
.action-widget-result img {
|
| 1930 |
max-width: 400px;
|
| 1931 |
+
max-height: 400px;
|
| 1932 |
height: auto;
|
| 1933 |
margin: 8px 0;
|
| 1934 |
border-radius: 3px;
|
|
|
|
| 2631 |
|
| 2632 |
.result-content img {
|
| 2633 |
max-width: 400px;
|
| 2634 |
+
max-height: 400px;
|
| 2635 |
height: auto;
|
| 2636 |
margin: 8px 0;
|
| 2637 |
border-radius: 3px;
|
|
|
|
| 3226 |
font-style: italic;
|
| 3227 |
}
|
| 3228 |
|
| 3229 |
+
.dialog-checkbox-label {
|
| 3230 |
+
font-size: 11px;
|
| 3231 |
+
color: var(--text-secondary);
|
| 3232 |
+
display: flex;
|
| 3233 |
+
align-items: center;
|
| 3234 |
+
gap: 6px;
|
| 3235 |
+
cursor: pointer;
|
| 3236 |
+
}
|
| 3237 |
+
|
| 3238 |
+
.dialog-checkbox-label input[type="checkbox"] {
|
| 3239 |
+
margin: 0;
|
| 3240 |
+
}
|
| 3241 |
+
|
| 3242 |
.settings-dialog-actions {
|
| 3243 |
display: flex;
|
| 3244 |
gap: 6px;
|