lvwerra HF Staff Claude Opus 4.6 commited on
Commit
583c5ee
·
1 Parent(s): d86459e

Image agent fixes: sizing, error handling, result nudge, registry filtering

Browse files

- Add max-height: 400px to all image CSS rules and inline styles
- Filter globalFigureRegistry to only include images/figures referenced
in sub-agent <result> content (prevents command center from rendering
images the sub-agent didn't explicitly include)
- Extract shared nudge_for_result() utility in agents.py, replacing
duplicated nudge code in agent.py, code.py, and image.py
- Return actual error messages from execute_generate_image/execute_edit_image
(tuple return) so the LLM can adapt its strategy on failure
- Show real error messages in frontend tool cells instead of generic
"Failed to process image"
- Resize large input images to 1024px max before sending to HF
image_to_image API (FLUX.1-Kontext-dev expects ~1024px inputs)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

backend/agent.py CHANGED
@@ -13,7 +13,9 @@ from typing import List, Dict, Optional
13
  from .tools import (
14
  web_search, read_url,
15
  execute_web_search, execute_read_url,
 
16
  )
 
17
 
18
  logger = logging.getLogger(__name__)
19
 
@@ -95,7 +97,8 @@ def stream_agent_execution(
95
  messages: List[Dict],
96
  serper_key: str,
97
  extra_params: Optional[Dict] = None,
98
- abort_event=None
 
99
  ):
100
  """
101
  Run the agent tool-calling loop.
@@ -229,18 +232,30 @@ def stream_agent_execution(
229
  # Execute tool
230
  result = execute_tool(func_name, args, serper_key)
231
 
232
- # Build tool response message for LLM
233
- if result.get("image"):
234
- # For screenshots, send image as vision content so LLM can see it
 
235
  tool_response_content = [
236
  {"type": "text", "text": result["content"]},
237
- {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{result['image']}"}}
238
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  else:
240
  tool_response_content = result["content"]
241
 
242
- tool_response_str = tool_response_content if isinstance(tool_response_content, str) else json.dumps(tool_response_content)
243
-
244
  # Add to message history
245
  messages.append({
246
  "role": "assistant",
@@ -250,7 +265,7 @@ def stream_agent_execution(
250
  messages.append({
251
  "role": "tool",
252
  "tool_call_id": tool_call.id,
253
- "content": tool_response_str
254
  })
255
 
256
  # Signal tool result to frontend (include response for history)
@@ -259,7 +274,7 @@ def stream_agent_execution(
259
  "tool": func_name,
260
  "tool_call_id": tool_call.id,
261
  "result": result.get("display", {}),
262
- "response": tool_response_str,
263
  }
264
  if result.get("image"):
265
  tool_result_event["image"] = result["image"]
@@ -281,32 +296,7 @@ def stream_agent_execution(
281
 
282
  # If agent finished without a <result>, nudge it for one
283
  if not has_result:
284
- messages.append({
285
- "role": "user",
286
- "content": "Please provide your final answer now. Wrap it in <result> tags."
287
- })
288
- try:
289
- call_params = {
290
- "messages": messages,
291
- "model": model,
292
- }
293
- if extra_params:
294
- call_params["extra_body"] = extra_params
295
- response = client.chat.completions.create(**call_params)
296
- nudge_content = response.choices[0].message.content or ""
297
- result_match = re.search(r'<result>(.*?)</result>', nudge_content, re.DOTALL | re.IGNORECASE)
298
- if result_match:
299
- result_content = result_match.group(1).strip()
300
- thinking = re.sub(r'<result>.*?</result>', '', nudge_content, flags=re.DOTALL | re.IGNORECASE).strip()
301
- if thinking:
302
- yield {"type": "content", "content": thinking}
303
- yield {"type": "result_preview", "content": result_content}
304
- yield {"type": "result", "content": result_content}
305
- elif nudge_content.strip():
306
- # No result tags but got content — use it as the result
307
- yield {"type": "result_preview", "content": nudge_content.strip()}
308
- yield {"type": "result", "content": nudge_content.strip()}
309
- except Exception as e:
310
- logger.warning(f"Result nudge failed: {e}")
311
 
312
  yield {"type": "done"}
 
13
  from .tools import (
14
  web_search, read_url,
15
  execute_web_search, execute_read_url,
16
+ extract_and_download_images,
17
  )
18
+ from .image import resize_image_for_vlm
19
 
20
  logger = logging.getLogger(__name__)
21
 
 
97
  messages: List[Dict],
98
  serper_key: str,
99
  extra_params: Optional[Dict] = None,
100
+ abort_event=None,
101
+ multimodal: bool = False
102
  ):
103
  """
104
  Run the agent tool-calling loop.
 
232
  # Execute tool
233
  result = execute_tool(func_name, args, serper_key)
234
 
235
+ # Build tool response content for LLM
236
+ if result.get("image") and multimodal:
237
+ # Send screenshot as multimodal content so VLM can see it
238
+ vlm_image = resize_image_for_vlm(result["image"])
239
  tool_response_content = [
240
  {"type": "text", "text": result["content"]},
241
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{vlm_image}"}}
242
  ]
243
+ elif func_name == "read_url" and multimodal:
244
+ # Extract and include page images so VLM can see them
245
+ page_images = extract_and_download_images(result["content"])
246
+ if page_images:
247
+ tool_response_content = [{"type": "text", "text": result["content"]}]
248
+ for img_b64 in page_images:
249
+ vlm_img = resize_image_for_vlm(img_b64)
250
+ tool_response_content.append({
251
+ "type": "image_url",
252
+ "image_url": {"url": f"data:image/jpeg;base64,{vlm_img}"}
253
+ })
254
+ else:
255
+ tool_response_content = result["content"]
256
  else:
257
  tool_response_content = result["content"]
258
 
 
 
259
  # Add to message history
260
  messages.append({
261
  "role": "assistant",
 
265
  messages.append({
266
  "role": "tool",
267
  "tool_call_id": tool_call.id,
268
+ "content": tool_response_content
269
  })
270
 
271
  # Signal tool result to frontend (include response for history)
 
274
  "tool": func_name,
275
  "tool_call_id": tool_call.id,
276
  "result": result.get("display", {}),
277
+ "response": result.get("content", ""),
278
  }
279
  if result.get("image"):
280
  tool_result_event["image"] = result["image"]
 
296
 
297
  # If agent finished without a <result>, nudge it for one
298
  if not has_result:
299
+ from .agents import nudge_for_result
300
+ yield from nudge_for_result(client, model, messages, extra_params=extra_params)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
 
302
  yield {"type": "done"}
backend/agents.py CHANGED
@@ -82,9 +82,11 @@ AGENT_REGISTRY = {
82
  "(this preserves context and the Jupyter kernel for code agents).\n\n"
83
  "## Presenting Results\n\n"
84
  "Sub-agent results may be collapsed in the UI. When presenting results to the user, "
85
- "always include the key findings in YOUR response text — don't just say \"see the agent result\". "
86
- "If the agent produced figures, charts, or images, describe what they show and mention "
87
- "that the visualization is available in the agent result widget.\n\n"
 
 
88
  "## Handling Aborted Agents\n\n"
89
  "If an agent's result is 'Generation aborted by user.', the user deliberately stopped it. "
90
  "Do NOT automatically re-launch the same task. Instead, briefly acknowledge the abort and "
@@ -313,7 +315,8 @@ AGENT_REGISTRY = {
313
  "Returns an image reference (e.g., 'image_1') that you can see.\n"
314
  "- **edit_image(prompt, source)**: Edit or transform an existing image. "
315
  "The source can be a URL, a local file path, or an image reference from a previous tool call (e.g., 'image_1').\n"
316
- "- **read_image(source)**: Load an image from a URL or local file path. "
 
317
  "Returns an image reference that you can see and use with edit_image.\n\n"
318
  "## Strategy\n\n"
319
  "1. If the user provides an image URL or file path, use read_image first to load it\n"
@@ -385,6 +388,53 @@ def get_system_prompt(agent_key: str) -> str:
385
  return prompt
386
 
387
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
  def get_tools() -> list:
389
  """Get tool definitions for the command center."""
390
  return [
 
82
  "(this preserves context and the Jupyter kernel for code agents).\n\n"
83
  "## Presenting Results\n\n"
84
  "Sub-agent results may be collapsed in the UI. When presenting results to the user, "
85
+ "always include the key findings in YOUR response text — don't just say \"see the agent result\".\n\n"
86
+ "**Embedding images/figures from sub-agents:** If a sub-agent result contains image or figure "
87
+ "references like <image_1> or <figure_1>, you can embed them directly in your response using "
88
+ "the same tags (e.g., <image_1>, <figure_2>). The UI will resolve these to the actual images. "
89
+ "Always embed the relevant images when discussing visual results.\n\n"
90
  "## Handling Aborted Agents\n\n"
91
  "If an agent's result is 'Generation aborted by user.', the user deliberately stopped it. "
92
  "Do NOT automatically re-launch the same task. Instead, briefly acknowledge the abort and "
 
315
  "Returns an image reference (e.g., 'image_1') that you can see.\n"
316
  "- **edit_image(prompt, source)**: Edit or transform an existing image. "
317
  "The source can be a URL, a local file path, or an image reference from a previous tool call (e.g., 'image_1').\n"
318
+ "- **read_image(source)**: Load a raster image (PNG, JPEG, GIF, WebP, BMP) from a URL or local file path. "
319
+ "SVG is NOT supported — if given an SVG URL, tell the user and ask for a raster format instead. "
320
  "Returns an image reference that you can see and use with edit_image.\n\n"
321
  "## Strategy\n\n"
322
  "1. If the user provides an image URL or file path, use read_image first to load it\n"
 
388
  return prompt
389
 
390
 
391
+ def nudge_for_result(client, model, messages, extra_params=None, extra_result_data=None):
392
+ """Nudge an agent that finished without <result> tags to produce one.
393
+
394
+ This is a generator that yields SSE events (content, result_preview, result).
395
+ Call it after an agent's tool loop when no <result> was found.
396
+
397
+ Args:
398
+ client: OpenAI-compatible client
399
+ model: Model name
400
+ messages: Full message history (will be mutated — nudge message appended)
401
+ extra_params: Optional extra_body params for the LLM call
402
+ extra_result_data: Optional dict of extra fields to include in result events
403
+ (e.g. {"figures": {...}} or {"images": {...}})
404
+ """
405
+ import re
406
+ import logging
407
+ _logger = logging.getLogger(__name__)
408
+
409
+ messages.append({
410
+ "role": "user",
411
+ "content": "Please provide your final answer now. Wrap it in <result> tags."
412
+ })
413
+ try:
414
+ call_params = {"messages": messages, "model": model}
415
+ if extra_params:
416
+ call_params["extra_body"] = extra_params
417
+ response = client.chat.completions.create(**call_params)
418
+ nudge_content = response.choices[0].message.content or ""
419
+ result_match = re.search(r'<result>(.*?)</result>', nudge_content, re.DOTALL | re.IGNORECASE)
420
+
421
+ extra = extra_result_data or {}
422
+
423
+ if result_match:
424
+ result_content = result_match.group(1).strip()
425
+ thinking = re.sub(r'<result>.*?</result>', '', nudge_content, flags=re.DOTALL | re.IGNORECASE).strip()
426
+ if thinking:
427
+ yield {"type": "content", "content": thinking}
428
+ yield {"type": "result_preview", "content": result_content, **extra}
429
+ yield {"type": "result", "content": result_content, **extra}
430
+ elif nudge_content.strip():
431
+ # No result tags but got content — use it as the result
432
+ yield {"type": "result_preview", "content": nudge_content.strip(), **extra}
433
+ yield {"type": "result", "content": nudge_content.strip(), **extra}
434
+ except Exception as e:
435
+ _logger.warning(f"Result nudge failed: {e}")
436
+
437
+
438
  def get_tools() -> list:
439
  """Get tool definitions for the command center."""
440
  return [
backend/code.py CHANGED
@@ -9,6 +9,7 @@ from typing import List, Dict, Optional
9
  from e2b_code_interpreter import Sandbox
10
 
11
  from .tools import execute_code, upload_files, download_files
 
12
 
13
  logger = logging.getLogger(__name__)
14
 
@@ -204,7 +205,7 @@ def download_files_from_sandbox(sbx: Sandbox, files: List[Dict], files_root: str
204
  return "\n".join(results)
205
 
206
 
207
- def stream_code_execution(client, model: str, messages: List[Dict], sbx: Sandbox, files_root: str = None, extra_params: Optional[Dict] = None, abort_event=None):
208
  """
209
  Stream code execution results
210
 
@@ -429,11 +430,27 @@ def stream_code_execution(client, model: str, messages: List[Dict], sbx: Sandbox
429
  }]
430
  })
431
 
432
- messages.append({
433
- "role": "tool",
434
- "tool_call_id": tool_call.id,
435
- "content": output
436
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
 
438
  elif tool_call.function.name == "upload_files":
439
  # Parse arguments
@@ -571,32 +588,8 @@ def stream_code_execution(client, model: str, messages: List[Dict], sbx: Sandbox
571
 
572
  # If agent finished without a <result>, nudge it for one
573
  if not has_result:
574
- messages.append({
575
- "role": "user",
576
- "content": "Please provide your final answer now. Wrap it in <result> tags."
577
- })
578
- try:
579
- call_params = {
580
- "messages": messages,
581
- "model": model,
582
- }
583
- if extra_params:
584
- call_params["extra_body"] = extra_params
585
- response = client.chat.completions.create(**call_params)
586
- nudge_content = response.choices[0].message.content or ""
587
- result_match = re.search(r'<result>(.*?)</result>', nudge_content, re.DOTALL | re.IGNORECASE)
588
- if result_match:
589
- result_content = result_match.group(1).strip()
590
- thinking = re.sub(r'<result>.*?</result>', '', nudge_content, flags=re.DOTALL | re.IGNORECASE).strip()
591
- if thinking:
592
- yield {"type": "content", "content": thinking}
593
- yield {"type": "result_preview", "content": result_content, "figures": figure_data}
594
- yield {"type": "result", "content": result_content, "figures": figure_data}
595
- elif nudge_content.strip():
596
- yield {"type": "result_preview", "content": nudge_content.strip(), "figures": figure_data}
597
- yield {"type": "result", "content": nudge_content.strip(), "figures": figure_data}
598
- except Exception as e:
599
- logger.warning(f"Result nudge failed: {e}")
600
 
601
  # Send done signal
602
  yield {"type": "done"}
 
9
  from e2b_code_interpreter import Sandbox
10
 
11
  from .tools import execute_code, upload_files, download_files
12
+ from .image import resize_image_for_vlm
13
 
14
  logger = logging.getLogger(__name__)
15
 
 
205
  return "\n".join(results)
206
 
207
 
208
+ def stream_code_execution(client, model: str, messages: List[Dict], sbx: Sandbox, files_root: str = None, extra_params: Optional[Dict] = None, abort_event=None, multimodal: bool = False):
209
  """
210
  Stream code execution results
211
 
 
430
  }]
431
  })
432
 
433
+ # Build tool response — include figures if multimodal
434
+ if multimodal and images:
435
+ tool_content = [{"type": "text", "text": output}]
436
+ for img in images:
437
+ if img["type"] in ("png", "jpeg"):
438
+ vlm_img = resize_image_for_vlm(img["data"])
439
+ tool_content.append({
440
+ "type": "image_url",
441
+ "image_url": {"url": f"data:image/jpeg;base64,{vlm_img}"}
442
+ })
443
+ messages.append({
444
+ "role": "tool",
445
+ "tool_call_id": tool_call.id,
446
+ "content": tool_content
447
+ })
448
+ else:
449
+ messages.append({
450
+ "role": "tool",
451
+ "tool_call_id": tool_call.id,
452
+ "content": output
453
+ })
454
 
455
  elif tool_call.function.name == "upload_files":
456
  # Parse arguments
 
588
 
589
  # If agent finished without a <result>, nudge it for one
590
  if not has_result:
591
+ from .agents import nudge_for_result
592
+ yield from nudge_for_result(client, model, messages, extra_params=extra_params, extra_result_data={"figures": figure_data})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
593
 
594
  # Send done signal
595
  yield {"type": "done"}
backend/image.py CHANGED
@@ -97,7 +97,7 @@ def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, i
97
  if tool_name == "generate_image":
98
  prompt = args.get("prompt", "")
99
  model = args.get("model") or default_gen_model or "black-forest-labs/FLUX.1-schnell"
100
- base64_png = execute_generate_image(prompt, hf_token, model)
101
 
102
  if base64_png:
103
  image_counter += 1
@@ -112,7 +112,7 @@ def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, i
112
  }
113
  else:
114
  return {
115
- "content": f"Failed to generate image. The model may be unavailable or the prompt may be invalid.",
116
  "display": {"type": "generate_error", "prompt": prompt},
117
  "image_counter": image_counter,
118
  }
@@ -138,7 +138,7 @@ def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, i
138
  "image_counter": image_counter,
139
  }
140
 
141
- base64_png = execute_edit_image(prompt, source_bytes, hf_token, model)
142
 
143
  if base64_png:
144
  image_counter += 1
@@ -153,7 +153,7 @@ def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, i
153
  }
154
  else:
155
  return {
156
- "content": f"Failed to edit image. The model may be unavailable or the request may be invalid.",
157
  "display": {"type": "edit_error", "source": source},
158
  "image_counter": image_counter,
159
  }
@@ -174,8 +174,14 @@ def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, i
174
  "image_counter": image_counter,
175
  }
176
  else:
 
 
 
 
 
 
177
  return {
178
- "content": f"Failed to load image from '{source}'. Check that the path or URL is correct.",
179
  "display": {"type": "read_image_error", "url": source},
180
  "image_counter": image_counter,
181
  }
@@ -196,7 +202,8 @@ def stream_image_execution(
196
  image_edit_model: Optional[str] = None,
197
  extra_params: Optional[Dict] = None,
198
  abort_event=None,
199
- files_root: str = None
 
200
  ):
201
  """
202
  Run the image agent tool-calling loop.
@@ -334,9 +341,8 @@ def stream_image_execution(
334
  result = execute_tool(func_name, args, hf_token, image_store, image_counter, default_gen_model=image_gen_model, default_edit_model=image_edit_model, files_root=files_root)
335
  image_counter = result.get("image_counter", image_counter)
336
 
337
- # Build tool response message for LLM
338
- if result.get("image"):
339
- # Resize image for VLM context to avoid token overflow
340
  vlm_image = resize_image_for_vlm(result["image"])
341
  tool_response_content = [
342
  {"type": "text", "text": result["content"]},
@@ -385,12 +391,18 @@ def stream_image_execution(
385
  if not done:
386
  yield {"type": "generating"}
387
 
388
- # Fallback: if VLM never produced a <result> tag, synthesize one with all images
389
  if not result_sent and image_store:
390
- fallback_parts = []
391
- for name in image_store:
392
- fallback_parts.append(f"<{name}>")
393
- fallback_content = "\n\n".join(fallback_parts)
394
- yield {"type": "result", "content": fallback_content, "images": image_store}
 
 
 
 
 
 
395
 
396
  yield {"type": "done"}
 
97
  if tool_name == "generate_image":
98
  prompt = args.get("prompt", "")
99
  model = args.get("model") or default_gen_model or "black-forest-labs/FLUX.1-schnell"
100
+ base64_png, error = execute_generate_image(prompt, hf_token, model)
101
 
102
  if base64_png:
103
  image_counter += 1
 
112
  }
113
  else:
114
  return {
115
+ "content": f"Failed to generate image: {error}",
116
  "display": {"type": "generate_error", "prompt": prompt},
117
  "image_counter": image_counter,
118
  }
 
138
  "image_counter": image_counter,
139
  }
140
 
141
+ base64_png, error = execute_edit_image(prompt, source_bytes, hf_token, model)
142
 
143
  if base64_png:
144
  image_counter += 1
 
153
  }
154
  else:
155
  return {
156
+ "content": f"Failed to edit image: {error}",
157
  "display": {"type": "edit_error", "source": source},
158
  "image_counter": image_counter,
159
  }
 
174
  "image_counter": image_counter,
175
  }
176
  else:
177
+ # Provide more specific error for SVG files
178
+ is_svg = source.lower().endswith(".svg") or "/svg" in source.lower()
179
+ if is_svg:
180
+ error_msg = f"Failed to load image from '{source}'. SVG format is not supported — only raster formats (PNG, JPEG, GIF, WebP, BMP) are accepted. Ask the user for a raster version of the image."
181
+ else:
182
+ error_msg = f"Failed to load image from '{source}'. Check that the path or URL is correct and that it is a raster image (PNG, JPEG, GIF, WebP, BMP)."
183
  return {
184
+ "content": error_msg,
185
  "display": {"type": "read_image_error", "url": source},
186
  "image_counter": image_counter,
187
  }
 
202
  image_edit_model: Optional[str] = None,
203
  extra_params: Optional[Dict] = None,
204
  abort_event=None,
205
+ files_root: str = None,
206
+ multimodal: bool = False
207
  ):
208
  """
209
  Run the image agent tool-calling loop.
 
341
  result = execute_tool(func_name, args, hf_token, image_store, image_counter, default_gen_model=image_gen_model, default_edit_model=image_edit_model, files_root=files_root)
342
  image_counter = result.get("image_counter", image_counter)
343
 
344
+ # Build tool response content for LLM
345
+ if result.get("image") and multimodal:
 
346
  vlm_image = resize_image_for_vlm(result["image"])
347
  tool_response_content = [
348
  {"type": "text", "text": result["content"]},
 
391
  if not done:
392
  yield {"type": "generating"}
393
 
394
+ # If agent finished without a <result>, nudge it for one
395
  if not result_sent and image_store:
396
+ from .agents import nudge_for_result
397
+ nudge_produced_result = False
398
+ for event in nudge_for_result(client, model, messages, extra_params=extra_params, extra_result_data={"images": image_store}):
399
+ yield event
400
+ if event.get("type") == "result":
401
+ nudge_produced_result = True
402
+
403
+ # Final fallback: synthesize a result with all images
404
+ if not nudge_produced_result:
405
+ fallback_parts = [f"<{name}>" for name in image_store]
406
+ yield {"type": "result", "content": "\n\n".join(fallback_parts), "images": image_store}
407
 
408
  yield {"type": "done"}
backend/tools.py CHANGED
@@ -419,7 +419,7 @@ read_image = {
419
  "type": "function",
420
  "function": {
421
  "name": "read_image",
422
- "description": "Load an image from a URL or local file path. Returns an image reference name (e.g., 'image_1') that you can see and use with edit_image.",
423
  "parameters": {
424
  "type": "object",
425
  "properties": {
@@ -441,48 +441,56 @@ read_image_url = read_image
441
  # Image tool execution functions
442
  # ============================================================
443
 
444
- def execute_generate_image(prompt: str, hf_token: str, model: str = "black-forest-labs/FLUX.1-schnell") -> Optional[str]:
445
- """Text-to-image via HF InferenceClient. Returns base64 PNG or None on error."""
446
  try:
447
  from huggingface_hub import InferenceClient
448
  except ImportError:
449
- logger.error("huggingface_hub not installed")
450
- return None
451
 
452
  try:
453
  client = InferenceClient(token=hf_token)
454
  image = client.text_to_image(prompt, model=model)
455
  buffer = io.BytesIO()
456
  image.save(buffer, format="PNG")
457
- return base64.b64encode(buffer.getvalue()).decode("utf-8")
458
  except Exception as e:
459
  logger.error(f"Generate image error: {e}")
460
- return None
461
 
462
 
463
- def execute_edit_image(prompt: str, source_image_bytes: bytes, hf_token: str, model: str = "black-forest-labs/FLUX.1-Kontext-dev") -> Optional[str]:
464
- """Image-to-image via HF InferenceClient. source_image_bytes is raw image data. Returns base64 PNG or None."""
465
  try:
466
  from huggingface_hub import InferenceClient
467
  from PIL import Image
468
  except ImportError:
469
- logger.error("huggingface_hub or Pillow not installed")
470
- return None
471
 
472
  try:
473
  client = InferenceClient(token=hf_token)
474
  input_image = Image.open(io.BytesIO(source_image_bytes))
 
 
 
 
 
 
 
475
  result = client.image_to_image(input_image, prompt=prompt, model=model)
476
  buffer = io.BytesIO()
477
  result.save(buffer, format="PNG")
478
- return base64.b64encode(buffer.getvalue()).decode("utf-8")
479
  except Exception as e:
480
  logger.error(f"Edit image error: {e}")
481
- return None
482
 
483
 
484
  def execute_read_image(source: str, files_root: str = None) -> Optional[str]:
485
- """Load image from URL or local file path, return base64 string or None on error."""
 
 
 
486
  import os
487
 
488
  # Check if it's a URL
@@ -523,6 +531,41 @@ def execute_read_image(source: str, files_root: str = None) -> Optional[str]:
523
  return None
524
 
525
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
526
  # Keep old name as alias
527
  def execute_read_image_url(url: str) -> Optional[str]:
528
  return execute_read_image(url)
 
419
  "type": "function",
420
  "function": {
421
  "name": "read_image",
422
+ "description": "Load a raster image (PNG, JPEG, GIF, WebP, BMP) from a URL or local file path. SVG is NOT supported. Returns an image reference name (e.g., 'image_1') that you can see and use with edit_image.",
423
  "parameters": {
424
  "type": "object",
425
  "properties": {
 
441
  # Image tool execution functions
442
  # ============================================================
443
 
444
+ def execute_generate_image(prompt: str, hf_token: str, model: str = "black-forest-labs/FLUX.1-schnell") -> tuple:
445
+ """Text-to-image via HF InferenceClient. Returns (base64_png, None) on success or (None, error_str) on failure."""
446
  try:
447
  from huggingface_hub import InferenceClient
448
  except ImportError:
449
+ return None, "huggingface_hub not installed"
 
450
 
451
  try:
452
  client = InferenceClient(token=hf_token)
453
  image = client.text_to_image(prompt, model=model)
454
  buffer = io.BytesIO()
455
  image.save(buffer, format="PNG")
456
+ return base64.b64encode(buffer.getvalue()).decode("utf-8"), None
457
  except Exception as e:
458
  logger.error(f"Generate image error: {e}")
459
+ return None, str(e)
460
 
461
 
462
+ def execute_edit_image(prompt: str, source_image_bytes: bytes, hf_token: str, model: str = "black-forest-labs/FLUX.1-Kontext-dev") -> tuple:
463
+ """Image-to-image via HF InferenceClient. Returns (base64_png, None) on success or (None, error_str) on failure."""
464
  try:
465
  from huggingface_hub import InferenceClient
466
  from PIL import Image
467
  except ImportError:
468
+ return None, "huggingface_hub or Pillow not installed"
 
469
 
470
  try:
471
  client = InferenceClient(token=hf_token)
472
  input_image = Image.open(io.BytesIO(source_image_bytes))
473
+
474
+ # Resize large images to avoid API failures (most models expect ~1024px)
475
+ MAX_EDIT_DIM = 1024
476
+ if max(input_image.size) > MAX_EDIT_DIM:
477
+ input_image.thumbnail((MAX_EDIT_DIM, MAX_EDIT_DIM), Image.LANCZOS)
478
+ logger.info(f"Resized input image to {input_image.size} for editing")
479
+
480
  result = client.image_to_image(input_image, prompt=prompt, model=model)
481
  buffer = io.BytesIO()
482
  result.save(buffer, format="PNG")
483
+ return base64.b64encode(buffer.getvalue()).decode("utf-8"), None
484
  except Exception as e:
485
  logger.error(f"Edit image error: {e}")
486
+ return None, str(e)
487
 
488
 
489
  def execute_read_image(source: str, files_root: str = None) -> Optional[str]:
490
+ """Load image from URL or local file path, return base64 string or None on error.
491
+
492
+ Supported formats: PNG, JPEG, GIF, WebP, BMP. SVG is NOT supported.
493
+ """
494
  import os
495
 
496
  # Check if it's a URL
 
531
  return None
532
 
533
 
534
+ def extract_and_download_images(markdown: str, max_images: int = 5) -> List[str]:
535
+ """Extract image URLs from markdown and download them as base64 strings.
536
+
537
+ Returns list of base64-encoded image strings (PNG/JPEG).
538
+ Skips SVGs, data URIs, and failed downloads.
539
+ """
540
+ import re as _re
541
+ img_pattern = _re.compile(r'!\[[^\]]*\]\(([^)]+)\)')
542
+ urls = img_pattern.findall(markdown)
543
+
544
+ results = []
545
+ for url in urls:
546
+ if len(results) >= max_images:
547
+ break
548
+ if url.startswith("data:") or url.endswith(".svg"):
549
+ continue
550
+ try:
551
+ resp = httpx.get(
552
+ url,
553
+ follow_redirects=True,
554
+ timeout=10,
555
+ headers={"User-Agent": _USER_AGENT}
556
+ )
557
+ if resp.status_code != 200:
558
+ continue
559
+ ct = resp.headers.get("content-type", "")
560
+ if not ct.startswith("image/"):
561
+ continue
562
+ results.append(base64.b64encode(resp.content).decode("utf-8"))
563
+ except Exception:
564
+ continue
565
+
566
+ return results
567
+
568
+
569
  # Keep old name as alias
570
  def execute_read_image_url(url: str) -> Optional[str]:
571
  return execute_read_image(url)
frontend/script.js CHANGED
@@ -93,6 +93,10 @@ const actionWidgets = {};
93
  // Track tool call IDs for result updates (maps tabId -> tool_call_id)
94
  const toolCallIds = {};
95
 
 
 
 
 
96
  // Track agents by task_id for reuse (maps task_id -> tabId)
97
  const taskIdToTabId = {};
98
 
@@ -128,6 +132,7 @@ function resetLocalState() {
128
  // Clear object maps
129
  Object.keys(actionWidgets).forEach(k => delete actionWidgets[k]);
130
  Object.keys(toolCallIds).forEach(k => delete toolCallIds[k]);
 
131
  Object.keys(taskIdToTabId).forEach(k => delete taskIdToTabId[k]);
132
  researchQueryTabIds = {};
133
  showAllTurns = true;
@@ -2021,6 +2026,7 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
2021
  token: modelConfig.token || null,
2022
  model: modelConfig.model,
2023
  extra_params: modelConfig.extraParams || null,
 
2024
  e2b_key: currentSettings.e2bKey || null,
2025
  serper_key: currentSettings.serperKey || null,
2026
  hf_token: currentSettings.hfToken || null,
@@ -2084,7 +2090,7 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
2084
  currentMessageEl = createAssistantMessage(chatContainer);
2085
  }
2086
  fullResponse += data.content;
2087
- appendToMessage(currentMessageEl, parseMarkdown(fullResponse));
2088
  scrollChatToBottom(chatContainer);
2089
 
2090
  } else if (data.type === 'code') {
@@ -2128,6 +2134,22 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
2128
  // Still generating - no action needed
2129
 
2130
  } else if (data.type === 'result') {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2131
  // Agent result - update command center widget
2132
  updateActionWidgetWithResult(tabId, data.content, data.figures, data.images);
2133
 
@@ -2175,7 +2197,7 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
2175
  for (const [placeholderId, figureData] of Object.entries(figurePlaceholders)) {
2176
  let imageHtml = '';
2177
  if (figureData.type === 'png' || figureData.type === 'jpeg') {
2178
- imageHtml = `<img src="data:image/${figureData.type};base64,${figureData.data}" style="max-width: 400px; height: auto; border-radius: 4px; margin: 12px 0; display: block;" onclick="openImageModal(this.src)">`;
2179
  } else if (figureData.type === 'svg') {
2180
  imageHtml = `<div style="margin: 12px 0;">${atob(figureData.data)}</div>`;
2181
  }
@@ -2371,7 +2393,8 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
2371
  const imgName = data.image_name || 'image';
2372
  outputHtml = `<img src="data:image/png;base64,${data.image}" alt="${escapeHtml(imgName)}" class="generated-img" />`;
2373
  } else if ((data.tool === 'generate_image' || data.tool === 'edit_image' || data.tool === 'read_image_url' || data.tool === 'read_image') && !data.image) {
2374
- outputHtml = `<div class="tool-cell-read-summary">Failed to process image</div>`;
 
2375
  }
2376
 
2377
  if (outputHtml && lastToolCell) {
@@ -2389,7 +2412,7 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
2389
  currentMessageEl = createAssistantMessage(chatContainer);
2390
  }
2391
  fullResponse += data.content;
2392
- appendToMessage(currentMessageEl, parseMarkdown(fullResponse));
2393
  scrollChatToBottom(chatContainer);
2394
 
2395
  } else if (data.type === 'launch') {
@@ -2521,6 +2544,16 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
2521
  errorDiv.innerHTML = `<div class="message-content" style="color: #c62828;">Error: ${escapeHtml(data.content)}</div>`;
2522
  chatContainer.appendChild(errorDiv);
2523
  scrollChatToBottom(chatContainer);
 
 
 
 
 
 
 
 
 
 
2524
  }
2525
  }
2526
  }
@@ -2865,7 +2898,7 @@ async function updateActionWidgetWithResult(tabId, resultContent, figures, image
2865
  for (const [placeholderId, figureData] of Object.entries(figurePlaceholders)) {
2866
  let imageHtml = '';
2867
  if (figureData.type === 'png' || figureData.type === 'jpeg') {
2868
- imageHtml = `<img src="data:image/${figureData.type};base64,${figureData.data}" style="max-width: 400px; height: auto; border-radius: 4px; margin: 12px 0; display: block;" onclick="openImageModal(this.src)">`;
2869
  } else if (figureData.type === 'svg') {
2870
  imageHtml = `<div style="margin: 12px 0;">${atob(figureData.data)}</div>`;
2871
  }
@@ -3115,6 +3148,22 @@ if (typeof marked !== 'undefined') {
3115
  });
3116
  }
3117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3118
  function parseMarkdown(text) {
3119
  // Use marked library for proper markdown parsing
3120
  let html;
@@ -4128,7 +4177,7 @@ function deleteProvider(providerId) {
4128
  // Show add/edit model dialog
4129
  function showModelDialog(modelId = null) {
4130
  const isEdit = !!modelId;
4131
- const model = isEdit ? settings.models[modelId] : { name: '', providerId: '', modelId: '', extraParams: null };
4132
 
4133
  const dialog = document.getElementById('model-dialog');
4134
  const title = document.getElementById('model-dialog-title');
@@ -4136,11 +4185,13 @@ function showModelDialog(modelId = null) {
4136
  const providerSelect = document.getElementById('model-provider');
4137
  const modelIdInput = document.getElementById('model-model-id');
4138
  const extraParamsInput = document.getElementById('model-extra-params');
 
4139
 
4140
  title.textContent = isEdit ? 'Edit Model' : 'Add Model';
4141
  nameInput.value = model.name;
4142
  modelIdInput.value = model.modelId;
4143
  extraParamsInput.value = model.extraParams ? JSON.stringify(model.extraParams, null, 2) : '';
 
4144
 
4145
  // Populate provider dropdown
4146
  providerSelect.innerHTML = '<option value="">-- Select Provider --</option>';
@@ -4187,7 +4238,8 @@ function saveModelFromDialog() {
4187
  }
4188
  }
4189
 
4190
- settings.models[modelId] = { name, providerId, modelId: apiModelId, extraParams };
 
4191
  hideModelDialog();
4192
  renderModelsList();
4193
  populateModelDropdowns();
@@ -4492,7 +4544,8 @@ function resolveModelConfig(agentType) {
4492
  endpoint: provider.endpoint,
4493
  token: provider.token,
4494
  model: model.modelId,
4495
- extraParams: model.extraParams || null
 
4496
  };
4497
  }
4498
 
@@ -4510,7 +4563,8 @@ function getDefaultModelConfig() {
4510
  endpoint: provider.endpoint,
4511
  token: provider.token,
4512
  model: model.modelId,
4513
- extraParams: model.extraParams || null
 
4514
  };
4515
  }
4516
 
 
93
  // Track tool call IDs for result updates (maps tabId -> tool_call_id)
94
  const toolCallIds = {};
95
 
96
+ // Global figure/image registry populated by sub-agents for cross-agent reference resolution
97
+ // Maps "figure_1" -> {type, data} and "image_1" -> {type: "png", data: base64}
98
+ const globalFigureRegistry = {};
99
+
100
  // Track agents by task_id for reuse (maps task_id -> tabId)
101
  const taskIdToTabId = {};
102
 
 
132
  // Clear object maps
133
  Object.keys(actionWidgets).forEach(k => delete actionWidgets[k]);
134
  Object.keys(toolCallIds).forEach(k => delete toolCallIds[k]);
135
+ Object.keys(globalFigureRegistry).forEach(k => delete globalFigureRegistry[k]);
136
  Object.keys(taskIdToTabId).forEach(k => delete taskIdToTabId[k]);
137
  researchQueryTabIds = {};
138
  showAllTurns = true;
 
2026
  token: modelConfig.token || null,
2027
  model: modelConfig.model,
2028
  extra_params: modelConfig.extraParams || null,
2029
+ multimodal: modelConfig.multimodal || false,
2030
  e2b_key: currentSettings.e2bKey || null,
2031
  serper_key: currentSettings.serperKey || null,
2032
  hf_token: currentSettings.hfToken || null,
 
2090
  currentMessageEl = createAssistantMessage(chatContainer);
2091
  }
2092
  fullResponse += data.content;
2093
+ appendToMessage(currentMessageEl, resolveGlobalFigureRefs(parseMarkdown(fullResponse)));
2094
  scrollChatToBottom(chatContainer);
2095
 
2096
  } else if (data.type === 'code') {
 
2134
  // Still generating - no action needed
2135
 
2136
  } else if (data.type === 'result') {
2137
+ // Populate global figure/image registry only for items referenced in result content
2138
+ const resultText = data.content || '';
2139
+ if (data.figures) {
2140
+ for (const [name, figData] of Object.entries(data.figures)) {
2141
+ if (new RegExp(`</?${name}>`, 'i').test(resultText)) {
2142
+ globalFigureRegistry[name] = figData;
2143
+ }
2144
+ }
2145
+ }
2146
+ if (data.images) {
2147
+ for (const [name, imgBase64] of Object.entries(data.images)) {
2148
+ if (new RegExp(`</?${name}>`, 'i').test(resultText)) {
2149
+ globalFigureRegistry[name] = { type: 'png', data: imgBase64 };
2150
+ }
2151
+ }
2152
+ }
2153
  // Agent result - update command center widget
2154
  updateActionWidgetWithResult(tabId, data.content, data.figures, data.images);
2155
 
 
2197
  for (const [placeholderId, figureData] of Object.entries(figurePlaceholders)) {
2198
  let imageHtml = '';
2199
  if (figureData.type === 'png' || figureData.type === 'jpeg') {
2200
+ imageHtml = `<img src="data:image/${figureData.type};base64,${figureData.data}" style="max-width: 400px; max-height: 400px; height: auto; border-radius: 4px; margin: 12px 0; display: block;" onclick="openImageModal(this.src)">`;
2201
  } else if (figureData.type === 'svg') {
2202
  imageHtml = `<div style="margin: 12px 0;">${atob(figureData.data)}</div>`;
2203
  }
 
2393
  const imgName = data.image_name || 'image';
2394
  outputHtml = `<img src="data:image/png;base64,${data.image}" alt="${escapeHtml(imgName)}" class="generated-img" />`;
2395
  } else if ((data.tool === 'generate_image' || data.tool === 'edit_image' || data.tool === 'read_image_url' || data.tool === 'read_image') && !data.image) {
2396
+ const errMsg = data.response || 'Failed to process image';
2397
+ outputHtml = `<div class="tool-cell-read-summary">${escapeHtml(errMsg)}</div>`;
2398
  }
2399
 
2400
  if (outputHtml && lastToolCell) {
 
2412
  currentMessageEl = createAssistantMessage(chatContainer);
2413
  }
2414
  fullResponse += data.content;
2415
+ appendToMessage(currentMessageEl, resolveGlobalFigureRefs(parseMarkdown(fullResponse)));
2416
  scrollChatToBottom(chatContainer);
2417
 
2418
  } else if (data.type === 'launch') {
 
2544
  errorDiv.innerHTML = `<div class="message-content" style="color: #c62828;">Error: ${escapeHtml(data.content)}</div>`;
2545
  chatContainer.appendChild(errorDiv);
2546
  scrollChatToBottom(chatContainer);
2547
+
2548
+ // Propagate error to parent action widget
2549
+ updateActionWidgetWithResult(tabId, `Error: ${data.content}`, {}, {});
2550
+ const errorWidget = actionWidgets[tabId];
2551
+ if (errorWidget) {
2552
+ const doneIndicator = errorWidget.querySelector('.done-indicator');
2553
+ if (doneIndicator) {
2554
+ doneIndicator.classList.add('errored');
2555
+ }
2556
+ }
2557
  }
2558
  }
2559
  }
 
2898
  for (const [placeholderId, figureData] of Object.entries(figurePlaceholders)) {
2899
  let imageHtml = '';
2900
  if (figureData.type === 'png' || figureData.type === 'jpeg') {
2901
+ imageHtml = `<img src="data:image/${figureData.type};base64,${figureData.data}" style="max-width: 400px; max-height: 400px; height: auto; border-radius: 4px; margin: 12px 0; display: block;" onclick="openImageModal(this.src)">`;
2902
  } else if (figureData.type === 'svg') {
2903
  imageHtml = `<div style="margin: 12px 0;">${atob(figureData.data)}</div>`;
2904
  }
 
3148
  });
3149
  }
3150
 
3151
+ // Resolve <figure_N> and <image_N> references using the global registry
3152
+ function resolveGlobalFigureRefs(html) {
3153
+ return html.replace(/<\/?(figure_\d+|image_\d+)>/gi, (match) => {
3154
+ // Extract the name (strip < > and /)
3155
+ const name = match.replace(/[<>/]/g, '');
3156
+ const data = globalFigureRegistry[name];
3157
+ if (!data) return match; // Leave unresolved refs as-is
3158
+ if (data.type === 'png' || data.type === 'jpeg') {
3159
+ return `<img src="data:image/${data.type};base64,${data.data}" style="max-width: 400px; max-height: 400px; height: auto; border-radius: 4px; margin: 12px 0; display: block;" onclick="openImageModal(this.src)">`;
3160
+ } else if (data.type === 'svg') {
3161
+ return `<div style="margin: 12px 0;">${atob(data.data)}</div>`;
3162
+ }
3163
+ return match;
3164
+ });
3165
+ }
3166
+
3167
  function parseMarkdown(text) {
3168
  // Use marked library for proper markdown parsing
3169
  let html;
 
4177
  // Show add/edit model dialog
4178
  function showModelDialog(modelId = null) {
4179
  const isEdit = !!modelId;
4180
+ const model = isEdit ? settings.models[modelId] : { name: '', providerId: '', modelId: '', extraParams: null, multimodal: false };
4181
 
4182
  const dialog = document.getElementById('model-dialog');
4183
  const title = document.getElementById('model-dialog-title');
 
4185
  const providerSelect = document.getElementById('model-provider');
4186
  const modelIdInput = document.getElementById('model-model-id');
4187
  const extraParamsInput = document.getElementById('model-extra-params');
4188
+ const multimodalCheckbox = document.getElementById('model-multimodal');
4189
 
4190
  title.textContent = isEdit ? 'Edit Model' : 'Add Model';
4191
  nameInput.value = model.name;
4192
  modelIdInput.value = model.modelId;
4193
  extraParamsInput.value = model.extraParams ? JSON.stringify(model.extraParams, null, 2) : '';
4194
+ multimodalCheckbox.checked = !!model.multimodal;
4195
 
4196
  // Populate provider dropdown
4197
  providerSelect.innerHTML = '<option value="">-- Select Provider --</option>';
 
4238
  }
4239
  }
4240
 
4241
+ const multimodal = document.getElementById('model-multimodal').checked;
4242
+ settings.models[modelId] = { name, providerId, modelId: apiModelId, extraParams, multimodal };
4243
  hideModelDialog();
4244
  renderModelsList();
4245
  populateModelDropdowns();
 
4544
  endpoint: provider.endpoint,
4545
  token: provider.token,
4546
  model: model.modelId,
4547
+ extraParams: model.extraParams || null,
4548
+ multimodal: !!model.multimodal
4549
  };
4550
  }
4551
 
 
4563
  endpoint: provider.endpoint,
4564
  token: provider.token,
4565
  model: model.modelId,
4566
+ extraParams: model.extraParams || null,
4567
+ multimodal: !!model.multimodal
4568
  };
4569
  }
4570
 
frontend/style.css CHANGED
@@ -1129,6 +1129,20 @@ body {
1129
  margin: 16px 0;
1130
  }
1131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1132
  /* LaTeX / KaTeX */
1133
  .message-content .katex-display {
1134
  margin: 12px 0;
@@ -1298,6 +1312,7 @@ pre code [class*="token"] {
1298
 
1299
  .code-cell-image img {
1300
  max-width: 400px;
 
1301
  height: auto;
1302
  border-radius: 4px;
1303
  cursor: pointer;
@@ -1798,6 +1813,22 @@ pre code [class*="token"] {
1798
  color: var(--bg-primary);
1799
  }
1800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1801
  .action-widget-body {
1802
  padding: 12px;
1803
  background: var(--bg-tertiary);
@@ -1834,6 +1865,7 @@ pre code [class*="token"] {
1834
  .action-widget .section-content img,
1835
  .action-widget img {
1836
  max-width: 400px !important;
 
1837
  width: auto !important;
1838
  height: auto !important;
1839
  margin: 8px 0;
@@ -1896,6 +1928,7 @@ pre code [class*="token"] {
1896
 
1897
  .action-widget-result img {
1898
  max-width: 400px;
 
1899
  height: auto;
1900
  margin: 8px 0;
1901
  border-radius: 3px;
@@ -2598,6 +2631,7 @@ pre code [class*="token"] {
2598
 
2599
  .result-content img {
2600
  max-width: 400px;
 
2601
  height: auto;
2602
  margin: 8px 0;
2603
  border-radius: 3px;
@@ -3192,6 +3226,19 @@ pre code [class*="token"] {
3192
  font-style: italic;
3193
  }
3194
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3195
  .settings-dialog-actions {
3196
  display: flex;
3197
  gap: 6px;
 
1129
  margin: 16px 0;
1130
  }
1131
 
1132
+ .message-content img {
1133
+ max-width: 400px;
1134
+ max-height: 400px;
1135
+ height: auto;
1136
+ margin: 8px 0;
1137
+ border-radius: 3px;
1138
+ cursor: pointer;
1139
+ transition: opacity 0.2s;
1140
+ }
1141
+
1142
+ .message-content img:hover {
1143
+ opacity: 0.85;
1144
+ }
1145
+
1146
  /* LaTeX / KaTeX */
1147
  .message-content .katex-display {
1148
  margin: 12px 0;
 
1312
 
1313
  .code-cell-image img {
1314
  max-width: 400px;
1315
+ max-height: 400px;
1316
  height: auto;
1317
  border-radius: 4px;
1318
  cursor: pointer;
 
1813
  color: var(--bg-primary);
1814
  }
1815
 
1816
+ /* Errored action widget - red background with exclamation */
1817
+ .action-widget .done-indicator.errored {
1818
+ background: #c62828;
1819
+ }
1820
+ .action-widget .done-indicator.errored::before {
1821
+ content: '!';
1822
+ width: auto;
1823
+ height: auto;
1824
+ border: none;
1825
+ transform: none;
1826
+ font-size: 11px;
1827
+ font-weight: bold;
1828
+ line-height: 1;
1829
+ color: white;
1830
+ }
1831
+
1832
  .action-widget-body {
1833
  padding: 12px;
1834
  background: var(--bg-tertiary);
 
1865
  .action-widget .section-content img,
1866
  .action-widget img {
1867
  max-width: 400px !important;
1868
+ max-height: 400px !important;
1869
  width: auto !important;
1870
  height: auto !important;
1871
  margin: 8px 0;
 
1928
 
1929
  .action-widget-result img {
1930
  max-width: 400px;
1931
+ max-height: 400px;
1932
  height: auto;
1933
  margin: 8px 0;
1934
  border-radius: 3px;
 
2631
 
2632
  .result-content img {
2633
  max-width: 400px;
2634
+ max-height: 400px;
2635
  height: auto;
2636
  margin: 8px 0;
2637
  border-radius: 3px;
 
3226
  font-style: italic;
3227
  }
3228
 
3229
+ .dialog-checkbox-label {
3230
+ font-size: 11px;
3231
+ color: var(--text-secondary);
3232
+ display: flex;
3233
+ align-items: center;
3234
+ gap: 6px;
3235
+ cursor: pointer;
3236
+ }
3237
+
3238
+ .dialog-checkbox-label input[type="checkbox"] {
3239
+ margin: 0;
3240
+ }
3241
+
3242
  .settings-dialog-actions {
3243
  display: flex;
3244
  gap: 6px;