lvwerra HF Staff Claude Opus 4.6 commited on
Commit
4c24c65
Β·
1 Parent(s): f508f01

Unify figure store globally, enable cross-agent figure references

Browse files

- Replace per-tab IMAGE_STORES with single global FIGURE_STORE shared by code and image agents
- Unify naming: image_N β†’ figure_T{tab}_N across all agents
- Store values as {type, data} dicts consistently
- Restore figure registry from workspace on session reload
- Remove separate 'images' handling in frontend (unified under 'figures')
- Add cross-agent figure reference instructions to command center prompt
- Fix double-wrapping bug in image agent nudge path

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

backend/agents.py CHANGED
@@ -53,7 +53,10 @@ AGENT_REGISTRY = {
53
  "- **Code agent**: data analysis, code execution, visualizations, debugging\n"
54
  "- **Research agent**: ONLY deep multi-source analysis, comparisons, reports\n"
55
  "- **Image agent**: generating or editing images (ONLY when the user explicitly asks to generate/create an image β€” never for finding/showing existing photos)\n\n"
56
- "When delegating, provide a clear objective, scope boundaries, and expected output format.\n\n"
 
 
 
57
  "## Task Decomposition β€” ALWAYS parallelize\n\n"
58
  "**RULE: When a request mentions multiple distinct entities or topics, "
59
  "launch a separate agent for each.** Never combine multiple lookups into one agent.\n\n"
@@ -67,8 +70,8 @@ AGENT_REGISTRY = {
67
  "- Do NOT save/create files unless the user explicitly requests it.\n"
68
  "- Reuse task_id when a follow-up relates to an existing agent (preserves context and kernel).\n"
69
  "- Include key findings in YOUR response β€” don't just say \"see the agent result\".\n"
70
- "- **ALWAYS embed figures/images** from sub-agents in your response using their reference tags "
71
- "(e.g., <figure_1>, <image_1>). Sub-agent results are collapsed β€” if you don't embed the figure, the user won't see it.\n"
72
  "- If an agent was aborted by the user, acknowledge it and ask how to proceed β€” don't re-launch."
73
  ),
74
  "tool": None,
@@ -142,17 +145,18 @@ AGENT_REGISTRY = {
142
  "**Code runs in a remote sandbox, NOT locally.** "
143
  "Use upload_files before processing user files, download_files to send results back.\n\n"
144
  "## Guidelines\n\n"
145
- "- **Figures**: Call plt.show() β€” figures are auto-captured as figure_1, figure_2, etc. "
 
146
  "NEVER use both plt.savefig() and plt.show() (creates duplicates). "
147
- "To display a figure, embed it in your result text as <figure_1> β€” do NOT use show_html with an <img> tag.\n"
148
  "- **Files**: Do NOT save/download unless explicitly requested. Never overwrite without permission.\n"
149
  "- Execute code incrementally and reflect on output between steps.\n\n"
150
  "## CRITICAL: You MUST provide a <result> tag\n\n"
151
  "Keep results SHORT (1-2 sentences). The user can see code and output above.\n"
152
- "Use <figure_1> (self-closing) to embed figures.\n\n"
153
  "<result>\n"
154
  "Here's the sine function plot:\n\n"
155
- "<figure_1>\n"
156
  "</result>\n"
157
  ),
158
  "tool": {
@@ -233,12 +237,12 @@ AGENT_REGISTRY = {
233
  "system_prompt": (
234
  "You are a creative AI assistant with image tools.\n\n"
235
  "## Tools\n\n"
236
- "- **generate_image(prompt)**: Generate from text. Returns image reference (e.g., 'image_1').\n"
237
  "- **edit_image(prompt, source)**: Edit/transform an image. Source: URL, file path, or reference.\n"
238
  "- **read_image(source)**: Load a raster image (PNG, JPEG, GIF, WebP, BMP). "
239
- "SVG NOT supported. Returns image reference.\n"
240
  "- **save_image(source, filename)**: Save an image to the workspace as PNG. "
241
- "Source: reference (e.g., 'image_1') or URL.\n\n"
242
  "## Strategy\n\n"
243
  "1. If user provides a URL/file, use read_image first to load it\n"
244
  "2. Use generate_image ONLY when explicitly asked to generate/create an image β€” "
@@ -246,10 +250,11 @@ AGENT_REGISTRY = {
246
  "3. Use edit_image to transform existing ones\n"
247
  "4. Write detailed prompts. Describe what you see and iterate if needed.\n\n"
248
  "## CRITICAL: You MUST provide a <result> tag\n\n"
249
- "Use <image_1> (self-closing) to embed images in your result.\n\n"
 
250
  "<result>\n"
251
  "Here's the comic version of your image:\n\n"
252
- "<image_2>\n"
253
  "</result>\n"
254
  ),
255
  "tool": {
 
53
  "- **Code agent**: data analysis, code execution, visualizations, debugging\n"
54
  "- **Research agent**: ONLY deep multi-source analysis, comparisons, reports\n"
55
  "- **Image agent**: generating or editing images (ONLY when the user explicitly asks to generate/create an image β€” never for finding/showing existing photos)\n\n"
56
+ "When delegating, provide a clear objective, scope boundaries, and expected output format.\n"
57
+ "**Figures are shared across agents.** If a previous agent produced a figure (e.g., figure_T3_1), "
58
+ "you can pass its reference to another agent β€” for example, ask the image agent to edit figure_T3_1 "
59
+ "or the code agent to process it. Just include the reference name in the task description.\n\n"
60
  "## Task Decomposition β€” ALWAYS parallelize\n\n"
61
  "**RULE: When a request mentions multiple distinct entities or topics, "
62
  "launch a separate agent for each.** Never combine multiple lookups into one agent.\n\n"
 
70
  "- Do NOT save/create files unless the user explicitly requests it.\n"
71
  "- Reuse task_id when a follow-up relates to an existing agent (preserves context and kernel).\n"
72
  "- Include key findings in YOUR response β€” don't just say \"see the agent result\".\n"
73
+ "- **ALWAYS embed figures** from sub-agents in your response using the exact reference tags from the agent result "
74
+ "(e.g., <figure_T3_1>). Sub-agent results are collapsed β€” if you don't embed the figure, the user won't see it.\n"
75
  "- If an agent was aborted by the user, acknowledge it and ask how to proceed β€” don't re-launch."
76
  ),
77
  "tool": None,
 
145
  "**Code runs in a remote sandbox, NOT locally.** "
146
  "Use upload_files before processing user files, download_files to send results back.\n\n"
147
  "## Guidelines\n\n"
148
+ "- **Figures**: Call plt.show() β€” figures are auto-captured with names like figure_T4_1, figure_T4_2, etc. "
149
+ "The exact names appear in the execution output. "
150
  "NEVER use both plt.savefig() and plt.show() (creates duplicates). "
151
+ "To display a figure, embed the exact figure name from the output in your result text β€” do NOT use show_html with an <img> tag.\n"
152
  "- **Files**: Do NOT save/download unless explicitly requested. Never overwrite without permission.\n"
153
  "- Execute code incrementally and reflect on output between steps.\n\n"
154
  "## CRITICAL: You MUST provide a <result> tag\n\n"
155
  "Keep results SHORT (1-2 sentences). The user can see code and output above.\n"
156
+ "Use the exact figure name from the execution output (e.g., <figure_T4_1>) to embed figures.\n\n"
157
  "<result>\n"
158
  "Here's the sine function plot:\n\n"
159
+ "<figure_T4_1>\n"
160
  "</result>\n"
161
  ),
162
  "tool": {
 
237
  "system_prompt": (
238
  "You are a creative AI assistant with image tools.\n\n"
239
  "## Tools\n\n"
240
+ "- **generate_image(prompt)**: Generate from text. Returns figure reference (e.g., 'figure_T4_1').\n"
241
  "- **edit_image(prompt, source)**: Edit/transform an image. Source: URL, file path, or reference.\n"
242
  "- **read_image(source)**: Load a raster image (PNG, JPEG, GIF, WebP, BMP). "
243
+ "SVG NOT supported. Returns figure reference.\n"
244
  "- **save_image(source, filename)**: Save an image to the workspace as PNG. "
245
+ "Source: reference (e.g., 'figure_T4_1') or URL.\n\n"
246
  "## Strategy\n\n"
247
  "1. If user provides a URL/file, use read_image first to load it\n"
248
  "2. Use generate_image ONLY when explicitly asked to generate/create an image β€” "
 
250
  "3. Use edit_image to transform existing ones\n"
251
  "4. Write detailed prompts. Describe what you see and iterate if needed.\n\n"
252
  "## CRITICAL: You MUST provide a <result> tag\n\n"
253
+ "Use the exact figure reference from tool output to embed figures in your result.\n"
254
+ "Figure references are self-closing tags like <figure_T4_2> β€” do NOT add a closing </figure_T4_2> tag.\n\n"
255
  "<result>\n"
256
  "Here's the comic version of your image:\n\n"
257
+ "<figure_T4_2>\n"
258
  "</result>\n"
259
  ),
260
  "tool": {
backend/code.py CHANGED
@@ -174,7 +174,7 @@ def download_files_from_sandbox(sbx: Sandbox, files: List[Dict], files_root: str
174
  return "\n".join(results)
175
 
176
 
177
- def stream_code_execution(client, model: str, messages: List[Dict], sbx: Sandbox, files_root: str = None, extra_params: Optional[Dict] = None, abort_event=None, multimodal: bool = False, tab_id: str = "0"):
178
  """
179
  Stream code execution results
180
 
@@ -195,7 +195,10 @@ def stream_code_execution(client, model: str, messages: List[Dict], sbx: Sandbox
195
  done = False
196
  figure_counter = 0 # Track figure numbers
197
  figure_prefix = f"figure_T{tab_id}_"
198
- figure_data = {} # Store figure data by name for result rendering
 
 
 
199
  has_result = False
200
  debug_call_number = 0
201
 
 
174
  return "\n".join(results)
175
 
176
 
177
+ def stream_code_execution(client, model: str, messages: List[Dict], sbx: Sandbox, files_root: str = None, extra_params: Optional[Dict] = None, abort_event=None, multimodal: bool = False, tab_id: str = "0", figure_store: Optional[Dict[str, dict]] = None):
178
  """
179
  Stream code execution results
180
 
 
195
  done = False
196
  figure_counter = 0 # Track figure numbers
197
  figure_prefix = f"figure_T{tab_id}_"
198
+ # Use shared global store if provided, otherwise create local one
199
+ if figure_store is None:
200
+ figure_store = {}
201
+ figure_data = figure_store # Alias for clarity in this function
202
  has_result = False
203
  debug_call_number = 0
204
 
backend/image.py CHANGED
@@ -4,8 +4,8 @@ Image agent backend β€” multimodal agent with HuggingFace image generation tools
4
  Uses the same tool-calling loop pattern as agent.py:
5
  LLM call β†’ parse tool_calls β†’ execute β†’ update history β†’ repeat
6
 
7
- Key difference: maintains an image store (Dict[str, str]) mapping names like
8
- "image_1" to base64 data, so the VLM can reference images across tool calls
9
  without passing huge base64 strings in arguments.
10
  """
11
  import base64
@@ -61,7 +61,7 @@ def resize_image_for_vlm(base64_png: str) -> str:
61
  MAX_TURNS = 20
62
 
63
 
64
- def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, image_counter: int, default_gen_model: str = None, default_edit_model: str = None, files_root: str = None, image_prefix: str = "image_") -> dict:
65
  """
66
  Execute a tool by name and return result dict.
67
 
@@ -81,7 +81,7 @@ def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, i
81
  if base64_png:
82
  image_counter += 1
83
  name = f"{image_prefix}{image_counter}"
84
- image_store[name] = base64_png
85
  return {
86
  "content": f"Image generated successfully as '{name}'. The image is attached.",
87
  "image": base64_png,
@@ -104,7 +104,7 @@ def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, i
104
  # Resolve source: image store reference, URL, or local path
105
  source_bytes = None
106
  if source in image_store:
107
- source_bytes = base64.b64decode(image_store[source])
108
  else:
109
  source_base64 = execute_read_image(source, files_root=files_root)
110
  if source_base64:
@@ -112,7 +112,7 @@ def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, i
112
 
113
  if source_bytes is None:
114
  return {
115
- "content": f"Could not resolve image source '{source}'. Use a URL or a reference from a previous tool call (e.g., 'image_1').",
116
  "display": {"type": "edit_error", "source": source},
117
  "image_counter": image_counter,
118
  }
@@ -122,7 +122,7 @@ def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, i
122
  if base64_png:
123
  image_counter += 1
124
  name = f"{image_prefix}{image_counter}"
125
- image_store[name] = base64_png
126
  return {
127
  "content": f"Image edited successfully as '{name}'. The image is attached.",
128
  "image": base64_png,
@@ -148,7 +148,7 @@ def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, i
148
  # Resolve source from image store or URL
149
  image_data = None
150
  if source in image_store:
151
- image_data = base64.b64decode(image_store[source])
152
  else:
153
  source_base64 = execute_read_image(source, files_root=files_root)
154
  if source_base64:
@@ -156,7 +156,7 @@ def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, i
156
 
157
  if image_data is None:
158
  return {
159
- "content": f"Could not resolve image source '{source}'. Use a reference (e.g., 'image_1') or a URL.",
160
  "display": {"type": "save_error", "source": source},
161
  "image_counter": image_counter,
162
  }
@@ -187,7 +187,7 @@ def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, i
187
  if base64_png:
188
  image_counter += 1
189
  name = f"{image_prefix}{image_counter}"
190
- image_store[name] = base64_png
191
  return {
192
  "content": f"Image loaded successfully as '{name}'. The image is attached.",
193
  "image": base64_png,
@@ -227,7 +227,7 @@ def stream_image_execution(
227
  files_root: str = None,
228
  multimodal: bool = False,
229
  tab_id: str = "0",
230
- image_store: Optional[Dict[str, str]] = None,
231
  image_counter: int = 0,
232
  ):
233
  """
@@ -239,7 +239,7 @@ def stream_image_execution(
239
  - tool_start: { tool, args }
240
  - tool_result: { tool, result, image? }
241
  - result_preview: { content }
242
- - result: { content, images? }
243
  - generating: {}
244
  - retry: { attempt, max_attempts, delay, message }
245
  - error: { content }
@@ -249,7 +249,7 @@ def stream_image_execution(
249
 
250
  turns = 0
251
  done = False
252
- image_prefix = f"image_T{tab_id}_"
253
 
254
  # Use provided persistent store, or create a local one as fallback
255
  if image_store is None:
@@ -302,8 +302,8 @@ def stream_image_execution(
302
 
303
  # Send result preview
304
  if result_content:
305
- # Include image store so frontend can resolve <image_N> references
306
- yield {"type": "result_preview", "content": result_content, "images": image_store}
307
 
308
  # --- Handle tool calls ---
309
  if tool_calls:
@@ -386,7 +386,8 @@ def stream_image_execution(
386
 
387
  # Send result if found
388
  if result_content:
389
- yield {"type": "result", "content": result_content, "images": image_store}
 
390
  result_sent = True
391
 
392
  # Signal between-turn processing
@@ -397,14 +398,16 @@ def stream_image_execution(
397
  if not result_sent:
398
  from .agents import nudge_for_result
399
  nudge_produced_result = False
400
- for event in nudge_for_result(client, model, messages, extra_params=extra_params, extra_result_data={"images": image_store}, call_number=debug_call_number):
 
401
  yield event
402
  if event.get("type") == "result":
403
  nudge_produced_result = True
404
 
405
- # Final fallback: synthesize a result with all images
406
  if not nudge_produced_result:
407
  fallback_parts = [f"<{name}>" for name in image_store]
408
- yield {"type": "result", "content": "\n\n".join(fallback_parts), "images": image_store}
 
409
 
410
  yield {"type": "done"}
 
4
  Uses the same tool-calling loop pattern as agent.py:
5
  LLM call β†’ parse tool_calls β†’ execute β†’ update history β†’ repeat
6
 
7
+ Key difference: maintains a figure store (Dict[str, str]) mapping names like
8
+ "figure_T1_1" to base64 data, so the VLM can reference images across tool calls
9
  without passing huge base64 strings in arguments.
10
  """
11
  import base64
 
61
  MAX_TURNS = 20
62
 
63
 
64
+ def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, image_counter: int, default_gen_model: str = None, default_edit_model: str = None, files_root: str = None, image_prefix: str = "figure_") -> dict:
65
  """
66
  Execute a tool by name and return result dict.
67
 
 
81
  if base64_png:
82
  image_counter += 1
83
  name = f"{image_prefix}{image_counter}"
84
+ image_store[name] = {"type": "png", "data": base64_png}
85
  return {
86
  "content": f"Image generated successfully as '{name}'. The image is attached.",
87
  "image": base64_png,
 
104
  # Resolve source: image store reference, URL, or local path
105
  source_bytes = None
106
  if source in image_store:
107
+ source_bytes = base64.b64decode(image_store[source]["data"])
108
  else:
109
  source_base64 = execute_read_image(source, files_root=files_root)
110
  if source_base64:
 
112
 
113
  if source_bytes is None:
114
  return {
115
+ "content": f"Could not resolve image source '{source}'. Use a URL or a reference from a previous tool call (e.g., 'figure_T1_1').",
116
  "display": {"type": "edit_error", "source": source},
117
  "image_counter": image_counter,
118
  }
 
122
  if base64_png:
123
  image_counter += 1
124
  name = f"{image_prefix}{image_counter}"
125
+ image_store[name] = {"type": "png", "data": base64_png}
126
  return {
127
  "content": f"Image edited successfully as '{name}'. The image is attached.",
128
  "image": base64_png,
 
148
  # Resolve source from image store or URL
149
  image_data = None
150
  if source in image_store:
151
+ image_data = base64.b64decode(image_store[source]["data"])
152
  else:
153
  source_base64 = execute_read_image(source, files_root=files_root)
154
  if source_base64:
 
156
 
157
  if image_data is None:
158
  return {
159
+ "content": f"Could not resolve image source '{source}'. Use a reference (e.g., 'figure_T1_1') or a URL.",
160
  "display": {"type": "save_error", "source": source},
161
  "image_counter": image_counter,
162
  }
 
187
  if base64_png:
188
  image_counter += 1
189
  name = f"{image_prefix}{image_counter}"
190
+ image_store[name] = {"type": "png", "data": base64_png}
191
  return {
192
  "content": f"Image loaded successfully as '{name}'. The image is attached.",
193
  "image": base64_png,
 
227
  files_root: str = None,
228
  multimodal: bool = False,
229
  tab_id: str = "0",
230
+ image_store: Optional[Dict[str, dict]] = None,
231
  image_counter: int = 0,
232
  ):
233
  """
 
239
  - tool_start: { tool, args }
240
  - tool_result: { tool, result, image? }
241
  - result_preview: { content }
242
+ - result: { content, figures? }
243
  - generating: {}
244
  - retry: { attempt, max_attempts, delay, message }
245
  - error: { content }
 
249
 
250
  turns = 0
251
  done = False
252
+ image_prefix = f"figure_T{tab_id}_"
253
 
254
  # Use provided persistent store, or create a local one as fallback
255
  if image_store is None:
 
302
 
303
  # Send result preview
304
  if result_content:
305
+ figures = dict(image_store)
306
+ yield {"type": "result_preview", "content": result_content, "figures": figures}
307
 
308
  # --- Handle tool calls ---
309
  if tool_calls:
 
386
 
387
  # Send result if found
388
  if result_content:
389
+ figures = dict(image_store)
390
+ yield {"type": "result", "content": result_content, "figures": figures}
391
  result_sent = True
392
 
393
  # Signal between-turn processing
 
398
  if not result_sent:
399
  from .agents import nudge_for_result
400
  nudge_produced_result = False
401
+ figures = dict(image_store)
402
+ for event in nudge_for_result(client, model, messages, extra_params=extra_params, extra_result_data={"figures": figures}, call_number=debug_call_number):
403
  yield event
404
  if event.get("type") == "result":
405
  nudge_produced_result = True
406
 
407
+ # Final fallback: synthesize a result with all figures
408
  if not nudge_produced_result:
409
  fallback_parts = [f"<{name}>" for name in image_store]
410
+ figures = dict(image_store)
411
+ yield {"type": "result", "content": "\n\n".join(fallback_parts), "figures": figures}
412
 
413
  yield {"type": "done"}
backend/main.py CHANGED
@@ -174,10 +174,13 @@ SANDBOX_TIMEOUT = 300
174
  # Structure: {tab_id: [messages...]}
175
  CONVERSATION_HISTORY: Dict[str, List[Dict]] = {}
176
 
177
- # Image stores per tab (persistent across requests so re-entry works without multimodal)
178
- # Structure: {tab_id: {image_name: base64_png, ...}}
179
- IMAGE_STORES: Dict[str, Dict[str, str]] = {}
180
- IMAGE_COUNTERS: Dict[str, int] = {}
 
 
 
181
 
182
  # Multi-user isolation
183
  MULTI_USER = False
@@ -408,13 +411,28 @@ async def _stream_code_agent_inner(messages, endpoint, token, model, e2b_key, se
408
  system_prompt = get_system_prompt("code", frontend_context)
409
  full_messages = [{"role": "system", "content": system_prompt}] + messages
410
 
 
 
 
 
411
  async for chunk in _stream_sync_generator(
412
  stream_code_execution, client, model, full_messages, sbx,
413
  files_root=files_root or FILES_ROOT, extra_params=extra_params,
414
- abort_event=abort_event, multimodal=multimodal, tab_id=tab_id
 
415
  ):
416
  yield chunk
417
 
 
 
 
 
 
 
 
 
 
 
418
  except Exception as e:
419
  import traceback
420
  error_message = f"Code execution error: {str(e)}\n{traceback.format_exc()}"
@@ -442,7 +460,8 @@ async def _stream_code_agent_inner(messages, endpoint, token, model, e2b_key, se
442
  async for chunk in _stream_sync_generator(
443
  stream_code_execution, client, model, full_messages, sbx,
444
  files_root=files_root or FILES_ROOT, extra_params=extra_params,
445
- abort_event=abort_event, multimodal=multimodal, tab_id=tab_id
 
446
  ):
447
  yield chunk
448
 
@@ -646,11 +665,9 @@ async def _stream_image_agent_inner(messages, endpoint, token, model, hf_token,
646
  yield f"data: {json.dumps({'type': 'error', 'content': 'HuggingFace token required for image generation. Please configure in settings or set HF_TOKEN environment variable.'})}\n\n"
647
  return
648
 
649
- # Get or create persistent image store for this tab
650
- if tab_id not in IMAGE_STORES:
651
- IMAGE_STORES[tab_id] = {}
652
- if tab_id not in IMAGE_COUNTERS:
653
- IMAGE_COUNTERS[tab_id] = 0
654
 
655
  try:
656
  client = OpenAI(base_url=endpoint, api_key=token)
@@ -663,18 +680,20 @@ async def _stream_image_agent_inner(messages, endpoint, token, model, hf_token,
663
  extra_params=extra_params, abort_event=abort_event,
664
  files_root=files_root, multimodal=multimodal,
665
  tab_id=tab_id,
666
- image_store=IMAGE_STORES[tab_id],
667
- image_counter=IMAGE_COUNTERS[tab_id],
668
  ):
669
  yield chunk
670
 
671
- # Derive counter from store keys (each image_T{id}_{N} has a number)
 
672
  max_counter = 0
673
- for name in IMAGE_STORES[tab_id]:
674
- m = re.search(r'_(\d+)$', name)
675
- if m:
676
- max_counter = max(max_counter, int(m.group(1)))
677
- IMAGE_COUNTERS[tab_id] = max_counter
 
678
 
679
  except Exception as e:
680
  import traceback
@@ -1320,14 +1339,15 @@ def select_session(session_name: str, user_id: str = '') -> bool:
1320
  keys_to_remove = [k for k in CONVERSATION_HISTORY if k.startswith(prefix)]
1321
  for k in keys_to_remove:
1322
  del CONVERSATION_HISTORY[k]
1323
- for k in [k for k in IMAGE_STORES if k.startswith(prefix)]:
1324
- del IMAGE_STORES[k]
1325
- for k in [k for k in IMAGE_COUNTERS if k.startswith(prefix)]:
1326
- del IMAGE_COUNTERS[k]
 
1327
  else:
1328
  CONVERSATION_HISTORY.clear()
1329
- IMAGE_STORES.clear()
1330
- IMAGE_COUNTERS.clear()
1331
 
1332
  return True
1333
 
 
174
  # Structure: {tab_id: [messages...]}
175
  CONVERSATION_HISTORY: Dict[str, List[Dict]] = {}
176
 
177
+ # Figure stores per tab (persistent across requests so re-entry works without multimodal)
178
+ # Structure: {tab_id: {figure_name: base64_png, ...}}
179
+ # Global figure store: all agents write here so cross-agent references work.
180
+ # Keys are namespaced like "figure_T{tab}_{N}" so there are no collisions.
181
+ FIGURE_STORE: Dict[str, dict] = {}
182
+ # Per-tab counters to track the next figure number for each tab
183
+ FIGURE_COUNTERS: Dict[str, int] = {}
184
 
185
  # Multi-user isolation
186
  MULTI_USER = False
 
411
  system_prompt = get_system_prompt("code", frontend_context)
412
  full_messages = [{"role": "system", "content": system_prompt}] + messages
413
 
414
+ # Ensure per-tab counter exists
415
+ if tab_id not in FIGURE_COUNTERS:
416
+ FIGURE_COUNTERS[tab_id] = 0
417
+
418
  async for chunk in _stream_sync_generator(
419
  stream_code_execution, client, model, full_messages, sbx,
420
  files_root=files_root or FILES_ROOT, extra_params=extra_params,
421
+ abort_event=abort_event, multimodal=multimodal, tab_id=tab_id,
422
+ figure_store=FIGURE_STORE,
423
  ):
424
  yield chunk
425
 
426
+ # Derive counter from store keys for this tab's prefix
427
+ prefix = f"figure_T{tab_id}_"
428
+ max_counter = 0
429
+ for name in FIGURE_STORE:
430
+ if name.startswith(prefix):
431
+ m = re.search(r'_(\d+)$', name)
432
+ if m:
433
+ max_counter = max(max_counter, int(m.group(1)))
434
+ FIGURE_COUNTERS[tab_id] = max_counter
435
+
436
  except Exception as e:
437
  import traceback
438
  error_message = f"Code execution error: {str(e)}\n{traceback.format_exc()}"
 
460
  async for chunk in _stream_sync_generator(
461
  stream_code_execution, client, model, full_messages, sbx,
462
  files_root=files_root or FILES_ROOT, extra_params=extra_params,
463
+ abort_event=abort_event, multimodal=multimodal, tab_id=tab_id,
464
+ figure_store=FIGURE_STORE,
465
  ):
466
  yield chunk
467
 
 
665
  yield f"data: {json.dumps({'type': 'error', 'content': 'HuggingFace token required for image generation. Please configure in settings or set HF_TOKEN environment variable.'})}\n\n"
666
  return
667
 
668
+ # Ensure per-tab counter exists
669
+ if tab_id not in FIGURE_COUNTERS:
670
+ FIGURE_COUNTERS[tab_id] = 0
 
 
671
 
672
  try:
673
  client = OpenAI(base_url=endpoint, api_key=token)
 
680
  extra_params=extra_params, abort_event=abort_event,
681
  files_root=files_root, multimodal=multimodal,
682
  tab_id=tab_id,
683
+ image_store=FIGURE_STORE,
684
+ image_counter=FIGURE_COUNTERS[tab_id],
685
  ):
686
  yield chunk
687
 
688
+ # Derive counter from store keys for this tab's prefix
689
+ prefix = f"figure_T{tab_id}_"
690
  max_counter = 0
691
+ for name in FIGURE_STORE:
692
+ if name.startswith(prefix):
693
+ m = re.search(r'_(\d+)$', name)
694
+ if m:
695
+ max_counter = max(max_counter, int(m.group(1)))
696
+ FIGURE_COUNTERS[tab_id] = max_counter
697
 
698
  except Exception as e:
699
  import traceback
 
1339
  keys_to_remove = [k for k in CONVERSATION_HISTORY if k.startswith(prefix)]
1340
  for k in keys_to_remove:
1341
  del CONVERSATION_HISTORY[k]
1342
+ # Clear figure store entries belonging to this user's tabs
1343
+ for k in [k for k in FIGURE_STORE if k.startswith(f"figure_T{prefix}")]:
1344
+ del FIGURE_STORE[k]
1345
+ for k in [k for k in FIGURE_COUNTERS if k.startswith(prefix)]:
1346
+ del FIGURE_COUNTERS[k]
1347
  else:
1348
  CONVERSATION_HISTORY.clear()
1349
+ FIGURE_STORE.clear()
1350
+ FIGURE_COUNTERS.clear()
1351
 
1352
  return True
1353
 
frontend/streaming.js CHANGED
@@ -173,7 +173,7 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
173
  // Still generating - no action needed
174
 
175
  } else if (data.type === 'result') {
176
- // References are already globally namespaced by the backend (e.g., figure_T3_1, image_T3_1)
177
  const resultText = data.content || '';
178
 
179
  // Populate global registry
@@ -184,16 +184,9 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
184
  }
185
  }
186
  }
187
- if (data.images) {
188
- for (const [name, imgBase64] of Object.entries(data.images)) {
189
- if (new RegExp(`</?${name}>`, 'i').test(resultText)) {
190
- globalFigureRegistry[name] = { type: 'png', data: imgBase64 };
191
- }
192
- }
193
- }
194
 
195
  // Agent result - update command center widget
196
- updateActionWidgetWithResult(tabId, resultText, data.figures || {}, data.images || {});
197
 
198
  } else if (data.type === 'result_preview') {
199
  // Show result preview
@@ -219,19 +212,6 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
219
  }
220
  }
221
 
222
- // Handle <image_N> references from image agent
223
- if (data.images) {
224
- for (const [imageName, imageBase64] of Object.entries(data.images)) {
225
- const placeholderId = `%%%IMAGE_${imageName}%%%`;
226
- figurePlaceholders[placeholderId] = { type: 'png', data: imageBase64, isGenerated: true };
227
-
228
- const pairedTag = new RegExp(`<${imageName}></${imageName}>`, 'gi');
229
- previewContent = previewContent.replace(pairedTag, `\n\n${placeholderId}\n\n`);
230
- const singleTag = new RegExp(`</?${imageName}>`, 'gi');
231
- previewContent = previewContent.replace(singleTag, `\n\n${placeholderId}\n\n`);
232
- }
233
- }
234
-
235
  // Process markdown
236
  let html = parseMarkdown(previewContent);
237
 
@@ -647,7 +627,7 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
647
  scrollChatToBottom(chatContainer);
648
 
649
  // Propagate error to parent action widget
650
- updateActionWidgetWithResult(tabId, `Error: ${data.content}`, {}, {});
651
  const errorWidget = actionWidgets[tabId];
652
  if (errorWidget) {
653
  const doneIndicator = errorWidget.querySelector('.done-indicator');
@@ -679,7 +659,7 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
679
  chatContainer.appendChild(resultDiv);
680
 
681
  // Send abort result to parent action widget (so command center knows it was aborted)
682
- updateActionWidgetWithResult(tabId, abortResultText, {}, {});
683
 
684
  // Override the done indicator to show Γ— instead of βœ“
685
  const widget = actionWidgets[tabId];
@@ -946,7 +926,7 @@ function showActionWidget(chatContainer, action, message, targetTabId, taskId =
946
  actionWidgets[targetTabId] = widget;
947
  }
948
 
949
- async function updateActionWidgetWithResult(tabId, resultContent, figures, images) {
950
  const widget = actionWidgets[tabId];
951
  if (!widget) return;
952
 
@@ -980,19 +960,6 @@ async function updateActionWidgetWithResult(tabId, resultContent, figures, image
980
  }
981
  }
982
 
983
- // Handle <image_N> references from image agent
984
- if (images) {
985
- for (const [imageName, imageBase64] of Object.entries(images)) {
986
- const placeholderId = `%%%IMAGE_${imageName}%%%`;
987
- figurePlaceholders[placeholderId] = { type: 'png', data: imageBase64 };
988
-
989
- const pairedTag = new RegExp(`<${imageName}></${imageName}>`, 'gi');
990
- processedContent = processedContent.replace(pairedTag, `\n\n${placeholderId}\n\n`);
991
- const singleTag = new RegExp(`</?${imageName}>`, 'gi');
992
- processedContent = processedContent.replace(singleTag, `\n\n${placeholderId}\n\n`);
993
- }
994
- }
995
-
996
  // Process markdown
997
  let html = parseMarkdown(processedContent);
998
 
 
173
  // Still generating - no action needed
174
 
175
  } else if (data.type === 'result') {
176
+ // References are globally namespaced by the backend (e.g., figure_T3_1)
177
  const resultText = data.content || '';
178
 
179
  // Populate global registry
 
184
  }
185
  }
186
  }
 
 
 
 
 
 
 
187
 
188
  // Agent result - update command center widget
189
+ updateActionWidgetWithResult(tabId, resultText, data.figures || {});
190
 
191
  } else if (data.type === 'result_preview') {
192
  // Show result preview
 
212
  }
213
  }
214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  // Process markdown
216
  let html = parseMarkdown(previewContent);
217
 
 
627
  scrollChatToBottom(chatContainer);
628
 
629
  // Propagate error to parent action widget
630
+ updateActionWidgetWithResult(tabId, `Error: ${data.content}`, {});
631
  const errorWidget = actionWidgets[tabId];
632
  if (errorWidget) {
633
  const doneIndicator = errorWidget.querySelector('.done-indicator');
 
659
  chatContainer.appendChild(resultDiv);
660
 
661
  // Send abort result to parent action widget (so command center knows it was aborted)
662
+ updateActionWidgetWithResult(tabId, abortResultText, {});
663
 
664
  // Override the done indicator to show Γ— instead of βœ“
665
  const widget = actionWidgets[tabId];
 
926
  actionWidgets[targetTabId] = widget;
927
  }
928
 
929
+ async function updateActionWidgetWithResult(tabId, resultContent, figures) {
930
  const widget = actionWidgets[tabId];
931
  if (!widget) return;
932
 
 
960
  }
961
  }
962
 
 
 
 
 
 
 
 
 
 
 
 
 
 
963
  // Process markdown
964
  let html = parseMarkdown(processedContent);
965
 
frontend/workspace.js CHANGED
@@ -48,6 +48,34 @@ function restoreWorkspace(workspace) {
48
  }
49
  }
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  // Switch to the active tab
52
  if (workspace.activeTabId !== undefined) {
53
  switchToTab(workspace.activeTabId);
 
48
  }
49
  }
50
 
51
+ // Restore globalFigureRegistry from saved code-cell images
52
+ // so that <figure_T1_1> tags in results resolve after reload
53
+ for (const tabData of tabs) {
54
+ for (const msg of (tabData.messages || [])) {
55
+ if (msg.type === 'code-cell' && msg.images) {
56
+ for (const img of msg.images) {
57
+ if (img.name && img.src) {
58
+ // Parse data URL: "data:image/png;base64,..." -> {type: "png", data: "..."}
59
+ const m = img.src.match(/^data:image\/(\w+);base64,(.+)$/);
60
+ if (m) {
61
+ globalFigureRegistry[img.name] = { type: m[1], data: m[2] };
62
+ }
63
+ }
64
+ }
65
+ }
66
+ }
67
+ }
68
+
69
+ // Re-resolve any figure refs in rendered HTML now that the registry is populated
70
+ if (Object.keys(globalFigureRegistry).length > 0) {
71
+ document.querySelectorAll('.action-widget-result-section .section-content, .result-preview-content, .result-content').forEach(el => {
72
+ const resolved = resolveGlobalFigureRefs(el.innerHTML);
73
+ if (resolved !== el.innerHTML) {
74
+ el.innerHTML = resolved;
75
+ }
76
+ });
77
+ }
78
+
79
  // Switch to the active tab
80
  if (workspace.activeTabId !== undefined) {
81
  switchToTab(workspace.activeTabId);