lvwerra HF Staff Claude Opus 4.6 commited on
Commit
0d3d041
·
1 Parent(s): 8e4396d

Debug panel: full LLM call parity, show_html tool, drag-drop file upload

Browse files

- Centralize LLM calls into call_llm() generator in agents.py with retries and debug events
- Emit debug_call_input/output SSE events for every LLM turn across all agents
- Frontend accumulates debug history per tab with INPUT/OUTPUT sections
- Replace base64 images with hoverable thumbnail placeholders in debug JSON
- Persist debug history in workspace save/restore
- Add show_html direct tool for command center (iframe srcdoc rendering)
- Add drag & drop file upload to files panel (folder-aware targeting)
- Remove old MESSAGE_HISTORY polling, refresh button; debug updates in real-time
- Reduce debug panel width to 450px

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

backend/agent.py CHANGED
@@ -7,7 +7,6 @@ Uses the same tool-calling loop pattern as code.py:
7
  import json
8
  import logging
9
  import re
10
- import time
11
  from typing import List, Dict, Optional
12
 
13
  from .tools import (
@@ -22,26 +21,6 @@ logger = logging.getLogger(__name__)
22
  TOOLS = [web_search, read_url]
23
 
24
  MAX_TURNS = 20
25
- MAX_RETRIES = 3
26
- RETRY_DELAYS = [2, 5, 10]
27
-
28
-
29
- def parse_llm_error(error: Exception) -> dict:
30
- """Parse LLM error to extract useful message for frontend."""
31
- error_str = str(error)
32
- try:
33
- json_match = re.search(r'\{.*\}', error_str)
34
- if json_match:
35
- error_data = json.loads(json_match.group())
36
- return {
37
- "message": error_data.get("message", error_str),
38
- "retryable": error_data.get("type") == "too_many_requests_error" or "429" in error_str
39
- }
40
- except:
41
- pass
42
-
43
- retryable = any(x in error_str.lower() for x in ["429", "rate limit", "too many requests", "overloaded"])
44
- return {"message": error_str, "retryable": retryable}
45
 
46
 
47
  def execute_tool(tool_name: str, args: dict, serper_key: str) -> dict:
@@ -115,9 +94,12 @@ def stream_agent_execution(
115
  - error: { content }
116
  - done: {}
117
  """
 
 
118
  turns = 0
119
  done = False
120
  has_result = False
 
121
 
122
  while not done and turns < MAX_TURNS:
123
  # Check abort before each turn
@@ -127,47 +109,18 @@ def stream_agent_execution(
127
 
128
  turns += 1
129
 
130
- # --- LLM call with retry ---
131
  response = None
132
- last_error = None
133
-
134
- for attempt in range(MAX_RETRIES):
135
- try:
136
- call_params = {
137
- "messages": messages,
138
- "model": model,
139
- "tools": TOOLS,
140
- "tool_choice": "auto",
141
- }
142
- if extra_params:
143
- call_params["extra_body"] = extra_params
144
- response = client.chat.completions.create(**call_params)
145
- break
146
- except Exception as e:
147
- last_error = e
148
- error_info = parse_llm_error(e)
149
- if attempt < MAX_RETRIES - 1 and error_info["retryable"]:
150
- delay = RETRY_DELAYS[attempt]
151
- yield {
152
- "type": "retry",
153
- "attempt": attempt + 1,
154
- "max_attempts": MAX_RETRIES,
155
- "delay": delay,
156
- "message": error_info["message"],
157
- }
158
- if abort_event:
159
- abort_event.wait(delay)
160
- if abort_event.is_set():
161
- yield {"type": "aborted"}
162
- return
163
- else:
164
- time.sleep(delay)
165
- else:
166
- yield {"type": "error", "content": error_info["message"]}
167
  return
168
 
169
  if response is None:
170
- yield {"type": "error", "content": f"LLM error after {MAX_RETRIES} attempts: {str(last_error)}"}
171
  return
172
 
173
  # --- Parse response ---
 
7
  import json
8
  import logging
9
  import re
 
10
  from typing import List, Dict, Optional
11
 
12
  from .tools import (
 
21
  TOOLS = [web_search, read_url]
22
 
23
  MAX_TURNS = 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
 
26
  def execute_tool(tool_name: str, args: dict, serper_key: str) -> dict:
 
94
  - error: { content }
95
  - done: {}
96
  """
97
+ from .agents import call_llm
98
+
99
  turns = 0
100
  done = False
101
  has_result = False
102
+ debug_call_number = 0
103
 
104
  while not done and turns < MAX_TURNS:
105
  # Check abort before each turn
 
109
 
110
  turns += 1
111
 
112
+ # LLM call with retries and debug events
113
  response = None
114
+ for event in call_llm(client, model, messages, tools=TOOLS, extra_params=extra_params, abort_event=abort_event, call_number=debug_call_number):
115
+ if "_response" in event:
116
+ response = event["_response"]
117
+ debug_call_number = event["_call_number"]
118
+ else:
119
+ yield event
120
+ if event.get("type") in ("error", "aborted"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  return
122
 
123
  if response is None:
 
124
  return
125
 
126
  # --- Parse response ---
backend/agents.py CHANGED
@@ -388,6 +388,144 @@ def get_system_prompt(agent_key: str) -> str:
388
  return prompt
389
 
390
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
  def nudge_for_result(client, model, messages, extra_params=None, extra_result_data=None):
392
  """Nudge an agent that finished without <result> tags to produce one.
393
 
 
388
  return prompt
389
 
390
 
391
+ def serialize_llm_response(response):
392
+ """Extract raw LLM response into a JSON-serializable dict."""
393
+ choice = response.choices[0]
394
+ msg = choice.message
395
+ result = {"role": "assistant", "content": msg.content or ""}
396
+ if msg.tool_calls:
397
+ result["tool_calls"] = [
398
+ {
399
+ "id": tc.id,
400
+ "type": "function",
401
+ "function": {
402
+ "name": tc.function.name,
403
+ "arguments": tc.function.arguments,
404
+ },
405
+ }
406
+ for tc in msg.tool_calls
407
+ ]
408
+ return result
409
+
410
+
411
+ MAX_RETRIES = 3
412
+ RETRY_DELAYS = [2, 5, 10]
413
+
414
+
415
+ def parse_llm_error(error: Exception) -> dict:
416
+ """Parse LLM error to extract useful message for frontend."""
417
+ import json as _json
418
+ import re as _re
419
+ error_str = str(error)
420
+ try:
421
+ json_match = _re.search(r'\{.*\}', error_str)
422
+ if json_match:
423
+ error_data = _json.loads(json_match.group())
424
+ return {
425
+ "message": error_data.get("message", error_str),
426
+ "type": error_data.get("type", "unknown_error"),
427
+ "retryable": error_data.get("type") == "too_many_requests_error" or "429" in error_str,
428
+ }
429
+ except Exception:
430
+ pass
431
+
432
+ retryable = any(x in error_str.lower() for x in ["429", "rate limit", "too many requests", "overloaded", "high traffic"])
433
+ return {"message": error_str, "type": "unknown_error", "retryable": retryable}
434
+
435
+
436
+ def call_llm(client, model, messages, tools=None, extra_params=None, abort_event=None, call_number=0):
437
+ """Centralized LLM call with retries and debug event emission.
438
+
439
+ Generator that yields:
440
+ - {"type": "debug_call_input", ...} before the call
441
+ - {"type": "debug_call_output", ...} after success or final failure
442
+ - {"type": "retry", ...} on retryable errors
443
+ - {"type": "error", ...} on fatal errors
444
+ - {"type": "aborted"} if abort_event fires during retry wait
445
+ - {"_response": response} the actual OpenAI response (not an SSE event)
446
+
447
+ The caller should forward all dicts to the SSE stream except those with "_response".
448
+
449
+ Returns call_number (incremented) via the last yielded dict's "call_number" field.
450
+ """
451
+ import copy
452
+ import time
453
+
454
+ call_number += 1
455
+
456
+ # Emit input before the call
457
+ yield {
458
+ "type": "debug_call_input",
459
+ "call_number": call_number,
460
+ "messages": copy.deepcopy(messages),
461
+ }
462
+
463
+ response = None
464
+ last_error = None
465
+
466
+ for attempt in range(MAX_RETRIES):
467
+ try:
468
+ call_params = {
469
+ "messages": messages,
470
+ "model": model,
471
+ }
472
+ if tools:
473
+ call_params["tools"] = tools
474
+ call_params["tool_choice"] = "auto"
475
+ if extra_params:
476
+ call_params["extra_body"] = extra_params
477
+
478
+ response = client.chat.completions.create(**call_params)
479
+
480
+ # Emit output on success
481
+ yield {
482
+ "type": "debug_call_output",
483
+ "call_number": call_number,
484
+ "response": serialize_llm_response(response),
485
+ }
486
+
487
+ # Yield the actual response object for the caller
488
+ yield {"_response": response, "_call_number": call_number}
489
+ return
490
+
491
+ except Exception as e:
492
+ last_error = e
493
+ error_info = parse_llm_error(e)
494
+
495
+ # Emit debug output for every failed attempt
496
+ yield {
497
+ "type": "debug_call_output",
498
+ "call_number": call_number,
499
+ "error": error_info["message"],
500
+ "attempt": attempt + 1,
501
+ "retryable": error_info["retryable"],
502
+ }
503
+
504
+ if attempt < MAX_RETRIES - 1 and error_info["retryable"]:
505
+ delay = RETRY_DELAYS[attempt]
506
+ yield {
507
+ "type": "retry",
508
+ "attempt": attempt + 1,
509
+ "max_attempts": MAX_RETRIES,
510
+ "delay": delay,
511
+ "message": error_info["message"],
512
+ "error_type": error_info.get("type", "unknown_error"),
513
+ }
514
+ if abort_event:
515
+ abort_event.wait(delay)
516
+ if abort_event.is_set():
517
+ yield {"type": "aborted"}
518
+ return
519
+ else:
520
+ time.sleep(delay)
521
+ else:
522
+ yield {"type": "error", "content": error_info["message"]}
523
+ return
524
+
525
+ # Should not reach here, but just in case
526
+ yield {"type": "error", "content": f"LLM error after {MAX_RETRIES} attempts: {str(last_error)}"}
527
+
528
+
529
  def nudge_for_result(client, model, messages, extra_params=None, extra_result_data=None):
530
  """Nudge an agent that finished without <result> tags to produce one.
531
 
backend/code.py CHANGED
@@ -16,37 +16,6 @@ logger = logging.getLogger(__name__)
16
  TOOLS = [execute_code, upload_files, download_files]
17
 
18
  MAX_TURNS = 40
19
- MAX_RETRIES = 3 # Maximum retries for LLM calls
20
- RETRY_DELAYS = [2, 5, 10] # Delay in seconds for each retry attempt
21
-
22
-
23
- def parse_llm_error(error: Exception) -> dict:
24
- """Parse LLM error to extract useful message for frontend"""
25
- error_str = str(error)
26
-
27
- # Try to extract JSON error message
28
- try:
29
- json_match = re.search(r'\{.*\}', error_str)
30
- if json_match:
31
- error_data = json.loads(json_match.group())
32
- return {
33
- "message": error_data.get("message", error_str),
34
- "type": error_data.get("type", "unknown_error"),
35
- "code": error_data.get("code", "unknown"),
36
- "retryable": error_data.get("type") == "too_many_requests_error" or "429" in error_str
37
- }
38
- except:
39
- pass
40
-
41
- # Check for common retryable errors
42
- retryable = any(x in error_str.lower() for x in ["429", "rate limit", "too many requests", "overloaded", "high traffic"])
43
-
44
- return {
45
- "message": error_str,
46
- "type": "unknown_error",
47
- "code": "unknown",
48
- "retryable": retryable
49
- }
50
 
51
 
52
  def parse_execution_result(execution, max_output_length=4000):
@@ -220,11 +189,14 @@ def stream_code_execution(client, model: str, messages: List[Dict], sbx: Sandbox
220
  files_root: Root directory for file uploads (optional)
221
  extra_params: Extra parameters for API calls (optional)
222
  """
 
 
223
  turns = 0
224
  done = False
225
  figure_counter = 0 # Track figure numbers
226
  figure_data = {} # Store figure data by name for result rendering
227
  has_result = False
 
228
 
229
  while not done and turns < MAX_TURNS:
230
  # Check abort before each turn
@@ -234,53 +206,18 @@ def stream_code_execution(client, model: str, messages: List[Dict], sbx: Sandbox
234
 
235
  turns += 1
236
 
237
- # Retry loop for LLM calls
238
  response = None
239
- last_error = None
240
-
241
- for attempt in range(MAX_RETRIES):
242
- try:
243
- # Call LLM with tools
244
- call_params = {
245
- "messages": messages,
246
- "model": model,
247
- "tools": TOOLS,
248
- "tool_choice": "auto",
249
- }
250
- # Apply any extra params via extra_body (for OpenAI SDK compatibility)
251
- if extra_params:
252
- call_params["extra_body"] = extra_params
253
- response = client.chat.completions.create(**call_params)
254
- break # Success, exit retry loop
255
- except Exception as e:
256
- last_error = e
257
- error_info = parse_llm_error(e)
258
-
259
- if attempt < MAX_RETRIES - 1 and error_info["retryable"]:
260
- delay = RETRY_DELAYS[attempt]
261
- yield {
262
- "type": "retry",
263
- "attempt": attempt + 1,
264
- "max_attempts": MAX_RETRIES,
265
- "delay": delay,
266
- "message": error_info["message"],
267
- "error_type": error_info["type"]
268
- }
269
- import time
270
- if abort_event:
271
- abort_event.wait(delay)
272
- if abort_event.is_set():
273
- yield {"type": "aborted"}
274
- return
275
- else:
276
- time.sleep(delay)
277
- else:
278
- # Final attempt failed or non-retryable error
279
- yield {"type": "error", "content": error_info["message"]}
280
  return
281
 
282
  if response is None:
283
- yield {"type": "error", "content": f"LLM error after {MAX_RETRIES} attempts: {str(last_error)}"}
284
  return
285
 
286
  # Get response
 
16
  TOOLS = [execute_code, upload_files, download_files]
17
 
18
  MAX_TURNS = 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
 
21
  def parse_execution_result(execution, max_output_length=4000):
 
189
  files_root: Root directory for file uploads (optional)
190
  extra_params: Extra parameters for API calls (optional)
191
  """
192
+ from .agents import call_llm
193
+
194
  turns = 0
195
  done = False
196
  figure_counter = 0 # Track figure numbers
197
  figure_data = {} # Store figure data by name for result rendering
198
  has_result = False
199
+ debug_call_number = 0
200
 
201
  while not done and turns < MAX_TURNS:
202
  # Check abort before each turn
 
206
 
207
  turns += 1
208
 
209
+ # LLM call with retries and debug events
210
  response = None
211
+ for event in call_llm(client, model, messages, tools=TOOLS, extra_params=extra_params, abort_event=abort_event, call_number=debug_call_number):
212
+ if "_response" in event:
213
+ response = event["_response"]
214
+ debug_call_number = event["_call_number"]
215
+ else:
216
+ yield event
217
+ if event.get("type") in ("error", "aborted"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  return
219
 
220
  if response is None:
 
221
  return
222
 
223
  # Get response
backend/command.py CHANGED
@@ -1,70 +1,37 @@
1
  """
2
- Command center backend - handles tool-based agent launching
3
  """
4
  import json
5
  import logging
6
- import os
7
- import re
8
  from typing import List, Dict
9
 
10
  logger = logging.getLogger(__name__)
11
 
12
  # Tool definitions derived from agent registry
13
  from .agents import get_tools, get_agent_type_map, get_tool_arg
 
14
 
15
- TOOLS = get_tools()
 
 
 
 
16
 
17
  MAX_TURNS = 10 # Limit conversation turns in command center
18
- MAX_RETRIES = 3 # Maximum retries for LLM calls
19
- RETRY_DELAYS = [2, 5, 10] # Delay in seconds for each retry attempt
20
-
21
-
22
- # Set FORCE_RETRY=1 to test retry logic with any error
23
- FORCE_RETRY_FOR_TESTING = os.environ.get("FORCE_RETRY", "0") == "1"
24
- if FORCE_RETRY_FOR_TESTING:
25
- logger.debug("FORCE_RETRY_FOR_TESTING enabled")
26
-
27
-
28
- def parse_llm_error(error: Exception) -> dict:
29
- """Parse LLM error to extract useful message for frontend"""
30
- error_str = str(error)
31
-
32
- # Try to extract JSON error message
33
- try:
34
- # Look for JSON in the error string
35
- json_match = re.search(r'\{.*\}', error_str)
36
- if json_match:
37
- error_data = json.loads(json_match.group())
38
- retryable = error_data.get("type") == "too_many_requests_error" or "429" in error_str
39
- return {
40
- "message": error_data.get("message", error_str),
41
- "type": error_data.get("type", "unknown_error"),
42
- "code": error_data.get("code", "unknown"),
43
- "retryable": retryable or FORCE_RETRY_FOR_TESTING
44
- }
45
- except:
46
- pass
47
-
48
- # Check for common retryable errors
49
- retryable = any(x in error_str.lower() for x in ["429", "rate limit", "too many requests", "overloaded", "high traffic"])
50
-
51
- return {
52
- "message": error_str,
53
- "type": "unknown_error",
54
- "code": "unknown",
55
- "retryable": retryable or FORCE_RETRY_FOR_TESTING
56
- }
57
-
58
-
59
- def stream_command_center(client, model: str, messages: List[Dict], extra_params: dict = None, abort_event=None):
60
  """
61
  Stream command center responses with agent launching capabilities
62
 
63
  Yields:
64
  dict: Updates with type 'thinking', 'launch', 'done', or 'error'
65
  """
 
 
66
  turns = 0
67
  done = False
 
68
 
69
  while not done and turns < MAX_TURNS:
70
  # Check abort before each turn
@@ -74,56 +41,18 @@ def stream_command_center(client, model: str, messages: List[Dict], extra_params
74
 
75
  turns += 1
76
 
77
- # Retry loop for LLM calls
78
  response = None
79
- last_error = None
80
-
81
- for attempt in range(MAX_RETRIES):
82
- try:
83
- # Simulate 429 error for testing if FORCE_RETRY is set
84
- if FORCE_RETRY_FOR_TESTING and attempt < MAX_RETRIES - 1:
85
- raise Exception("Error code: 429 - {'message': \"We're experiencing high traffic right now! Please try again soon.\", 'type': 'too_many_requests_error', 'param': 'queue', 'code': 'queue_exceeded'}")
86
-
87
- # Call LLM with tools
88
- call_params = {
89
- "messages": messages,
90
- "model": model,
91
- "tools": TOOLS,
92
- "tool_choice": "auto",
93
- }
94
- if extra_params:
95
- call_params["extra_body"] = extra_params
96
- response = client.chat.completions.create(**call_params)
97
- break # Success, exit retry loop
98
- except Exception as e:
99
- last_error = e
100
- error_info = parse_llm_error(e)
101
-
102
- if attempt < MAX_RETRIES - 1 and error_info["retryable"]:
103
- delay = RETRY_DELAYS[attempt]
104
- yield {
105
- "type": "retry",
106
- "attempt": attempt + 1,
107
- "max_attempts": MAX_RETRIES,
108
- "delay": delay,
109
- "message": error_info["message"],
110
- "error_type": error_info["type"]
111
- }
112
- import time
113
- if abort_event:
114
- abort_event.wait(delay)
115
- if abort_event.is_set():
116
- yield {"type": "aborted"}
117
- return
118
- else:
119
- time.sleep(delay)
120
- else:
121
- # Final attempt failed or non-retryable error
122
- yield {"type": "error", "content": error_info["message"]}
123
  return
124
 
125
  if response is None:
126
- yield {"type": "error", "content": f"LLM error after {MAX_RETRIES} attempts: {str(last_error)}"}
127
  return
128
 
129
  # Get response
@@ -135,8 +64,9 @@ def stream_command_center(client, model: str, messages: List[Dict], extra_params
135
  if content.strip():
136
  yield {"type": "thinking", "content": content}
137
 
138
- # Handle tool calls (agent launches)
139
  if tool_calls:
 
140
  for tool_call in tool_calls:
141
  # Check abort between tool calls
142
  if abort_event and abort_event.is_set():
@@ -152,11 +82,59 @@ def stream_command_center(client, model: str, messages: List[Dict], extra_params
152
  yield {"type": "error", "content": "Failed to parse tool arguments"}
153
  return
154
 
155
- # Map function names to agent types (derived from registry)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  agent_type_map = get_agent_type_map()
157
  agent_type = agent_type_map.get(function_name)
158
 
159
  if agent_type:
 
160
  # Get the initial message using the registered arg name for this type
161
  initial_message = args.get(get_tool_arg(agent_type)) or args.get("task") or args.get("message")
162
  task_id = args.get("task_id", "")
@@ -193,8 +171,11 @@ def stream_command_center(client, model: str, messages: List[Dict], extra_params
193
  else:
194
  yield {"type": "error", "content": f"Unknown tool: {function_name}"}
195
  return
196
- # All agent launches processed — stop and let agents run
197
- done = True
 
 
 
198
  else:
199
  # No tool calls - conversation complete
200
  messages.append({"role": "assistant", "content": content})
 
1
  """
2
+ Command center backend - handles tool-based agent launching and direct tools
3
  """
4
  import json
5
  import logging
 
 
6
  from typing import List, Dict
7
 
8
  logger = logging.getLogger(__name__)
9
 
10
  # Tool definitions derived from agent registry
11
  from .agents import get_tools, get_agent_type_map, get_tool_arg
12
+ from .tools import show_html as show_html_tool, execute_show_html
13
 
14
+ # Combine agent-launch tools with direct tools
15
+ TOOLS = get_tools() + [show_html_tool]
16
+
17
+ # Direct tools that execute synchronously (not sub-agent launches)
18
+ DIRECT_TOOLS = {"show_html"}
19
 
20
  MAX_TURNS = 10 # Limit conversation turns in command center
21
+
22
+
23
+ def stream_command_center(client, model: str, messages: List[Dict], extra_params: dict = None, abort_event=None, files_root: str = None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  """
25
  Stream command center responses with agent launching capabilities
26
 
27
  Yields:
28
  dict: Updates with type 'thinking', 'launch', 'done', or 'error'
29
  """
30
+ from .agents import call_llm
31
+
32
  turns = 0
33
  done = False
34
+ debug_call_number = 0
35
 
36
  while not done and turns < MAX_TURNS:
37
  # Check abort before each turn
 
41
 
42
  turns += 1
43
 
44
+ # LLM call with retries and debug events
45
  response = None
46
+ for event in call_llm(client, model, messages, tools=TOOLS, extra_params=extra_params, abort_event=abort_event, call_number=debug_call_number):
47
+ if "_response" in event:
48
+ response = event["_response"]
49
+ debug_call_number = event["_call_number"]
50
+ else:
51
+ yield event
52
+ if event.get("type") in ("error", "aborted"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  return
54
 
55
  if response is None:
 
56
  return
57
 
58
  # Get response
 
64
  if content.strip():
65
  yield {"type": "thinking", "content": content}
66
 
67
+ # Handle tool calls (agent launches + direct tools)
68
  if tool_calls:
69
+ has_launches = False
70
  for tool_call in tool_calls:
71
  # Check abort between tool calls
72
  if abort_event and abort_event.is_set():
 
82
  yield {"type": "error", "content": "Failed to parse tool arguments"}
83
  return
84
 
85
+ # --- Direct tools (execute synchronously) ---
86
+ if function_name in DIRECT_TOOLS:
87
+ # Emit tool_start for frontend
88
+ yield {
89
+ "type": "tool_start",
90
+ "tool": function_name,
91
+ "args": args,
92
+ "tool_call_id": tool_call.id,
93
+ "arguments": tool_call.function.arguments,
94
+ "thinking": content,
95
+ }
96
+
97
+ # Execute the tool
98
+ if function_name == "show_html":
99
+ result = execute_show_html(args.get("source", ""), files_root=files_root)
100
+ else:
101
+ result = {"content": f"Unknown direct tool: {function_name}"}
102
+
103
+ # Emit tool_result for frontend
104
+ yield {
105
+ "type": "tool_result",
106
+ "tool": function_name,
107
+ "tool_call_id": tool_call.id,
108
+ "result": result,
109
+ "response": result.get("content", ""),
110
+ }
111
+
112
+ # Add to message history so LLM can continue
113
+ messages.append({
114
+ "role": "assistant",
115
+ "content": content,
116
+ "tool_calls": [{
117
+ "id": tool_call.id,
118
+ "type": "function",
119
+ "function": {
120
+ "name": function_name,
121
+ "arguments": tool_call.function.arguments,
122
+ }
123
+ }]
124
+ })
125
+ messages.append({
126
+ "role": "tool",
127
+ "tool_call_id": tool_call.id,
128
+ "content": result.get("content", ""),
129
+ })
130
+ continue
131
+
132
+ # --- Agent launch tools ---
133
  agent_type_map = get_agent_type_map()
134
  agent_type = agent_type_map.get(function_name)
135
 
136
  if agent_type:
137
+ has_launches = True
138
  # Get the initial message using the registered arg name for this type
139
  initial_message = args.get(get_tool_arg(agent_type)) or args.get("task") or args.get("message")
140
  task_id = args.get("task_id", "")
 
171
  else:
172
  yield {"type": "error", "content": f"Unknown tool: {function_name}"}
173
  return
174
+
175
+ # If any agent launches happened, stop and let agents run
176
+ # If only direct tools, continue the loop so LLM can respond
177
+ if has_launches:
178
+ done = True
179
  else:
180
  # No tool calls - conversation complete
181
  messages.append({"role": "assistant", "content": content})
backend/image.py CHANGED
@@ -12,7 +12,6 @@ import base64
12
  import json
13
  import logging
14
  import re
15
- import time
16
  from typing import List, Dict, Optional
17
 
18
  from .tools import (
@@ -60,26 +59,6 @@ def resize_image_for_vlm(base64_png: str) -> str:
60
  return base64_png
61
 
62
  MAX_TURNS = 20
63
- MAX_RETRIES = 3
64
- RETRY_DELAYS = [2, 5, 10]
65
-
66
-
67
- def parse_llm_error(error: Exception) -> dict:
68
- """Parse LLM error to extract useful message for frontend."""
69
- error_str = str(error)
70
- try:
71
- json_match = re.search(r'\{.*\}', error_str)
72
- if json_match:
73
- error_data = json.loads(json_match.group())
74
- return {
75
- "message": error_data.get("message", error_str),
76
- "retryable": error_data.get("type") == "too_many_requests_error" or "429" in error_str
77
- }
78
- except:
79
- pass
80
-
81
- retryable = any(x in error_str.lower() for x in ["429", "rate limit", "too many requests", "overloaded"])
82
- return {"message": error_str, "retryable": retryable}
83
 
84
 
85
  def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, image_counter: int, default_gen_model: str = None, default_edit_model: str = None, files_root: str = None) -> dict:
@@ -220,11 +199,14 @@ def stream_image_execution(
220
  - error: { content }
221
  - done: {}
222
  """
 
 
223
  turns = 0
224
  done = False
225
  image_store = {}
226
  image_counter = 0
227
  result_sent = False
 
228
 
229
  while not done and turns < MAX_TURNS:
230
  # Check abort before each turn
@@ -234,47 +216,18 @@ def stream_image_execution(
234
 
235
  turns += 1
236
 
237
- # --- LLM call with retry ---
238
  response = None
239
- last_error = None
240
-
241
- for attempt in range(MAX_RETRIES):
242
- try:
243
- call_params = {
244
- "messages": messages,
245
- "model": model,
246
- "tools": TOOLS,
247
- "tool_choice": "auto",
248
- }
249
- if extra_params:
250
- call_params["extra_body"] = extra_params
251
- response = client.chat.completions.create(**call_params)
252
- break
253
- except Exception as e:
254
- last_error = e
255
- error_info = parse_llm_error(e)
256
- if attempt < MAX_RETRIES - 1 and error_info["retryable"]:
257
- delay = RETRY_DELAYS[attempt]
258
- yield {
259
- "type": "retry",
260
- "attempt": attempt + 1,
261
- "max_attempts": MAX_RETRIES,
262
- "delay": delay,
263
- "message": error_info["message"],
264
- }
265
- if abort_event:
266
- abort_event.wait(delay)
267
- if abort_event.is_set():
268
- yield {"type": "aborted"}
269
- return
270
- else:
271
- time.sleep(delay)
272
- else:
273
- yield {"type": "error", "content": error_info["message"]}
274
  return
275
 
276
  if response is None:
277
- yield {"type": "error", "content": f"LLM error after {MAX_RETRIES} attempts: {str(last_error)}"}
278
  return
279
 
280
  # --- Parse response ---
 
12
  import json
13
  import logging
14
  import re
 
15
  from typing import List, Dict, Optional
16
 
17
  from .tools import (
 
59
  return base64_png
60
 
61
  MAX_TURNS = 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
 
64
  def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, image_counter: int, default_gen_model: str = None, default_edit_model: str = None, files_root: str = None) -> dict:
 
199
  - error: { content }
200
  - done: {}
201
  """
202
+ from .agents import call_llm
203
+
204
  turns = 0
205
  done = False
206
  image_store = {}
207
  image_counter = 0
208
  result_sent = False
209
+ debug_call_number = 0
210
 
211
  while not done and turns < MAX_TURNS:
212
  # Check abort before each turn
 
216
 
217
  turns += 1
218
 
219
+ # LLM call with retries and debug events
220
  response = None
221
+ for event in call_llm(client, model, messages, tools=TOOLS, extra_params=extra_params, abort_event=abort_event, call_number=debug_call_number):
222
+ if "_response" in event:
223
+ response = event["_response"]
224
+ debug_call_number = event["_call_number"]
225
+ else:
226
+ yield event
227
+ if event.get("type") in ("error", "aborted"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  return
229
 
230
  if response is None:
 
231
  return
232
 
233
  # --- Parse response ---
backend/main.py CHANGED
@@ -144,10 +144,6 @@ except ImportError:
144
  SANDBOXES: Dict[str, any] = {}
145
  SANDBOX_TIMEOUT = 300
146
 
147
- # Debug: Store message history for debugging per tab
148
- # Structure: {tab_id: [{call_number: int, timestamp: str, messages: List[dict]}]}
149
- MESSAGE_HISTORY: Dict[str, List[Dict]] = {}
150
-
151
  # Conversation history per tab (persistent across requests)
152
  # Structure: {tab_id: [messages...]}
153
  CONVERSATION_HISTORY: Dict[str, List[Dict]] = {}
@@ -282,23 +278,6 @@ app.add_middleware(
282
  # Agent type registry is in agents.py — system prompts, tools, and metadata are all defined there
283
 
284
 
285
- def record_api_call(tab_id: str, messages: List[dict]):
286
- """Record an API call for debugging purposes"""
287
- global MESSAGE_HISTORY
288
-
289
- if tab_id not in MESSAGE_HISTORY:
290
- MESSAGE_HISTORY[tab_id] = []
291
-
292
- call_number = len(MESSAGE_HISTORY[tab_id]) + 1
293
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
294
-
295
- MESSAGE_HISTORY[tab_id].append({
296
- "call_number": call_number,
297
- "timestamp": timestamp,
298
- "messages": messages
299
- })
300
-
301
-
302
  class Message(BaseModel):
303
  role: str
304
  content: str
@@ -318,6 +297,7 @@ class ChatRequest(BaseModel):
318
  token: Optional[str] = None # Optional auth token
319
  model: Optional[str] = "gpt-4" # Model name
320
  extra_params: Optional[Dict] = None # Extra parameters for API calls (e.g., enable_thinking)
 
321
  e2b_key: Optional[str] = None # E2B API key for code execution
322
  serper_key: Optional[str] = None # Serper API key for research
323
  hf_token: Optional[str] = None # HuggingFace token for image generation
@@ -365,17 +345,18 @@ async def stream_code_agent(
365
  parent_agent_id: Optional[str] = None,
366
  frontend_context: Optional[Dict] = None,
367
  extra_params: Optional[Dict] = None,
368
- files_root: str = None
 
369
  ):
370
  """Handle code agent with execution capabilities"""
371
  abort_event = register_agent(tab_id, parent_agent_id)
372
  try:
373
- async for chunk in _stream_code_agent_inner(messages, endpoint, token, model, e2b_key, session_id, tab_id, frontend_context, extra_params, abort_event, files_root):
374
  yield chunk
375
  finally:
376
  unregister_agent(tab_id)
377
 
378
- async def _stream_code_agent_inner(messages, endpoint, token, model, e2b_key, session_id, tab_id, frontend_context, extra_params, abort_event, files_root=None):
379
  if not E2B_AVAILABLE:
380
  yield f"data: {json.dumps({'type': 'error', 'content': 'E2B not available. Install with: pip install e2b-code-interpreter'})}\n\n"
381
  return
@@ -401,8 +382,8 @@ async def _stream_code_agent_inner(messages, endpoint, token, model, e2b_key, se
401
  {"role": "system", "content": system_prompt}
402
  ] + messages
403
 
404
- # Store for debugging
405
- record_api_call(tab_id, full_messages)
406
 
407
  # Stream code execution in a thread to avoid blocking the event loop
408
  loop = asyncio.get_event_loop()
@@ -410,7 +391,7 @@ async def _stream_code_agent_inner(messages, endpoint, token, model, e2b_key, se
410
 
411
  def run_sync_generator():
412
  try:
413
- for update in stream_code_execution(client, model, full_messages, sbx, files_root=files_root or FILES_ROOT, extra_params=extra_params, abort_event=abort_event):
414
  loop.call_soon_threadsafe(queue.put_nowait, update)
415
  finally:
416
  loop.call_soon_threadsafe(queue.put_nowait, None) # Signal completion
@@ -460,7 +441,7 @@ async def _stream_code_agent_inner(messages, endpoint, token, model, e2b_key, se
460
 
461
  def run_retry_generator():
462
  try:
463
- for update in stream_code_execution(client, model, full_messages, sbx, files_root=files_root or FILES_ROOT, extra_params=extra_params, abort_event=abort_event):
464
  loop.call_soon_threadsafe(retry_queue.put_nowait, update)
465
  finally:
466
  loop.call_soon_threadsafe(retry_queue.put_nowait, None)
@@ -533,10 +514,6 @@ async def _stream_research_agent_inner(messages, endpoint, token, model, serper_
533
  # Get system prompt for research (with file tree)
534
  system_prompt = get_system_prompt("research")
535
 
536
- # Store for debugging (simplified version for research)
537
- full_messages = [{"role": "system", "content": system_prompt}] + messages
538
- record_api_call(tab_id, full_messages)
539
-
540
  # Use sub-agent model if provided, otherwise fall back to main model
541
  analysis_model = sub_agent_model if sub_agent_model else model
542
 
@@ -583,17 +560,18 @@ async def stream_command_center_handler(
583
  token: Optional[str],
584
  model: str,
585
  tab_id: str = "0",
586
- extra_params: Optional[Dict] = None
 
587
  ):
588
  """Handle command center with tool-based agent launching"""
589
  abort_event = register_agent(tab_id)
590
  try:
591
- async for chunk in _stream_command_center_inner(messages, endpoint, token, model, tab_id, extra_params, abort_event):
592
  yield chunk
593
  finally:
594
  unregister_agent(tab_id)
595
 
596
- async def _stream_command_center_inner(messages, endpoint, token, model, tab_id, extra_params, abort_event):
597
  if not COMMAND_AVAILABLE:
598
  # Fallback to regular chat if command tools not available
599
  async for chunk in stream_chat_response(messages, endpoint, token, model, "command", tab_id, extra_params):
@@ -611,8 +589,8 @@ async def _stream_command_center_inner(messages, endpoint, token, model, tab_id,
611
 
612
  logger.debug(f"tab_id={tab_id}, messages={len(messages)}, full_messages={len(full_messages)}")
613
 
614
- # Store for debugging
615
- record_api_call(tab_id, full_messages)
616
 
617
  # Stream command center execution in a thread to avoid blocking the event loop
618
  loop = asyncio.get_event_loop()
@@ -620,7 +598,7 @@ async def _stream_command_center_inner(messages, endpoint, token, model, tab_id,
620
 
621
  def run_sync_generator():
622
  try:
623
- for update in stream_command_center(client, model, full_messages, extra_params=extra_params, abort_event=abort_event):
624
  loop.call_soon_threadsafe(queue.put_nowait, update)
625
  finally:
626
  loop.call_soon_threadsafe(queue.put_nowait, None) # Signal completion
@@ -653,17 +631,18 @@ async def stream_web_agent(
653
  serper_key: str,
654
  tab_id: str = "default",
655
  parent_agent_id: Optional[str] = None,
656
- extra_params: Optional[Dict] = None
 
657
  ):
658
  """Handle web agent with tools (search, read, screenshot)"""
659
  abort_event = register_agent(tab_id, parent_agent_id)
660
  try:
661
- async for chunk in _stream_web_agent_inner(messages, endpoint, token, model, serper_key, tab_id, extra_params, abort_event):
662
  yield chunk
663
  finally:
664
  unregister_agent(tab_id)
665
 
666
- async def _stream_web_agent_inner(messages, endpoint, token, model, serper_key, tab_id, extra_params, abort_event):
667
  if not AGENT_AVAILABLE:
668
  async for chunk in stream_chat_response(messages, endpoint, token, model, "agent", tab_id, extra_params):
669
  yield chunk
@@ -675,14 +654,14 @@ async def _stream_web_agent_inner(messages, endpoint, token, model, serper_key,
675
  system_prompt = get_system_prompt("agent")
676
  full_messages = [{"role": "system", "content": system_prompt}] + messages
677
 
678
- record_api_call(tab_id, full_messages)
679
 
680
  loop = asyncio.get_event_loop()
681
  queue = asyncio.Queue()
682
 
683
  def run_sync_generator():
684
  try:
685
- for update in stream_agent_execution(client, model, full_messages, serper_key, extra_params=extra_params, abort_event=abort_event):
686
  loop.call_soon_threadsafe(queue.put_nowait, update)
687
  finally:
688
  loop.call_soon_threadsafe(queue.put_nowait, None)
@@ -715,17 +694,18 @@ async def stream_image_agent(
715
  tab_id: str = "default",
716
  parent_agent_id: Optional[str] = None,
717
  extra_params: Optional[Dict] = None,
718
- files_root: str = None
 
719
  ):
720
  """Handle image agent with HuggingFace image generation tools"""
721
  abort_event = register_agent(tab_id, parent_agent_id)
722
  try:
723
- async for chunk in _stream_image_agent_inner(messages, endpoint, token, model, hf_token, image_gen_model, image_edit_model, tab_id, extra_params, abort_event, files_root):
724
  yield chunk
725
  finally:
726
  unregister_agent(tab_id)
727
 
728
- async def _stream_image_agent_inner(messages, endpoint, token, model, hf_token, image_gen_model, image_edit_model, tab_id, extra_params, abort_event, files_root=None):
729
  if not IMAGE_AVAILABLE:
730
  yield f"data: {json.dumps({'type': 'error', 'content': 'Image agent not available. Install with: pip install huggingface_hub Pillow'})}\n\n"
731
  return
@@ -740,14 +720,14 @@ async def _stream_image_agent_inner(messages, endpoint, token, model, hf_token,
740
  system_prompt = get_system_prompt("image")
741
  full_messages = [{"role": "system", "content": system_prompt}] + messages
742
 
743
- record_api_call(tab_id, full_messages)
744
 
745
  loop = asyncio.get_event_loop()
746
  queue = asyncio.Queue()
747
 
748
  def run_sync_generator():
749
  try:
750
- for update in stream_image_execution(client, model, full_messages, hf_token, image_gen_model=image_gen_model, image_edit_model=image_edit_model, extra_params=extra_params, abort_event=abort_event, files_root=files_root):
751
  loop.call_soon_threadsafe(queue.put_nowait, update)
752
  finally:
753
  loop.call_soon_threadsafe(queue.put_nowait, None)
@@ -789,8 +769,8 @@ async def stream_chat_response(
789
  {"role": "system", "content": system_prompt}
790
  ] + messages
791
 
792
- # Store for debugging
793
- record_api_call(tab_id, full_messages)
794
 
795
  # Handle Hugging Face endpoint with fallback to HF_TOKEN
796
  if not token and "huggingface.co" in endpoint:
@@ -1000,7 +980,8 @@ async def chat_stream(raw_request: Request, request: ChatRequest):
1000
  user_key(user_id, request.parent_agent_id) if request.parent_agent_id else None,
1001
  frontend_context,
1002
  request.extra_params,
1003
- files_root=files_root
 
1004
  ),
1005
  media_type="text/event-stream",
1006
  headers={
@@ -1055,7 +1036,8 @@ async def chat_stream(raw_request: Request, request: ChatRequest):
1055
  uk_tab_id,
1056
  user_key(user_id, request.parent_agent_id) if request.parent_agent_id else None,
1057
  request.extra_params,
1058
- files_root=files_root
 
1059
  ),
1060
  media_type="text/event-stream",
1061
  headers={
@@ -1076,7 +1058,8 @@ async def chat_stream(raw_request: Request, request: ChatRequest):
1076
  serper_key or "",
1077
  uk_tab_id,
1078
  user_key(user_id, request.parent_agent_id) if request.parent_agent_id else None,
1079
- request.extra_params
 
1080
  ),
1081
  media_type="text/event-stream",
1082
  headers={
@@ -1095,7 +1078,8 @@ async def chat_stream(raw_request: Request, request: ChatRequest):
1095
  token,
1096
  request.model or "gpt-4",
1097
  uk_tab_id,
1098
- request.extra_params
 
1099
  ),
1100
  media_type="text/event-stream",
1101
  headers={
@@ -1220,11 +1204,9 @@ async def add_tool_response(raw_request: Request, request: dict):
1220
 
1221
  @app.get("/api/debug/messages/{tab_id}")
1222
  async def get_debug_messages(request: Request, tab_id: str):
1223
- """Get the message history for a specific tab for debugging"""
1224
- user_id = get_user_id(request)
1225
- uk_tab_id = user_key(user_id, tab_id)
1226
- if uk_tab_id in MESSAGE_HISTORY:
1227
- return {"calls": MESSAGE_HISTORY[uk_tab_id]}
1228
  return {"calls": []}
1229
 
1230
 
@@ -1387,13 +1369,11 @@ def select_session(session_name: str, user_id: str = '') -> bool:
1387
  # In multi-user mode, only clear keys belonging to this user
1388
  if MULTI_USER and user_id:
1389
  prefix = f"{user_id}:"
1390
- for d in (CONVERSATION_HISTORY, MESSAGE_HISTORY):
1391
- keys_to_remove = [k for k in d if k.startswith(prefix)]
1392
- for k in keys_to_remove:
1393
- del d[k]
1394
  else:
1395
  CONVERSATION_HISTORY.clear()
1396
- MESSAGE_HISTORY.clear()
1397
 
1398
  return True
1399
 
 
144
  SANDBOXES: Dict[str, any] = {}
145
  SANDBOX_TIMEOUT = 300
146
 
 
 
 
 
147
  # Conversation history per tab (persistent across requests)
148
  # Structure: {tab_id: [messages...]}
149
  CONVERSATION_HISTORY: Dict[str, List[Dict]] = {}
 
278
  # Agent type registry is in agents.py — system prompts, tools, and metadata are all defined there
279
 
280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  class Message(BaseModel):
282
  role: str
283
  content: str
 
297
  token: Optional[str] = None # Optional auth token
298
  model: Optional[str] = "gpt-4" # Model name
299
  extra_params: Optional[Dict] = None # Extra parameters for API calls (e.g., enable_thinking)
300
+ multimodal: bool = False # Whether the model supports vision/image input
301
  e2b_key: Optional[str] = None # E2B API key for code execution
302
  serper_key: Optional[str] = None # Serper API key for research
303
  hf_token: Optional[str] = None # HuggingFace token for image generation
 
345
  parent_agent_id: Optional[str] = None,
346
  frontend_context: Optional[Dict] = None,
347
  extra_params: Optional[Dict] = None,
348
+ files_root: str = None,
349
+ multimodal: bool = False
350
  ):
351
  """Handle code agent with execution capabilities"""
352
  abort_event = register_agent(tab_id, parent_agent_id)
353
  try:
354
+ async for chunk in _stream_code_agent_inner(messages, endpoint, token, model, e2b_key, session_id, tab_id, frontend_context, extra_params, abort_event, files_root, multimodal):
355
  yield chunk
356
  finally:
357
  unregister_agent(tab_id)
358
 
359
+ async def _stream_code_agent_inner(messages, endpoint, token, model, e2b_key, session_id, tab_id, frontend_context, extra_params, abort_event, files_root=None, multimodal=False):
360
  if not E2B_AVAILABLE:
361
  yield f"data: {json.dumps({'type': 'error', 'content': 'E2B not available. Install with: pip install e2b-code-interpreter'})}\n\n"
362
  return
 
382
  {"role": "system", "content": system_prompt}
383
  ] + messages
384
 
385
+
386
+
387
 
388
  # Stream code execution in a thread to avoid blocking the event loop
389
  loop = asyncio.get_event_loop()
 
391
 
392
  def run_sync_generator():
393
  try:
394
+ for update in stream_code_execution(client, model, full_messages, sbx, files_root=files_root or FILES_ROOT, extra_params=extra_params, abort_event=abort_event, multimodal=multimodal):
395
  loop.call_soon_threadsafe(queue.put_nowait, update)
396
  finally:
397
  loop.call_soon_threadsafe(queue.put_nowait, None) # Signal completion
 
441
 
442
  def run_retry_generator():
443
  try:
444
+ for update in stream_code_execution(client, model, full_messages, sbx, files_root=files_root or FILES_ROOT, extra_params=extra_params, abort_event=abort_event, multimodal=multimodal):
445
  loop.call_soon_threadsafe(retry_queue.put_nowait, update)
446
  finally:
447
  loop.call_soon_threadsafe(retry_queue.put_nowait, None)
 
514
  # Get system prompt for research (with file tree)
515
  system_prompt = get_system_prompt("research")
516
 
 
 
 
 
517
  # Use sub-agent model if provided, otherwise fall back to main model
518
  analysis_model = sub_agent_model if sub_agent_model else model
519
 
 
560
  token: Optional[str],
561
  model: str,
562
  tab_id: str = "0",
563
+ extra_params: Optional[Dict] = None,
564
+ files_root: str = None,
565
  ):
566
  """Handle command center with tool-based agent launching"""
567
  abort_event = register_agent(tab_id)
568
  try:
569
+ async for chunk in _stream_command_center_inner(messages, endpoint, token, model, tab_id, extra_params, abort_event, files_root=files_root):
570
  yield chunk
571
  finally:
572
  unregister_agent(tab_id)
573
 
574
+ async def _stream_command_center_inner(messages, endpoint, token, model, tab_id, extra_params, abort_event, files_root=None):
575
  if not COMMAND_AVAILABLE:
576
  # Fallback to regular chat if command tools not available
577
  async for chunk in stream_chat_response(messages, endpoint, token, model, "command", tab_id, extra_params):
 
589
 
590
  logger.debug(f"tab_id={tab_id}, messages={len(messages)}, full_messages={len(full_messages)}")
591
 
592
+
593
+
594
 
595
  # Stream command center execution in a thread to avoid blocking the event loop
596
  loop = asyncio.get_event_loop()
 
598
 
599
  def run_sync_generator():
600
  try:
601
+ for update in stream_command_center(client, model, full_messages, extra_params=extra_params, abort_event=abort_event, files_root=files_root or FILES_ROOT):
602
  loop.call_soon_threadsafe(queue.put_nowait, update)
603
  finally:
604
  loop.call_soon_threadsafe(queue.put_nowait, None) # Signal completion
 
631
  serper_key: str,
632
  tab_id: str = "default",
633
  parent_agent_id: Optional[str] = None,
634
+ extra_params: Optional[Dict] = None,
635
+ multimodal: bool = False
636
  ):
637
  """Handle web agent with tools (search, read, screenshot)"""
638
  abort_event = register_agent(tab_id, parent_agent_id)
639
  try:
640
+ async for chunk in _stream_web_agent_inner(messages, endpoint, token, model, serper_key, tab_id, extra_params, abort_event, multimodal):
641
  yield chunk
642
  finally:
643
  unregister_agent(tab_id)
644
 
645
+ async def _stream_web_agent_inner(messages, endpoint, token, model, serper_key, tab_id, extra_params, abort_event, multimodal=False):
646
  if not AGENT_AVAILABLE:
647
  async for chunk in stream_chat_response(messages, endpoint, token, model, "agent", tab_id, extra_params):
648
  yield chunk
 
654
  system_prompt = get_system_prompt("agent")
655
  full_messages = [{"role": "system", "content": system_prompt}] + messages
656
 
657
+
658
 
659
  loop = asyncio.get_event_loop()
660
  queue = asyncio.Queue()
661
 
662
  def run_sync_generator():
663
  try:
664
+ for update in stream_agent_execution(client, model, full_messages, serper_key, extra_params=extra_params, abort_event=abort_event, multimodal=multimodal):
665
  loop.call_soon_threadsafe(queue.put_nowait, update)
666
  finally:
667
  loop.call_soon_threadsafe(queue.put_nowait, None)
 
694
  tab_id: str = "default",
695
  parent_agent_id: Optional[str] = None,
696
  extra_params: Optional[Dict] = None,
697
+ files_root: str = None,
698
+ multimodal: bool = False
699
  ):
700
  """Handle image agent with HuggingFace image generation tools"""
701
  abort_event = register_agent(tab_id, parent_agent_id)
702
  try:
703
+ async for chunk in _stream_image_agent_inner(messages, endpoint, token, model, hf_token, image_gen_model, image_edit_model, tab_id, extra_params, abort_event, files_root, multimodal):
704
  yield chunk
705
  finally:
706
  unregister_agent(tab_id)
707
 
708
+ async def _stream_image_agent_inner(messages, endpoint, token, model, hf_token, image_gen_model, image_edit_model, tab_id, extra_params, abort_event, files_root=None, multimodal=False):
709
  if not IMAGE_AVAILABLE:
710
  yield f"data: {json.dumps({'type': 'error', 'content': 'Image agent not available. Install with: pip install huggingface_hub Pillow'})}\n\n"
711
  return
 
720
  system_prompt = get_system_prompt("image")
721
  full_messages = [{"role": "system", "content": system_prompt}] + messages
722
 
723
+
724
 
725
  loop = asyncio.get_event_loop()
726
  queue = asyncio.Queue()
727
 
728
  def run_sync_generator():
729
  try:
730
+ for update in stream_image_execution(client, model, full_messages, hf_token, image_gen_model=image_gen_model, image_edit_model=image_edit_model, extra_params=extra_params, abort_event=abort_event, files_root=files_root, multimodal=multimodal):
731
  loop.call_soon_threadsafe(queue.put_nowait, update)
732
  finally:
733
  loop.call_soon_threadsafe(queue.put_nowait, None)
 
769
  {"role": "system", "content": system_prompt}
770
  ] + messages
771
 
772
+
773
+
774
 
775
  # Handle Hugging Face endpoint with fallback to HF_TOKEN
776
  if not token and "huggingface.co" in endpoint:
 
980
  user_key(user_id, request.parent_agent_id) if request.parent_agent_id else None,
981
  frontend_context,
982
  request.extra_params,
983
+ files_root=files_root,
984
+ multimodal=request.multimodal
985
  ),
986
  media_type="text/event-stream",
987
  headers={
 
1036
  uk_tab_id,
1037
  user_key(user_id, request.parent_agent_id) if request.parent_agent_id else None,
1038
  request.extra_params,
1039
+ files_root=files_root,
1040
+ multimodal=request.multimodal
1041
  ),
1042
  media_type="text/event-stream",
1043
  headers={
 
1058
  serper_key or "",
1059
  uk_tab_id,
1060
  user_key(user_id, request.parent_agent_id) if request.parent_agent_id else None,
1061
+ request.extra_params,
1062
+ multimodal=request.multimodal
1063
  ),
1064
  media_type="text/event-stream",
1065
  headers={
 
1078
  token,
1079
  request.model or "gpt-4",
1080
  uk_tab_id,
1081
+ request.extra_params,
1082
+ files_root=files_root,
1083
  ),
1084
  media_type="text/event-stream",
1085
  headers={
 
1204
 
1205
  @app.get("/api/debug/messages/{tab_id}")
1206
  async def get_debug_messages(request: Request, tab_id: str):
1207
+ """Get the message history for a specific tab for debugging.
1208
+ Debug data is now streamed via SSE events (debug_call_input/output) and stored in the frontend.
1209
+ This endpoint is kept for backward compatibility but returns empty."""
 
 
1210
  return {"calls": []}
1211
 
1212
 
 
1369
  # In multi-user mode, only clear keys belonging to this user
1370
  if MULTI_USER and user_id:
1371
  prefix = f"{user_id}:"
1372
+ keys_to_remove = [k for k in CONVERSATION_HISTORY if k.startswith(prefix)]
1373
+ for k in keys_to_remove:
1374
+ del CONVERSATION_HISTORY[k]
 
1375
  else:
1376
  CONVERSATION_HISTORY.clear()
 
1377
 
1378
  return True
1379
 
backend/tools.py CHANGED
@@ -569,3 +569,60 @@ def extract_and_download_images(markdown: str, max_images: int = 5) -> List[str]
569
  # Keep old name as alias
570
  def execute_read_image_url(url: str) -> Optional[str]:
571
  return execute_read_image(url)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
569
  # Keep old name as alias
570
  def execute_read_image_url(url: str) -> Optional[str]:
571
  return execute_read_image(url)
572
+
573
+
574
+ # ============================================================
575
+ # HTML display tool (used by command center)
576
+ # ============================================================
577
+
578
+ show_html = {
579
+ "type": "function",
580
+ "function": {
581
+ "name": "show_html",
582
+ "description": "Display HTML content in the chat. Accepts either a file path to an HTML file or a raw HTML string. Use this to show interactive visualizations, maps, charts, or any HTML content produced by a code agent.",
583
+ "parameters": {
584
+ "type": "object",
585
+ "properties": {
586
+ "source": {
587
+ "type": "string",
588
+ "description": "Either a file path (e.g., 'workspace/map.html') or a raw HTML string (starting with '<')"
589
+ }
590
+ },
591
+ "required": ["source"]
592
+ }
593
+ }
594
+ }
595
+
596
+
597
+ def execute_show_html(source: str, files_root: str = None) -> dict:
598
+ """Load HTML from a file path or use a raw HTML string.
599
+
600
+ Returns dict with:
601
+ - "content": str description for the LLM
602
+ - "html": the HTML content string (or None on error)
603
+ """
604
+ import os
605
+
606
+ if source.strip().startswith("<"):
607
+ return {
608
+ "content": "Rendered inline HTML content.",
609
+ "html": source,
610
+ }
611
+
612
+ # File path — resolve relative to files_root
613
+ file_path = source
614
+ if files_root and not os.path.isabs(file_path):
615
+ file_path = os.path.join(files_root, file_path)
616
+
617
+ try:
618
+ with open(file_path, "r", encoding="utf-8") as f:
619
+ html_content = f.read()
620
+ return {
621
+ "content": f"Rendered HTML from file: {source}",
622
+ "html": html_content,
623
+ }
624
+ except Exception as e:
625
+ return {
626
+ "content": f"Failed to load HTML from '{source}': {e}",
627
+ "html": None,
628
+ }
frontend/index.html CHANGED
@@ -7,7 +7,7 @@
7
  <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;700&display=swap" rel="stylesheet">
8
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/themes/prism.min.css">
9
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.css">
10
- <link rel="stylesheet" href="style.css?v=103">
11
  </head>
12
  <body>
13
  <div class="app-container">
@@ -355,9 +355,6 @@
355
  <button class="debug-close" id="debugClose">×</button>
356
  </div>
357
  <div class="debug-body">
358
- <div class="debug-controls">
359
- <button class="debug-refresh" id="debugRefresh">Refresh</button>
360
- </div>
361
  <pre class="debug-content" id="debugContent">No message history available yet.</pre>
362
  </div>
363
  </div>
@@ -508,6 +505,6 @@
508
  </div>
509
 
510
  <script src="research-ui.js?v=23"></script>
511
- <script src="script.js?v=90"></script>
512
  </body>
513
  </html>
 
7
  <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;700&display=swap" rel="stylesheet">
8
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/themes/prism.min.css">
9
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.css">
10
+ <link rel="stylesheet" href="style.css?v=111">
11
  </head>
12
  <body>
13
  <div class="app-container">
 
355
  <button class="debug-close" id="debugClose">×</button>
356
  </div>
357
  <div class="debug-body">
 
 
 
358
  <pre class="debug-content" id="debugContent">No message history available yet.</pre>
359
  </div>
360
  </div>
 
505
  </div>
506
 
507
  <script src="research-ui.js?v=23"></script>
508
+ <script src="script.js?v=98"></script>
509
  </body>
510
  </html>
frontend/script.js CHANGED
@@ -97,6 +97,10 @@ const toolCallIds = {};
97
  // Maps "figure_1" -> {type, data} and "image_1" -> {type: "png", data: base64}
98
  const globalFigureRegistry = {};
99
 
 
 
 
 
100
  // Track agents by task_id for reuse (maps task_id -> tabId)
101
  const taskIdToTabId = {};
102
 
@@ -133,6 +137,7 @@ function resetLocalState() {
133
  Object.keys(actionWidgets).forEach(k => delete actionWidgets[k]);
134
  Object.keys(toolCallIds).forEach(k => delete toolCallIds[k]);
135
  Object.keys(globalFigureRegistry).forEach(k => delete globalFigureRegistry[k]);
 
136
  Object.keys(taskIdToTabId).forEach(k => delete taskIdToTabId[k]);
137
  researchQueryTabIds = {};
138
  showAllTurns = true;
@@ -2107,7 +2112,7 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
2107
  const data = JSON.parse(line.slice(6));
2108
 
2109
  // Hide progress widget on first meaningful response
2110
- if (!progressHidden && data.type !== 'generating' && data.type !== 'retry') {
2111
  hideProgressWidget(chatContainer);
2112
  progressHidden = true;
2113
  }
@@ -2329,7 +2334,8 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
2329
  'generate_image': 'GENERATE',
2330
  'edit_image': 'EDIT',
2331
  'read_image_url': 'LOAD IMAGE',
2332
- 'read_image': 'LOAD IMAGE'
 
2333
  };
2334
  const toolDescriptions = {
2335
  'web_search': data.args?.query || '',
@@ -2338,22 +2344,27 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
2338
  'generate_image': data.args?.prompt || '',
2339
  'edit_image': `${data.args?.prompt || ''} (from ${data.args?.source || ''})`,
2340
  'read_image_url': data.args?.url || '',
2341
- 'read_image': data.args?.source || ''
 
2342
  };
2343
  const label = toolLabels[data.tool] || data.tool.toUpperCase();
2344
  const description = toolDescriptions[data.tool] || '';
2345
 
2346
  // Store tool call in DOM for history reconstruction
2347
- const toolCallMsg = document.createElement('div');
2348
- toolCallMsg.className = 'message assistant';
2349
- toolCallMsg.style.display = 'none';
 
 
 
 
 
2350
  toolCallMsg.setAttribute('data-tool-call', JSON.stringify({
2351
  tool_call_id: data.tool_call_id,
2352
  function_name: data.tool,
2353
  arguments: data.arguments,
2354
  thinking: data.thinking || ''
2355
  }));
2356
- chatContainer.appendChild(toolCallMsg);
2357
 
2358
  // Create tool-cell box (similar to code-cell)
2359
  const toolCell = document.createElement('div');
@@ -2427,6 +2438,21 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
2427
  } else if ((data.tool === 'generate_image' || data.tool === 'edit_image' || data.tool === 'read_image_url' || data.tool === 'read_image') && !data.image) {
2428
  const errMsg = data.response || 'Failed to process image';
2429
  outputHtml = `<div class="tool-cell-read-summary">${escapeHtml(errMsg)}</div>`;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2430
  }
2431
 
2432
  if (outputHtml && lastToolCell) {
@@ -2490,6 +2516,29 @@ async function streamChatResponse(messages, chatContainer, agentType, tabId) {
2490
  // Reset current message element so any subsequent thinking starts fresh
2491
  currentMessageEl = null;
2492
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2493
  } else if (data.type === 'aborted') {
2494
  // Agent was aborted by user
2495
  hideProgressWidget(chatContainer);
@@ -3264,6 +3313,14 @@ function restoreWorkspace(workspace) {
3264
  }
3265
  }
3266
 
 
 
 
 
 
 
 
 
3267
  // Restore tabs (skip command center as it already exists in HTML)
3268
  const tabs = workspace.tabs || [];
3269
  for (const tabData of tabs) {
@@ -3636,7 +3693,8 @@ function serializeWorkspace() {
3636
  activeTabId: activeTabId,
3637
  agentCounters: agentCounters,
3638
  tabs: [],
3639
- timelineData: serializeTimelineData()
 
3640
  };
3641
 
3642
  // Serialize command center (tab 0)
@@ -4767,7 +4825,6 @@ function openImageModal(src) {
4767
  const debugPanel = document.getElementById('debugPanel');
4768
  const debugBtn = document.getElementById('debugBtn');
4769
  const debugClose = document.getElementById('debugClose');
4770
- const debugRefresh = document.getElementById('debugRefresh');
4771
  const debugContent = document.getElementById('debugContent');
4772
 
4773
  // Toggle debug panel
@@ -4810,57 +4867,77 @@ if (debugClose) {
4810
  });
4811
  }
4812
 
4813
- // Refresh debug messages
4814
- if (debugRefresh) {
4815
- debugRefresh.addEventListener('click', () => {
4816
- loadDebugMessages();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4817
  });
 
4818
  }
4819
 
4820
- // Load debug messages from backend
4821
- async function loadDebugMessages() {
4822
- try {
4823
- debugContent.innerHTML = '<div style="padding: 10px; color: #666;">Loading...</div>';
4824
 
4825
- // Get current active tab ID
4826
- const activeTab = document.querySelector('.tab.active');
4827
- const tabId = activeTab ? activeTab.dataset.tabId : '0';
 
4828
 
4829
- const response = await apiFetch(`/api/debug/messages/${tabId}`);
 
 
 
 
4830
 
4831
- if (!response.ok) {
4832
- throw new Error(`HTTP error! status: ${response.status}`);
4833
- }
4834
 
4835
- const data = await response.json();
4836
-
4837
- if (data.calls && data.calls.length > 0) {
4838
- // Create collapsible UI for each API call
4839
- let html = '';
4840
- data.calls.forEach((call, index) => {
4841
- const isExpanded = index === data.calls.length - 1; // Expand only the last call by default
4842
- html += `<div class="debug-call-item"><div class="debug-call-header" onclick="toggleDebugCall(${index})"><span class="debug-call-arrow" id="arrow-${index}">${isExpanded ? '▼' : '▶'}</span><span class="debug-call-title">LLM Call #${call.call_number}</span><span class="debug-call-time">${call.timestamp}</span></div><pre class="debug-call-content" id="call-${index}" style="display: ${isExpanded ? 'block' : 'none'};">${JSON.stringify(call.messages, null, 2)}</pre></div>`;
4843
- });
4844
- debugContent.innerHTML = html;
4845
  } else {
4846
- debugContent.innerHTML = '<div style="padding: 10px; color: #666;">No message history available yet.<br><br>Send a message in this tab to see the message history here.</div>';
4847
  }
4848
- } catch (error) {
4849
- console.error('Failed to load debug messages:', error);
4850
- debugContent.innerHTML = `<div style="padding: 10px; color: #d32f2f;">Error loading debug messages: ${error.message}</div>`;
4851
- }
4852
  }
4853
 
4854
  // Toggle debug call expansion
4855
  window.toggleDebugCall = function(index) {
4856
  const content = document.getElementById(`call-${index}`);
4857
  const arrow = document.getElementById(`arrow-${index}`);
 
4858
  if (content.style.display === 'none') {
4859
  content.style.display = 'block';
4860
  arrow.textContent = '▼';
 
4861
  } else {
4862
  content.style.display = 'none';
4863
  arrow.textContent = '▶';
 
4864
  }
4865
  }
4866
 
@@ -5266,6 +5343,65 @@ if (showHiddenFiles) {
5266
  });
5267
  }
5268
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5269
  // Sessions panel (same pattern as Files/Settings/Debug panels)
5270
  const sessionsPanel = document.getElementById('sessionsPanel');
5271
  const sessionsPanelClose = document.getElementById('sessionsPanelClose');
 
97
  // Maps "figure_1" -> {type, data} and "image_1" -> {type: "png", data: base64}
98
  const globalFigureRegistry = {};
99
 
100
+ // Debug: per-tab LLM call history (populated by SSE debug_call_input/output events)
101
+ // Maps tabId -> [{call_number, timestamp, input, output, error}]
102
+ const debugHistory = {};
103
+
104
  // Track agents by task_id for reuse (maps task_id -> tabId)
105
  const taskIdToTabId = {};
106
 
 
137
  Object.keys(actionWidgets).forEach(k => delete actionWidgets[k]);
138
  Object.keys(toolCallIds).forEach(k => delete toolCallIds[k]);
139
  Object.keys(globalFigureRegistry).forEach(k => delete globalFigureRegistry[k]);
140
+ Object.keys(debugHistory).forEach(k => delete debugHistory[k]);
141
  Object.keys(taskIdToTabId).forEach(k => delete taskIdToTabId[k]);
142
  researchQueryTabIds = {};
143
  showAllTurns = true;
 
2112
  const data = JSON.parse(line.slice(6));
2113
 
2114
  // Hide progress widget on first meaningful response
2115
+ if (!progressHidden && data.type !== 'generating' && data.type !== 'retry' && !data.type.startsWith('debug_')) {
2116
  hideProgressWidget(chatContainer);
2117
  progressHidden = true;
2118
  }
 
2334
  'generate_image': 'GENERATE',
2335
  'edit_image': 'EDIT',
2336
  'read_image_url': 'LOAD IMAGE',
2337
+ 'read_image': 'LOAD IMAGE',
2338
+ 'show_html': 'HTML'
2339
  };
2340
  const toolDescriptions = {
2341
  'web_search': data.args?.query || '',
 
2344
  'generate_image': data.args?.prompt || '',
2345
  'edit_image': `${data.args?.prompt || ''} (from ${data.args?.source || ''})`,
2346
  'read_image_url': data.args?.url || '',
2347
+ 'read_image': data.args?.source || '',
2348
+ 'show_html': data.args?.source?.substring(0, 80) || ''
2349
  };
2350
  const label = toolLabels[data.tool] || data.tool.toUpperCase();
2351
  const description = toolDescriptions[data.tool] || '';
2352
 
2353
  // Store tool call in DOM for history reconstruction
2354
+ // Reuse currentMessageEl (from thinking) if it exists, like launch events do
2355
+ let toolCallMsg = currentMessageEl;
2356
+ if (!toolCallMsg) {
2357
+ toolCallMsg = document.createElement('div');
2358
+ toolCallMsg.className = 'message assistant';
2359
+ toolCallMsg.style.display = 'none';
2360
+ chatContainer.appendChild(toolCallMsg);
2361
+ }
2362
  toolCallMsg.setAttribute('data-tool-call', JSON.stringify({
2363
  tool_call_id: data.tool_call_id,
2364
  function_name: data.tool,
2365
  arguments: data.arguments,
2366
  thinking: data.thinking || ''
2367
  }));
 
2368
 
2369
  // Create tool-cell box (similar to code-cell)
2370
  const toolCell = document.createElement('div');
 
2438
  } else if ((data.tool === 'generate_image' || data.tool === 'edit_image' || data.tool === 'read_image_url' || data.tool === 'read_image') && !data.image) {
2439
  const errMsg = data.response || 'Failed to process image';
2440
  outputHtml = `<div class="tool-cell-read-summary">${escapeHtml(errMsg)}</div>`;
2441
+ } else if (data.tool === 'show_html' && data.result?.html) {
2442
+ // Create iframe programmatically to avoid escaping issues with srcdoc
2443
+ if (lastToolCell) {
2444
+ const outputEl = document.createElement('div');
2445
+ outputEl.className = 'tool-cell-output';
2446
+ const iframe = document.createElement('iframe');
2447
+ iframe.className = 'show-html-iframe';
2448
+ iframe.sandbox = 'allow-scripts allow-same-origin';
2449
+ iframe.srcdoc = data.result.html;
2450
+ outputEl.appendChild(iframe);
2451
+ lastToolCell.appendChild(outputEl);
2452
+ }
2453
+ } else if (data.tool === 'show_html' && !data.result?.html) {
2454
+ const errMsg = data.response || 'Failed to load HTML';
2455
+ outputHtml = `<div class="tool-cell-read-summary">${escapeHtml(errMsg)}</div>`;
2456
  }
2457
 
2458
  if (outputHtml && lastToolCell) {
 
2516
  // Reset current message element so any subsequent thinking starts fresh
2517
  currentMessageEl = null;
2518
 
2519
+ } else if (data.type === 'debug_call_input') {
2520
+ // Debug: LLM call input (before API call)
2521
+ if (!debugHistory[tabId]) debugHistory[tabId] = [];
2522
+ debugHistory[tabId].push({
2523
+ call_number: data.call_number,
2524
+ timestamp: new Date().toLocaleTimeString(),
2525
+ input: data.messages,
2526
+ output: null,
2527
+ error: null
2528
+ });
2529
+ if (document.getElementById('debugPanel')?.classList.contains('active')) loadDebugMessages();
2530
+
2531
+ } else if (data.type === 'debug_call_output') {
2532
+ // Debug: LLM call output (after API call)
2533
+ // Match the last pending call (call_numbers reset per streaming request)
2534
+ const calls = debugHistory[tabId] || [];
2535
+ const call = calls.findLast(c => c.output === null && c.error === null);
2536
+ if (call) {
2537
+ call.output = data.response || null;
2538
+ call.error = data.error || null;
2539
+ }
2540
+ if (document.getElementById('debugPanel')?.classList.contains('active')) loadDebugMessages();
2541
+
2542
  } else if (data.type === 'aborted') {
2543
  // Agent was aborted by user
2544
  hideProgressWidget(chatContainer);
 
3313
  }
3314
  }
3315
 
3316
+ // Restore debug history
3317
+ if (workspace.debugHistory) {
3318
+ Object.keys(debugHistory).forEach(k => delete debugHistory[k]);
3319
+ for (const [tabId, calls] of Object.entries(workspace.debugHistory)) {
3320
+ debugHistory[tabId] = calls;
3321
+ }
3322
+ }
3323
+
3324
  // Restore tabs (skip command center as it already exists in HTML)
3325
  const tabs = workspace.tabs || [];
3326
  for (const tabData of tabs) {
 
3693
  activeTabId: activeTabId,
3694
  agentCounters: agentCounters,
3695
  tabs: [],
3696
+ timelineData: serializeTimelineData(),
3697
+ debugHistory: debugHistory
3698
  };
3699
 
3700
  // Serialize command center (tab 0)
 
4825
  const debugPanel = document.getElementById('debugPanel');
4826
  const debugBtn = document.getElementById('debugBtn');
4827
  const debugClose = document.getElementById('debugClose');
 
4828
  const debugContent = document.getElementById('debugContent');
4829
 
4830
  // Toggle debug panel
 
4867
  });
4868
  }
4869
 
4870
+
4871
+ // Load debug messages from backend
4872
+ function formatDebugJson(obj) {
4873
+ /**
4874
+ * Format an object as HTML-escaped JSON, replacing base64 image data
4875
+ * with clickable placeholders that show a thumbnail on hover.
4876
+ */
4877
+ // Collect base64 images and replace with placeholders before escaping
4878
+ const images = [];
4879
+ const json = JSON.stringify(obj, null, 2);
4880
+ const placeholder = json.replace(
4881
+ /"(data:image\/[^;]+;base64,)([A-Za-z0-9+/=\n]{200,})"/g,
4882
+ (match, prefix, b64) => {
4883
+ const idx = images.length;
4884
+ const sizeKB = (b64.length * 0.75 / 1024).toFixed(1);
4885
+ images.push(prefix + b64);
4886
+ return `"__DEBUG_IMG_${idx}_${sizeKB}KB__"`;
4887
+ }
4888
+ );
4889
+ // Now HTML-escape the JSON (placeholders are safe ASCII)
4890
+ let html = escapeHtml(placeholder);
4891
+ // Replace placeholders with hoverable image thumbnails
4892
+ html = html.replace(/__DEBUG_IMG_(\d+)_([\d.]+KB)__/g, (match, idx, size) => {
4893
+ const src = images[parseInt(idx)];
4894
+ return `<span class="debug-image-placeholder" onmouseenter="this.querySelector('.debug-image-tooltip').style.display='block'" onmouseleave="this.querySelector('.debug-image-tooltip').style.display='none'">[image ${size}]<span class="debug-image-tooltip"><img src="${src}"></span></span>`;
4895
  });
4896
+ return html;
4897
  }
4898
 
4899
+ function loadDebugMessages() {
4900
+ const calls = debugHistory[activeTabId] || [];
 
 
4901
 
4902
+ if (calls.length === 0) {
4903
+ debugContent.innerHTML = '<div style="padding: 10px; color: var(--text-secondary);">No LLM calls recorded yet.<br><br>Send a message in this tab to see the call history here.</div>';
4904
+ return;
4905
+ }
4906
 
4907
+ debugContent.innerHTML = calls.map((call, i) => {
4908
+ const isLast = i === calls.length - 1;
4909
+ const arrow = isLast ? '▼' : '▶';
4910
+ const display = isLast ? 'block' : 'none';
4911
+ const msgCount = call.input ? call.input.length : 0;
4912
 
4913
+ const inputHtml = call.input ? formatDebugJson(call.input) : '<em>No input</em>';
 
 
4914
 
4915
+ let outputHtml;
4916
+ if (call.error) {
4917
+ outputHtml = `<span style="color: #d32f2f;">${escapeHtml(call.error)}</span>`;
4918
+ } else if (call.output) {
4919
+ outputHtml = formatDebugJson(call.output);
 
 
 
 
 
4920
  } else {
4921
+ outputHtml = '<em>Pending...</em>';
4922
  }
4923
+
4924
+ return `<div class="debug-call-item${isLast ? ' expanded' : ''}" id="callitem-${i}"><div class="debug-call-header" onclick="toggleDebugCall(${i})"><span class="debug-call-arrow" id="arrow-${i}">${arrow}</span><span class="debug-call-title">Call #${i + 1}</span><span class="debug-call-time">${call.timestamp}</span></div><div class="debug-call-content" id="call-${i}" style="display: ${display};"><div class="debug-section-label">INPUT (${msgCount} messages)</div><pre>${inputHtml}</pre><div class="debug-section-label">OUTPUT</div><pre>${outputHtml}</pre></div></div>`;
4925
+ }).join('');
 
4926
  }
4927
 
4928
  // Toggle debug call expansion
4929
  window.toggleDebugCall = function(index) {
4930
  const content = document.getElementById(`call-${index}`);
4931
  const arrow = document.getElementById(`arrow-${index}`);
4932
+ const item = document.getElementById(`callitem-${index}`);
4933
  if (content.style.display === 'none') {
4934
  content.style.display = 'block';
4935
  arrow.textContent = '▼';
4936
+ item.classList.add('expanded');
4937
  } else {
4938
  content.style.display = 'none';
4939
  arrow.textContent = '▶';
4940
+ item.classList.remove('expanded');
4941
  }
4942
  }
4943
 
 
5343
  });
5344
  }
5345
 
5346
+ // Drag & drop upload on files panel
5347
+ if (fileTree) {
5348
+ let dragOverFolder = null;
5349
+
5350
+ fileTree.addEventListener('dragover', (e) => {
5351
+ // Only handle external file drops (not internal path drags)
5352
+ if (!e.dataTransfer.types.includes('Files')) return;
5353
+ e.preventDefault();
5354
+ e.dataTransfer.dropEffect = 'copy';
5355
+
5356
+ // Find folder under cursor
5357
+ const folderItem = e.target.closest('.file-tree-item.folder');
5358
+ if (folderItem) {
5359
+ if (dragOverFolder !== folderItem) {
5360
+ if (dragOverFolder) dragOverFolder.classList.remove('drag-over');
5361
+ fileTree.classList.remove('drag-over-root');
5362
+ folderItem.classList.add('drag-over');
5363
+ dragOverFolder = folderItem;
5364
+ }
5365
+ } else {
5366
+ if (dragOverFolder) { dragOverFolder.classList.remove('drag-over'); dragOverFolder = null; }
5367
+ fileTree.classList.add('drag-over-root');
5368
+ }
5369
+ });
5370
+
5371
+ fileTree.addEventListener('dragleave', (e) => {
5372
+ // Only clear when leaving the fileTree entirely
5373
+ if (!fileTree.contains(e.relatedTarget)) {
5374
+ if (dragOverFolder) { dragOverFolder.classList.remove('drag-over'); dragOverFolder = null; }
5375
+ fileTree.classList.remove('drag-over-root');
5376
+ }
5377
+ });
5378
+
5379
+ fileTree.addEventListener('drop', async (e) => {
5380
+ if (!e.dataTransfer.files.length) return;
5381
+ e.preventDefault();
5382
+
5383
+ // Determine target folder
5384
+ const folderItem = e.target.closest('.file-tree-item.folder');
5385
+ const folder = folderItem ? folderItem.dataset.path : '';
5386
+
5387
+ // Clear highlights
5388
+ if (dragOverFolder) { dragOverFolder.classList.remove('drag-over'); dragOverFolder = null; }
5389
+ fileTree.classList.remove('drag-over-root');
5390
+
5391
+ // Upload all files
5392
+ for (const file of e.dataTransfer.files) {
5393
+ const formData = new FormData();
5394
+ formData.append('file', file);
5395
+ try {
5396
+ await apiFetch(`/api/files/upload?folder=${encodeURIComponent(folder)}`, { method: 'POST', body: formData });
5397
+ } catch (err) {
5398
+ console.error('Upload failed:', err);
5399
+ }
5400
+ }
5401
+ loadFileTree();
5402
+ });
5403
+ }
5404
+
5405
  // Sessions panel (same pattern as Files/Settings/Debug panels)
5406
  const sessionsPanel = document.getElementById('sessionsPanel');
5407
  const sessionsPanelClose = document.getElementById('sessionsPanelClose');
frontend/style.css CHANGED
@@ -2764,8 +2764,8 @@ pre code [class*="token"] {
2764
  .debug-panel {
2765
  position: fixed;
2766
  top: 25px;
2767
- right: -600px;
2768
- width: 600px;
2769
  height: calc(100vh - 25px);
2770
  background: var(--bg-card);
2771
  border-left: 1px solid var(--border-primary);
@@ -2783,8 +2783,8 @@ pre code [class*="token"] {
2783
  padding: 6px 12px;
2784
  border-bottom: 1px solid var(--border-primary);
2785
  display: flex;
2786
- justify-content: space-between;
2787
  align-items: center;
 
2788
  background: var(--theme-accent);
2789
  }
2790
 
@@ -2795,6 +2795,7 @@ pre code [class*="token"] {
2795
  color: white;
2796
  text-transform: uppercase;
2797
  letter-spacing: 0.5px;
 
2798
  }
2799
 
2800
  .debug-close {
@@ -2824,31 +2825,6 @@ pre code [class*="token"] {
2824
  flex-direction: column;
2825
  }
2826
 
2827
- .debug-controls {
2828
- padding: 6px 12px;
2829
- border-bottom: 1px solid var(--border-primary);
2830
- background: var(--bg-tertiary);
2831
- }
2832
-
2833
- .debug-refresh {
2834
- padding: 4px 10px;
2835
- background: var(--theme-accent);
2836
- color: white;
2837
- border: none;
2838
- border-radius: 3px;
2839
- font-family: 'JetBrains Mono', monospace;
2840
- font-size: 10px;
2841
- font-weight: 500;
2842
- cursor: pointer;
2843
- text-transform: uppercase;
2844
- letter-spacing: 0.3px;
2845
- transition: opacity 0.15s;
2846
- }
2847
-
2848
- .debug-refresh:hover {
2849
- opacity: 0.9;
2850
- }
2851
-
2852
  .debug-content {
2853
  flex: 1;
2854
  padding: 0;
@@ -2862,16 +2838,19 @@ pre code [class*="token"] {
2862
 
2863
  .debug-call-item {
2864
  border-bottom: 1px solid var(--border-primary);
2865
- background: var(--bg-card);
 
 
 
2866
  }
2867
 
2868
  .debug-call-header {
2869
- padding: 6px 12px;
2870
- background: var(--bg-card);
2871
  cursor: pointer;
2872
  display: flex;
2873
  align-items: center;
2874
  gap: 8px;
 
2875
  transition: background 0.15s;
2876
  user-select: none;
2877
  }
@@ -2900,16 +2879,60 @@ pre code [class*="token"] {
2900
  }
2901
 
2902
  .debug-call-content {
2903
- margin: 0;
2904
  padding: 8px 12px;
2905
- background: var(--bg-tertiary);
2906
- border-top: 1px solid var(--border-primary);
 
 
 
 
2907
  white-space: pre-wrap;
2908
  word-wrap: break-word;
2909
- overflow-x: auto;
2910
  font-size: 10px;
2911
  }
2912
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2913
  /* Debug button uses same styling as settings/files buttons */
2914
 
2915
  /* Settings Panel (side panel like debug) */
@@ -3541,6 +3564,15 @@ pre code [class*="token"] {
3541
  border-radius: 3px;
3542
  }
3543
 
 
 
 
 
 
 
 
 
 
3544
  /* Inline code file path links */
3545
  .file-path-link {
3546
  text-decoration: none;
@@ -4125,6 +4157,14 @@ pre code [class*="token"] {
4125
  font-size: 12px;
4126
  }
4127
 
 
 
 
 
 
 
 
 
4128
  .search-results-content {
4129
  display: flex;
4130
  flex-direction: column;
 
2764
  .debug-panel {
2765
  position: fixed;
2766
  top: 25px;
2767
+ right: -450px;
2768
+ width: 450px;
2769
  height: calc(100vh - 25px);
2770
  background: var(--bg-card);
2771
  border-left: 1px solid var(--border-primary);
 
2783
  padding: 6px 12px;
2784
  border-bottom: 1px solid var(--border-primary);
2785
  display: flex;
 
2786
  align-items: center;
2787
+ gap: 8px;
2788
  background: var(--theme-accent);
2789
  }
2790
 
 
2795
  color: white;
2796
  text-transform: uppercase;
2797
  letter-spacing: 0.5px;
2798
+ flex: 1;
2799
  }
2800
 
2801
  .debug-close {
 
2825
  flex-direction: column;
2826
  }
2827
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2828
  .debug-content {
2829
  flex: 1;
2830
  padding: 0;
 
2838
 
2839
  .debug-call-item {
2840
  border-bottom: 1px solid var(--border-primary);
2841
+ }
2842
+
2843
+ .debug-call-item.expanded {
2844
+ border-bottom: none;
2845
  }
2846
 
2847
  .debug-call-header {
2848
+ padding: 2px 10px;
 
2849
  cursor: pointer;
2850
  display: flex;
2851
  align-items: center;
2852
  gap: 8px;
2853
+ line-height: 1.5;
2854
  transition: background 0.15s;
2855
  user-select: none;
2856
  }
 
2879
  }
2880
 
2881
  .debug-call-content {
 
2882
  padding: 8px 12px;
2883
+ overflow-x: auto;
2884
+ font-size: 10px;
2885
+ }
2886
+
2887
+ .debug-call-content pre {
2888
+ margin: 0;
2889
  white-space: pre-wrap;
2890
  word-wrap: break-word;
 
2891
  font-size: 10px;
2892
  }
2893
 
2894
+ .debug-section-label {
2895
+ font-size: 10px;
2896
+ font-weight: 600;
2897
+ text-transform: uppercase;
2898
+ color: var(--text-muted);
2899
+ margin: 8px 0 4px 0;
2900
+ letter-spacing: 0.5px;
2901
+ }
2902
+
2903
+ .debug-section-label:first-child {
2904
+ margin-top: 0;
2905
+ }
2906
+
2907
+ .debug-image-placeholder {
2908
+ background: var(--theme-bg);
2909
+ color: var(--theme-accent);
2910
+ padding: 1px 4px;
2911
+ border-radius: 3px;
2912
+ cursor: pointer;
2913
+ position: relative;
2914
+ display: inline;
2915
+ }
2916
+
2917
+ .debug-image-tooltip {
2918
+ display: none;
2919
+ position: absolute;
2920
+ bottom: 100%;
2921
+ left: 0;
2922
+ z-index: 1001;
2923
+ padding: 4px;
2924
+ background: var(--bg-card);
2925
+ border: 1px solid var(--border-primary);
2926
+ border-radius: 4px;
2927
+ box-shadow: 0 2px 8px rgba(0,0,0,0.15);
2928
+ }
2929
+
2930
+ .debug-image-tooltip img {
2931
+ max-width: 200px;
2932
+ max-height: 200px;
2933
+ display: block;
2934
+ }
2935
+
2936
  /* Debug button uses same styling as settings/files buttons */
2937
 
2938
  /* Settings Panel (side panel like debug) */
 
3564
  border-radius: 3px;
3565
  }
3566
 
3567
+ .file-tree-item.folder.drag-over > .file-tree-line {
3568
+ background: rgba(var(--theme-accent-rgb), 0.15);
3569
+ border-radius: 3px;
3570
+ }
3571
+
3572
+ #fileTree.drag-over-root {
3573
+ background: rgba(var(--theme-accent-rgb), 0.06);
3574
+ }
3575
+
3576
  /* Inline code file path links */
3577
  .file-path-link {
3578
  text-decoration: none;
 
4157
  font-size: 12px;
4158
  }
4159
 
4160
+ .tool-cell-output .show-html-iframe {
4161
+ width: 100%;
4162
+ height: 400px;
4163
+ border: 1px solid var(--border-primary);
4164
+ border-radius: 4px;
4165
+ background: #fff;
4166
+ }
4167
+
4168
  .search-results-content {
4169
  display: flex;
4170
  flex-direction: column;