lvwerra HF Staff Claude Opus 4.6 commited on
Commit
2a5ead4
·
1 Parent(s): d22e6fd

Add IMAGE agent, web tools for AGENT, and fix conversation history

Browse files

- New IMAGE notebook type with HuggingFace image generation/editing tools
(generate_image, edit_image, read_image_url via FLUX.1 models)
- New backend/image.py streaming handler with image store and VLM context
resize (512px JPEG thumbnails to avoid token overflow)
- New backend/tools.py centralizing all tool definitions and execution functions
- New backend/agent.py with web tools (web_search, read_url, screenshot_url)
- Image model settings use dropdowns populated from configured models
- Settings panel shows active settings.json file path
- Fix conversation history: sub-notebook results now update the tool response
DOM element so follow-up questions have full context

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

backend/agent.py ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Agent notebook backend - autonomous agent with web tools (search, read, screenshot).
3
+
4
+ Uses the same tool-calling loop pattern as code.py:
5
+ LLM call → parse tool_calls → execute → update history → repeat
6
+ """
7
+ import json
8
+ import logging
9
+ import re
10
+ import time
11
+ from typing import List, Dict, Optional
12
+
13
+ from tools import (
14
+ web_search, read_url, screenshot_url,
15
+ execute_web_search, execute_read_url, execute_screenshot_url,
16
+ )
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ TOOLS = [web_search, read_url, screenshot_url]
21
+
22
+ MAX_TURNS = 20
23
+ MAX_RETRIES = 3
24
+ RETRY_DELAYS = [2, 5, 10]
25
+
26
+
27
+ def parse_llm_error(error: Exception) -> dict:
28
+ """Parse LLM error to extract useful message for frontend."""
29
+ error_str = str(error)
30
+ try:
31
+ json_match = re.search(r'\{.*\}', error_str)
32
+ if json_match:
33
+ error_data = json.loads(json_match.group())
34
+ return {
35
+ "message": error_data.get("message", error_str),
36
+ "retryable": error_data.get("type") == "too_many_requests_error" or "429" in error_str
37
+ }
38
+ except:
39
+ pass
40
+
41
+ retryable = any(x in error_str.lower() for x in ["429", "rate limit", "too many requests", "overloaded"])
42
+ return {"message": error_str, "retryable": retryable}
43
+
44
+
45
+ def execute_tool(tool_name: str, args: dict, serper_key: str) -> dict:
46
+ """
47
+ Execute a tool by name and return result dict.
48
+
49
+ Returns:
50
+ dict with keys:
51
+ - "content": str result for the LLM
52
+ - "image": optional base64 PNG (for screenshot_url)
53
+ - "display": dict with display-friendly data for frontend
54
+ """
55
+ if tool_name == "web_search":
56
+ query = args.get("query", "")
57
+ num_results = args.get("num_results", 5)
58
+ result_str = execute_web_search(query, serper_key, num_results)
59
+ return {
60
+ "content": result_str,
61
+ "display": {"type": "search", "query": query, "results": result_str}
62
+ }
63
+
64
+ elif tool_name == "read_url":
65
+ url = args.get("url", "")
66
+ content = execute_read_url(url)
67
+ return {
68
+ "content": content,
69
+ "display": {"type": "page", "url": url, "length": len(content)}
70
+ }
71
+
72
+ elif tool_name == "screenshot_url":
73
+ url = args.get("url", "")
74
+ base64_png = execute_screenshot_url(url)
75
+ if base64_png:
76
+ return {
77
+ "content": "Screenshot captured successfully. The image is attached.",
78
+ "image": base64_png,
79
+ "display": {"type": "screenshot", "url": url}
80
+ }
81
+ else:
82
+ return {
83
+ "content": f"Failed to take screenshot of {url}. The page may require JavaScript or be inaccessible.",
84
+ "display": {"type": "screenshot_error", "url": url}
85
+ }
86
+
87
+ return {"content": f"Unknown tool: {tool_name}", "display": {"type": "error"}}
88
+
89
+
90
+ def stream_agent_execution(
91
+ client,
92
+ model: str,
93
+ messages: List[Dict],
94
+ serper_key: str,
95
+ extra_params: Optional[Dict] = None
96
+ ):
97
+ """
98
+ Run the agent tool-calling loop.
99
+
100
+ Yields dicts with SSE event types:
101
+ - thinking: { content }
102
+ - content: { content }
103
+ - tool_start: { tool, args }
104
+ - tool_result: { tool, result, image? }
105
+ - result_preview: { content }
106
+ - result: { content }
107
+ - generating: {}
108
+ - retry: { attempt, max_attempts, delay, message }
109
+ - error: { content }
110
+ - done: {}
111
+ """
112
+ turns = 0
113
+ done = False
114
+
115
+ while not done and turns < MAX_TURNS:
116
+ turns += 1
117
+
118
+ # --- LLM call with retry ---
119
+ response = None
120
+ last_error = None
121
+
122
+ for attempt in range(MAX_RETRIES):
123
+ try:
124
+ call_params = {
125
+ "messages": messages,
126
+ "model": model,
127
+ "tools": TOOLS,
128
+ "tool_choice": "auto",
129
+ }
130
+ if extra_params:
131
+ call_params["extra_body"] = extra_params
132
+ response = client.chat.completions.create(**call_params)
133
+ break
134
+ except Exception as e:
135
+ last_error = e
136
+ error_info = parse_llm_error(e)
137
+ if attempt < MAX_RETRIES - 1 and error_info["retryable"]:
138
+ delay = RETRY_DELAYS[attempt]
139
+ yield {
140
+ "type": "retry",
141
+ "attempt": attempt + 1,
142
+ "max_attempts": MAX_RETRIES,
143
+ "delay": delay,
144
+ "message": error_info["message"],
145
+ }
146
+ time.sleep(delay)
147
+ else:
148
+ yield {"type": "error", "content": error_info["message"]}
149
+ return
150
+
151
+ if response is None:
152
+ yield {"type": "error", "content": f"LLM error after {MAX_RETRIES} attempts: {str(last_error)}"}
153
+ return
154
+
155
+ # --- Parse response ---
156
+ assistant_message = response.choices[0].message
157
+ content = assistant_message.content or ""
158
+ tool_calls = assistant_message.tool_calls or []
159
+
160
+ # Check for <result> tags
161
+ result_match = re.search(r'<result>(.*?)</result>', content, re.DOTALL | re.IGNORECASE)
162
+ result_content = None
163
+ thinking_content = content
164
+
165
+ if result_match:
166
+ result_content = result_match.group(1).strip()
167
+ thinking_content = re.sub(r'<result>.*?</result>', '', content, flags=re.DOTALL | re.IGNORECASE).strip()
168
+
169
+ # Send thinking/content
170
+ if thinking_content.strip():
171
+ if tool_calls:
172
+ yield {"type": "thinking", "content": thinking_content}
173
+ else:
174
+ yield {"type": "content", "content": thinking_content}
175
+
176
+ # Send result preview
177
+ if result_content:
178
+ yield {"type": "result_preview", "content": result_content}
179
+
180
+ # --- Handle tool calls ---
181
+ if tool_calls:
182
+ for tool_call in tool_calls:
183
+ func_name = tool_call.function.name
184
+
185
+ # Parse arguments
186
+ try:
187
+ args = json.loads(tool_call.function.arguments)
188
+ except json.JSONDecodeError as e:
189
+ output = f"Error parsing arguments: {e}"
190
+ messages.append({
191
+ "role": "assistant",
192
+ "content": content,
193
+ "tool_calls": [{"id": tool_call.id, "type": "function", "function": {"name": func_name, "arguments": tool_call.function.arguments}}]
194
+ })
195
+ messages.append({"role": "tool", "tool_call_id": tool_call.id, "content": output})
196
+ yield {"type": "error", "content": output}
197
+ continue
198
+
199
+ # Signal tool start (include IDs for history reconstruction)
200
+ yield {
201
+ "type": "tool_start",
202
+ "tool": func_name,
203
+ "args": args,
204
+ "tool_call_id": tool_call.id,
205
+ "arguments": tool_call.function.arguments,
206
+ "thinking": content,
207
+ }
208
+
209
+ # Execute tool
210
+ result = execute_tool(func_name, args, serper_key)
211
+
212
+ # Build tool response message for LLM
213
+ if result.get("image"):
214
+ # For screenshots, send image as vision content so LLM can see it
215
+ tool_response_content = [
216
+ {"type": "text", "text": result["content"]},
217
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{result['image']}"}}
218
+ ]
219
+ else:
220
+ tool_response_content = result["content"]
221
+
222
+ tool_response_str = tool_response_content if isinstance(tool_response_content, str) else json.dumps(tool_response_content)
223
+
224
+ # Add to message history
225
+ messages.append({
226
+ "role": "assistant",
227
+ "content": content,
228
+ "tool_calls": [{"id": tool_call.id, "type": "function", "function": {"name": func_name, "arguments": tool_call.function.arguments}}]
229
+ })
230
+ messages.append({
231
+ "role": "tool",
232
+ "tool_call_id": tool_call.id,
233
+ "content": tool_response_str
234
+ })
235
+
236
+ # Signal tool result to frontend (include response for history)
237
+ tool_result_event = {
238
+ "type": "tool_result",
239
+ "tool": func_name,
240
+ "tool_call_id": tool_call.id,
241
+ "result": result.get("display", {}),
242
+ "response": tool_response_str,
243
+ }
244
+ if result.get("image"):
245
+ tool_result_event["image"] = result["image"]
246
+ yield tool_result_event
247
+
248
+ else:
249
+ # No tool calls — we're done
250
+ messages.append({"role": "assistant", "content": content})
251
+ done = True
252
+
253
+ # Send result if found
254
+ if result_content:
255
+ yield {"type": "result", "content": result_content}
256
+
257
+ # Signal between-turn processing
258
+ if not done:
259
+ yield {"type": "generating"}
260
+
261
+ yield {"type": "done"}
backend/agents.py CHANGED
@@ -74,13 +74,33 @@ AGENT_REGISTRY = {
74
  "agent": {
75
  "label": "AGENT",
76
  "system_prompt": (
77
- "You are an autonomous agent assistant specialized in breaking down and executing multi-step tasks.\n\n"
78
- "Your role is to:\n"
79
- "- Understand complex tasks and break them down into clear steps\n"
80
- "- Execute tasks methodically\n"
81
- "- Keep track of progress and next steps\n"
82
- "- Provide clear status updates\n\n"
83
- "Focus on being proactive, organized, and thorough in completing multi-step workflows.\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  ),
85
  "tool": {
86
  "type": "function",
@@ -105,7 +125,7 @@ AGENT_REGISTRY = {
105
  },
106
  "tool_arg": "task",
107
  "has_counter": True,
108
- "in_menu": False,
109
  "in_launcher": True,
110
  "placeholder": "Enter message...",
111
  },
@@ -272,6 +292,65 @@ AGENT_REGISTRY = {
272
  "in_launcher": True,
273
  "placeholder": "Enter message...",
274
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
  }
276
 
277
 
 
74
  "agent": {
75
  "label": "AGENT",
76
  "system_prompt": (
77
+ "You are an autonomous agent with web access, specialized in research and multi-step tasks.\n\n"
78
+ "## Available Tools\n\n"
79
+ "You have three tools:\n"
80
+ "- **web_search(query)**: Search Google. Returns titles, URLs, and snippets. "
81
+ "Use this first to discover information and find relevant pages.\n"
82
+ "- **read_url(url)**: Fetch a web page and extract its content as clean markdown (includes images). "
83
+ "Use this when you need detailed content from a specific page.\n"
84
+ "- **screenshot_url(url)**: Take a screenshot of a web page. "
85
+ "Use this when you need to see the visual layout, images, charts, or design.\n\n"
86
+ "## Strategy\n\n"
87
+ "1. **Search first** — use web_search to find relevant pages\n"
88
+ "2. **Evaluate snippets** — often the search snippets contain enough info to answer\n"
89
+ "3. **Read selectively** — only use read_url on 1-3 most relevant pages when you need more detail\n"
90
+ "4. **Screenshot when visual** — use screenshot_url when images, charts, or layout matter\n"
91
+ "5. **Synthesize** — combine findings into a clear, concise answer\n\n"
92
+ "## Guidelines\n\n"
93
+ "- Be efficient with tool calls — don't read every search result\n"
94
+ "- Break complex tasks into steps and explain your reasoning\n"
95
+ "- Cite sources with URLs when presenting findings\n\n"
96
+ "## CRITICAL: You MUST provide a <result> tag\n\n"
97
+ "When you have completed the task, you MUST provide a brief summary using the <result> tag. "
98
+ "This is REQUIRED - without it, your work will not be visible in the command center.\n\n"
99
+ "Keep results SHORT - 1-3 sentences summarizing what you found or did.\n\n"
100
+ "Example:\n"
101
+ "<result>\n"
102
+ "Python 3.13 was released on Oct 7, 2024 with key features including ...\n"
103
+ "</result>\n"
104
  ),
105
  "tool": {
106
  "type": "function",
 
125
  },
126
  "tool_arg": "task",
127
  "has_counter": True,
128
+ "in_menu": True,
129
  "in_launcher": True,
130
  "placeholder": "Enter message...",
131
  },
 
292
  "in_launcher": True,
293
  "placeholder": "Enter message...",
294
  },
295
+
296
+ "image": {
297
+ "label": "IMAGE",
298
+ "system_prompt": (
299
+ "You are a creative AI assistant with access to image generation and editing tools.\n\n"
300
+ "## Available Tools\n\n"
301
+ "- **generate_image(prompt)**: Generate a new image from a text description. "
302
+ "Returns an image reference (e.g., 'image_1') that you can see.\n"
303
+ "- **edit_image(prompt, source)**: Edit or transform an existing image. "
304
+ "The source can be a URL or an image reference from a previous tool call (e.g., 'image_1').\n"
305
+ "- **read_image_url(url)**: Download an image from a URL. "
306
+ "Returns an image reference that you can see and use with edit_image.\n\n"
307
+ "## Strategy\n\n"
308
+ "1. If the user provides an image URL, use read_image_url first to load it\n"
309
+ "2. Use generate_image for creating new images from text descriptions\n"
310
+ "3. Use edit_image to transform existing images (style transfer, edits, variations)\n"
311
+ "4. You can see all generated/loaded images — describe what you see and iterate if needed\n"
312
+ "5. Write detailed, descriptive prompts for best results\n\n"
313
+ "## Guidelines\n\n"
314
+ "- Be creative and descriptive in your image prompts\n"
315
+ "- When editing, reference the source image by its name (e.g., 'image_1')\n"
316
+ "- Describe what you see in generated images to confirm they match the request\n\n"
317
+ "## CRITICAL: You MUST provide a <result> tag\n\n"
318
+ "When you have completed the task, you MUST provide a brief summary using the <result> tag. "
319
+ "This is REQUIRED - without it, your work will not be visible in the command center.\n\n"
320
+ "Include image references in your result using self-closing tags like <image_1> (NOT </image_1>).\n\n"
321
+ "Example:\n"
322
+ "<result>\n"
323
+ "Here's the comic version of your image:\n\n"
324
+ "<image_2>\n"
325
+ "</result>\n"
326
+ ),
327
+ "tool": {
328
+ "type": "function",
329
+ "function": {
330
+ "name": "launch_image_notebook",
331
+ "description": "Launch an image notebook for generating or editing images using AI models. Use this for creating images from text, applying style transfers, editing photos, or any visual content creation.",
332
+ "parameters": {
333
+ "type": "object",
334
+ "properties": {
335
+ "task": {
336
+ "type": "string",
337
+ "description": "The image task or description. Should contain all necessary context including any image URLs."
338
+ },
339
+ "task_id": {
340
+ "type": "string",
341
+ "description": "A 2-3 word summary of the task, separated by dashes."
342
+ }
343
+ },
344
+ "required": ["task", "task_id"]
345
+ }
346
+ }
347
+ },
348
+ "tool_arg": "task",
349
+ "has_counter": True,
350
+ "in_menu": True,
351
+ "in_launcher": True,
352
+ "placeholder": "Describe an image or paste a URL...",
353
+ },
354
  }
355
 
356
 
backend/code.py CHANGED
@@ -8,77 +8,11 @@ import re
8
  from typing import List, Dict, Optional
9
  from e2b_code_interpreter import Sandbox
10
 
11
- logger = logging.getLogger(__name__)
12
 
 
13
 
14
- TOOLS = [
15
- {
16
- "type": "function",
17
- "function": {
18
- "name": "execute_code",
19
- "description": "Execute Python code in a stateful environment. Variables and imports persist between executions.",
20
- "parameters": {
21
- "type": "object",
22
- "properties": {
23
- "code": {
24
- "type": "string",
25
- "description": "The Python code to execute."
26
- }
27
- },
28
- "required": ["code"]
29
- }
30
- }
31
- },
32
- {
33
- "type": "function",
34
- "function": {
35
- "name": "upload_files",
36
- "description": "Upload files from the local workspace to the code execution environment for analysis. Files will be available at /home/user/<filename>. Use this to load data files, scripts, or any files you need to analyze.",
37
- "parameters": {
38
- "type": "object",
39
- "properties": {
40
- "paths": {
41
- "type": "array",
42
- "items": {"type": "string"},
43
- "description": "List of file paths relative to the workspace root (e.g., ['data/sales.csv', 'config.json'])"
44
- }
45
- },
46
- "required": ["paths"]
47
- }
48
- }
49
- },
50
- {
51
- "type": "function",
52
- "function": {
53
- "name": "download_files",
54
- "description": "Download files from the code execution environment to the local workspace. Use this to save generated files, processed data, or any output files you want to keep.",
55
- "parameters": {
56
- "type": "object",
57
- "properties": {
58
- "files": {
59
- "type": "array",
60
- "items": {
61
- "type": "object",
62
- "properties": {
63
- "sandbox_path": {
64
- "type": "string",
65
- "description": "Path in the sandbox (e.g., '/home/user/output.csv')"
66
- },
67
- "local_path": {
68
- "type": "string",
69
- "description": "Destination path relative to workspace (e.g., 'results/output.csv')"
70
- }
71
- },
72
- "required": ["sandbox_path", "local_path"]
73
- },
74
- "description": "List of files to download with their sandbox and local paths"
75
- }
76
- },
77
- "required": ["files"]
78
- }
79
- }
80
- }
81
- ]
82
 
83
  MAX_TURNS = 40
84
  MAX_RETRIES = 3 # Maximum retries for LLM calls
 
8
  from typing import List, Dict, Optional
9
  from e2b_code_interpreter import Sandbox
10
 
11
+ from tools import execute_code, upload_files, download_files
12
 
13
+ logger = logging.getLogger(__name__)
14
 
15
+ TOOLS = [execute_code, upload_files, download_files]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  MAX_TURNS = 40
18
  MAX_RETRIES = 3 # Maximum retries for LLM calls
backend/image.py ADDED
@@ -0,0 +1,376 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Image notebook backend — multimodal agent with HuggingFace image generation tools.
3
+
4
+ Uses the same tool-calling loop pattern as agent.py:
5
+ LLM call → parse tool_calls → execute → update history → repeat
6
+
7
+ Key difference: maintains an image store (Dict[str, str]) mapping names like
8
+ "image_1" to base64 data, so the VLM can reference images across tool calls
9
+ without passing huge base64 strings in arguments.
10
+ """
11
+ import base64
12
+ import json
13
+ import logging
14
+ import re
15
+ import time
16
+ from typing import List, Dict, Optional
17
+
18
+ try:
19
+ from .tools import (
20
+ generate_image, edit_image, read_image_url,
21
+ execute_generate_image, execute_edit_image, execute_read_image_url,
22
+ )
23
+ except ImportError:
24
+ from tools import (
25
+ generate_image, edit_image, read_image_url,
26
+ execute_generate_image, execute_edit_image, execute_read_image_url,
27
+ )
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ TOOLS = [generate_image, edit_image, read_image_url]
32
+
33
+ # Max dimension for images sent to the VLM context (keeps token count manageable)
34
+ VLM_IMAGE_MAX_DIM = 512
35
+ VLM_IMAGE_JPEG_QUALITY = 70
36
+
37
+
38
+ def resize_image_for_vlm(base64_png: str) -> str:
39
+ """Resize and compress an image for VLM context to avoid token overflow.
40
+
41
+ Takes a full-res base64 PNG and returns a smaller base64 JPEG thumbnail
42
+ that fits within VLM_IMAGE_MAX_DIM on its longest side.
43
+ """
44
+ try:
45
+ from PIL import Image
46
+ import io as _io
47
+
48
+ img_bytes = base64.b64decode(base64_png)
49
+ img = Image.open(_io.BytesIO(img_bytes))
50
+
51
+ # Resize if larger than max dimension
52
+ if max(img.size) > VLM_IMAGE_MAX_DIM:
53
+ img.thumbnail((VLM_IMAGE_MAX_DIM, VLM_IMAGE_MAX_DIM), Image.LANCZOS)
54
+
55
+ # Convert to RGB (JPEG doesn't support alpha)
56
+ if img.mode in ("RGBA", "P"):
57
+ img = img.convert("RGB")
58
+
59
+ # Save as JPEG for much smaller base64
60
+ buffer = _io.BytesIO()
61
+ img.save(buffer, format="JPEG", quality=VLM_IMAGE_JPEG_QUALITY)
62
+ return base64.b64encode(buffer.getvalue()).decode("utf-8")
63
+ except Exception as e:
64
+ logger.error(f"Failed to resize image for VLM: {e}")
65
+ # Fall back to original — better to try than to lose the image entirely
66
+ return base64_png
67
+
68
+ MAX_TURNS = 20
69
+ MAX_RETRIES = 3
70
+ RETRY_DELAYS = [2, 5, 10]
71
+
72
+
73
+ def parse_llm_error(error: Exception) -> dict:
74
+ """Parse LLM error to extract useful message for frontend."""
75
+ error_str = str(error)
76
+ try:
77
+ json_match = re.search(r'\{.*\}', error_str)
78
+ if json_match:
79
+ error_data = json.loads(json_match.group())
80
+ return {
81
+ "message": error_data.get("message", error_str),
82
+ "retryable": error_data.get("type") == "too_many_requests_error" or "429" in error_str
83
+ }
84
+ except:
85
+ pass
86
+
87
+ retryable = any(x in error_str.lower() for x in ["429", "rate limit", "too many requests", "overloaded"])
88
+ return {"message": error_str, "retryable": retryable}
89
+
90
+
91
+ def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, image_counter: int, default_gen_model: str = None, default_edit_model: str = None) -> dict:
92
+ """
93
+ Execute a tool by name and return result dict.
94
+
95
+ Returns:
96
+ dict with keys:
97
+ - "content": str result for the LLM
98
+ - "image": optional base64 PNG
99
+ - "image_name": optional image reference name (e.g., "image_1")
100
+ - "display": dict with display-friendly data for frontend
101
+ - "image_counter": updated counter
102
+ """
103
+ if tool_name == "generate_image":
104
+ prompt = args.get("prompt", "")
105
+ model = args.get("model") or default_gen_model or "black-forest-labs/FLUX.1-schnell"
106
+ base64_png = execute_generate_image(prompt, hf_token, model)
107
+
108
+ if base64_png:
109
+ image_counter += 1
110
+ name = f"image_{image_counter}"
111
+ image_store[name] = base64_png
112
+ return {
113
+ "content": f"Image generated successfully as '{name}'. The image is attached.",
114
+ "image": base64_png,
115
+ "image_name": name,
116
+ "display": {"type": "generate", "prompt": prompt, "model": model, "image_name": name},
117
+ "image_counter": image_counter,
118
+ }
119
+ else:
120
+ return {
121
+ "content": f"Failed to generate image. The model may be unavailable or the prompt may be invalid.",
122
+ "display": {"type": "generate_error", "prompt": prompt},
123
+ "image_counter": image_counter,
124
+ }
125
+
126
+ elif tool_name == "edit_image":
127
+ prompt = args.get("prompt", "")
128
+ source = args.get("source", "")
129
+ model = args.get("model") or default_edit_model or "black-forest-labs/FLUX.1-Kontext-dev"
130
+
131
+ # Resolve source: image store reference or URL
132
+ source_bytes = None
133
+ if source in image_store:
134
+ source_bytes = base64.b64decode(image_store[source])
135
+ elif source.startswith(("http://", "https://")):
136
+ source_base64 = execute_read_image_url(source)
137
+ if source_base64:
138
+ source_bytes = base64.b64decode(source_base64)
139
+
140
+ if source_bytes is None:
141
+ return {
142
+ "content": f"Could not resolve image source '{source}'. Use a URL or a reference from a previous tool call (e.g., 'image_1').",
143
+ "display": {"type": "edit_error", "source": source},
144
+ "image_counter": image_counter,
145
+ }
146
+
147
+ base64_png = execute_edit_image(prompt, source_bytes, hf_token, model)
148
+
149
+ if base64_png:
150
+ image_counter += 1
151
+ name = f"image_{image_counter}"
152
+ image_store[name] = base64_png
153
+ return {
154
+ "content": f"Image edited successfully as '{name}'. The image is attached.",
155
+ "image": base64_png,
156
+ "image_name": name,
157
+ "display": {"type": "edit", "prompt": prompt, "source": source, "model": model, "image_name": name},
158
+ "image_counter": image_counter,
159
+ }
160
+ else:
161
+ return {
162
+ "content": f"Failed to edit image. The model may be unavailable or the request may be invalid.",
163
+ "display": {"type": "edit_error", "source": source},
164
+ "image_counter": image_counter,
165
+ }
166
+
167
+ elif tool_name == "read_image_url":
168
+ url = args.get("url", "")
169
+ base64_png = execute_read_image_url(url)
170
+
171
+ if base64_png:
172
+ image_counter += 1
173
+ name = f"image_{image_counter}"
174
+ image_store[name] = base64_png
175
+ return {
176
+ "content": f"Image downloaded successfully as '{name}'. The image is attached.",
177
+ "image": base64_png,
178
+ "image_name": name,
179
+ "display": {"type": "read_image", "url": url, "image_name": name},
180
+ "image_counter": image_counter,
181
+ }
182
+ else:
183
+ return {
184
+ "content": f"Failed to download image from {url}. The URL may be invalid or inaccessible.",
185
+ "display": {"type": "read_image_error", "url": url},
186
+ "image_counter": image_counter,
187
+ }
188
+
189
+ return {
190
+ "content": f"Unknown tool: {tool_name}",
191
+ "display": {"type": "error"},
192
+ "image_counter": image_counter,
193
+ }
194
+
195
+
196
+ def stream_image_execution(
197
+ client,
198
+ model: str,
199
+ messages: List[Dict],
200
+ hf_token: str,
201
+ image_gen_model: Optional[str] = None,
202
+ image_edit_model: Optional[str] = None,
203
+ extra_params: Optional[Dict] = None
204
+ ):
205
+ """
206
+ Run the image agent tool-calling loop.
207
+
208
+ Yields dicts with SSE event types:
209
+ - thinking: { content }
210
+ - content: { content }
211
+ - tool_start: { tool, args }
212
+ - tool_result: { tool, result, image? }
213
+ - result_preview: { content }
214
+ - result: { content, images? }
215
+ - generating: {}
216
+ - retry: { attempt, max_attempts, delay, message }
217
+ - error: { content }
218
+ - done: {}
219
+ """
220
+ turns = 0
221
+ done = False
222
+ image_store = {}
223
+ image_counter = 0
224
+
225
+ while not done and turns < MAX_TURNS:
226
+ turns += 1
227
+
228
+ # --- LLM call with retry ---
229
+ response = None
230
+ last_error = None
231
+
232
+ for attempt in range(MAX_RETRIES):
233
+ try:
234
+ call_params = {
235
+ "messages": messages,
236
+ "model": model,
237
+ "tools": TOOLS,
238
+ "tool_choice": "auto",
239
+ }
240
+ if extra_params:
241
+ call_params["extra_body"] = extra_params
242
+ response = client.chat.completions.create(**call_params)
243
+ break
244
+ except Exception as e:
245
+ last_error = e
246
+ error_info = parse_llm_error(e)
247
+ if attempt < MAX_RETRIES - 1 and error_info["retryable"]:
248
+ delay = RETRY_DELAYS[attempt]
249
+ yield {
250
+ "type": "retry",
251
+ "attempt": attempt + 1,
252
+ "max_attempts": MAX_RETRIES,
253
+ "delay": delay,
254
+ "message": error_info["message"],
255
+ }
256
+ time.sleep(delay)
257
+ else:
258
+ yield {"type": "error", "content": error_info["message"]}
259
+ return
260
+
261
+ if response is None:
262
+ yield {"type": "error", "content": f"LLM error after {MAX_RETRIES} attempts: {str(last_error)}"}
263
+ return
264
+
265
+ # --- Parse response ---
266
+ assistant_message = response.choices[0].message
267
+ content = assistant_message.content or ""
268
+ tool_calls = assistant_message.tool_calls or []
269
+
270
+ # Check for <result> tags
271
+ result_match = re.search(r'<result>(.*?)</result>', content, re.DOTALL | re.IGNORECASE)
272
+ result_content = None
273
+ thinking_content = content
274
+
275
+ if result_match:
276
+ result_content = result_match.group(1).strip()
277
+ thinking_content = re.sub(r'<result>.*?</result>', '', content, flags=re.DOTALL | re.IGNORECASE).strip()
278
+
279
+ # Send thinking/content
280
+ if thinking_content.strip():
281
+ if tool_calls:
282
+ yield {"type": "thinking", "content": thinking_content}
283
+ else:
284
+ yield {"type": "content", "content": thinking_content}
285
+
286
+ # Send result preview
287
+ if result_content:
288
+ # Include image store so frontend can resolve <image_N> references
289
+ yield {"type": "result_preview", "content": result_content, "images": image_store}
290
+
291
+ # --- Handle tool calls ---
292
+ if tool_calls:
293
+ for tool_call in tool_calls:
294
+ func_name = tool_call.function.name
295
+
296
+ # Parse arguments
297
+ try:
298
+ args = json.loads(tool_call.function.arguments)
299
+ except json.JSONDecodeError as e:
300
+ output = f"Error parsing arguments: {e}"
301
+ messages.append({
302
+ "role": "assistant",
303
+ "content": content,
304
+ "tool_calls": [{"id": tool_call.id, "type": "function", "function": {"name": func_name, "arguments": tool_call.function.arguments}}]
305
+ })
306
+ messages.append({"role": "tool", "tool_call_id": tool_call.id, "content": output})
307
+ yield {"type": "error", "content": output}
308
+ continue
309
+
310
+ # Signal tool start
311
+ yield {
312
+ "type": "tool_start",
313
+ "tool": func_name,
314
+ "args": args,
315
+ "tool_call_id": tool_call.id,
316
+ "arguments": tool_call.function.arguments,
317
+ "thinking": content,
318
+ }
319
+
320
+ # Execute tool
321
+ result = execute_tool(func_name, args, hf_token, image_store, image_counter, default_gen_model=image_gen_model, default_edit_model=image_edit_model)
322
+ image_counter = result.get("image_counter", image_counter)
323
+
324
+ # Build tool response message for LLM
325
+ if result.get("image"):
326
+ # Resize image for VLM context to avoid token overflow
327
+ vlm_image = resize_image_for_vlm(result["image"])
328
+ tool_response_content = [
329
+ {"type": "text", "text": result["content"]},
330
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{vlm_image}"}}
331
+ ]
332
+ else:
333
+ tool_response_content = result["content"]
334
+
335
+ tool_response_str = tool_response_content if isinstance(tool_response_content, str) else json.dumps(tool_response_content)
336
+
337
+ # Add to message history
338
+ messages.append({
339
+ "role": "assistant",
340
+ "content": content,
341
+ "tool_calls": [{"id": tool_call.id, "type": "function", "function": {"name": func_name, "arguments": tool_call.function.arguments}}]
342
+ })
343
+ messages.append({
344
+ "role": "tool",
345
+ "tool_call_id": tool_call.id,
346
+ "content": tool_response_str
347
+ })
348
+
349
+ # Signal tool result to frontend
350
+ tool_result_event = {
351
+ "type": "tool_result",
352
+ "tool": func_name,
353
+ "tool_call_id": tool_call.id,
354
+ "result": result.get("display", {}),
355
+ "response": tool_response_str,
356
+ }
357
+ if result.get("image"):
358
+ tool_result_event["image"] = result["image"]
359
+ if result.get("image_name"):
360
+ tool_result_event["image_name"] = result["image_name"]
361
+ yield tool_result_event
362
+
363
+ else:
364
+ # No tool calls — we're done
365
+ messages.append({"role": "assistant", "content": content})
366
+ done = True
367
+
368
+ # Send result if found
369
+ if result_content:
370
+ yield {"type": "result", "content": result_content, "images": image_store}
371
+
372
+ # Signal between-turn processing
373
+ if not done:
374
+ yield {"type": "generating"}
375
+
376
+ yield {"type": "done"}
backend/main.py CHANGED
@@ -99,6 +99,28 @@ except ImportError:
99
  COMMAND_AVAILABLE = False
100
  logger.warning("Command center tool handling not available.")
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  # Session management for sandboxes
103
  SANDBOXES: Dict[str, any] = {}
104
  SANDBOX_TIMEOUT = 300
@@ -196,6 +218,9 @@ class ChatRequest(BaseModel):
196
  extra_params: Optional[Dict] = None # Extra parameters for API calls (e.g., enable_thinking)
197
  e2b_key: Optional[str] = None # E2B API key for code execution
198
  serper_key: Optional[str] = None # Serper API key for research
 
 
 
199
  research_sub_agent_model: Optional[str] = None # Model for research sub-tasks
200
  research_sub_agent_endpoint: Optional[str] = None # Endpoint for research sub-agent (may differ from main)
201
  research_sub_agent_token: Optional[str] = None # Token for research sub-agent endpoint
@@ -489,6 +514,113 @@ async def stream_command_center_notebook(
489
  yield f"data: {json.dumps({'type': 'error', 'content': error_message})}\n\n"
490
 
491
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
  async def stream_chat_response(
493
  messages: List[dict],
494
  endpoint: str,
@@ -682,7 +814,11 @@ async def chat_stream(request: ChatRequest):
682
  # Apply environment variable fallbacks for API keys
683
  e2b_key = get_env_fallback(request.e2b_key, "E2B_API_KEY")
684
  serper_key = get_env_fallback(request.serper_key, "SERPER_API_KEY")
 
685
  token = get_env_fallback(request.token, "LLM_API_KEY")
 
 
 
686
 
687
  # Route to code execution handler for code notebooks
688
  if request.notebook_type == "code":
@@ -739,6 +875,48 @@ async def chat_stream(request: ChatRequest):
739
  }
740
  )
741
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
742
  # Route to command center handler for command center (with tool-based launching)
743
  if request.notebook_type == "command":
744
  return StreamingResponse(
@@ -932,6 +1110,7 @@ async def get_settings():
932
  if os.path.exists(SETTINGS_FILE):
933
  with open(SETTINGS_FILE, "r") as f:
934
  settings = json.load(f)
 
935
  return settings
936
  else:
937
  # Return default settings if file doesn't exist
 
99
  COMMAND_AVAILABLE = False
100
  logger.warning("Command center tool handling not available.")
101
 
102
+ # For agent with web tools
103
+ try:
104
+ try:
105
+ from .agent import stream_agent_execution
106
+ except ImportError:
107
+ from agent import stream_agent_execution
108
+ AGENT_AVAILABLE = True
109
+ except ImportError:
110
+ AGENT_AVAILABLE = False
111
+ logger.warning("Agent web tools not available. Install with: pip install readability-lxml markdownify")
112
+
113
+ # For image agent with HuggingFace tools
114
+ try:
115
+ try:
116
+ from .image import stream_image_execution
117
+ except ImportError:
118
+ from image import stream_image_execution
119
+ IMAGE_AVAILABLE = True
120
+ except ImportError:
121
+ IMAGE_AVAILABLE = False
122
+ logger.warning("Image agent not available. Install with: pip install huggingface_hub Pillow")
123
+
124
  # Session management for sandboxes
125
  SANDBOXES: Dict[str, any] = {}
126
  SANDBOX_TIMEOUT = 300
 
218
  extra_params: Optional[Dict] = None # Extra parameters for API calls (e.g., enable_thinking)
219
  e2b_key: Optional[str] = None # E2B API key for code execution
220
  serper_key: Optional[str] = None # Serper API key for research
221
+ hf_token: Optional[str] = None # HuggingFace token for image generation
222
+ image_gen_model: Optional[str] = None # HuggingFace model for text-to-image
223
+ image_edit_model: Optional[str] = None # HuggingFace model for image-to-image
224
  research_sub_agent_model: Optional[str] = None # Model for research sub-tasks
225
  research_sub_agent_endpoint: Optional[str] = None # Endpoint for research sub-agent (may differ from main)
226
  research_sub_agent_token: Optional[str] = None # Token for research sub-agent endpoint
 
514
  yield f"data: {json.dumps({'type': 'error', 'content': error_message})}\n\n"
515
 
516
 
517
+ async def stream_agent_notebook(
518
+ messages: List[dict],
519
+ endpoint: str,
520
+ token: Optional[str],
521
+ model: str,
522
+ serper_key: str,
523
+ tab_id: str = "default",
524
+ extra_params: Optional[Dict] = None
525
+ ):
526
+ """Handle agent notebook with web tools (search, read, screenshot)"""
527
+
528
+ if not AGENT_AVAILABLE:
529
+ async for chunk in stream_chat_response(messages, endpoint, token, model, "agent", tab_id, extra_params):
530
+ yield chunk
531
+ return
532
+
533
+ try:
534
+ client = OpenAI(base_url=endpoint, api_key=token)
535
+
536
+ system_prompt = get_system_prompt("agent")
537
+ full_messages = [{"role": "system", "content": system_prompt}] + messages
538
+
539
+ record_api_call(tab_id, full_messages)
540
+
541
+ loop = asyncio.get_event_loop()
542
+ queue = asyncio.Queue()
543
+
544
+ def run_sync_generator():
545
+ try:
546
+ for update in stream_agent_execution(client, model, full_messages, serper_key, extra_params=extra_params):
547
+ loop.call_soon_threadsafe(queue.put_nowait, update)
548
+ finally:
549
+ loop.call_soon_threadsafe(queue.put_nowait, None)
550
+
551
+ future = loop.run_in_executor(_executor, run_sync_generator)
552
+
553
+ while True:
554
+ update = await queue.get()
555
+ if update is None:
556
+ break
557
+ yield f"data: {json.dumps(update)}\n\n"
558
+
559
+ await asyncio.wrap_future(future)
560
+
561
+ except Exception as e:
562
+ import traceback
563
+ error_message = f"Agent error: {str(e)}\n{traceback.format_exc()}"
564
+ logger.error(error_message)
565
+ yield f"data: {json.dumps({'type': 'error', 'content': error_message})}\n\n"
566
+
567
+
568
+ async def stream_image_notebook(
569
+ messages: List[dict],
570
+ endpoint: str,
571
+ token: Optional[str],
572
+ model: str,
573
+ hf_token: str,
574
+ image_gen_model: Optional[str] = None,
575
+ image_edit_model: Optional[str] = None,
576
+ tab_id: str = "default",
577
+ extra_params: Optional[Dict] = None
578
+ ):
579
+ """Handle image notebook with HuggingFace image generation tools"""
580
+
581
+ if not IMAGE_AVAILABLE:
582
+ yield f"data: {json.dumps({'type': 'error', 'content': 'Image agent not available. Install with: pip install huggingface_hub Pillow'})}\n\n"
583
+ return
584
+
585
+ if not hf_token:
586
+ yield f"data: {json.dumps({'type': 'error', 'content': 'HuggingFace token required for image generation. Please configure in settings or set HF_TOKEN environment variable.'})}\n\n"
587
+ return
588
+
589
+ try:
590
+ client = OpenAI(base_url=endpoint, api_key=token)
591
+
592
+ system_prompt = get_system_prompt("image")
593
+ full_messages = [{"role": "system", "content": system_prompt}] + messages
594
+
595
+ record_api_call(tab_id, full_messages)
596
+
597
+ loop = asyncio.get_event_loop()
598
+ queue = asyncio.Queue()
599
+
600
+ def run_sync_generator():
601
+ try:
602
+ for update in stream_image_execution(client, model, full_messages, hf_token, image_gen_model=image_gen_model, image_edit_model=image_edit_model, extra_params=extra_params):
603
+ loop.call_soon_threadsafe(queue.put_nowait, update)
604
+ finally:
605
+ loop.call_soon_threadsafe(queue.put_nowait, None)
606
+
607
+ future = loop.run_in_executor(_executor, run_sync_generator)
608
+
609
+ while True:
610
+ update = await queue.get()
611
+ if update is None:
612
+ break
613
+ yield f"data: {json.dumps(update)}\n\n"
614
+
615
+ await asyncio.wrap_future(future)
616
+
617
+ except Exception as e:
618
+ import traceback
619
+ error_message = f"Image agent error: {str(e)}\n{traceback.format_exc()}"
620
+ logger.error(error_message)
621
+ yield f"data: {json.dumps({'type': 'error', 'content': error_message})}\n\n"
622
+
623
+
624
  async def stream_chat_response(
625
  messages: List[dict],
626
  endpoint: str,
 
814
  # Apply environment variable fallbacks for API keys
815
  e2b_key = get_env_fallback(request.e2b_key, "E2B_API_KEY")
816
  serper_key = get_env_fallback(request.serper_key, "SERPER_API_KEY")
817
+ hf_token = get_env_fallback(request.hf_token, "HF_TOKEN")
818
  token = get_env_fallback(request.token, "LLM_API_KEY")
819
+ # For image generation: fall back to the LLM provider token (often the same HF token)
820
+ if not hf_token:
821
+ hf_token = token
822
 
823
  # Route to code execution handler for code notebooks
824
  if request.notebook_type == "code":
 
875
  }
876
  )
877
 
878
+ # Route to image handler with HuggingFace tools
879
+ if request.notebook_type == "image":
880
+ return StreamingResponse(
881
+ stream_image_notebook(
882
+ messages,
883
+ request.endpoint,
884
+ token,
885
+ request.model or "gpt-4",
886
+ hf_token or "",
887
+ request.image_gen_model,
888
+ request.image_edit_model,
889
+ tab_id,
890
+ request.extra_params
891
+ ),
892
+ media_type="text/event-stream",
893
+ headers={
894
+ "Cache-Control": "no-cache",
895
+ "Connection": "keep-alive",
896
+ "X-Accel-Buffering": "no",
897
+ }
898
+ )
899
+
900
+ # Route to agent handler with web tools
901
+ if request.notebook_type == "agent":
902
+ return StreamingResponse(
903
+ stream_agent_notebook(
904
+ messages,
905
+ request.endpoint,
906
+ token,
907
+ request.model or "gpt-4",
908
+ serper_key or "",
909
+ tab_id,
910
+ request.extra_params
911
+ ),
912
+ media_type="text/event-stream",
913
+ headers={
914
+ "Cache-Control": "no-cache",
915
+ "Connection": "keep-alive",
916
+ "X-Accel-Buffering": "no",
917
+ }
918
+ )
919
+
920
  # Route to command center handler for command center (with tool-based launching)
921
  if request.notebook_type == "command":
922
  return StreamingResponse(
 
1110
  if os.path.exists(SETTINGS_FILE):
1111
  with open(SETTINGS_FILE, "r") as f:
1112
  settings = json.load(f)
1113
+ settings["_settingsPath"] = SETTINGS_FILE
1114
  return settings
1115
  else:
1116
  # Return default settings if file doesn't exist
backend/tools.py ADDED
@@ -0,0 +1,405 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Centralized Tool Definitions & Execution Functions.
3
+
4
+ All OpenAI function-calling tool definitions live here.
5
+ Agent handlers compose tools by importing what they need:
6
+
7
+ from tools import execute_code, upload_files, download_files
8
+ TOOLS = [execute_code, upload_files, download_files]
9
+
10
+ Execution functions for tools that run server-side (web tools)
11
+ are also defined here, prefixed with `execute_`.
12
+ """
13
+
14
+ import base64
15
+ import io
16
+ import json
17
+ import logging
18
+ from typing import List, Dict, Optional
19
+ from urllib.parse import urljoin, urlparse
20
+
21
+ import httpx
22
+ import requests
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ # ============================================================
28
+ # Code execution tools (used by code notebook)
29
+ # ============================================================
30
+
31
+ execute_code = {
32
+ "type": "function",
33
+ "function": {
34
+ "name": "execute_code",
35
+ "description": "Execute Python code in a stateful environment. Variables and imports persist between executions.",
36
+ "parameters": {
37
+ "type": "object",
38
+ "properties": {
39
+ "code": {
40
+ "type": "string",
41
+ "description": "The Python code to execute."
42
+ }
43
+ },
44
+ "required": ["code"]
45
+ }
46
+ }
47
+ }
48
+
49
+ upload_files = {
50
+ "type": "function",
51
+ "function": {
52
+ "name": "upload_files",
53
+ "description": "Upload files from the local workspace to the code execution environment for analysis. Files will be available at /home/user/<filename>. Use this to load data files, scripts, or any files you need to analyze.",
54
+ "parameters": {
55
+ "type": "object",
56
+ "properties": {
57
+ "paths": {
58
+ "type": "array",
59
+ "items": {"type": "string"},
60
+ "description": "List of file paths relative to the workspace root (e.g., ['data/sales.csv', 'config.json'])"
61
+ }
62
+ },
63
+ "required": ["paths"]
64
+ }
65
+ }
66
+ }
67
+
68
+ download_files = {
69
+ "type": "function",
70
+ "function": {
71
+ "name": "download_files",
72
+ "description": "Download files from the code execution environment to the local workspace. Use this to save generated files, processed data, or any output files you want to keep.",
73
+ "parameters": {
74
+ "type": "object",
75
+ "properties": {
76
+ "files": {
77
+ "type": "array",
78
+ "items": {
79
+ "type": "object",
80
+ "properties": {
81
+ "sandbox_path": {
82
+ "type": "string",
83
+ "description": "Path in the sandbox (e.g., '/home/user/output.csv')"
84
+ },
85
+ "local_path": {
86
+ "type": "string",
87
+ "description": "Destination path relative to workspace (e.g., 'results/output.csv')"
88
+ }
89
+ },
90
+ "required": ["sandbox_path", "local_path"]
91
+ },
92
+ "description": "List of files to download with their sandbox and local paths"
93
+ }
94
+ },
95
+ "required": ["files"]
96
+ }
97
+ }
98
+ }
99
+
100
+
101
+ # ============================================================
102
+ # Web tools (used by agent notebook)
103
+ # ============================================================
104
+
105
+ web_search = {
106
+ "type": "function",
107
+ "function": {
108
+ "name": "web_search",
109
+ "description": "Search the web using Google. Returns titles, URLs, and short snippets for each result. Use this to find information, discover relevant pages, and get an overview of a topic.",
110
+ "parameters": {
111
+ "type": "object",
112
+ "properties": {
113
+ "query": {
114
+ "type": "string",
115
+ "description": "The search query"
116
+ },
117
+ "num_results": {
118
+ "type": "integer",
119
+ "description": "Number of results to return (default: 5, max: 10)",
120
+ "default": 5
121
+ }
122
+ },
123
+ "required": ["query"]
124
+ }
125
+ }
126
+ }
127
+
128
+ read_url = {
129
+ "type": "function",
130
+ "function": {
131
+ "name": "read_url",
132
+ "description": "Fetch a web page and extract its main content as clean markdown. Includes text, headings, links, and image references. Use this when you need detailed content from a specific page.",
133
+ "parameters": {
134
+ "type": "object",
135
+ "properties": {
136
+ "url": {
137
+ "type": "string",
138
+ "description": "The URL to read"
139
+ }
140
+ },
141
+ "required": ["url"]
142
+ }
143
+ }
144
+ }
145
+
146
+ screenshot_url = {
147
+ "type": "function",
148
+ "function": {
149
+ "name": "screenshot_url",
150
+ "description": "Take a screenshot of a web page. Use this when you need to see the visual layout, images, charts, or design of a page. The screenshot will be sent to you as an image.",
151
+ "parameters": {
152
+ "type": "object",
153
+ "properties": {
154
+ "url": {
155
+ "type": "string",
156
+ "description": "The URL to screenshot"
157
+ }
158
+ },
159
+ "required": ["url"]
160
+ }
161
+ }
162
+ }
163
+
164
+
165
+ # ============================================================
166
+ # Web tool execution functions
167
+ # ============================================================
168
+
169
+ _USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
170
+
171
+
172
+ def execute_web_search(query: str, serper_key: str, num_results: int = 5) -> str:
173
+ """Search via Serper API, return formatted results as JSON string."""
174
+ url = "https://google.serper.dev/search"
175
+ payload = json.dumps({"q": query, "num": min(num_results, 10)})
176
+ headers = {
177
+ "X-API-KEY": serper_key,
178
+ "Content-Type": "application/json"
179
+ }
180
+
181
+ try:
182
+ response = requests.post(url, headers=headers, data=payload, timeout=10)
183
+ if response.status_code != 200:
184
+ return json.dumps({"error": f"Search API returned status {response.status_code}"})
185
+
186
+ data = response.json()
187
+ results = []
188
+ for item in data.get("organic", []):
189
+ results.append({
190
+ "title": item.get("title", ""),
191
+ "url": item.get("link", ""),
192
+ "snippet": item.get("snippet", "")
193
+ })
194
+ return json.dumps(results, indent=2)
195
+ except Exception as e:
196
+ logger.error(f"Web search error: {e}")
197
+ return json.dumps({"error": str(e)})
198
+
199
+
200
+ def execute_read_url(url: str) -> str:
201
+ """Fetch URL and extract main content as markdown with images."""
202
+ try:
203
+ from readability import Document
204
+ from markdownify import markdownify
205
+ except ImportError:
206
+ return "Error: readability-lxml and markdownify packages required. Install with: pip install readability-lxml markdownify"
207
+
208
+ try:
209
+ resp = httpx.get(
210
+ url,
211
+ follow_redirects=True,
212
+ timeout=15,
213
+ headers={"User-Agent": _USER_AGENT}
214
+ )
215
+ if resp.status_code != 200:
216
+ return f"Error: HTTP {resp.status_code} fetching {url}"
217
+
218
+ doc = Document(resp.text)
219
+ title = doc.title()
220
+ content_html = doc.summary()
221
+
222
+ # Convert to markdown (preserves images as ![alt](src))
223
+ md = markdownify(content_html, strip=["script", "style"])
224
+
225
+ # Resolve relative image URLs to absolute
226
+ parsed_base = urlparse(url)
227
+ base_url = f"{parsed_base.scheme}://{parsed_base.netloc}"
228
+
229
+ def resolve_url(match):
230
+ img_url = match.group(2)
231
+ if img_url.startswith(("http://", "https://", "data:")):
232
+ return match.group(0)
233
+ absolute = urljoin(url, img_url)
234
+ return f"![{match.group(1)}]({absolute})"
235
+
236
+ import re
237
+ md = re.sub(r'!\[([^\]]*)\]\(([^)]+)\)', resolve_url, md)
238
+
239
+ # Clean up excessive whitespace
240
+ md = re.sub(r'\n{3,}', '\n\n', md).strip()
241
+
242
+ # Truncate if very long
243
+ max_len = 15000
244
+ if len(md) > max_len:
245
+ md = md[:max_len] + f"\n\n[Content truncated - {len(md)} chars total]"
246
+
247
+ return f"# {title}\n\n{md}" if title else md
248
+ except Exception as e:
249
+ logger.error(f"Read URL error for {url}: {e}")
250
+ return f"Error reading {url}: {str(e)}"
251
+
252
+
253
+ def execute_screenshot_url(url: str) -> Optional[str]:
254
+ """Take a screenshot of a URL using Playwright, return base64 PNG."""
255
+ try:
256
+ from playwright.sync_api import sync_playwright
257
+ except ImportError:
258
+ return None # Caller should handle gracefully
259
+
260
+ try:
261
+ with sync_playwright() as p:
262
+ browser = p.chromium.launch(headless=True)
263
+ page = browser.new_page(viewport={"width": 1280, "height": 720})
264
+ page.goto(url, wait_until="networkidle", timeout=15000)
265
+ screenshot_bytes = page.screenshot(full_page=False)
266
+ browser.close()
267
+ return base64.b64encode(screenshot_bytes).decode("utf-8")
268
+ except Exception as e:
269
+ logger.error(f"Screenshot error for {url}: {e}")
270
+ return None
271
+
272
+
273
+ # ============================================================
274
+ # Image tools (used by image notebook)
275
+ # ============================================================
276
+
277
+ generate_image = {
278
+ "type": "function",
279
+ "function": {
280
+ "name": "generate_image",
281
+ "description": "Generate an image from a text prompt. Returns an image reference name (e.g., 'image_1') that you can see and use with edit_image.",
282
+ "parameters": {
283
+ "type": "object",
284
+ "properties": {
285
+ "prompt": {
286
+ "type": "string",
287
+ "description": "Detailed text description of the image to generate"
288
+ },
289
+ "model": {
290
+ "type": "string",
291
+ "description": "HuggingFace model to use (default: black-forest-labs/FLUX.1-schnell)",
292
+ "default": "black-forest-labs/FLUX.1-schnell"
293
+ }
294
+ },
295
+ "required": ["prompt"]
296
+ }
297
+ }
298
+ }
299
+
300
+ edit_image = {
301
+ "type": "function",
302
+ "function": {
303
+ "name": "edit_image",
304
+ "description": "Edit or transform an existing image using a text prompt. The source can be a URL (https://...) or a reference to a previously generated/loaded image (e.g., 'image_1').",
305
+ "parameters": {
306
+ "type": "object",
307
+ "properties": {
308
+ "prompt": {
309
+ "type": "string",
310
+ "description": "Text description of the edit or transformation to apply"
311
+ },
312
+ "source": {
313
+ "type": "string",
314
+ "description": "Image URL or reference name from a previous tool call (e.g., 'image_1')"
315
+ },
316
+ "model": {
317
+ "type": "string",
318
+ "description": "HuggingFace model to use (default: black-forest-labs/FLUX.1-Kontext-dev)",
319
+ "default": "black-forest-labs/FLUX.1-Kontext-dev"
320
+ }
321
+ },
322
+ "required": ["prompt", "source"]
323
+ }
324
+ }
325
+ }
326
+
327
+ read_image_url = {
328
+ "type": "function",
329
+ "function": {
330
+ "name": "read_image_url",
331
+ "description": "Download an image from a URL. Returns an image reference name (e.g., 'image_1') that you can see and use with edit_image.",
332
+ "parameters": {
333
+ "type": "object",
334
+ "properties": {
335
+ "url": {
336
+ "type": "string",
337
+ "description": "The image URL to download"
338
+ }
339
+ },
340
+ "required": ["url"]
341
+ }
342
+ }
343
+ }
344
+
345
+
346
+ # ============================================================
347
+ # Image tool execution functions
348
+ # ============================================================
349
+
350
+ def execute_generate_image(prompt: str, hf_token: str, model: str = "black-forest-labs/FLUX.1-schnell") -> Optional[str]:
351
+ """Text-to-image via HF InferenceClient. Returns base64 PNG or None on error."""
352
+ try:
353
+ from huggingface_hub import InferenceClient
354
+ except ImportError:
355
+ logger.error("huggingface_hub not installed")
356
+ return None
357
+
358
+ try:
359
+ client = InferenceClient(token=hf_token)
360
+ image = client.text_to_image(prompt, model=model)
361
+ buffer = io.BytesIO()
362
+ image.save(buffer, format="PNG")
363
+ return base64.b64encode(buffer.getvalue()).decode("utf-8")
364
+ except Exception as e:
365
+ logger.error(f"Generate image error: {e}")
366
+ return None
367
+
368
+
369
+ def execute_edit_image(prompt: str, source_image_bytes: bytes, hf_token: str, model: str = "black-forest-labs/FLUX.1-Kontext-dev") -> Optional[str]:
370
+ """Image-to-image via HF InferenceClient. source_image_bytes is raw image data. Returns base64 PNG or None."""
371
+ try:
372
+ from huggingface_hub import InferenceClient
373
+ from PIL import Image
374
+ except ImportError:
375
+ logger.error("huggingface_hub or Pillow not installed")
376
+ return None
377
+
378
+ try:
379
+ client = InferenceClient(token=hf_token)
380
+ input_image = Image.open(io.BytesIO(source_image_bytes))
381
+ result = client.image_to_image(input_image, prompt=prompt, model=model)
382
+ buffer = io.BytesIO()
383
+ result.save(buffer, format="PNG")
384
+ return base64.b64encode(buffer.getvalue()).decode("utf-8")
385
+ except Exception as e:
386
+ logger.error(f"Edit image error: {e}")
387
+ return None
388
+
389
+
390
+ def execute_read_image_url(url: str) -> Optional[str]:
391
+ """Download image from URL, return base64 string or None on error."""
392
+ try:
393
+ resp = httpx.get(
394
+ url,
395
+ follow_redirects=True,
396
+ timeout=15,
397
+ headers={"User-Agent": _USER_AGENT}
398
+ )
399
+ if resp.status_code != 200:
400
+ logger.error(f"Read image URL error: HTTP {resp.status_code} for {url}")
401
+ return None
402
+ return base64.b64encode(resp.content).decode("utf-8")
403
+ except Exception as e:
404
+ logger.error(f"Read image URL error for {url}: {e}")
405
+ return None
frontend/index.html CHANGED
@@ -7,7 +7,7 @@
7
  <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;700&display=swap" rel="stylesheet">
8
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/themes/prism.min.css">
9
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.css">
10
- <link rel="stylesheet" href="style.css?v=62">
11
  </head>
12
  <body>
13
  <div class="app-container">
@@ -175,6 +175,7 @@
175
  <div class="settings-panel" id="settingsPanel">
176
  <div class="settings-panel-header">
177
  <h3>SETTINGS</h3>
 
178
  <button class="settings-panel-close" id="settingsPanelClose">×</button>
179
  </div>
180
  <div class="settings-panel-body" id="settingsPanelBody">
@@ -227,6 +228,31 @@
227
  <input type="password" id="setting-serper-key" class="settings-input" placeholder="Leave empty if not using research">
228
  </div>
229
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  <!-- Research Settings -->
231
  <div class="settings-section">
232
  <label class="settings-label">
@@ -457,6 +483,6 @@
457
  <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
458
  <script src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.js"></script>
459
  <script src="research-ui.js?v=23"></script>
460
- <script src="script.js?v=57"></script>
461
  </body>
462
  </html>
 
7
  <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;700&display=swap" rel="stylesheet">
8
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/themes/prism.min.css">
9
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.css">
10
+ <link rel="stylesheet" href="style.css?v=64">
11
  </head>
12
  <body>
13
  <div class="app-container">
 
175
  <div class="settings-panel" id="settingsPanel">
176
  <div class="settings-panel-header">
177
  <h3>SETTINGS</h3>
178
+ <span class="settings-path" id="settingsPath"></span>
179
  <button class="settings-panel-close" id="settingsPanelClose">×</button>
180
  </div>
181
  <div class="settings-panel-body" id="settingsPanelBody">
 
228
  <input type="password" id="setting-serper-key" class="settings-input" placeholder="Leave empty if not using research">
229
  </div>
230
 
231
+ <div class="settings-section">
232
+ <label class="settings-label">
233
+ <span class="label-text">HUGGINGFACE TOKEN (OPTIONAL)</span>
234
+ <span class="label-description">Required for image generation in IMAGE notebooks</span>
235
+ </label>
236
+ <input type="password" id="setting-hf-token" class="settings-input" placeholder="Leave empty to use provider token">
237
+ </div>
238
+
239
+ <!-- Image Model Settings -->
240
+ <div class="settings-section">
241
+ <label class="settings-label">
242
+ <span class="label-text">IMAGE GENERATION MODEL (OPTIONAL)</span>
243
+ <span class="label-description">Model for text-to-image generation in IMAGE notebooks</span>
244
+ </label>
245
+ <select id="setting-image-gen-model" class="settings-select"></select>
246
+ </div>
247
+
248
+ <div class="settings-section">
249
+ <label class="settings-label">
250
+ <span class="label-text">IMAGE EDIT MODEL (OPTIONAL)</span>
251
+ <span class="label-description">Model for image-to-image editing in IMAGE notebooks</span>
252
+ </label>
253
+ <select id="setting-image-edit-model" class="settings-select"></select>
254
+ </div>
255
+
256
  <!-- Research Settings -->
257
  <div class="settings-section">
258
  <label class="settings-label">
 
483
  <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
484
  <script src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.js"></script>
485
  <script src="research-ui.js?v=23"></script>
486
+ <script src="script.js?v=59"></script>
487
  </body>
488
  </html>
frontend/script.js CHANGED
@@ -4,10 +4,11 @@
4
  // ============================================================
5
  const AGENT_REGISTRY = {
6
  command: { label: 'TASKS', hasCounter: false, inMenu: false, inLauncher: false, placeholder: 'Enter message...' },
7
- agent: { label: 'AGENT', hasCounter: true, inMenu: false, inLauncher: true, placeholder: 'Enter message...' },
8
  code: { label: 'CODE', hasCounter: true, inMenu: true, inLauncher: true, placeholder: 'Enter message...' },
9
  research: { label: 'RESEARCH', hasCounter: true, inMenu: true, inLauncher: true, placeholder: 'Enter message...' },
10
  chat: { label: 'CHAT', hasCounter: true, inMenu: true, inLauncher: true, placeholder: 'Enter message...' },
 
11
  };
12
  // Virtual types used only in timeline rendering (not real agents)
13
  const VIRTUAL_TYPE_LABELS = { search: 'SEARCH', browse: 'BROWSE' };
@@ -57,6 +58,10 @@ let settings = {
57
  // Service API keys
58
  e2bKey: '',
59
  serperKey: '',
 
 
 
 
60
  // Research settings
61
  researchSubAgentModel: '',
62
  researchParallelWorkers: null,
@@ -1562,22 +1567,32 @@ function getConversationHistory(chatContainer) {
1562
  // Check if this message has a tool call
1563
  const toolCallData = msg.getAttribute('data-tool-call');
1564
  if (toolCallData) {
1565
- // This is a tool call message - add it in the proper format
1566
- // Include content if the message also has thinking text
1567
  const toolCall = JSON.parse(toolCallData);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1568
  messages.push({
1569
  role: 'assistant',
1570
- content: content || '', // Preserve thinking content if present
1571
  tool_calls: [{
1572
  id: toolCall.tool_call_id || 'tool_' + Date.now(),
1573
  type: 'function',
1574
  function: {
1575
- name: `launch_${toolCall.notebook_type}_notebook`,
1576
- arguments: JSON.stringify({
1577
- task: toolCall.message,
1578
- topic: toolCall.message,
1579
- message: toolCall.message
1580
- })
1581
  }
1582
  }]
1583
  });
@@ -1637,6 +1652,14 @@ async function streamChatResponse(messages, chatContainer, notebookType, tabId)
1637
  }
1638
  }
1639
 
 
 
 
 
 
 
 
 
1640
  try {
1641
  const response = await fetch(`${backendEndpoint}/chat/stream`, {
1642
  method: 'POST',
@@ -1651,6 +1674,9 @@ async function streamChatResponse(messages, chatContainer, notebookType, tabId)
1651
  extra_params: modelConfig.extraParams || null,
1652
  e2b_key: currentSettings.e2bKey || null,
1653
  serper_key: currentSettings.serperKey || null,
 
 
 
1654
  research_sub_agent_model: researchSubAgentConfig?.model || null,
1655
  research_sub_agent_endpoint: researchSubAgentConfig?.endpoint || null,
1656
  research_sub_agent_token: researchSubAgentConfig?.token || null,
@@ -1737,7 +1763,7 @@ async function streamChatResponse(messages, chatContainer, notebookType, tabId)
1737
 
1738
  } else if (data.type === 'result') {
1739
  // Notebook result - update command center widget
1740
- updateActionWidgetWithResult(tabId, data.content, data.figures);
1741
 
1742
  } else if (data.type === 'result_preview') {
1743
  // Show result preview
@@ -1762,6 +1788,19 @@ async function streamChatResponse(messages, chatContainer, notebookType, tabId)
1762
  }
1763
  }
1764
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1765
  // Process markdown
1766
  let html = parseMarkdown(previewContent);
1767
 
@@ -1857,6 +1896,108 @@ async function streamChatResponse(messages, chatContainer, notebookType, tabId)
1857
  // Add to timeline
1858
  addTimelineEvent(tabId, 'assistant', `[report] ${data.sources_count || 0} sources, ${data.websites_visited || 0} sites`);
1859
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1860
  } else if (data.type === 'content') {
1861
  // Regular streaming content (non-code notebooks)
1862
  if (!currentMessageEl) {
@@ -2191,7 +2332,7 @@ function showActionWidget(chatContainer, action, message, targetTabId, taskId =
2191
  actionWidgets[targetTabId] = widget;
2192
  }
2193
 
2194
- async function updateActionWidgetWithResult(tabId, resultContent, figures) {
2195
  const widget = actionWidgets[tabId];
2196
  if (!widget) return;
2197
 
@@ -2225,6 +2366,19 @@ async function updateActionWidgetWithResult(tabId, resultContent, figures) {
2225
  }
2226
  }
2227
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2228
  // Process markdown
2229
  let html = parseMarkdown(processedContent);
2230
 
@@ -2253,9 +2407,26 @@ async function updateActionWidgetWithResult(tabId, resultContent, figures) {
2253
  body.appendChild(resultSection);
2254
  }
2255
 
2256
- // Send result back to backend to update conversation history (non-blocking)
2257
  const toolCallId = toolCallIds[tabId];
2258
  if (toolCallId) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2259
  fetch('/api/conversation/add-tool-response', {
2260
  method: 'POST',
2261
  headers: { 'Content-Type': 'application/json' },
@@ -3055,6 +3226,9 @@ function migrateSettings(oldSettings) {
3055
  },
3056
  e2bKey: oldSettings.e2bKey || '',
3057
  serperKey: oldSettings.serperKey || '',
 
 
 
3058
  researchSubAgentModel: oldSettings.researchSubAgentModel || '',
3059
  researchParallelWorkers: oldSettings.researchParallelWorkers || null,
3060
  researchMaxWebsites: oldSettings.researchMaxWebsites || null,
@@ -3234,7 +3408,9 @@ function populateModelDropdowns() {
3234
  // Build dropdown IDs from registry + special dropdowns
3235
  const dropdownIds = [
3236
  ...Object.keys(AGENT_REGISTRY).map(t => `setting-notebook-${t}`),
3237
- 'setting-research-sub-agent-model'
 
 
3238
  ];
3239
 
3240
  dropdownIds.forEach(dropdownId => {
@@ -3267,6 +3443,10 @@ function populateModelDropdowns() {
3267
  }
3268
  const subAgentDropdown = document.getElementById('setting-research-sub-agent-model');
3269
  if (subAgentDropdown) subAgentDropdown.value = settings.researchSubAgentModel || '';
 
 
 
 
3270
  }
3271
 
3272
  // Show add/edit provider dialog
@@ -3433,6 +3613,10 @@ function deleteModel(modelId) {
3433
  }
3434
 
3435
  function openSettings() {
 
 
 
 
3436
  // Render providers and models lists
3437
  renderProvidersList();
3438
  renderModelsList();
@@ -3441,6 +3625,7 @@ function openSettings() {
3441
  // Populate service keys
3442
  document.getElementById('setting-e2b-key').value = settings.e2bKey || '';
3443
  document.getElementById('setting-serper-key').value = settings.serperKey || '';
 
3444
 
3445
  // Populate research settings
3446
  document.getElementById('setting-research-parallel-workers').value = settings.researchParallelWorkers || '';
@@ -3478,6 +3663,9 @@ async function saveSettings() {
3478
  // Get other settings
3479
  const e2bKey = document.getElementById('setting-e2b-key').value.trim();
3480
  const serperKey = document.getElementById('setting-serper-key').value.trim();
 
 
 
3481
  const researchParallelWorkers = document.getElementById('setting-research-parallel-workers').value.trim();
3482
  const researchMaxWebsites = document.getElementById('setting-research-max-websites').value.trim();
3483
  const themeColor = document.getElementById('setting-theme-color').value || 'forest';
@@ -3497,6 +3685,9 @@ async function saveSettings() {
3497
  settings.notebooks = notebookModels;
3498
  settings.e2bKey = e2bKey;
3499
  settings.serperKey = serperKey;
 
 
 
3500
  settings.researchSubAgentModel = researchSubAgentModel;
3501
  settings.researchParallelWorkers = researchParallelWorkers ? parseInt(researchParallelWorkers) : null;
3502
  settings.researchMaxWebsites = researchMaxWebsites ? parseInt(researchMaxWebsites) : null;
 
4
  // ============================================================
5
  const AGENT_REGISTRY = {
6
  command: { label: 'TASKS', hasCounter: false, inMenu: false, inLauncher: false, placeholder: 'Enter message...' },
7
+ agent: { label: 'AGENT', hasCounter: true, inMenu: true, inLauncher: true, placeholder: 'Enter message...' },
8
  code: { label: 'CODE', hasCounter: true, inMenu: true, inLauncher: true, placeholder: 'Enter message...' },
9
  research: { label: 'RESEARCH', hasCounter: true, inMenu: true, inLauncher: true, placeholder: 'Enter message...' },
10
  chat: { label: 'CHAT', hasCounter: true, inMenu: true, inLauncher: true, placeholder: 'Enter message...' },
11
+ image: { label: 'IMAGE', hasCounter: true, inMenu: true, inLauncher: true, placeholder: 'Describe an image or paste a URL...' },
12
  };
13
  // Virtual types used only in timeline rendering (not real agents)
14
  const VIRTUAL_TYPE_LABELS = { search: 'SEARCH', browse: 'BROWSE' };
 
58
  // Service API keys
59
  e2bKey: '',
60
  serperKey: '',
61
+ hfToken: '',
62
+ // Image model selections (model IDs from the models list)
63
+ imageGenModel: '',
64
+ imageEditModel: '',
65
  // Research settings
66
  researchSubAgentModel: '',
67
  researchParallelWorkers: null,
 
1567
  // Check if this message has a tool call
1568
  const toolCallData = msg.getAttribute('data-tool-call');
1569
  if (toolCallData) {
 
 
1570
  const toolCall = JSON.parse(toolCallData);
1571
+ let funcName, funcArgs;
1572
+
1573
+ if (toolCall.function_name) {
1574
+ // Agent-style tool call (web_search, read_url, etc.)
1575
+ funcName = toolCall.function_name;
1576
+ funcArgs = toolCall.arguments;
1577
+ } else {
1578
+ // Command center-style tool call (launch_*_notebook)
1579
+ funcName = `launch_${toolCall.notebook_type}_notebook`;
1580
+ funcArgs = JSON.stringify({
1581
+ task: toolCall.message,
1582
+ topic: toolCall.message,
1583
+ message: toolCall.message
1584
+ });
1585
+ }
1586
+
1587
  messages.push({
1588
  role: 'assistant',
1589
+ content: toolCall.thinking || content || '',
1590
  tool_calls: [{
1591
  id: toolCall.tool_call_id || 'tool_' + Date.now(),
1592
  type: 'function',
1593
  function: {
1594
+ name: funcName,
1595
+ arguments: funcArgs
 
 
 
 
1596
  }
1597
  }]
1598
  });
 
1652
  }
1653
  }
1654
 
1655
+ // Resolve image model selections to HF model ID strings
1656
+ const imageGenModelId = currentSettings.imageGenModel
1657
+ ? currentSettings.models?.[currentSettings.imageGenModel]?.modelId || null
1658
+ : null;
1659
+ const imageEditModelId = currentSettings.imageEditModel
1660
+ ? currentSettings.models?.[currentSettings.imageEditModel]?.modelId || null
1661
+ : null;
1662
+
1663
  try {
1664
  const response = await fetch(`${backendEndpoint}/chat/stream`, {
1665
  method: 'POST',
 
1674
  extra_params: modelConfig.extraParams || null,
1675
  e2b_key: currentSettings.e2bKey || null,
1676
  serper_key: currentSettings.serperKey || null,
1677
+ hf_token: currentSettings.hfToken || null,
1678
+ image_gen_model: imageGenModelId,
1679
+ image_edit_model: imageEditModelId,
1680
  research_sub_agent_model: researchSubAgentConfig?.model || null,
1681
  research_sub_agent_endpoint: researchSubAgentConfig?.endpoint || null,
1682
  research_sub_agent_token: researchSubAgentConfig?.token || null,
 
1763
 
1764
  } else if (data.type === 'result') {
1765
  // Notebook result - update command center widget
1766
+ updateActionWidgetWithResult(tabId, data.content, data.figures, data.images);
1767
 
1768
  } else if (data.type === 'result_preview') {
1769
  // Show result preview
 
1788
  }
1789
  }
1790
 
1791
+ // Handle <image_N> references from image agent
1792
+ if (data.images) {
1793
+ for (const [imageName, imageBase64] of Object.entries(data.images)) {
1794
+ const placeholderId = `%%%IMAGE_${imageName}%%%`;
1795
+ figurePlaceholders[placeholderId] = { type: 'png', data: imageBase64, isGenerated: true };
1796
+
1797
+ const pairedTag = new RegExp(`<${imageName}></${imageName}>`, 'gi');
1798
+ previewContent = previewContent.replace(pairedTag, `\n\n${placeholderId}\n\n`);
1799
+ const singleTag = new RegExp(`</?${imageName}>`, 'gi');
1800
+ previewContent = previewContent.replace(singleTag, `\n\n${placeholderId}\n\n`);
1801
+ }
1802
+ }
1803
+
1804
  // Process markdown
1805
  let html = parseMarkdown(previewContent);
1806
 
 
1896
  // Add to timeline
1897
  addTimelineEvent(tabId, 'assistant', `[report] ${data.sources_count || 0} sources, ${data.websites_visited || 0} sites`);
1898
 
1899
+ } else if (data.type === 'tool_start') {
1900
+ // Agent tool execution starting — create a tool-cell box (like code cells)
1901
+ currentMessageEl = null;
1902
+ fullResponse = '';
1903
+
1904
+ const toolLabels = {
1905
+ 'web_search': 'SEARCH',
1906
+ 'read_url': 'READ',
1907
+ 'screenshot_url': 'SCREENSHOT',
1908
+ 'generate_image': 'GENERATE',
1909
+ 'edit_image': 'EDIT',
1910
+ 'read_image_url': 'LOAD IMAGE'
1911
+ };
1912
+ const toolDescriptions = {
1913
+ 'web_search': data.args?.query || '',
1914
+ 'read_url': data.args?.url || '',
1915
+ 'screenshot_url': data.args?.url || '',
1916
+ 'generate_image': data.args?.prompt || '',
1917
+ 'edit_image': `${data.args?.prompt || ''} (from ${data.args?.source || ''})`,
1918
+ 'read_image_url': data.args?.url || ''
1919
+ };
1920
+ const label = toolLabels[data.tool] || data.tool.toUpperCase();
1921
+ const description = toolDescriptions[data.tool] || '';
1922
+
1923
+ // Store tool call in DOM for history reconstruction
1924
+ const toolCallMsg = document.createElement('div');
1925
+ toolCallMsg.className = 'message assistant';
1926
+ toolCallMsg.style.display = 'none';
1927
+ toolCallMsg.setAttribute('data-tool-call', JSON.stringify({
1928
+ tool_call_id: data.tool_call_id,
1929
+ function_name: data.tool,
1930
+ arguments: data.arguments,
1931
+ thinking: data.thinking || ''
1932
+ }));
1933
+ chatContainer.appendChild(toolCallMsg);
1934
+
1935
+ // Create tool-cell box (similar to code-cell)
1936
+ const toolCell = document.createElement('div');
1937
+ toolCell.className = 'tool-cell';
1938
+ toolCell.setAttribute('data-tool-name', data.tool);
1939
+ toolCell.innerHTML = `
1940
+ <div class="tool-cell-label"><span>${label}</span>${createSpinnerHtml()}</div>
1941
+ <div class="tool-cell-input">${escapeHtml(description)}</div>
1942
+ `;
1943
+ chatContainer.appendChild(toolCell);
1944
+ scrollChatToBottom(chatContainer);
1945
+ addTimelineEvent(tabId, 'assistant', `[${data.tool}] ${description}`);
1946
+
1947
+ } else if (data.type === 'tool_result') {
1948
+ // Agent tool result — populate the last tool-cell with output
1949
+ const lastToolCell = chatContainer.querySelector('.tool-cell:last-of-type');
1950
+
1951
+ // Remove spinner
1952
+ if (lastToolCell) {
1953
+ const spinner = lastToolCell.querySelector('.tool-spinner');
1954
+ if (spinner) spinner.remove();
1955
+ }
1956
+
1957
+ // Store tool response in DOM for history reconstruction
1958
+ const toolResponseMsg = document.createElement('div');
1959
+ toolResponseMsg.className = 'message tool';
1960
+ toolResponseMsg.style.display = 'none';
1961
+ toolResponseMsg.setAttribute('data-tool-response', JSON.stringify({
1962
+ tool_call_id: data.tool_call_id,
1963
+ content: data.response || ''
1964
+ }));
1965
+ chatContainer.appendChild(toolResponseMsg);
1966
+
1967
+ // Build output HTML based on tool type
1968
+ let outputHtml = '';
1969
+
1970
+ if (data.tool === 'web_search' && data.result?.results) {
1971
+ try {
1972
+ const results = typeof data.result.results === 'string' ? JSON.parse(data.result.results) : data.result.results;
1973
+ if (Array.isArray(results)) {
1974
+ outputHtml = '<div class="search-results-content">' +
1975
+ results.map(r =>
1976
+ `<div class="search-result-item"><a href="${escapeHtml(r.url)}" target="_blank">${escapeHtml(r.title)}</a><span class="search-snippet">${escapeHtml(r.snippet)}</span></div>`
1977
+ ).join('') + '</div>';
1978
+ }
1979
+ } catch(e) { /* ignore parse errors */ }
1980
+ } else if (data.tool === 'read_url') {
1981
+ const len = data.result?.length || 0;
1982
+ outputHtml = `<div class="tool-cell-read-summary">${len > 0 ? `Extracted ${(len / 1000).toFixed(1)}k chars` : 'No content extracted'}</div>`;
1983
+ } else if (data.tool === 'screenshot_url' && data.image) {
1984
+ outputHtml = `<img src="data:image/png;base64,${data.image}" alt="Screenshot" class="screenshot-img" />`;
1985
+ } else if ((data.tool === 'generate_image' || data.tool === 'edit_image' || data.tool === 'read_image_url') && data.image) {
1986
+ const imgName = data.image_name || 'image';
1987
+ outputHtml = `<img src="data:image/png;base64,${data.image}" alt="${escapeHtml(imgName)}" class="generated-img" />`;
1988
+ } else if ((data.tool === 'generate_image' || data.tool === 'edit_image' || data.tool === 'read_image_url') && !data.image) {
1989
+ outputHtml = `<div class="tool-cell-read-summary">Failed to process image</div>`;
1990
+ }
1991
+
1992
+ if (outputHtml && lastToolCell) {
1993
+ const outputEl = document.createElement('div');
1994
+ outputEl.className = 'tool-cell-output';
1995
+ outputEl.innerHTML = outputHtml;
1996
+ lastToolCell.appendChild(outputEl);
1997
+ }
1998
+
1999
+ scrollChatToBottom(chatContainer);
2000
+
2001
  } else if (data.type === 'content') {
2002
  // Regular streaming content (non-code notebooks)
2003
  if (!currentMessageEl) {
 
2332
  actionWidgets[targetTabId] = widget;
2333
  }
2334
 
2335
+ async function updateActionWidgetWithResult(tabId, resultContent, figures, images) {
2336
  const widget = actionWidgets[tabId];
2337
  if (!widget) return;
2338
 
 
2366
  }
2367
  }
2368
 
2369
+ // Handle <image_N> references from image agent
2370
+ if (images) {
2371
+ for (const [imageName, imageBase64] of Object.entries(images)) {
2372
+ const placeholderId = `%%%IMAGE_${imageName}%%%`;
2373
+ figurePlaceholders[placeholderId] = { type: 'png', data: imageBase64 };
2374
+
2375
+ const pairedTag = new RegExp(`<${imageName}></${imageName}>`, 'gi');
2376
+ processedContent = processedContent.replace(pairedTag, `\n\n${placeholderId}\n\n`);
2377
+ const singleTag = new RegExp(`</?${imageName}>`, 'gi');
2378
+ processedContent = processedContent.replace(singleTag, `\n\n${placeholderId}\n\n`);
2379
+ }
2380
+ }
2381
+
2382
  // Process markdown
2383
  let html = parseMarkdown(processedContent);
2384
 
 
2407
  body.appendChild(resultSection);
2408
  }
2409
 
2410
+ // Update the tool response DOM element so getConversationHistory picks up actual results
2411
  const toolCallId = toolCallIds[tabId];
2412
  if (toolCallId) {
2413
+ // Find the hidden tool response element with this tool_call_id in the command center
2414
+ const commandContainer = document.getElementById('messages-command');
2415
+ if (commandContainer) {
2416
+ const toolMsgs = commandContainer.querySelectorAll('.message.tool[data-tool-response]');
2417
+ for (const toolMsg of toolMsgs) {
2418
+ try {
2419
+ const data = JSON.parse(toolMsg.getAttribute('data-tool-response'));
2420
+ if (data.tool_call_id === toolCallId) {
2421
+ data.content = resultContent;
2422
+ toolMsg.setAttribute('data-tool-response', JSON.stringify(data));
2423
+ break;
2424
+ }
2425
+ } catch (e) { /* ignore parse errors */ }
2426
+ }
2427
+ }
2428
+
2429
+ // Also send to backend (non-blocking)
2430
  fetch('/api/conversation/add-tool-response', {
2431
  method: 'POST',
2432
  headers: { 'Content-Type': 'application/json' },
 
3226
  },
3227
  e2bKey: oldSettings.e2bKey || '',
3228
  serperKey: oldSettings.serperKey || '',
3229
+ hfToken: oldSettings.hfToken || '',
3230
+ imageGenModel: oldSettings.imageGenModel || '',
3231
+ imageEditModel: oldSettings.imageEditModel || '',
3232
  researchSubAgentModel: oldSettings.researchSubAgentModel || '',
3233
  researchParallelWorkers: oldSettings.researchParallelWorkers || null,
3234
  researchMaxWebsites: oldSettings.researchMaxWebsites || null,
 
3408
  // Build dropdown IDs from registry + special dropdowns
3409
  const dropdownIds = [
3410
  ...Object.keys(AGENT_REGISTRY).map(t => `setting-notebook-${t}`),
3411
+ 'setting-research-sub-agent-model',
3412
+ 'setting-image-gen-model',
3413
+ 'setting-image-edit-model'
3414
  ];
3415
 
3416
  dropdownIds.forEach(dropdownId => {
 
3443
  }
3444
  const subAgentDropdown = document.getElementById('setting-research-sub-agent-model');
3445
  if (subAgentDropdown) subAgentDropdown.value = settings.researchSubAgentModel || '';
3446
+ const imageGenDropdown = document.getElementById('setting-image-gen-model');
3447
+ if (imageGenDropdown) imageGenDropdown.value = settings.imageGenModel || '';
3448
+ const imageEditDropdown = document.getElementById('setting-image-edit-model');
3449
+ if (imageEditDropdown) imageEditDropdown.value = settings.imageEditModel || '';
3450
  }
3451
 
3452
  // Show add/edit provider dialog
 
3613
  }
3614
 
3615
  function openSettings() {
3616
+ // Show settings file path
3617
+ const pathEl = document.getElementById('settingsPath');
3618
+ if (pathEl) pathEl.textContent = settings._settingsPath || '';
3619
+
3620
  // Render providers and models lists
3621
  renderProvidersList();
3622
  renderModelsList();
 
3625
  // Populate service keys
3626
  document.getElementById('setting-e2b-key').value = settings.e2bKey || '';
3627
  document.getElementById('setting-serper-key').value = settings.serperKey || '';
3628
+ document.getElementById('setting-hf-token').value = settings.hfToken || '';
3629
 
3630
  // Populate research settings
3631
  document.getElementById('setting-research-parallel-workers').value = settings.researchParallelWorkers || '';
 
3663
  // Get other settings
3664
  const e2bKey = document.getElementById('setting-e2b-key').value.trim();
3665
  const serperKey = document.getElementById('setting-serper-key').value.trim();
3666
+ const hfToken = document.getElementById('setting-hf-token').value.trim();
3667
+ const imageGenModel = document.getElementById('setting-image-gen-model')?.value || '';
3668
+ const imageEditModel = document.getElementById('setting-image-edit-model')?.value || '';
3669
  const researchParallelWorkers = document.getElementById('setting-research-parallel-workers').value.trim();
3670
  const researchMaxWebsites = document.getElementById('setting-research-max-websites').value.trim();
3671
  const themeColor = document.getElementById('setting-theme-color').value || 'forest';
 
3685
  settings.notebooks = notebookModels;
3686
  settings.e2bKey = e2bKey;
3687
  settings.serperKey = serperKey;
3688
+ settings.hfToken = hfToken;
3689
+ settings.imageGenModel = imageGenModel;
3690
+ settings.imageEditModel = imageEditModel;
3691
  settings.researchSubAgentModel = researchSubAgentModel;
3692
  settings.researchParallelWorkers = researchParallelWorkers ? parseInt(researchParallelWorkers) : null;
3693
  settings.researchMaxWebsites = researchMaxWebsites ? parseInt(researchMaxWebsites) : null;
frontend/style.css CHANGED
@@ -2729,6 +2729,17 @@ pre code [class*="token"] {
2729
  letter-spacing: 0.5px;
2730
  }
2731
 
 
 
 
 
 
 
 
 
 
 
 
2732
  .settings-panel-close {
2733
  background: none;
2734
  border: none;
@@ -3778,3 +3789,91 @@ pre code [class*="token"] {
3778
  }
3779
 
3780
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2729
  letter-spacing: 0.5px;
2730
  }
2731
 
2732
+ .settings-path {
2733
+ font-size: 10px;
2734
+ color: rgba(255, 255, 255, 0.6);
2735
+ font-weight: 400;
2736
+ overflow: hidden;
2737
+ text-overflow: ellipsis;
2738
+ white-space: nowrap;
2739
+ flex: 1;
2740
+ margin: 0 12px;
2741
+ }
2742
+
2743
  .settings-panel-close {
2744
  background: none;
2745
  border: none;
 
3789
  }
3790
 
3791
 
3792
+ /* ============================================================
3793
+ Agent Tool Cells (search, read, screenshot)
3794
+ ============================================================ */
3795
+
3796
+ .tool-cell {
3797
+ margin: 16px 0;
3798
+ overflow: hidden;
3799
+ }
3800
+
3801
+ .tool-cell-label {
3802
+ font-size: 10px;
3803
+ font-weight: 600;
3804
+ color: var(--bg-primary);
3805
+ text-transform: uppercase;
3806
+ letter-spacing: 0.5px;
3807
+ padding: 6px 12px;
3808
+ background: var(--theme-accent);
3809
+ display: flex;
3810
+ align-items: center;
3811
+ gap: 8px;
3812
+ }
3813
+
3814
+ .tool-cell-input {
3815
+ background: var(--bg-tertiary);
3816
+ padding: 10px 12px;
3817
+ border: 1px solid var(--border-primary);
3818
+ border-top: none;
3819
+ font-family: 'JetBrains Mono', monospace;
3820
+ font-size: 12px;
3821
+ color: var(--text-primary);
3822
+ word-break: break-all;
3823
+ }
3824
+
3825
+ .tool-cell-output {
3826
+ padding: 10px 12px;
3827
+ background: var(--bg-tertiary);
3828
+ border: 1px solid var(--border-primary);
3829
+ border-top: none;
3830
+ font-size: 12px;
3831
+ }
3832
+
3833
+ .search-results-content {
3834
+ display: flex;
3835
+ flex-direction: column;
3836
+ gap: 8px;
3837
+ }
3838
+
3839
+ .search-result-item {
3840
+ display: flex;
3841
+ flex-direction: column;
3842
+ gap: 1px;
3843
+ }
3844
+
3845
+ .search-result-item a {
3846
+ color: var(--accent-primary);
3847
+ text-decoration: none;
3848
+ font-weight: 500;
3849
+ font-size: 12px;
3850
+ }
3851
+
3852
+ .search-result-item a:hover {
3853
+ text-decoration: underline;
3854
+ }
3855
+
3856
+ .search-snippet {
3857
+ color: var(--text-muted);
3858
+ font-size: 11px;
3859
+ line-height: 1.3;
3860
+ }
3861
+
3862
+ .tool-cell-read-summary {
3863
+ color: var(--text-muted);
3864
+ font-size: 11px;
3865
+ }
3866
+
3867
+ .screenshot-img {
3868
+ max-width: 100%;
3869
+ max-height: 400px;
3870
+ border-radius: 2px;
3871
+ }
3872
+
3873
+ .generated-img {
3874
+ max-width: 100%;
3875
+ border-radius: 4px;
3876
+ cursor: pointer;
3877
+ }
3878
+
3879
+