Spaces:
Running
Running
Add IMAGE agent, web tools for AGENT, and fix conversation history
Browse files- New IMAGE notebook type with HuggingFace image generation/editing tools
(generate_image, edit_image, read_image_url via FLUX.1 models)
- New backend/image.py streaming handler with image store and VLM context
resize (512px JPEG thumbnails to avoid token overflow)
- New backend/tools.py centralizing all tool definitions and execution functions
- New backend/agent.py with web tools (web_search, read_url, screenshot_url)
- Image model settings use dropdowns populated from configured models
- Settings panel shows active settings.json file path
- Fix conversation history: sub-notebook results now update the tool response
DOM element so follow-up questions have full context
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- backend/agent.py +261 -0
- backend/agents.py +87 -8
- backend/code.py +3 -69
- backend/image.py +376 -0
- backend/main.py +179 -0
- backend/tools.py +405 -0
- frontend/index.html +28 -2
- frontend/script.js +205 -14
- frontend/style.css +99 -0
backend/agent.py
ADDED
|
@@ -0,0 +1,261 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Agent notebook backend - autonomous agent with web tools (search, read, screenshot).
|
| 3 |
+
|
| 4 |
+
Uses the same tool-calling loop pattern as code.py:
|
| 5 |
+
LLM call → parse tool_calls → execute → update history → repeat
|
| 6 |
+
"""
|
| 7 |
+
import json
|
| 8 |
+
import logging
|
| 9 |
+
import re
|
| 10 |
+
import time
|
| 11 |
+
from typing import List, Dict, Optional
|
| 12 |
+
|
| 13 |
+
from tools import (
|
| 14 |
+
web_search, read_url, screenshot_url,
|
| 15 |
+
execute_web_search, execute_read_url, execute_screenshot_url,
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
+
|
| 20 |
+
TOOLS = [web_search, read_url, screenshot_url]
|
| 21 |
+
|
| 22 |
+
MAX_TURNS = 20
|
| 23 |
+
MAX_RETRIES = 3
|
| 24 |
+
RETRY_DELAYS = [2, 5, 10]
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def parse_llm_error(error: Exception) -> dict:
|
| 28 |
+
"""Parse LLM error to extract useful message for frontend."""
|
| 29 |
+
error_str = str(error)
|
| 30 |
+
try:
|
| 31 |
+
json_match = re.search(r'\{.*\}', error_str)
|
| 32 |
+
if json_match:
|
| 33 |
+
error_data = json.loads(json_match.group())
|
| 34 |
+
return {
|
| 35 |
+
"message": error_data.get("message", error_str),
|
| 36 |
+
"retryable": error_data.get("type") == "too_many_requests_error" or "429" in error_str
|
| 37 |
+
}
|
| 38 |
+
except:
|
| 39 |
+
pass
|
| 40 |
+
|
| 41 |
+
retryable = any(x in error_str.lower() for x in ["429", "rate limit", "too many requests", "overloaded"])
|
| 42 |
+
return {"message": error_str, "retryable": retryable}
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def execute_tool(tool_name: str, args: dict, serper_key: str) -> dict:
|
| 46 |
+
"""
|
| 47 |
+
Execute a tool by name and return result dict.
|
| 48 |
+
|
| 49 |
+
Returns:
|
| 50 |
+
dict with keys:
|
| 51 |
+
- "content": str result for the LLM
|
| 52 |
+
- "image": optional base64 PNG (for screenshot_url)
|
| 53 |
+
- "display": dict with display-friendly data for frontend
|
| 54 |
+
"""
|
| 55 |
+
if tool_name == "web_search":
|
| 56 |
+
query = args.get("query", "")
|
| 57 |
+
num_results = args.get("num_results", 5)
|
| 58 |
+
result_str = execute_web_search(query, serper_key, num_results)
|
| 59 |
+
return {
|
| 60 |
+
"content": result_str,
|
| 61 |
+
"display": {"type": "search", "query": query, "results": result_str}
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
elif tool_name == "read_url":
|
| 65 |
+
url = args.get("url", "")
|
| 66 |
+
content = execute_read_url(url)
|
| 67 |
+
return {
|
| 68 |
+
"content": content,
|
| 69 |
+
"display": {"type": "page", "url": url, "length": len(content)}
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
elif tool_name == "screenshot_url":
|
| 73 |
+
url = args.get("url", "")
|
| 74 |
+
base64_png = execute_screenshot_url(url)
|
| 75 |
+
if base64_png:
|
| 76 |
+
return {
|
| 77 |
+
"content": "Screenshot captured successfully. The image is attached.",
|
| 78 |
+
"image": base64_png,
|
| 79 |
+
"display": {"type": "screenshot", "url": url}
|
| 80 |
+
}
|
| 81 |
+
else:
|
| 82 |
+
return {
|
| 83 |
+
"content": f"Failed to take screenshot of {url}. The page may require JavaScript or be inaccessible.",
|
| 84 |
+
"display": {"type": "screenshot_error", "url": url}
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
return {"content": f"Unknown tool: {tool_name}", "display": {"type": "error"}}
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def stream_agent_execution(
|
| 91 |
+
client,
|
| 92 |
+
model: str,
|
| 93 |
+
messages: List[Dict],
|
| 94 |
+
serper_key: str,
|
| 95 |
+
extra_params: Optional[Dict] = None
|
| 96 |
+
):
|
| 97 |
+
"""
|
| 98 |
+
Run the agent tool-calling loop.
|
| 99 |
+
|
| 100 |
+
Yields dicts with SSE event types:
|
| 101 |
+
- thinking: { content }
|
| 102 |
+
- content: { content }
|
| 103 |
+
- tool_start: { tool, args }
|
| 104 |
+
- tool_result: { tool, result, image? }
|
| 105 |
+
- result_preview: { content }
|
| 106 |
+
- result: { content }
|
| 107 |
+
- generating: {}
|
| 108 |
+
- retry: { attempt, max_attempts, delay, message }
|
| 109 |
+
- error: { content }
|
| 110 |
+
- done: {}
|
| 111 |
+
"""
|
| 112 |
+
turns = 0
|
| 113 |
+
done = False
|
| 114 |
+
|
| 115 |
+
while not done and turns < MAX_TURNS:
|
| 116 |
+
turns += 1
|
| 117 |
+
|
| 118 |
+
# --- LLM call with retry ---
|
| 119 |
+
response = None
|
| 120 |
+
last_error = None
|
| 121 |
+
|
| 122 |
+
for attempt in range(MAX_RETRIES):
|
| 123 |
+
try:
|
| 124 |
+
call_params = {
|
| 125 |
+
"messages": messages,
|
| 126 |
+
"model": model,
|
| 127 |
+
"tools": TOOLS,
|
| 128 |
+
"tool_choice": "auto",
|
| 129 |
+
}
|
| 130 |
+
if extra_params:
|
| 131 |
+
call_params["extra_body"] = extra_params
|
| 132 |
+
response = client.chat.completions.create(**call_params)
|
| 133 |
+
break
|
| 134 |
+
except Exception as e:
|
| 135 |
+
last_error = e
|
| 136 |
+
error_info = parse_llm_error(e)
|
| 137 |
+
if attempt < MAX_RETRIES - 1 and error_info["retryable"]:
|
| 138 |
+
delay = RETRY_DELAYS[attempt]
|
| 139 |
+
yield {
|
| 140 |
+
"type": "retry",
|
| 141 |
+
"attempt": attempt + 1,
|
| 142 |
+
"max_attempts": MAX_RETRIES,
|
| 143 |
+
"delay": delay,
|
| 144 |
+
"message": error_info["message"],
|
| 145 |
+
}
|
| 146 |
+
time.sleep(delay)
|
| 147 |
+
else:
|
| 148 |
+
yield {"type": "error", "content": error_info["message"]}
|
| 149 |
+
return
|
| 150 |
+
|
| 151 |
+
if response is None:
|
| 152 |
+
yield {"type": "error", "content": f"LLM error after {MAX_RETRIES} attempts: {str(last_error)}"}
|
| 153 |
+
return
|
| 154 |
+
|
| 155 |
+
# --- Parse response ---
|
| 156 |
+
assistant_message = response.choices[0].message
|
| 157 |
+
content = assistant_message.content or ""
|
| 158 |
+
tool_calls = assistant_message.tool_calls or []
|
| 159 |
+
|
| 160 |
+
# Check for <result> tags
|
| 161 |
+
result_match = re.search(r'<result>(.*?)</result>', content, re.DOTALL | re.IGNORECASE)
|
| 162 |
+
result_content = None
|
| 163 |
+
thinking_content = content
|
| 164 |
+
|
| 165 |
+
if result_match:
|
| 166 |
+
result_content = result_match.group(1).strip()
|
| 167 |
+
thinking_content = re.sub(r'<result>.*?</result>', '', content, flags=re.DOTALL | re.IGNORECASE).strip()
|
| 168 |
+
|
| 169 |
+
# Send thinking/content
|
| 170 |
+
if thinking_content.strip():
|
| 171 |
+
if tool_calls:
|
| 172 |
+
yield {"type": "thinking", "content": thinking_content}
|
| 173 |
+
else:
|
| 174 |
+
yield {"type": "content", "content": thinking_content}
|
| 175 |
+
|
| 176 |
+
# Send result preview
|
| 177 |
+
if result_content:
|
| 178 |
+
yield {"type": "result_preview", "content": result_content}
|
| 179 |
+
|
| 180 |
+
# --- Handle tool calls ---
|
| 181 |
+
if tool_calls:
|
| 182 |
+
for tool_call in tool_calls:
|
| 183 |
+
func_name = tool_call.function.name
|
| 184 |
+
|
| 185 |
+
# Parse arguments
|
| 186 |
+
try:
|
| 187 |
+
args = json.loads(tool_call.function.arguments)
|
| 188 |
+
except json.JSONDecodeError as e:
|
| 189 |
+
output = f"Error parsing arguments: {e}"
|
| 190 |
+
messages.append({
|
| 191 |
+
"role": "assistant",
|
| 192 |
+
"content": content,
|
| 193 |
+
"tool_calls": [{"id": tool_call.id, "type": "function", "function": {"name": func_name, "arguments": tool_call.function.arguments}}]
|
| 194 |
+
})
|
| 195 |
+
messages.append({"role": "tool", "tool_call_id": tool_call.id, "content": output})
|
| 196 |
+
yield {"type": "error", "content": output}
|
| 197 |
+
continue
|
| 198 |
+
|
| 199 |
+
# Signal tool start (include IDs for history reconstruction)
|
| 200 |
+
yield {
|
| 201 |
+
"type": "tool_start",
|
| 202 |
+
"tool": func_name,
|
| 203 |
+
"args": args,
|
| 204 |
+
"tool_call_id": tool_call.id,
|
| 205 |
+
"arguments": tool_call.function.arguments,
|
| 206 |
+
"thinking": content,
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
# Execute tool
|
| 210 |
+
result = execute_tool(func_name, args, serper_key)
|
| 211 |
+
|
| 212 |
+
# Build tool response message for LLM
|
| 213 |
+
if result.get("image"):
|
| 214 |
+
# For screenshots, send image as vision content so LLM can see it
|
| 215 |
+
tool_response_content = [
|
| 216 |
+
{"type": "text", "text": result["content"]},
|
| 217 |
+
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{result['image']}"}}
|
| 218 |
+
]
|
| 219 |
+
else:
|
| 220 |
+
tool_response_content = result["content"]
|
| 221 |
+
|
| 222 |
+
tool_response_str = tool_response_content if isinstance(tool_response_content, str) else json.dumps(tool_response_content)
|
| 223 |
+
|
| 224 |
+
# Add to message history
|
| 225 |
+
messages.append({
|
| 226 |
+
"role": "assistant",
|
| 227 |
+
"content": content,
|
| 228 |
+
"tool_calls": [{"id": tool_call.id, "type": "function", "function": {"name": func_name, "arguments": tool_call.function.arguments}}]
|
| 229 |
+
})
|
| 230 |
+
messages.append({
|
| 231 |
+
"role": "tool",
|
| 232 |
+
"tool_call_id": tool_call.id,
|
| 233 |
+
"content": tool_response_str
|
| 234 |
+
})
|
| 235 |
+
|
| 236 |
+
# Signal tool result to frontend (include response for history)
|
| 237 |
+
tool_result_event = {
|
| 238 |
+
"type": "tool_result",
|
| 239 |
+
"tool": func_name,
|
| 240 |
+
"tool_call_id": tool_call.id,
|
| 241 |
+
"result": result.get("display", {}),
|
| 242 |
+
"response": tool_response_str,
|
| 243 |
+
}
|
| 244 |
+
if result.get("image"):
|
| 245 |
+
tool_result_event["image"] = result["image"]
|
| 246 |
+
yield tool_result_event
|
| 247 |
+
|
| 248 |
+
else:
|
| 249 |
+
# No tool calls — we're done
|
| 250 |
+
messages.append({"role": "assistant", "content": content})
|
| 251 |
+
done = True
|
| 252 |
+
|
| 253 |
+
# Send result if found
|
| 254 |
+
if result_content:
|
| 255 |
+
yield {"type": "result", "content": result_content}
|
| 256 |
+
|
| 257 |
+
# Signal between-turn processing
|
| 258 |
+
if not done:
|
| 259 |
+
yield {"type": "generating"}
|
| 260 |
+
|
| 261 |
+
yield {"type": "done"}
|
backend/agents.py
CHANGED
|
@@ -74,13 +74,33 @@ AGENT_REGISTRY = {
|
|
| 74 |
"agent": {
|
| 75 |
"label": "AGENT",
|
| 76 |
"system_prompt": (
|
| 77 |
-
"You are an autonomous agent
|
| 78 |
-
"
|
| 79 |
-
"
|
| 80 |
-
"-
|
| 81 |
-
"
|
| 82 |
-
"-
|
| 83 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
),
|
| 85 |
"tool": {
|
| 86 |
"type": "function",
|
|
@@ -105,7 +125,7 @@ AGENT_REGISTRY = {
|
|
| 105 |
},
|
| 106 |
"tool_arg": "task",
|
| 107 |
"has_counter": True,
|
| 108 |
-
"in_menu":
|
| 109 |
"in_launcher": True,
|
| 110 |
"placeholder": "Enter message...",
|
| 111 |
},
|
|
@@ -272,6 +292,65 @@ AGENT_REGISTRY = {
|
|
| 272 |
"in_launcher": True,
|
| 273 |
"placeholder": "Enter message...",
|
| 274 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
}
|
| 276 |
|
| 277 |
|
|
|
|
| 74 |
"agent": {
|
| 75 |
"label": "AGENT",
|
| 76 |
"system_prompt": (
|
| 77 |
+
"You are an autonomous agent with web access, specialized in research and multi-step tasks.\n\n"
|
| 78 |
+
"## Available Tools\n\n"
|
| 79 |
+
"You have three tools:\n"
|
| 80 |
+
"- **web_search(query)**: Search Google. Returns titles, URLs, and snippets. "
|
| 81 |
+
"Use this first to discover information and find relevant pages.\n"
|
| 82 |
+
"- **read_url(url)**: Fetch a web page and extract its content as clean markdown (includes images). "
|
| 83 |
+
"Use this when you need detailed content from a specific page.\n"
|
| 84 |
+
"- **screenshot_url(url)**: Take a screenshot of a web page. "
|
| 85 |
+
"Use this when you need to see the visual layout, images, charts, or design.\n\n"
|
| 86 |
+
"## Strategy\n\n"
|
| 87 |
+
"1. **Search first** — use web_search to find relevant pages\n"
|
| 88 |
+
"2. **Evaluate snippets** — often the search snippets contain enough info to answer\n"
|
| 89 |
+
"3. **Read selectively** — only use read_url on 1-3 most relevant pages when you need more detail\n"
|
| 90 |
+
"4. **Screenshot when visual** — use screenshot_url when images, charts, or layout matter\n"
|
| 91 |
+
"5. **Synthesize** — combine findings into a clear, concise answer\n\n"
|
| 92 |
+
"## Guidelines\n\n"
|
| 93 |
+
"- Be efficient with tool calls — don't read every search result\n"
|
| 94 |
+
"- Break complex tasks into steps and explain your reasoning\n"
|
| 95 |
+
"- Cite sources with URLs when presenting findings\n\n"
|
| 96 |
+
"## CRITICAL: You MUST provide a <result> tag\n\n"
|
| 97 |
+
"When you have completed the task, you MUST provide a brief summary using the <result> tag. "
|
| 98 |
+
"This is REQUIRED - without it, your work will not be visible in the command center.\n\n"
|
| 99 |
+
"Keep results SHORT - 1-3 sentences summarizing what you found or did.\n\n"
|
| 100 |
+
"Example:\n"
|
| 101 |
+
"<result>\n"
|
| 102 |
+
"Python 3.13 was released on Oct 7, 2024 with key features including ...\n"
|
| 103 |
+
"</result>\n"
|
| 104 |
),
|
| 105 |
"tool": {
|
| 106 |
"type": "function",
|
|
|
|
| 125 |
},
|
| 126 |
"tool_arg": "task",
|
| 127 |
"has_counter": True,
|
| 128 |
+
"in_menu": True,
|
| 129 |
"in_launcher": True,
|
| 130 |
"placeholder": "Enter message...",
|
| 131 |
},
|
|
|
|
| 292 |
"in_launcher": True,
|
| 293 |
"placeholder": "Enter message...",
|
| 294 |
},
|
| 295 |
+
|
| 296 |
+
"image": {
|
| 297 |
+
"label": "IMAGE",
|
| 298 |
+
"system_prompt": (
|
| 299 |
+
"You are a creative AI assistant with access to image generation and editing tools.\n\n"
|
| 300 |
+
"## Available Tools\n\n"
|
| 301 |
+
"- **generate_image(prompt)**: Generate a new image from a text description. "
|
| 302 |
+
"Returns an image reference (e.g., 'image_1') that you can see.\n"
|
| 303 |
+
"- **edit_image(prompt, source)**: Edit or transform an existing image. "
|
| 304 |
+
"The source can be a URL or an image reference from a previous tool call (e.g., 'image_1').\n"
|
| 305 |
+
"- **read_image_url(url)**: Download an image from a URL. "
|
| 306 |
+
"Returns an image reference that you can see and use with edit_image.\n\n"
|
| 307 |
+
"## Strategy\n\n"
|
| 308 |
+
"1. If the user provides an image URL, use read_image_url first to load it\n"
|
| 309 |
+
"2. Use generate_image for creating new images from text descriptions\n"
|
| 310 |
+
"3. Use edit_image to transform existing images (style transfer, edits, variations)\n"
|
| 311 |
+
"4. You can see all generated/loaded images — describe what you see and iterate if needed\n"
|
| 312 |
+
"5. Write detailed, descriptive prompts for best results\n\n"
|
| 313 |
+
"## Guidelines\n\n"
|
| 314 |
+
"- Be creative and descriptive in your image prompts\n"
|
| 315 |
+
"- When editing, reference the source image by its name (e.g., 'image_1')\n"
|
| 316 |
+
"- Describe what you see in generated images to confirm they match the request\n\n"
|
| 317 |
+
"## CRITICAL: You MUST provide a <result> tag\n\n"
|
| 318 |
+
"When you have completed the task, you MUST provide a brief summary using the <result> tag. "
|
| 319 |
+
"This is REQUIRED - without it, your work will not be visible in the command center.\n\n"
|
| 320 |
+
"Include image references in your result using self-closing tags like <image_1> (NOT </image_1>).\n\n"
|
| 321 |
+
"Example:\n"
|
| 322 |
+
"<result>\n"
|
| 323 |
+
"Here's the comic version of your image:\n\n"
|
| 324 |
+
"<image_2>\n"
|
| 325 |
+
"</result>\n"
|
| 326 |
+
),
|
| 327 |
+
"tool": {
|
| 328 |
+
"type": "function",
|
| 329 |
+
"function": {
|
| 330 |
+
"name": "launch_image_notebook",
|
| 331 |
+
"description": "Launch an image notebook for generating or editing images using AI models. Use this for creating images from text, applying style transfers, editing photos, or any visual content creation.",
|
| 332 |
+
"parameters": {
|
| 333 |
+
"type": "object",
|
| 334 |
+
"properties": {
|
| 335 |
+
"task": {
|
| 336 |
+
"type": "string",
|
| 337 |
+
"description": "The image task or description. Should contain all necessary context including any image URLs."
|
| 338 |
+
},
|
| 339 |
+
"task_id": {
|
| 340 |
+
"type": "string",
|
| 341 |
+
"description": "A 2-3 word summary of the task, separated by dashes."
|
| 342 |
+
}
|
| 343 |
+
},
|
| 344 |
+
"required": ["task", "task_id"]
|
| 345 |
+
}
|
| 346 |
+
}
|
| 347 |
+
},
|
| 348 |
+
"tool_arg": "task",
|
| 349 |
+
"has_counter": True,
|
| 350 |
+
"in_menu": True,
|
| 351 |
+
"in_launcher": True,
|
| 352 |
+
"placeholder": "Describe an image or paste a URL...",
|
| 353 |
+
},
|
| 354 |
}
|
| 355 |
|
| 356 |
|
backend/code.py
CHANGED
|
@@ -8,77 +8,11 @@ import re
|
|
| 8 |
from typing import List, Dict, Optional
|
| 9 |
from e2b_code_interpreter import Sandbox
|
| 10 |
|
| 11 |
-
|
| 12 |
|
|
|
|
| 13 |
|
| 14 |
-
TOOLS = [
|
| 15 |
-
{
|
| 16 |
-
"type": "function",
|
| 17 |
-
"function": {
|
| 18 |
-
"name": "execute_code",
|
| 19 |
-
"description": "Execute Python code in a stateful environment. Variables and imports persist between executions.",
|
| 20 |
-
"parameters": {
|
| 21 |
-
"type": "object",
|
| 22 |
-
"properties": {
|
| 23 |
-
"code": {
|
| 24 |
-
"type": "string",
|
| 25 |
-
"description": "The Python code to execute."
|
| 26 |
-
}
|
| 27 |
-
},
|
| 28 |
-
"required": ["code"]
|
| 29 |
-
}
|
| 30 |
-
}
|
| 31 |
-
},
|
| 32 |
-
{
|
| 33 |
-
"type": "function",
|
| 34 |
-
"function": {
|
| 35 |
-
"name": "upload_files",
|
| 36 |
-
"description": "Upload files from the local workspace to the code execution environment for analysis. Files will be available at /home/user/<filename>. Use this to load data files, scripts, or any files you need to analyze.",
|
| 37 |
-
"parameters": {
|
| 38 |
-
"type": "object",
|
| 39 |
-
"properties": {
|
| 40 |
-
"paths": {
|
| 41 |
-
"type": "array",
|
| 42 |
-
"items": {"type": "string"},
|
| 43 |
-
"description": "List of file paths relative to the workspace root (e.g., ['data/sales.csv', 'config.json'])"
|
| 44 |
-
}
|
| 45 |
-
},
|
| 46 |
-
"required": ["paths"]
|
| 47 |
-
}
|
| 48 |
-
}
|
| 49 |
-
},
|
| 50 |
-
{
|
| 51 |
-
"type": "function",
|
| 52 |
-
"function": {
|
| 53 |
-
"name": "download_files",
|
| 54 |
-
"description": "Download files from the code execution environment to the local workspace. Use this to save generated files, processed data, or any output files you want to keep.",
|
| 55 |
-
"parameters": {
|
| 56 |
-
"type": "object",
|
| 57 |
-
"properties": {
|
| 58 |
-
"files": {
|
| 59 |
-
"type": "array",
|
| 60 |
-
"items": {
|
| 61 |
-
"type": "object",
|
| 62 |
-
"properties": {
|
| 63 |
-
"sandbox_path": {
|
| 64 |
-
"type": "string",
|
| 65 |
-
"description": "Path in the sandbox (e.g., '/home/user/output.csv')"
|
| 66 |
-
},
|
| 67 |
-
"local_path": {
|
| 68 |
-
"type": "string",
|
| 69 |
-
"description": "Destination path relative to workspace (e.g., 'results/output.csv')"
|
| 70 |
-
}
|
| 71 |
-
},
|
| 72 |
-
"required": ["sandbox_path", "local_path"]
|
| 73 |
-
},
|
| 74 |
-
"description": "List of files to download with their sandbox and local paths"
|
| 75 |
-
}
|
| 76 |
-
},
|
| 77 |
-
"required": ["files"]
|
| 78 |
-
}
|
| 79 |
-
}
|
| 80 |
-
}
|
| 81 |
-
]
|
| 82 |
|
| 83 |
MAX_TURNS = 40
|
| 84 |
MAX_RETRIES = 3 # Maximum retries for LLM calls
|
|
|
|
| 8 |
from typing import List, Dict, Optional
|
| 9 |
from e2b_code_interpreter import Sandbox
|
| 10 |
|
| 11 |
+
from tools import execute_code, upload_files, download_files
|
| 12 |
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
|
| 15 |
+
TOOLS = [execute_code, upload_files, download_files]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
MAX_TURNS = 40
|
| 18 |
MAX_RETRIES = 3 # Maximum retries for LLM calls
|
backend/image.py
ADDED
|
@@ -0,0 +1,376 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Image notebook backend — multimodal agent with HuggingFace image generation tools.
|
| 3 |
+
|
| 4 |
+
Uses the same tool-calling loop pattern as agent.py:
|
| 5 |
+
LLM call → parse tool_calls → execute → update history → repeat
|
| 6 |
+
|
| 7 |
+
Key difference: maintains an image store (Dict[str, str]) mapping names like
|
| 8 |
+
"image_1" to base64 data, so the VLM can reference images across tool calls
|
| 9 |
+
without passing huge base64 strings in arguments.
|
| 10 |
+
"""
|
| 11 |
+
import base64
|
| 12 |
+
import json
|
| 13 |
+
import logging
|
| 14 |
+
import re
|
| 15 |
+
import time
|
| 16 |
+
from typing import List, Dict, Optional
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
from .tools import (
|
| 20 |
+
generate_image, edit_image, read_image_url,
|
| 21 |
+
execute_generate_image, execute_edit_image, execute_read_image_url,
|
| 22 |
+
)
|
| 23 |
+
except ImportError:
|
| 24 |
+
from tools import (
|
| 25 |
+
generate_image, edit_image, read_image_url,
|
| 26 |
+
execute_generate_image, execute_edit_image, execute_read_image_url,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
logger = logging.getLogger(__name__)
|
| 30 |
+
|
| 31 |
+
TOOLS = [generate_image, edit_image, read_image_url]
|
| 32 |
+
|
| 33 |
+
# Max dimension for images sent to the VLM context (keeps token count manageable)
|
| 34 |
+
VLM_IMAGE_MAX_DIM = 512
|
| 35 |
+
VLM_IMAGE_JPEG_QUALITY = 70
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def resize_image_for_vlm(base64_png: str) -> str:
|
| 39 |
+
"""Resize and compress an image for VLM context to avoid token overflow.
|
| 40 |
+
|
| 41 |
+
Takes a full-res base64 PNG and returns a smaller base64 JPEG thumbnail
|
| 42 |
+
that fits within VLM_IMAGE_MAX_DIM on its longest side.
|
| 43 |
+
"""
|
| 44 |
+
try:
|
| 45 |
+
from PIL import Image
|
| 46 |
+
import io as _io
|
| 47 |
+
|
| 48 |
+
img_bytes = base64.b64decode(base64_png)
|
| 49 |
+
img = Image.open(_io.BytesIO(img_bytes))
|
| 50 |
+
|
| 51 |
+
# Resize if larger than max dimension
|
| 52 |
+
if max(img.size) > VLM_IMAGE_MAX_DIM:
|
| 53 |
+
img.thumbnail((VLM_IMAGE_MAX_DIM, VLM_IMAGE_MAX_DIM), Image.LANCZOS)
|
| 54 |
+
|
| 55 |
+
# Convert to RGB (JPEG doesn't support alpha)
|
| 56 |
+
if img.mode in ("RGBA", "P"):
|
| 57 |
+
img = img.convert("RGB")
|
| 58 |
+
|
| 59 |
+
# Save as JPEG for much smaller base64
|
| 60 |
+
buffer = _io.BytesIO()
|
| 61 |
+
img.save(buffer, format="JPEG", quality=VLM_IMAGE_JPEG_QUALITY)
|
| 62 |
+
return base64.b64encode(buffer.getvalue()).decode("utf-8")
|
| 63 |
+
except Exception as e:
|
| 64 |
+
logger.error(f"Failed to resize image for VLM: {e}")
|
| 65 |
+
# Fall back to original — better to try than to lose the image entirely
|
| 66 |
+
return base64_png
|
| 67 |
+
|
| 68 |
+
MAX_TURNS = 20
|
| 69 |
+
MAX_RETRIES = 3
|
| 70 |
+
RETRY_DELAYS = [2, 5, 10]
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def parse_llm_error(error: Exception) -> dict:
|
| 74 |
+
"""Parse LLM error to extract useful message for frontend."""
|
| 75 |
+
error_str = str(error)
|
| 76 |
+
try:
|
| 77 |
+
json_match = re.search(r'\{.*\}', error_str)
|
| 78 |
+
if json_match:
|
| 79 |
+
error_data = json.loads(json_match.group())
|
| 80 |
+
return {
|
| 81 |
+
"message": error_data.get("message", error_str),
|
| 82 |
+
"retryable": error_data.get("type") == "too_many_requests_error" or "429" in error_str
|
| 83 |
+
}
|
| 84 |
+
except:
|
| 85 |
+
pass
|
| 86 |
+
|
| 87 |
+
retryable = any(x in error_str.lower() for x in ["429", "rate limit", "too many requests", "overloaded"])
|
| 88 |
+
return {"message": error_str, "retryable": retryable}
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def execute_tool(tool_name: str, args: dict, hf_token: str, image_store: dict, image_counter: int, default_gen_model: str = None, default_edit_model: str = None) -> dict:
|
| 92 |
+
"""
|
| 93 |
+
Execute a tool by name and return result dict.
|
| 94 |
+
|
| 95 |
+
Returns:
|
| 96 |
+
dict with keys:
|
| 97 |
+
- "content": str result for the LLM
|
| 98 |
+
- "image": optional base64 PNG
|
| 99 |
+
- "image_name": optional image reference name (e.g., "image_1")
|
| 100 |
+
- "display": dict with display-friendly data for frontend
|
| 101 |
+
- "image_counter": updated counter
|
| 102 |
+
"""
|
| 103 |
+
if tool_name == "generate_image":
|
| 104 |
+
prompt = args.get("prompt", "")
|
| 105 |
+
model = args.get("model") or default_gen_model or "black-forest-labs/FLUX.1-schnell"
|
| 106 |
+
base64_png = execute_generate_image(prompt, hf_token, model)
|
| 107 |
+
|
| 108 |
+
if base64_png:
|
| 109 |
+
image_counter += 1
|
| 110 |
+
name = f"image_{image_counter}"
|
| 111 |
+
image_store[name] = base64_png
|
| 112 |
+
return {
|
| 113 |
+
"content": f"Image generated successfully as '{name}'. The image is attached.",
|
| 114 |
+
"image": base64_png,
|
| 115 |
+
"image_name": name,
|
| 116 |
+
"display": {"type": "generate", "prompt": prompt, "model": model, "image_name": name},
|
| 117 |
+
"image_counter": image_counter,
|
| 118 |
+
}
|
| 119 |
+
else:
|
| 120 |
+
return {
|
| 121 |
+
"content": f"Failed to generate image. The model may be unavailable or the prompt may be invalid.",
|
| 122 |
+
"display": {"type": "generate_error", "prompt": prompt},
|
| 123 |
+
"image_counter": image_counter,
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
elif tool_name == "edit_image":
|
| 127 |
+
prompt = args.get("prompt", "")
|
| 128 |
+
source = args.get("source", "")
|
| 129 |
+
model = args.get("model") or default_edit_model or "black-forest-labs/FLUX.1-Kontext-dev"
|
| 130 |
+
|
| 131 |
+
# Resolve source: image store reference or URL
|
| 132 |
+
source_bytes = None
|
| 133 |
+
if source in image_store:
|
| 134 |
+
source_bytes = base64.b64decode(image_store[source])
|
| 135 |
+
elif source.startswith(("http://", "https://")):
|
| 136 |
+
source_base64 = execute_read_image_url(source)
|
| 137 |
+
if source_base64:
|
| 138 |
+
source_bytes = base64.b64decode(source_base64)
|
| 139 |
+
|
| 140 |
+
if source_bytes is None:
|
| 141 |
+
return {
|
| 142 |
+
"content": f"Could not resolve image source '{source}'. Use a URL or a reference from a previous tool call (e.g., 'image_1').",
|
| 143 |
+
"display": {"type": "edit_error", "source": source},
|
| 144 |
+
"image_counter": image_counter,
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
base64_png = execute_edit_image(prompt, source_bytes, hf_token, model)
|
| 148 |
+
|
| 149 |
+
if base64_png:
|
| 150 |
+
image_counter += 1
|
| 151 |
+
name = f"image_{image_counter}"
|
| 152 |
+
image_store[name] = base64_png
|
| 153 |
+
return {
|
| 154 |
+
"content": f"Image edited successfully as '{name}'. The image is attached.",
|
| 155 |
+
"image": base64_png,
|
| 156 |
+
"image_name": name,
|
| 157 |
+
"display": {"type": "edit", "prompt": prompt, "source": source, "model": model, "image_name": name},
|
| 158 |
+
"image_counter": image_counter,
|
| 159 |
+
}
|
| 160 |
+
else:
|
| 161 |
+
return {
|
| 162 |
+
"content": f"Failed to edit image. The model may be unavailable or the request may be invalid.",
|
| 163 |
+
"display": {"type": "edit_error", "source": source},
|
| 164 |
+
"image_counter": image_counter,
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
elif tool_name == "read_image_url":
|
| 168 |
+
url = args.get("url", "")
|
| 169 |
+
base64_png = execute_read_image_url(url)
|
| 170 |
+
|
| 171 |
+
if base64_png:
|
| 172 |
+
image_counter += 1
|
| 173 |
+
name = f"image_{image_counter}"
|
| 174 |
+
image_store[name] = base64_png
|
| 175 |
+
return {
|
| 176 |
+
"content": f"Image downloaded successfully as '{name}'. The image is attached.",
|
| 177 |
+
"image": base64_png,
|
| 178 |
+
"image_name": name,
|
| 179 |
+
"display": {"type": "read_image", "url": url, "image_name": name},
|
| 180 |
+
"image_counter": image_counter,
|
| 181 |
+
}
|
| 182 |
+
else:
|
| 183 |
+
return {
|
| 184 |
+
"content": f"Failed to download image from {url}. The URL may be invalid or inaccessible.",
|
| 185 |
+
"display": {"type": "read_image_error", "url": url},
|
| 186 |
+
"image_counter": image_counter,
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
return {
|
| 190 |
+
"content": f"Unknown tool: {tool_name}",
|
| 191 |
+
"display": {"type": "error"},
|
| 192 |
+
"image_counter": image_counter,
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def stream_image_execution(
|
| 197 |
+
client,
|
| 198 |
+
model: str,
|
| 199 |
+
messages: List[Dict],
|
| 200 |
+
hf_token: str,
|
| 201 |
+
image_gen_model: Optional[str] = None,
|
| 202 |
+
image_edit_model: Optional[str] = None,
|
| 203 |
+
extra_params: Optional[Dict] = None
|
| 204 |
+
):
|
| 205 |
+
"""
|
| 206 |
+
Run the image agent tool-calling loop.
|
| 207 |
+
|
| 208 |
+
Yields dicts with SSE event types:
|
| 209 |
+
- thinking: { content }
|
| 210 |
+
- content: { content }
|
| 211 |
+
- tool_start: { tool, args }
|
| 212 |
+
- tool_result: { tool, result, image? }
|
| 213 |
+
- result_preview: { content }
|
| 214 |
+
- result: { content, images? }
|
| 215 |
+
- generating: {}
|
| 216 |
+
- retry: { attempt, max_attempts, delay, message }
|
| 217 |
+
- error: { content }
|
| 218 |
+
- done: {}
|
| 219 |
+
"""
|
| 220 |
+
turns = 0
|
| 221 |
+
done = False
|
| 222 |
+
image_store = {}
|
| 223 |
+
image_counter = 0
|
| 224 |
+
|
| 225 |
+
while not done and turns < MAX_TURNS:
|
| 226 |
+
turns += 1
|
| 227 |
+
|
| 228 |
+
# --- LLM call with retry ---
|
| 229 |
+
response = None
|
| 230 |
+
last_error = None
|
| 231 |
+
|
| 232 |
+
for attempt in range(MAX_RETRIES):
|
| 233 |
+
try:
|
| 234 |
+
call_params = {
|
| 235 |
+
"messages": messages,
|
| 236 |
+
"model": model,
|
| 237 |
+
"tools": TOOLS,
|
| 238 |
+
"tool_choice": "auto",
|
| 239 |
+
}
|
| 240 |
+
if extra_params:
|
| 241 |
+
call_params["extra_body"] = extra_params
|
| 242 |
+
response = client.chat.completions.create(**call_params)
|
| 243 |
+
break
|
| 244 |
+
except Exception as e:
|
| 245 |
+
last_error = e
|
| 246 |
+
error_info = parse_llm_error(e)
|
| 247 |
+
if attempt < MAX_RETRIES - 1 and error_info["retryable"]:
|
| 248 |
+
delay = RETRY_DELAYS[attempt]
|
| 249 |
+
yield {
|
| 250 |
+
"type": "retry",
|
| 251 |
+
"attempt": attempt + 1,
|
| 252 |
+
"max_attempts": MAX_RETRIES,
|
| 253 |
+
"delay": delay,
|
| 254 |
+
"message": error_info["message"],
|
| 255 |
+
}
|
| 256 |
+
time.sleep(delay)
|
| 257 |
+
else:
|
| 258 |
+
yield {"type": "error", "content": error_info["message"]}
|
| 259 |
+
return
|
| 260 |
+
|
| 261 |
+
if response is None:
|
| 262 |
+
yield {"type": "error", "content": f"LLM error after {MAX_RETRIES} attempts: {str(last_error)}"}
|
| 263 |
+
return
|
| 264 |
+
|
| 265 |
+
# --- Parse response ---
|
| 266 |
+
assistant_message = response.choices[0].message
|
| 267 |
+
content = assistant_message.content or ""
|
| 268 |
+
tool_calls = assistant_message.tool_calls or []
|
| 269 |
+
|
| 270 |
+
# Check for <result> tags
|
| 271 |
+
result_match = re.search(r'<result>(.*?)</result>', content, re.DOTALL | re.IGNORECASE)
|
| 272 |
+
result_content = None
|
| 273 |
+
thinking_content = content
|
| 274 |
+
|
| 275 |
+
if result_match:
|
| 276 |
+
result_content = result_match.group(1).strip()
|
| 277 |
+
thinking_content = re.sub(r'<result>.*?</result>', '', content, flags=re.DOTALL | re.IGNORECASE).strip()
|
| 278 |
+
|
| 279 |
+
# Send thinking/content
|
| 280 |
+
if thinking_content.strip():
|
| 281 |
+
if tool_calls:
|
| 282 |
+
yield {"type": "thinking", "content": thinking_content}
|
| 283 |
+
else:
|
| 284 |
+
yield {"type": "content", "content": thinking_content}
|
| 285 |
+
|
| 286 |
+
# Send result preview
|
| 287 |
+
if result_content:
|
| 288 |
+
# Include image store so frontend can resolve <image_N> references
|
| 289 |
+
yield {"type": "result_preview", "content": result_content, "images": image_store}
|
| 290 |
+
|
| 291 |
+
# --- Handle tool calls ---
|
| 292 |
+
if tool_calls:
|
| 293 |
+
for tool_call in tool_calls:
|
| 294 |
+
func_name = tool_call.function.name
|
| 295 |
+
|
| 296 |
+
# Parse arguments
|
| 297 |
+
try:
|
| 298 |
+
args = json.loads(tool_call.function.arguments)
|
| 299 |
+
except json.JSONDecodeError as e:
|
| 300 |
+
output = f"Error parsing arguments: {e}"
|
| 301 |
+
messages.append({
|
| 302 |
+
"role": "assistant",
|
| 303 |
+
"content": content,
|
| 304 |
+
"tool_calls": [{"id": tool_call.id, "type": "function", "function": {"name": func_name, "arguments": tool_call.function.arguments}}]
|
| 305 |
+
})
|
| 306 |
+
messages.append({"role": "tool", "tool_call_id": tool_call.id, "content": output})
|
| 307 |
+
yield {"type": "error", "content": output}
|
| 308 |
+
continue
|
| 309 |
+
|
| 310 |
+
# Signal tool start
|
| 311 |
+
yield {
|
| 312 |
+
"type": "tool_start",
|
| 313 |
+
"tool": func_name,
|
| 314 |
+
"args": args,
|
| 315 |
+
"tool_call_id": tool_call.id,
|
| 316 |
+
"arguments": tool_call.function.arguments,
|
| 317 |
+
"thinking": content,
|
| 318 |
+
}
|
| 319 |
+
|
| 320 |
+
# Execute tool
|
| 321 |
+
result = execute_tool(func_name, args, hf_token, image_store, image_counter, default_gen_model=image_gen_model, default_edit_model=image_edit_model)
|
| 322 |
+
image_counter = result.get("image_counter", image_counter)
|
| 323 |
+
|
| 324 |
+
# Build tool response message for LLM
|
| 325 |
+
if result.get("image"):
|
| 326 |
+
# Resize image for VLM context to avoid token overflow
|
| 327 |
+
vlm_image = resize_image_for_vlm(result["image"])
|
| 328 |
+
tool_response_content = [
|
| 329 |
+
{"type": "text", "text": result["content"]},
|
| 330 |
+
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{vlm_image}"}}
|
| 331 |
+
]
|
| 332 |
+
else:
|
| 333 |
+
tool_response_content = result["content"]
|
| 334 |
+
|
| 335 |
+
tool_response_str = tool_response_content if isinstance(tool_response_content, str) else json.dumps(tool_response_content)
|
| 336 |
+
|
| 337 |
+
# Add to message history
|
| 338 |
+
messages.append({
|
| 339 |
+
"role": "assistant",
|
| 340 |
+
"content": content,
|
| 341 |
+
"tool_calls": [{"id": tool_call.id, "type": "function", "function": {"name": func_name, "arguments": tool_call.function.arguments}}]
|
| 342 |
+
})
|
| 343 |
+
messages.append({
|
| 344 |
+
"role": "tool",
|
| 345 |
+
"tool_call_id": tool_call.id,
|
| 346 |
+
"content": tool_response_str
|
| 347 |
+
})
|
| 348 |
+
|
| 349 |
+
# Signal tool result to frontend
|
| 350 |
+
tool_result_event = {
|
| 351 |
+
"type": "tool_result",
|
| 352 |
+
"tool": func_name,
|
| 353 |
+
"tool_call_id": tool_call.id,
|
| 354 |
+
"result": result.get("display", {}),
|
| 355 |
+
"response": tool_response_str,
|
| 356 |
+
}
|
| 357 |
+
if result.get("image"):
|
| 358 |
+
tool_result_event["image"] = result["image"]
|
| 359 |
+
if result.get("image_name"):
|
| 360 |
+
tool_result_event["image_name"] = result["image_name"]
|
| 361 |
+
yield tool_result_event
|
| 362 |
+
|
| 363 |
+
else:
|
| 364 |
+
# No tool calls — we're done
|
| 365 |
+
messages.append({"role": "assistant", "content": content})
|
| 366 |
+
done = True
|
| 367 |
+
|
| 368 |
+
# Send result if found
|
| 369 |
+
if result_content:
|
| 370 |
+
yield {"type": "result", "content": result_content, "images": image_store}
|
| 371 |
+
|
| 372 |
+
# Signal between-turn processing
|
| 373 |
+
if not done:
|
| 374 |
+
yield {"type": "generating"}
|
| 375 |
+
|
| 376 |
+
yield {"type": "done"}
|
backend/main.py
CHANGED
|
@@ -99,6 +99,28 @@ except ImportError:
|
|
| 99 |
COMMAND_AVAILABLE = False
|
| 100 |
logger.warning("Command center tool handling not available.")
|
| 101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
# Session management for sandboxes
|
| 103 |
SANDBOXES: Dict[str, any] = {}
|
| 104 |
SANDBOX_TIMEOUT = 300
|
|
@@ -196,6 +218,9 @@ class ChatRequest(BaseModel):
|
|
| 196 |
extra_params: Optional[Dict] = None # Extra parameters for API calls (e.g., enable_thinking)
|
| 197 |
e2b_key: Optional[str] = None # E2B API key for code execution
|
| 198 |
serper_key: Optional[str] = None # Serper API key for research
|
|
|
|
|
|
|
|
|
|
| 199 |
research_sub_agent_model: Optional[str] = None # Model for research sub-tasks
|
| 200 |
research_sub_agent_endpoint: Optional[str] = None # Endpoint for research sub-agent (may differ from main)
|
| 201 |
research_sub_agent_token: Optional[str] = None # Token for research sub-agent endpoint
|
|
@@ -489,6 +514,113 @@ async def stream_command_center_notebook(
|
|
| 489 |
yield f"data: {json.dumps({'type': 'error', 'content': error_message})}\n\n"
|
| 490 |
|
| 491 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 492 |
async def stream_chat_response(
|
| 493 |
messages: List[dict],
|
| 494 |
endpoint: str,
|
|
@@ -682,7 +814,11 @@ async def chat_stream(request: ChatRequest):
|
|
| 682 |
# Apply environment variable fallbacks for API keys
|
| 683 |
e2b_key = get_env_fallback(request.e2b_key, "E2B_API_KEY")
|
| 684 |
serper_key = get_env_fallback(request.serper_key, "SERPER_API_KEY")
|
|
|
|
| 685 |
token = get_env_fallback(request.token, "LLM_API_KEY")
|
|
|
|
|
|
|
|
|
|
| 686 |
|
| 687 |
# Route to code execution handler for code notebooks
|
| 688 |
if request.notebook_type == "code":
|
|
@@ -739,6 +875,48 @@ async def chat_stream(request: ChatRequest):
|
|
| 739 |
}
|
| 740 |
)
|
| 741 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 742 |
# Route to command center handler for command center (with tool-based launching)
|
| 743 |
if request.notebook_type == "command":
|
| 744 |
return StreamingResponse(
|
|
@@ -932,6 +1110,7 @@ async def get_settings():
|
|
| 932 |
if os.path.exists(SETTINGS_FILE):
|
| 933 |
with open(SETTINGS_FILE, "r") as f:
|
| 934 |
settings = json.load(f)
|
|
|
|
| 935 |
return settings
|
| 936 |
else:
|
| 937 |
# Return default settings if file doesn't exist
|
|
|
|
| 99 |
COMMAND_AVAILABLE = False
|
| 100 |
logger.warning("Command center tool handling not available.")
|
| 101 |
|
| 102 |
+
# For agent with web tools
|
| 103 |
+
try:
|
| 104 |
+
try:
|
| 105 |
+
from .agent import stream_agent_execution
|
| 106 |
+
except ImportError:
|
| 107 |
+
from agent import stream_agent_execution
|
| 108 |
+
AGENT_AVAILABLE = True
|
| 109 |
+
except ImportError:
|
| 110 |
+
AGENT_AVAILABLE = False
|
| 111 |
+
logger.warning("Agent web tools not available. Install with: pip install readability-lxml markdownify")
|
| 112 |
+
|
| 113 |
+
# For image agent with HuggingFace tools
|
| 114 |
+
try:
|
| 115 |
+
try:
|
| 116 |
+
from .image import stream_image_execution
|
| 117 |
+
except ImportError:
|
| 118 |
+
from image import stream_image_execution
|
| 119 |
+
IMAGE_AVAILABLE = True
|
| 120 |
+
except ImportError:
|
| 121 |
+
IMAGE_AVAILABLE = False
|
| 122 |
+
logger.warning("Image agent not available. Install with: pip install huggingface_hub Pillow")
|
| 123 |
+
|
| 124 |
# Session management for sandboxes
|
| 125 |
SANDBOXES: Dict[str, any] = {}
|
| 126 |
SANDBOX_TIMEOUT = 300
|
|
|
|
| 218 |
extra_params: Optional[Dict] = None # Extra parameters for API calls (e.g., enable_thinking)
|
| 219 |
e2b_key: Optional[str] = None # E2B API key for code execution
|
| 220 |
serper_key: Optional[str] = None # Serper API key for research
|
| 221 |
+
hf_token: Optional[str] = None # HuggingFace token for image generation
|
| 222 |
+
image_gen_model: Optional[str] = None # HuggingFace model for text-to-image
|
| 223 |
+
image_edit_model: Optional[str] = None # HuggingFace model for image-to-image
|
| 224 |
research_sub_agent_model: Optional[str] = None # Model for research sub-tasks
|
| 225 |
research_sub_agent_endpoint: Optional[str] = None # Endpoint for research sub-agent (may differ from main)
|
| 226 |
research_sub_agent_token: Optional[str] = None # Token for research sub-agent endpoint
|
|
|
|
| 514 |
yield f"data: {json.dumps({'type': 'error', 'content': error_message})}\n\n"
|
| 515 |
|
| 516 |
|
| 517 |
+
async def stream_agent_notebook(
|
| 518 |
+
messages: List[dict],
|
| 519 |
+
endpoint: str,
|
| 520 |
+
token: Optional[str],
|
| 521 |
+
model: str,
|
| 522 |
+
serper_key: str,
|
| 523 |
+
tab_id: str = "default",
|
| 524 |
+
extra_params: Optional[Dict] = None
|
| 525 |
+
):
|
| 526 |
+
"""Handle agent notebook with web tools (search, read, screenshot)"""
|
| 527 |
+
|
| 528 |
+
if not AGENT_AVAILABLE:
|
| 529 |
+
async for chunk in stream_chat_response(messages, endpoint, token, model, "agent", tab_id, extra_params):
|
| 530 |
+
yield chunk
|
| 531 |
+
return
|
| 532 |
+
|
| 533 |
+
try:
|
| 534 |
+
client = OpenAI(base_url=endpoint, api_key=token)
|
| 535 |
+
|
| 536 |
+
system_prompt = get_system_prompt("agent")
|
| 537 |
+
full_messages = [{"role": "system", "content": system_prompt}] + messages
|
| 538 |
+
|
| 539 |
+
record_api_call(tab_id, full_messages)
|
| 540 |
+
|
| 541 |
+
loop = asyncio.get_event_loop()
|
| 542 |
+
queue = asyncio.Queue()
|
| 543 |
+
|
| 544 |
+
def run_sync_generator():
|
| 545 |
+
try:
|
| 546 |
+
for update in stream_agent_execution(client, model, full_messages, serper_key, extra_params=extra_params):
|
| 547 |
+
loop.call_soon_threadsafe(queue.put_nowait, update)
|
| 548 |
+
finally:
|
| 549 |
+
loop.call_soon_threadsafe(queue.put_nowait, None)
|
| 550 |
+
|
| 551 |
+
future = loop.run_in_executor(_executor, run_sync_generator)
|
| 552 |
+
|
| 553 |
+
while True:
|
| 554 |
+
update = await queue.get()
|
| 555 |
+
if update is None:
|
| 556 |
+
break
|
| 557 |
+
yield f"data: {json.dumps(update)}\n\n"
|
| 558 |
+
|
| 559 |
+
await asyncio.wrap_future(future)
|
| 560 |
+
|
| 561 |
+
except Exception as e:
|
| 562 |
+
import traceback
|
| 563 |
+
error_message = f"Agent error: {str(e)}\n{traceback.format_exc()}"
|
| 564 |
+
logger.error(error_message)
|
| 565 |
+
yield f"data: {json.dumps({'type': 'error', 'content': error_message})}\n\n"
|
| 566 |
+
|
| 567 |
+
|
| 568 |
+
async def stream_image_notebook(
|
| 569 |
+
messages: List[dict],
|
| 570 |
+
endpoint: str,
|
| 571 |
+
token: Optional[str],
|
| 572 |
+
model: str,
|
| 573 |
+
hf_token: str,
|
| 574 |
+
image_gen_model: Optional[str] = None,
|
| 575 |
+
image_edit_model: Optional[str] = None,
|
| 576 |
+
tab_id: str = "default",
|
| 577 |
+
extra_params: Optional[Dict] = None
|
| 578 |
+
):
|
| 579 |
+
"""Handle image notebook with HuggingFace image generation tools"""
|
| 580 |
+
|
| 581 |
+
if not IMAGE_AVAILABLE:
|
| 582 |
+
yield f"data: {json.dumps({'type': 'error', 'content': 'Image agent not available. Install with: pip install huggingface_hub Pillow'})}\n\n"
|
| 583 |
+
return
|
| 584 |
+
|
| 585 |
+
if not hf_token:
|
| 586 |
+
yield f"data: {json.dumps({'type': 'error', 'content': 'HuggingFace token required for image generation. Please configure in settings or set HF_TOKEN environment variable.'})}\n\n"
|
| 587 |
+
return
|
| 588 |
+
|
| 589 |
+
try:
|
| 590 |
+
client = OpenAI(base_url=endpoint, api_key=token)
|
| 591 |
+
|
| 592 |
+
system_prompt = get_system_prompt("image")
|
| 593 |
+
full_messages = [{"role": "system", "content": system_prompt}] + messages
|
| 594 |
+
|
| 595 |
+
record_api_call(tab_id, full_messages)
|
| 596 |
+
|
| 597 |
+
loop = asyncio.get_event_loop()
|
| 598 |
+
queue = asyncio.Queue()
|
| 599 |
+
|
| 600 |
+
def run_sync_generator():
|
| 601 |
+
try:
|
| 602 |
+
for update in stream_image_execution(client, model, full_messages, hf_token, image_gen_model=image_gen_model, image_edit_model=image_edit_model, extra_params=extra_params):
|
| 603 |
+
loop.call_soon_threadsafe(queue.put_nowait, update)
|
| 604 |
+
finally:
|
| 605 |
+
loop.call_soon_threadsafe(queue.put_nowait, None)
|
| 606 |
+
|
| 607 |
+
future = loop.run_in_executor(_executor, run_sync_generator)
|
| 608 |
+
|
| 609 |
+
while True:
|
| 610 |
+
update = await queue.get()
|
| 611 |
+
if update is None:
|
| 612 |
+
break
|
| 613 |
+
yield f"data: {json.dumps(update)}\n\n"
|
| 614 |
+
|
| 615 |
+
await asyncio.wrap_future(future)
|
| 616 |
+
|
| 617 |
+
except Exception as e:
|
| 618 |
+
import traceback
|
| 619 |
+
error_message = f"Image agent error: {str(e)}\n{traceback.format_exc()}"
|
| 620 |
+
logger.error(error_message)
|
| 621 |
+
yield f"data: {json.dumps({'type': 'error', 'content': error_message})}\n\n"
|
| 622 |
+
|
| 623 |
+
|
| 624 |
async def stream_chat_response(
|
| 625 |
messages: List[dict],
|
| 626 |
endpoint: str,
|
|
|
|
| 814 |
# Apply environment variable fallbacks for API keys
|
| 815 |
e2b_key = get_env_fallback(request.e2b_key, "E2B_API_KEY")
|
| 816 |
serper_key = get_env_fallback(request.serper_key, "SERPER_API_KEY")
|
| 817 |
+
hf_token = get_env_fallback(request.hf_token, "HF_TOKEN")
|
| 818 |
token = get_env_fallback(request.token, "LLM_API_KEY")
|
| 819 |
+
# For image generation: fall back to the LLM provider token (often the same HF token)
|
| 820 |
+
if not hf_token:
|
| 821 |
+
hf_token = token
|
| 822 |
|
| 823 |
# Route to code execution handler for code notebooks
|
| 824 |
if request.notebook_type == "code":
|
|
|
|
| 875 |
}
|
| 876 |
)
|
| 877 |
|
| 878 |
+
# Route to image handler with HuggingFace tools
|
| 879 |
+
if request.notebook_type == "image":
|
| 880 |
+
return StreamingResponse(
|
| 881 |
+
stream_image_notebook(
|
| 882 |
+
messages,
|
| 883 |
+
request.endpoint,
|
| 884 |
+
token,
|
| 885 |
+
request.model or "gpt-4",
|
| 886 |
+
hf_token or "",
|
| 887 |
+
request.image_gen_model,
|
| 888 |
+
request.image_edit_model,
|
| 889 |
+
tab_id,
|
| 890 |
+
request.extra_params
|
| 891 |
+
),
|
| 892 |
+
media_type="text/event-stream",
|
| 893 |
+
headers={
|
| 894 |
+
"Cache-Control": "no-cache",
|
| 895 |
+
"Connection": "keep-alive",
|
| 896 |
+
"X-Accel-Buffering": "no",
|
| 897 |
+
}
|
| 898 |
+
)
|
| 899 |
+
|
| 900 |
+
# Route to agent handler with web tools
|
| 901 |
+
if request.notebook_type == "agent":
|
| 902 |
+
return StreamingResponse(
|
| 903 |
+
stream_agent_notebook(
|
| 904 |
+
messages,
|
| 905 |
+
request.endpoint,
|
| 906 |
+
token,
|
| 907 |
+
request.model or "gpt-4",
|
| 908 |
+
serper_key or "",
|
| 909 |
+
tab_id,
|
| 910 |
+
request.extra_params
|
| 911 |
+
),
|
| 912 |
+
media_type="text/event-stream",
|
| 913 |
+
headers={
|
| 914 |
+
"Cache-Control": "no-cache",
|
| 915 |
+
"Connection": "keep-alive",
|
| 916 |
+
"X-Accel-Buffering": "no",
|
| 917 |
+
}
|
| 918 |
+
)
|
| 919 |
+
|
| 920 |
# Route to command center handler for command center (with tool-based launching)
|
| 921 |
if request.notebook_type == "command":
|
| 922 |
return StreamingResponse(
|
|
|
|
| 1110 |
if os.path.exists(SETTINGS_FILE):
|
| 1111 |
with open(SETTINGS_FILE, "r") as f:
|
| 1112 |
settings = json.load(f)
|
| 1113 |
+
settings["_settingsPath"] = SETTINGS_FILE
|
| 1114 |
return settings
|
| 1115 |
else:
|
| 1116 |
# Return default settings if file doesn't exist
|
backend/tools.py
ADDED
|
@@ -0,0 +1,405 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Centralized Tool Definitions & Execution Functions.
|
| 3 |
+
|
| 4 |
+
All OpenAI function-calling tool definitions live here.
|
| 5 |
+
Agent handlers compose tools by importing what they need:
|
| 6 |
+
|
| 7 |
+
from tools import execute_code, upload_files, download_files
|
| 8 |
+
TOOLS = [execute_code, upload_files, download_files]
|
| 9 |
+
|
| 10 |
+
Execution functions for tools that run server-side (web tools)
|
| 11 |
+
are also defined here, prefixed with `execute_`.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import base64
|
| 15 |
+
import io
|
| 16 |
+
import json
|
| 17 |
+
import logging
|
| 18 |
+
from typing import List, Dict, Optional
|
| 19 |
+
from urllib.parse import urljoin, urlparse
|
| 20 |
+
|
| 21 |
+
import httpx
|
| 22 |
+
import requests
|
| 23 |
+
|
| 24 |
+
logger = logging.getLogger(__name__)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# ============================================================
|
| 28 |
+
# Code execution tools (used by code notebook)
|
| 29 |
+
# ============================================================
|
| 30 |
+
|
| 31 |
+
execute_code = {
|
| 32 |
+
"type": "function",
|
| 33 |
+
"function": {
|
| 34 |
+
"name": "execute_code",
|
| 35 |
+
"description": "Execute Python code in a stateful environment. Variables and imports persist between executions.",
|
| 36 |
+
"parameters": {
|
| 37 |
+
"type": "object",
|
| 38 |
+
"properties": {
|
| 39 |
+
"code": {
|
| 40 |
+
"type": "string",
|
| 41 |
+
"description": "The Python code to execute."
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"required": ["code"]
|
| 45 |
+
}
|
| 46 |
+
}
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
upload_files = {
|
| 50 |
+
"type": "function",
|
| 51 |
+
"function": {
|
| 52 |
+
"name": "upload_files",
|
| 53 |
+
"description": "Upload files from the local workspace to the code execution environment for analysis. Files will be available at /home/user/<filename>. Use this to load data files, scripts, or any files you need to analyze.",
|
| 54 |
+
"parameters": {
|
| 55 |
+
"type": "object",
|
| 56 |
+
"properties": {
|
| 57 |
+
"paths": {
|
| 58 |
+
"type": "array",
|
| 59 |
+
"items": {"type": "string"},
|
| 60 |
+
"description": "List of file paths relative to the workspace root (e.g., ['data/sales.csv', 'config.json'])"
|
| 61 |
+
}
|
| 62 |
+
},
|
| 63 |
+
"required": ["paths"]
|
| 64 |
+
}
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
download_files = {
|
| 69 |
+
"type": "function",
|
| 70 |
+
"function": {
|
| 71 |
+
"name": "download_files",
|
| 72 |
+
"description": "Download files from the code execution environment to the local workspace. Use this to save generated files, processed data, or any output files you want to keep.",
|
| 73 |
+
"parameters": {
|
| 74 |
+
"type": "object",
|
| 75 |
+
"properties": {
|
| 76 |
+
"files": {
|
| 77 |
+
"type": "array",
|
| 78 |
+
"items": {
|
| 79 |
+
"type": "object",
|
| 80 |
+
"properties": {
|
| 81 |
+
"sandbox_path": {
|
| 82 |
+
"type": "string",
|
| 83 |
+
"description": "Path in the sandbox (e.g., '/home/user/output.csv')"
|
| 84 |
+
},
|
| 85 |
+
"local_path": {
|
| 86 |
+
"type": "string",
|
| 87 |
+
"description": "Destination path relative to workspace (e.g., 'results/output.csv')"
|
| 88 |
+
}
|
| 89 |
+
},
|
| 90 |
+
"required": ["sandbox_path", "local_path"]
|
| 91 |
+
},
|
| 92 |
+
"description": "List of files to download with their sandbox and local paths"
|
| 93 |
+
}
|
| 94 |
+
},
|
| 95 |
+
"required": ["files"]
|
| 96 |
+
}
|
| 97 |
+
}
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
# ============================================================
|
| 102 |
+
# Web tools (used by agent notebook)
|
| 103 |
+
# ============================================================
|
| 104 |
+
|
| 105 |
+
web_search = {
|
| 106 |
+
"type": "function",
|
| 107 |
+
"function": {
|
| 108 |
+
"name": "web_search",
|
| 109 |
+
"description": "Search the web using Google. Returns titles, URLs, and short snippets for each result. Use this to find information, discover relevant pages, and get an overview of a topic.",
|
| 110 |
+
"parameters": {
|
| 111 |
+
"type": "object",
|
| 112 |
+
"properties": {
|
| 113 |
+
"query": {
|
| 114 |
+
"type": "string",
|
| 115 |
+
"description": "The search query"
|
| 116 |
+
},
|
| 117 |
+
"num_results": {
|
| 118 |
+
"type": "integer",
|
| 119 |
+
"description": "Number of results to return (default: 5, max: 10)",
|
| 120 |
+
"default": 5
|
| 121 |
+
}
|
| 122 |
+
},
|
| 123 |
+
"required": ["query"]
|
| 124 |
+
}
|
| 125 |
+
}
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
read_url = {
|
| 129 |
+
"type": "function",
|
| 130 |
+
"function": {
|
| 131 |
+
"name": "read_url",
|
| 132 |
+
"description": "Fetch a web page and extract its main content as clean markdown. Includes text, headings, links, and image references. Use this when you need detailed content from a specific page.",
|
| 133 |
+
"parameters": {
|
| 134 |
+
"type": "object",
|
| 135 |
+
"properties": {
|
| 136 |
+
"url": {
|
| 137 |
+
"type": "string",
|
| 138 |
+
"description": "The URL to read"
|
| 139 |
+
}
|
| 140 |
+
},
|
| 141 |
+
"required": ["url"]
|
| 142 |
+
}
|
| 143 |
+
}
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
screenshot_url = {
|
| 147 |
+
"type": "function",
|
| 148 |
+
"function": {
|
| 149 |
+
"name": "screenshot_url",
|
| 150 |
+
"description": "Take a screenshot of a web page. Use this when you need to see the visual layout, images, charts, or design of a page. The screenshot will be sent to you as an image.",
|
| 151 |
+
"parameters": {
|
| 152 |
+
"type": "object",
|
| 153 |
+
"properties": {
|
| 154 |
+
"url": {
|
| 155 |
+
"type": "string",
|
| 156 |
+
"description": "The URL to screenshot"
|
| 157 |
+
}
|
| 158 |
+
},
|
| 159 |
+
"required": ["url"]
|
| 160 |
+
}
|
| 161 |
+
}
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
# ============================================================
|
| 166 |
+
# Web tool execution functions
|
| 167 |
+
# ============================================================
|
| 168 |
+
|
| 169 |
+
_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def execute_web_search(query: str, serper_key: str, num_results: int = 5) -> str:
|
| 173 |
+
"""Search via Serper API, return formatted results as JSON string."""
|
| 174 |
+
url = "https://google.serper.dev/search"
|
| 175 |
+
payload = json.dumps({"q": query, "num": min(num_results, 10)})
|
| 176 |
+
headers = {
|
| 177 |
+
"X-API-KEY": serper_key,
|
| 178 |
+
"Content-Type": "application/json"
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
try:
|
| 182 |
+
response = requests.post(url, headers=headers, data=payload, timeout=10)
|
| 183 |
+
if response.status_code != 200:
|
| 184 |
+
return json.dumps({"error": f"Search API returned status {response.status_code}"})
|
| 185 |
+
|
| 186 |
+
data = response.json()
|
| 187 |
+
results = []
|
| 188 |
+
for item in data.get("organic", []):
|
| 189 |
+
results.append({
|
| 190 |
+
"title": item.get("title", ""),
|
| 191 |
+
"url": item.get("link", ""),
|
| 192 |
+
"snippet": item.get("snippet", "")
|
| 193 |
+
})
|
| 194 |
+
return json.dumps(results, indent=2)
|
| 195 |
+
except Exception as e:
|
| 196 |
+
logger.error(f"Web search error: {e}")
|
| 197 |
+
return json.dumps({"error": str(e)})
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def execute_read_url(url: str) -> str:
|
| 201 |
+
"""Fetch URL and extract main content as markdown with images."""
|
| 202 |
+
try:
|
| 203 |
+
from readability import Document
|
| 204 |
+
from markdownify import markdownify
|
| 205 |
+
except ImportError:
|
| 206 |
+
return "Error: readability-lxml and markdownify packages required. Install with: pip install readability-lxml markdownify"
|
| 207 |
+
|
| 208 |
+
try:
|
| 209 |
+
resp = httpx.get(
|
| 210 |
+
url,
|
| 211 |
+
follow_redirects=True,
|
| 212 |
+
timeout=15,
|
| 213 |
+
headers={"User-Agent": _USER_AGENT}
|
| 214 |
+
)
|
| 215 |
+
if resp.status_code != 200:
|
| 216 |
+
return f"Error: HTTP {resp.status_code} fetching {url}"
|
| 217 |
+
|
| 218 |
+
doc = Document(resp.text)
|
| 219 |
+
title = doc.title()
|
| 220 |
+
content_html = doc.summary()
|
| 221 |
+
|
| 222 |
+
# Convert to markdown (preserves images as )
|
| 223 |
+
md = markdownify(content_html, strip=["script", "style"])
|
| 224 |
+
|
| 225 |
+
# Resolve relative image URLs to absolute
|
| 226 |
+
parsed_base = urlparse(url)
|
| 227 |
+
base_url = f"{parsed_base.scheme}://{parsed_base.netloc}"
|
| 228 |
+
|
| 229 |
+
def resolve_url(match):
|
| 230 |
+
img_url = match.group(2)
|
| 231 |
+
if img_url.startswith(("http://", "https://", "data:")):
|
| 232 |
+
return match.group(0)
|
| 233 |
+
absolute = urljoin(url, img_url)
|
| 234 |
+
return f""
|
| 235 |
+
|
| 236 |
+
import re
|
| 237 |
+
md = re.sub(r'!\[([^\]]*)\]\(([^)]+)\)', resolve_url, md)
|
| 238 |
+
|
| 239 |
+
# Clean up excessive whitespace
|
| 240 |
+
md = re.sub(r'\n{3,}', '\n\n', md).strip()
|
| 241 |
+
|
| 242 |
+
# Truncate if very long
|
| 243 |
+
max_len = 15000
|
| 244 |
+
if len(md) > max_len:
|
| 245 |
+
md = md[:max_len] + f"\n\n[Content truncated - {len(md)} chars total]"
|
| 246 |
+
|
| 247 |
+
return f"# {title}\n\n{md}" if title else md
|
| 248 |
+
except Exception as e:
|
| 249 |
+
logger.error(f"Read URL error for {url}: {e}")
|
| 250 |
+
return f"Error reading {url}: {str(e)}"
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
def execute_screenshot_url(url: str) -> Optional[str]:
|
| 254 |
+
"""Take a screenshot of a URL using Playwright, return base64 PNG."""
|
| 255 |
+
try:
|
| 256 |
+
from playwright.sync_api import sync_playwright
|
| 257 |
+
except ImportError:
|
| 258 |
+
return None # Caller should handle gracefully
|
| 259 |
+
|
| 260 |
+
try:
|
| 261 |
+
with sync_playwright() as p:
|
| 262 |
+
browser = p.chromium.launch(headless=True)
|
| 263 |
+
page = browser.new_page(viewport={"width": 1280, "height": 720})
|
| 264 |
+
page.goto(url, wait_until="networkidle", timeout=15000)
|
| 265 |
+
screenshot_bytes = page.screenshot(full_page=False)
|
| 266 |
+
browser.close()
|
| 267 |
+
return base64.b64encode(screenshot_bytes).decode("utf-8")
|
| 268 |
+
except Exception as e:
|
| 269 |
+
logger.error(f"Screenshot error for {url}: {e}")
|
| 270 |
+
return None
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
# ============================================================
|
| 274 |
+
# Image tools (used by image notebook)
|
| 275 |
+
# ============================================================
|
| 276 |
+
|
| 277 |
+
generate_image = {
|
| 278 |
+
"type": "function",
|
| 279 |
+
"function": {
|
| 280 |
+
"name": "generate_image",
|
| 281 |
+
"description": "Generate an image from a text prompt. Returns an image reference name (e.g., 'image_1') that you can see and use with edit_image.",
|
| 282 |
+
"parameters": {
|
| 283 |
+
"type": "object",
|
| 284 |
+
"properties": {
|
| 285 |
+
"prompt": {
|
| 286 |
+
"type": "string",
|
| 287 |
+
"description": "Detailed text description of the image to generate"
|
| 288 |
+
},
|
| 289 |
+
"model": {
|
| 290 |
+
"type": "string",
|
| 291 |
+
"description": "HuggingFace model to use (default: black-forest-labs/FLUX.1-schnell)",
|
| 292 |
+
"default": "black-forest-labs/FLUX.1-schnell"
|
| 293 |
+
}
|
| 294 |
+
},
|
| 295 |
+
"required": ["prompt"]
|
| 296 |
+
}
|
| 297 |
+
}
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
edit_image = {
|
| 301 |
+
"type": "function",
|
| 302 |
+
"function": {
|
| 303 |
+
"name": "edit_image",
|
| 304 |
+
"description": "Edit or transform an existing image using a text prompt. The source can be a URL (https://...) or a reference to a previously generated/loaded image (e.g., 'image_1').",
|
| 305 |
+
"parameters": {
|
| 306 |
+
"type": "object",
|
| 307 |
+
"properties": {
|
| 308 |
+
"prompt": {
|
| 309 |
+
"type": "string",
|
| 310 |
+
"description": "Text description of the edit or transformation to apply"
|
| 311 |
+
},
|
| 312 |
+
"source": {
|
| 313 |
+
"type": "string",
|
| 314 |
+
"description": "Image URL or reference name from a previous tool call (e.g., 'image_1')"
|
| 315 |
+
},
|
| 316 |
+
"model": {
|
| 317 |
+
"type": "string",
|
| 318 |
+
"description": "HuggingFace model to use (default: black-forest-labs/FLUX.1-Kontext-dev)",
|
| 319 |
+
"default": "black-forest-labs/FLUX.1-Kontext-dev"
|
| 320 |
+
}
|
| 321 |
+
},
|
| 322 |
+
"required": ["prompt", "source"]
|
| 323 |
+
}
|
| 324 |
+
}
|
| 325 |
+
}
|
| 326 |
+
|
| 327 |
+
read_image_url = {
|
| 328 |
+
"type": "function",
|
| 329 |
+
"function": {
|
| 330 |
+
"name": "read_image_url",
|
| 331 |
+
"description": "Download an image from a URL. Returns an image reference name (e.g., 'image_1') that you can see and use with edit_image.",
|
| 332 |
+
"parameters": {
|
| 333 |
+
"type": "object",
|
| 334 |
+
"properties": {
|
| 335 |
+
"url": {
|
| 336 |
+
"type": "string",
|
| 337 |
+
"description": "The image URL to download"
|
| 338 |
+
}
|
| 339 |
+
},
|
| 340 |
+
"required": ["url"]
|
| 341 |
+
}
|
| 342 |
+
}
|
| 343 |
+
}
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
# ============================================================
|
| 347 |
+
# Image tool execution functions
|
| 348 |
+
# ============================================================
|
| 349 |
+
|
| 350 |
+
def execute_generate_image(prompt: str, hf_token: str, model: str = "black-forest-labs/FLUX.1-schnell") -> Optional[str]:
|
| 351 |
+
"""Text-to-image via HF InferenceClient. Returns base64 PNG or None on error."""
|
| 352 |
+
try:
|
| 353 |
+
from huggingface_hub import InferenceClient
|
| 354 |
+
except ImportError:
|
| 355 |
+
logger.error("huggingface_hub not installed")
|
| 356 |
+
return None
|
| 357 |
+
|
| 358 |
+
try:
|
| 359 |
+
client = InferenceClient(token=hf_token)
|
| 360 |
+
image = client.text_to_image(prompt, model=model)
|
| 361 |
+
buffer = io.BytesIO()
|
| 362 |
+
image.save(buffer, format="PNG")
|
| 363 |
+
return base64.b64encode(buffer.getvalue()).decode("utf-8")
|
| 364 |
+
except Exception as e:
|
| 365 |
+
logger.error(f"Generate image error: {e}")
|
| 366 |
+
return None
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
def execute_edit_image(prompt: str, source_image_bytes: bytes, hf_token: str, model: str = "black-forest-labs/FLUX.1-Kontext-dev") -> Optional[str]:
|
| 370 |
+
"""Image-to-image via HF InferenceClient. source_image_bytes is raw image data. Returns base64 PNG or None."""
|
| 371 |
+
try:
|
| 372 |
+
from huggingface_hub import InferenceClient
|
| 373 |
+
from PIL import Image
|
| 374 |
+
except ImportError:
|
| 375 |
+
logger.error("huggingface_hub or Pillow not installed")
|
| 376 |
+
return None
|
| 377 |
+
|
| 378 |
+
try:
|
| 379 |
+
client = InferenceClient(token=hf_token)
|
| 380 |
+
input_image = Image.open(io.BytesIO(source_image_bytes))
|
| 381 |
+
result = client.image_to_image(input_image, prompt=prompt, model=model)
|
| 382 |
+
buffer = io.BytesIO()
|
| 383 |
+
result.save(buffer, format="PNG")
|
| 384 |
+
return base64.b64encode(buffer.getvalue()).decode("utf-8")
|
| 385 |
+
except Exception as e:
|
| 386 |
+
logger.error(f"Edit image error: {e}")
|
| 387 |
+
return None
|
| 388 |
+
|
| 389 |
+
|
| 390 |
+
def execute_read_image_url(url: str) -> Optional[str]:
|
| 391 |
+
"""Download image from URL, return base64 string or None on error."""
|
| 392 |
+
try:
|
| 393 |
+
resp = httpx.get(
|
| 394 |
+
url,
|
| 395 |
+
follow_redirects=True,
|
| 396 |
+
timeout=15,
|
| 397 |
+
headers={"User-Agent": _USER_AGENT}
|
| 398 |
+
)
|
| 399 |
+
if resp.status_code != 200:
|
| 400 |
+
logger.error(f"Read image URL error: HTTP {resp.status_code} for {url}")
|
| 401 |
+
return None
|
| 402 |
+
return base64.b64encode(resp.content).decode("utf-8")
|
| 403 |
+
except Exception as e:
|
| 404 |
+
logger.error(f"Read image URL error for {url}: {e}")
|
| 405 |
+
return None
|
frontend/index.html
CHANGED
|
@@ -7,7 +7,7 @@
|
|
| 7 |
<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;700&display=swap" rel="stylesheet">
|
| 8 |
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/themes/prism.min.css">
|
| 9 |
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.css">
|
| 10 |
-
<link rel="stylesheet" href="style.css?v=
|
| 11 |
</head>
|
| 12 |
<body>
|
| 13 |
<div class="app-container">
|
|
@@ -175,6 +175,7 @@
|
|
| 175 |
<div class="settings-panel" id="settingsPanel">
|
| 176 |
<div class="settings-panel-header">
|
| 177 |
<h3>SETTINGS</h3>
|
|
|
|
| 178 |
<button class="settings-panel-close" id="settingsPanelClose">×</button>
|
| 179 |
</div>
|
| 180 |
<div class="settings-panel-body" id="settingsPanelBody">
|
|
@@ -227,6 +228,31 @@
|
|
| 227 |
<input type="password" id="setting-serper-key" class="settings-input" placeholder="Leave empty if not using research">
|
| 228 |
</div>
|
| 229 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
<!-- Research Settings -->
|
| 231 |
<div class="settings-section">
|
| 232 |
<label class="settings-label">
|
|
@@ -457,6 +483,6 @@
|
|
| 457 |
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
| 458 |
<script src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.js"></script>
|
| 459 |
<script src="research-ui.js?v=23"></script>
|
| 460 |
-
<script src="script.js?v=
|
| 461 |
</body>
|
| 462 |
</html>
|
|
|
|
| 7 |
<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;700&display=swap" rel="stylesheet">
|
| 8 |
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/themes/prism.min.css">
|
| 9 |
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.css">
|
| 10 |
+
<link rel="stylesheet" href="style.css?v=64">
|
| 11 |
</head>
|
| 12 |
<body>
|
| 13 |
<div class="app-container">
|
|
|
|
| 175 |
<div class="settings-panel" id="settingsPanel">
|
| 176 |
<div class="settings-panel-header">
|
| 177 |
<h3>SETTINGS</h3>
|
| 178 |
+
<span class="settings-path" id="settingsPath"></span>
|
| 179 |
<button class="settings-panel-close" id="settingsPanelClose">×</button>
|
| 180 |
</div>
|
| 181 |
<div class="settings-panel-body" id="settingsPanelBody">
|
|
|
|
| 228 |
<input type="password" id="setting-serper-key" class="settings-input" placeholder="Leave empty if not using research">
|
| 229 |
</div>
|
| 230 |
|
| 231 |
+
<div class="settings-section">
|
| 232 |
+
<label class="settings-label">
|
| 233 |
+
<span class="label-text">HUGGINGFACE TOKEN (OPTIONAL)</span>
|
| 234 |
+
<span class="label-description">Required for image generation in IMAGE notebooks</span>
|
| 235 |
+
</label>
|
| 236 |
+
<input type="password" id="setting-hf-token" class="settings-input" placeholder="Leave empty to use provider token">
|
| 237 |
+
</div>
|
| 238 |
+
|
| 239 |
+
<!-- Image Model Settings -->
|
| 240 |
+
<div class="settings-section">
|
| 241 |
+
<label class="settings-label">
|
| 242 |
+
<span class="label-text">IMAGE GENERATION MODEL (OPTIONAL)</span>
|
| 243 |
+
<span class="label-description">Model for text-to-image generation in IMAGE notebooks</span>
|
| 244 |
+
</label>
|
| 245 |
+
<select id="setting-image-gen-model" class="settings-select"></select>
|
| 246 |
+
</div>
|
| 247 |
+
|
| 248 |
+
<div class="settings-section">
|
| 249 |
+
<label class="settings-label">
|
| 250 |
+
<span class="label-text">IMAGE EDIT MODEL (OPTIONAL)</span>
|
| 251 |
+
<span class="label-description">Model for image-to-image editing in IMAGE notebooks</span>
|
| 252 |
+
</label>
|
| 253 |
+
<select id="setting-image-edit-model" class="settings-select"></select>
|
| 254 |
+
</div>
|
| 255 |
+
|
| 256 |
<!-- Research Settings -->
|
| 257 |
<div class="settings-section">
|
| 258 |
<label class="settings-label">
|
|
|
|
| 483 |
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
| 484 |
<script src="https://cdn.jsdelivr.net/npm/katex@0.16.9/dist/katex.min.js"></script>
|
| 485 |
<script src="research-ui.js?v=23"></script>
|
| 486 |
+
<script src="script.js?v=59"></script>
|
| 487 |
</body>
|
| 488 |
</html>
|
frontend/script.js
CHANGED
|
@@ -4,10 +4,11 @@
|
|
| 4 |
// ============================================================
|
| 5 |
const AGENT_REGISTRY = {
|
| 6 |
command: { label: 'TASKS', hasCounter: false, inMenu: false, inLauncher: false, placeholder: 'Enter message...' },
|
| 7 |
-
agent: { label: 'AGENT', hasCounter: true, inMenu:
|
| 8 |
code: { label: 'CODE', hasCounter: true, inMenu: true, inLauncher: true, placeholder: 'Enter message...' },
|
| 9 |
research: { label: 'RESEARCH', hasCounter: true, inMenu: true, inLauncher: true, placeholder: 'Enter message...' },
|
| 10 |
chat: { label: 'CHAT', hasCounter: true, inMenu: true, inLauncher: true, placeholder: 'Enter message...' },
|
|
|
|
| 11 |
};
|
| 12 |
// Virtual types used only in timeline rendering (not real agents)
|
| 13 |
const VIRTUAL_TYPE_LABELS = { search: 'SEARCH', browse: 'BROWSE' };
|
|
@@ -57,6 +58,10 @@ let settings = {
|
|
| 57 |
// Service API keys
|
| 58 |
e2bKey: '',
|
| 59 |
serperKey: '',
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
// Research settings
|
| 61 |
researchSubAgentModel: '',
|
| 62 |
researchParallelWorkers: null,
|
|
@@ -1562,22 +1567,32 @@ function getConversationHistory(chatContainer) {
|
|
| 1562 |
// Check if this message has a tool call
|
| 1563 |
const toolCallData = msg.getAttribute('data-tool-call');
|
| 1564 |
if (toolCallData) {
|
| 1565 |
-
// This is a tool call message - add it in the proper format
|
| 1566 |
-
// Include content if the message also has thinking text
|
| 1567 |
const toolCall = JSON.parse(toolCallData);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1568 |
messages.push({
|
| 1569 |
role: 'assistant',
|
| 1570 |
-
content: content || '',
|
| 1571 |
tool_calls: [{
|
| 1572 |
id: toolCall.tool_call_id || 'tool_' + Date.now(),
|
| 1573 |
type: 'function',
|
| 1574 |
function: {
|
| 1575 |
-
name:
|
| 1576 |
-
arguments:
|
| 1577 |
-
task: toolCall.message,
|
| 1578 |
-
topic: toolCall.message,
|
| 1579 |
-
message: toolCall.message
|
| 1580 |
-
})
|
| 1581 |
}
|
| 1582 |
}]
|
| 1583 |
});
|
|
@@ -1637,6 +1652,14 @@ async function streamChatResponse(messages, chatContainer, notebookType, tabId)
|
|
| 1637 |
}
|
| 1638 |
}
|
| 1639 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1640 |
try {
|
| 1641 |
const response = await fetch(`${backendEndpoint}/chat/stream`, {
|
| 1642 |
method: 'POST',
|
|
@@ -1651,6 +1674,9 @@ async function streamChatResponse(messages, chatContainer, notebookType, tabId)
|
|
| 1651 |
extra_params: modelConfig.extraParams || null,
|
| 1652 |
e2b_key: currentSettings.e2bKey || null,
|
| 1653 |
serper_key: currentSettings.serperKey || null,
|
|
|
|
|
|
|
|
|
|
| 1654 |
research_sub_agent_model: researchSubAgentConfig?.model || null,
|
| 1655 |
research_sub_agent_endpoint: researchSubAgentConfig?.endpoint || null,
|
| 1656 |
research_sub_agent_token: researchSubAgentConfig?.token || null,
|
|
@@ -1737,7 +1763,7 @@ async function streamChatResponse(messages, chatContainer, notebookType, tabId)
|
|
| 1737 |
|
| 1738 |
} else if (data.type === 'result') {
|
| 1739 |
// Notebook result - update command center widget
|
| 1740 |
-
updateActionWidgetWithResult(tabId, data.content, data.figures);
|
| 1741 |
|
| 1742 |
} else if (data.type === 'result_preview') {
|
| 1743 |
// Show result preview
|
|
@@ -1762,6 +1788,19 @@ async function streamChatResponse(messages, chatContainer, notebookType, tabId)
|
|
| 1762 |
}
|
| 1763 |
}
|
| 1764 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1765 |
// Process markdown
|
| 1766 |
let html = parseMarkdown(previewContent);
|
| 1767 |
|
|
@@ -1857,6 +1896,108 @@ async function streamChatResponse(messages, chatContainer, notebookType, tabId)
|
|
| 1857 |
// Add to timeline
|
| 1858 |
addTimelineEvent(tabId, 'assistant', `[report] ${data.sources_count || 0} sources, ${data.websites_visited || 0} sites`);
|
| 1859 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1860 |
} else if (data.type === 'content') {
|
| 1861 |
// Regular streaming content (non-code notebooks)
|
| 1862 |
if (!currentMessageEl) {
|
|
@@ -2191,7 +2332,7 @@ function showActionWidget(chatContainer, action, message, targetTabId, taskId =
|
|
| 2191 |
actionWidgets[targetTabId] = widget;
|
| 2192 |
}
|
| 2193 |
|
| 2194 |
-
async function updateActionWidgetWithResult(tabId, resultContent, figures) {
|
| 2195 |
const widget = actionWidgets[tabId];
|
| 2196 |
if (!widget) return;
|
| 2197 |
|
|
@@ -2225,6 +2366,19 @@ async function updateActionWidgetWithResult(tabId, resultContent, figures) {
|
|
| 2225 |
}
|
| 2226 |
}
|
| 2227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2228 |
// Process markdown
|
| 2229 |
let html = parseMarkdown(processedContent);
|
| 2230 |
|
|
@@ -2253,9 +2407,26 @@ async function updateActionWidgetWithResult(tabId, resultContent, figures) {
|
|
| 2253 |
body.appendChild(resultSection);
|
| 2254 |
}
|
| 2255 |
|
| 2256 |
-
//
|
| 2257 |
const toolCallId = toolCallIds[tabId];
|
| 2258 |
if (toolCallId) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2259 |
fetch('/api/conversation/add-tool-response', {
|
| 2260 |
method: 'POST',
|
| 2261 |
headers: { 'Content-Type': 'application/json' },
|
|
@@ -3055,6 +3226,9 @@ function migrateSettings(oldSettings) {
|
|
| 3055 |
},
|
| 3056 |
e2bKey: oldSettings.e2bKey || '',
|
| 3057 |
serperKey: oldSettings.serperKey || '',
|
|
|
|
|
|
|
|
|
|
| 3058 |
researchSubAgentModel: oldSettings.researchSubAgentModel || '',
|
| 3059 |
researchParallelWorkers: oldSettings.researchParallelWorkers || null,
|
| 3060 |
researchMaxWebsites: oldSettings.researchMaxWebsites || null,
|
|
@@ -3234,7 +3408,9 @@ function populateModelDropdowns() {
|
|
| 3234 |
// Build dropdown IDs from registry + special dropdowns
|
| 3235 |
const dropdownIds = [
|
| 3236 |
...Object.keys(AGENT_REGISTRY).map(t => `setting-notebook-${t}`),
|
| 3237 |
-
'setting-research-sub-agent-model'
|
|
|
|
|
|
|
| 3238 |
];
|
| 3239 |
|
| 3240 |
dropdownIds.forEach(dropdownId => {
|
|
@@ -3267,6 +3443,10 @@ function populateModelDropdowns() {
|
|
| 3267 |
}
|
| 3268 |
const subAgentDropdown = document.getElementById('setting-research-sub-agent-model');
|
| 3269 |
if (subAgentDropdown) subAgentDropdown.value = settings.researchSubAgentModel || '';
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3270 |
}
|
| 3271 |
|
| 3272 |
// Show add/edit provider dialog
|
|
@@ -3433,6 +3613,10 @@ function deleteModel(modelId) {
|
|
| 3433 |
}
|
| 3434 |
|
| 3435 |
function openSettings() {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3436 |
// Render providers and models lists
|
| 3437 |
renderProvidersList();
|
| 3438 |
renderModelsList();
|
|
@@ -3441,6 +3625,7 @@ function openSettings() {
|
|
| 3441 |
// Populate service keys
|
| 3442 |
document.getElementById('setting-e2b-key').value = settings.e2bKey || '';
|
| 3443 |
document.getElementById('setting-serper-key').value = settings.serperKey || '';
|
|
|
|
| 3444 |
|
| 3445 |
// Populate research settings
|
| 3446 |
document.getElementById('setting-research-parallel-workers').value = settings.researchParallelWorkers || '';
|
|
@@ -3478,6 +3663,9 @@ async function saveSettings() {
|
|
| 3478 |
// Get other settings
|
| 3479 |
const e2bKey = document.getElementById('setting-e2b-key').value.trim();
|
| 3480 |
const serperKey = document.getElementById('setting-serper-key').value.trim();
|
|
|
|
|
|
|
|
|
|
| 3481 |
const researchParallelWorkers = document.getElementById('setting-research-parallel-workers').value.trim();
|
| 3482 |
const researchMaxWebsites = document.getElementById('setting-research-max-websites').value.trim();
|
| 3483 |
const themeColor = document.getElementById('setting-theme-color').value || 'forest';
|
|
@@ -3497,6 +3685,9 @@ async function saveSettings() {
|
|
| 3497 |
settings.notebooks = notebookModels;
|
| 3498 |
settings.e2bKey = e2bKey;
|
| 3499 |
settings.serperKey = serperKey;
|
|
|
|
|
|
|
|
|
|
| 3500 |
settings.researchSubAgentModel = researchSubAgentModel;
|
| 3501 |
settings.researchParallelWorkers = researchParallelWorkers ? parseInt(researchParallelWorkers) : null;
|
| 3502 |
settings.researchMaxWebsites = researchMaxWebsites ? parseInt(researchMaxWebsites) : null;
|
|
|
|
| 4 |
// ============================================================
|
| 5 |
const AGENT_REGISTRY = {
|
| 6 |
command: { label: 'TASKS', hasCounter: false, inMenu: false, inLauncher: false, placeholder: 'Enter message...' },
|
| 7 |
+
agent: { label: 'AGENT', hasCounter: true, inMenu: true, inLauncher: true, placeholder: 'Enter message...' },
|
| 8 |
code: { label: 'CODE', hasCounter: true, inMenu: true, inLauncher: true, placeholder: 'Enter message...' },
|
| 9 |
research: { label: 'RESEARCH', hasCounter: true, inMenu: true, inLauncher: true, placeholder: 'Enter message...' },
|
| 10 |
chat: { label: 'CHAT', hasCounter: true, inMenu: true, inLauncher: true, placeholder: 'Enter message...' },
|
| 11 |
+
image: { label: 'IMAGE', hasCounter: true, inMenu: true, inLauncher: true, placeholder: 'Describe an image or paste a URL...' },
|
| 12 |
};
|
| 13 |
// Virtual types used only in timeline rendering (not real agents)
|
| 14 |
const VIRTUAL_TYPE_LABELS = { search: 'SEARCH', browse: 'BROWSE' };
|
|
|
|
| 58 |
// Service API keys
|
| 59 |
e2bKey: '',
|
| 60 |
serperKey: '',
|
| 61 |
+
hfToken: '',
|
| 62 |
+
// Image model selections (model IDs from the models list)
|
| 63 |
+
imageGenModel: '',
|
| 64 |
+
imageEditModel: '',
|
| 65 |
// Research settings
|
| 66 |
researchSubAgentModel: '',
|
| 67 |
researchParallelWorkers: null,
|
|
|
|
| 1567 |
// Check if this message has a tool call
|
| 1568 |
const toolCallData = msg.getAttribute('data-tool-call');
|
| 1569 |
if (toolCallData) {
|
|
|
|
|
|
|
| 1570 |
const toolCall = JSON.parse(toolCallData);
|
| 1571 |
+
let funcName, funcArgs;
|
| 1572 |
+
|
| 1573 |
+
if (toolCall.function_name) {
|
| 1574 |
+
// Agent-style tool call (web_search, read_url, etc.)
|
| 1575 |
+
funcName = toolCall.function_name;
|
| 1576 |
+
funcArgs = toolCall.arguments;
|
| 1577 |
+
} else {
|
| 1578 |
+
// Command center-style tool call (launch_*_notebook)
|
| 1579 |
+
funcName = `launch_${toolCall.notebook_type}_notebook`;
|
| 1580 |
+
funcArgs = JSON.stringify({
|
| 1581 |
+
task: toolCall.message,
|
| 1582 |
+
topic: toolCall.message,
|
| 1583 |
+
message: toolCall.message
|
| 1584 |
+
});
|
| 1585 |
+
}
|
| 1586 |
+
|
| 1587 |
messages.push({
|
| 1588 |
role: 'assistant',
|
| 1589 |
+
content: toolCall.thinking || content || '',
|
| 1590 |
tool_calls: [{
|
| 1591 |
id: toolCall.tool_call_id || 'tool_' + Date.now(),
|
| 1592 |
type: 'function',
|
| 1593 |
function: {
|
| 1594 |
+
name: funcName,
|
| 1595 |
+
arguments: funcArgs
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1596 |
}
|
| 1597 |
}]
|
| 1598 |
});
|
|
|
|
| 1652 |
}
|
| 1653 |
}
|
| 1654 |
|
| 1655 |
+
// Resolve image model selections to HF model ID strings
|
| 1656 |
+
const imageGenModelId = currentSettings.imageGenModel
|
| 1657 |
+
? currentSettings.models?.[currentSettings.imageGenModel]?.modelId || null
|
| 1658 |
+
: null;
|
| 1659 |
+
const imageEditModelId = currentSettings.imageEditModel
|
| 1660 |
+
? currentSettings.models?.[currentSettings.imageEditModel]?.modelId || null
|
| 1661 |
+
: null;
|
| 1662 |
+
|
| 1663 |
try {
|
| 1664 |
const response = await fetch(`${backendEndpoint}/chat/stream`, {
|
| 1665 |
method: 'POST',
|
|
|
|
| 1674 |
extra_params: modelConfig.extraParams || null,
|
| 1675 |
e2b_key: currentSettings.e2bKey || null,
|
| 1676 |
serper_key: currentSettings.serperKey || null,
|
| 1677 |
+
hf_token: currentSettings.hfToken || null,
|
| 1678 |
+
image_gen_model: imageGenModelId,
|
| 1679 |
+
image_edit_model: imageEditModelId,
|
| 1680 |
research_sub_agent_model: researchSubAgentConfig?.model || null,
|
| 1681 |
research_sub_agent_endpoint: researchSubAgentConfig?.endpoint || null,
|
| 1682 |
research_sub_agent_token: researchSubAgentConfig?.token || null,
|
|
|
|
| 1763 |
|
| 1764 |
} else if (data.type === 'result') {
|
| 1765 |
// Notebook result - update command center widget
|
| 1766 |
+
updateActionWidgetWithResult(tabId, data.content, data.figures, data.images);
|
| 1767 |
|
| 1768 |
} else if (data.type === 'result_preview') {
|
| 1769 |
// Show result preview
|
|
|
|
| 1788 |
}
|
| 1789 |
}
|
| 1790 |
|
| 1791 |
+
// Handle <image_N> references from image agent
|
| 1792 |
+
if (data.images) {
|
| 1793 |
+
for (const [imageName, imageBase64] of Object.entries(data.images)) {
|
| 1794 |
+
const placeholderId = `%%%IMAGE_${imageName}%%%`;
|
| 1795 |
+
figurePlaceholders[placeholderId] = { type: 'png', data: imageBase64, isGenerated: true };
|
| 1796 |
+
|
| 1797 |
+
const pairedTag = new RegExp(`<${imageName}></${imageName}>`, 'gi');
|
| 1798 |
+
previewContent = previewContent.replace(pairedTag, `\n\n${placeholderId}\n\n`);
|
| 1799 |
+
const singleTag = new RegExp(`</?${imageName}>`, 'gi');
|
| 1800 |
+
previewContent = previewContent.replace(singleTag, `\n\n${placeholderId}\n\n`);
|
| 1801 |
+
}
|
| 1802 |
+
}
|
| 1803 |
+
|
| 1804 |
// Process markdown
|
| 1805 |
let html = parseMarkdown(previewContent);
|
| 1806 |
|
|
|
|
| 1896 |
// Add to timeline
|
| 1897 |
addTimelineEvent(tabId, 'assistant', `[report] ${data.sources_count || 0} sources, ${data.websites_visited || 0} sites`);
|
| 1898 |
|
| 1899 |
+
} else if (data.type === 'tool_start') {
|
| 1900 |
+
// Agent tool execution starting — create a tool-cell box (like code cells)
|
| 1901 |
+
currentMessageEl = null;
|
| 1902 |
+
fullResponse = '';
|
| 1903 |
+
|
| 1904 |
+
const toolLabels = {
|
| 1905 |
+
'web_search': 'SEARCH',
|
| 1906 |
+
'read_url': 'READ',
|
| 1907 |
+
'screenshot_url': 'SCREENSHOT',
|
| 1908 |
+
'generate_image': 'GENERATE',
|
| 1909 |
+
'edit_image': 'EDIT',
|
| 1910 |
+
'read_image_url': 'LOAD IMAGE'
|
| 1911 |
+
};
|
| 1912 |
+
const toolDescriptions = {
|
| 1913 |
+
'web_search': data.args?.query || '',
|
| 1914 |
+
'read_url': data.args?.url || '',
|
| 1915 |
+
'screenshot_url': data.args?.url || '',
|
| 1916 |
+
'generate_image': data.args?.prompt || '',
|
| 1917 |
+
'edit_image': `${data.args?.prompt || ''} (from ${data.args?.source || ''})`,
|
| 1918 |
+
'read_image_url': data.args?.url || ''
|
| 1919 |
+
};
|
| 1920 |
+
const label = toolLabels[data.tool] || data.tool.toUpperCase();
|
| 1921 |
+
const description = toolDescriptions[data.tool] || '';
|
| 1922 |
+
|
| 1923 |
+
// Store tool call in DOM for history reconstruction
|
| 1924 |
+
const toolCallMsg = document.createElement('div');
|
| 1925 |
+
toolCallMsg.className = 'message assistant';
|
| 1926 |
+
toolCallMsg.style.display = 'none';
|
| 1927 |
+
toolCallMsg.setAttribute('data-tool-call', JSON.stringify({
|
| 1928 |
+
tool_call_id: data.tool_call_id,
|
| 1929 |
+
function_name: data.tool,
|
| 1930 |
+
arguments: data.arguments,
|
| 1931 |
+
thinking: data.thinking || ''
|
| 1932 |
+
}));
|
| 1933 |
+
chatContainer.appendChild(toolCallMsg);
|
| 1934 |
+
|
| 1935 |
+
// Create tool-cell box (similar to code-cell)
|
| 1936 |
+
const toolCell = document.createElement('div');
|
| 1937 |
+
toolCell.className = 'tool-cell';
|
| 1938 |
+
toolCell.setAttribute('data-tool-name', data.tool);
|
| 1939 |
+
toolCell.innerHTML = `
|
| 1940 |
+
<div class="tool-cell-label"><span>${label}</span>${createSpinnerHtml()}</div>
|
| 1941 |
+
<div class="tool-cell-input">${escapeHtml(description)}</div>
|
| 1942 |
+
`;
|
| 1943 |
+
chatContainer.appendChild(toolCell);
|
| 1944 |
+
scrollChatToBottom(chatContainer);
|
| 1945 |
+
addTimelineEvent(tabId, 'assistant', `[${data.tool}] ${description}`);
|
| 1946 |
+
|
| 1947 |
+
} else if (data.type === 'tool_result') {
|
| 1948 |
+
// Agent tool result — populate the last tool-cell with output
|
| 1949 |
+
const lastToolCell = chatContainer.querySelector('.tool-cell:last-of-type');
|
| 1950 |
+
|
| 1951 |
+
// Remove spinner
|
| 1952 |
+
if (lastToolCell) {
|
| 1953 |
+
const spinner = lastToolCell.querySelector('.tool-spinner');
|
| 1954 |
+
if (spinner) spinner.remove();
|
| 1955 |
+
}
|
| 1956 |
+
|
| 1957 |
+
// Store tool response in DOM for history reconstruction
|
| 1958 |
+
const toolResponseMsg = document.createElement('div');
|
| 1959 |
+
toolResponseMsg.className = 'message tool';
|
| 1960 |
+
toolResponseMsg.style.display = 'none';
|
| 1961 |
+
toolResponseMsg.setAttribute('data-tool-response', JSON.stringify({
|
| 1962 |
+
tool_call_id: data.tool_call_id,
|
| 1963 |
+
content: data.response || ''
|
| 1964 |
+
}));
|
| 1965 |
+
chatContainer.appendChild(toolResponseMsg);
|
| 1966 |
+
|
| 1967 |
+
// Build output HTML based on tool type
|
| 1968 |
+
let outputHtml = '';
|
| 1969 |
+
|
| 1970 |
+
if (data.tool === 'web_search' && data.result?.results) {
|
| 1971 |
+
try {
|
| 1972 |
+
const results = typeof data.result.results === 'string' ? JSON.parse(data.result.results) : data.result.results;
|
| 1973 |
+
if (Array.isArray(results)) {
|
| 1974 |
+
outputHtml = '<div class="search-results-content">' +
|
| 1975 |
+
results.map(r =>
|
| 1976 |
+
`<div class="search-result-item"><a href="${escapeHtml(r.url)}" target="_blank">${escapeHtml(r.title)}</a><span class="search-snippet">${escapeHtml(r.snippet)}</span></div>`
|
| 1977 |
+
).join('') + '</div>';
|
| 1978 |
+
}
|
| 1979 |
+
} catch(e) { /* ignore parse errors */ }
|
| 1980 |
+
} else if (data.tool === 'read_url') {
|
| 1981 |
+
const len = data.result?.length || 0;
|
| 1982 |
+
outputHtml = `<div class="tool-cell-read-summary">${len > 0 ? `Extracted ${(len / 1000).toFixed(1)}k chars` : 'No content extracted'}</div>`;
|
| 1983 |
+
} else if (data.tool === 'screenshot_url' && data.image) {
|
| 1984 |
+
outputHtml = `<img src="data:image/png;base64,${data.image}" alt="Screenshot" class="screenshot-img" />`;
|
| 1985 |
+
} else if ((data.tool === 'generate_image' || data.tool === 'edit_image' || data.tool === 'read_image_url') && data.image) {
|
| 1986 |
+
const imgName = data.image_name || 'image';
|
| 1987 |
+
outputHtml = `<img src="data:image/png;base64,${data.image}" alt="${escapeHtml(imgName)}" class="generated-img" />`;
|
| 1988 |
+
} else if ((data.tool === 'generate_image' || data.tool === 'edit_image' || data.tool === 'read_image_url') && !data.image) {
|
| 1989 |
+
outputHtml = `<div class="tool-cell-read-summary">Failed to process image</div>`;
|
| 1990 |
+
}
|
| 1991 |
+
|
| 1992 |
+
if (outputHtml && lastToolCell) {
|
| 1993 |
+
const outputEl = document.createElement('div');
|
| 1994 |
+
outputEl.className = 'tool-cell-output';
|
| 1995 |
+
outputEl.innerHTML = outputHtml;
|
| 1996 |
+
lastToolCell.appendChild(outputEl);
|
| 1997 |
+
}
|
| 1998 |
+
|
| 1999 |
+
scrollChatToBottom(chatContainer);
|
| 2000 |
+
|
| 2001 |
} else if (data.type === 'content') {
|
| 2002 |
// Regular streaming content (non-code notebooks)
|
| 2003 |
if (!currentMessageEl) {
|
|
|
|
| 2332 |
actionWidgets[targetTabId] = widget;
|
| 2333 |
}
|
| 2334 |
|
| 2335 |
+
async function updateActionWidgetWithResult(tabId, resultContent, figures, images) {
|
| 2336 |
const widget = actionWidgets[tabId];
|
| 2337 |
if (!widget) return;
|
| 2338 |
|
|
|
|
| 2366 |
}
|
| 2367 |
}
|
| 2368 |
|
| 2369 |
+
// Handle <image_N> references from image agent
|
| 2370 |
+
if (images) {
|
| 2371 |
+
for (const [imageName, imageBase64] of Object.entries(images)) {
|
| 2372 |
+
const placeholderId = `%%%IMAGE_${imageName}%%%`;
|
| 2373 |
+
figurePlaceholders[placeholderId] = { type: 'png', data: imageBase64 };
|
| 2374 |
+
|
| 2375 |
+
const pairedTag = new RegExp(`<${imageName}></${imageName}>`, 'gi');
|
| 2376 |
+
processedContent = processedContent.replace(pairedTag, `\n\n${placeholderId}\n\n`);
|
| 2377 |
+
const singleTag = new RegExp(`</?${imageName}>`, 'gi');
|
| 2378 |
+
processedContent = processedContent.replace(singleTag, `\n\n${placeholderId}\n\n`);
|
| 2379 |
+
}
|
| 2380 |
+
}
|
| 2381 |
+
|
| 2382 |
// Process markdown
|
| 2383 |
let html = parseMarkdown(processedContent);
|
| 2384 |
|
|
|
|
| 2407 |
body.appendChild(resultSection);
|
| 2408 |
}
|
| 2409 |
|
| 2410 |
+
// Update the tool response DOM element so getConversationHistory picks up actual results
|
| 2411 |
const toolCallId = toolCallIds[tabId];
|
| 2412 |
if (toolCallId) {
|
| 2413 |
+
// Find the hidden tool response element with this tool_call_id in the command center
|
| 2414 |
+
const commandContainer = document.getElementById('messages-command');
|
| 2415 |
+
if (commandContainer) {
|
| 2416 |
+
const toolMsgs = commandContainer.querySelectorAll('.message.tool[data-tool-response]');
|
| 2417 |
+
for (const toolMsg of toolMsgs) {
|
| 2418 |
+
try {
|
| 2419 |
+
const data = JSON.parse(toolMsg.getAttribute('data-tool-response'));
|
| 2420 |
+
if (data.tool_call_id === toolCallId) {
|
| 2421 |
+
data.content = resultContent;
|
| 2422 |
+
toolMsg.setAttribute('data-tool-response', JSON.stringify(data));
|
| 2423 |
+
break;
|
| 2424 |
+
}
|
| 2425 |
+
} catch (e) { /* ignore parse errors */ }
|
| 2426 |
+
}
|
| 2427 |
+
}
|
| 2428 |
+
|
| 2429 |
+
// Also send to backend (non-blocking)
|
| 2430 |
fetch('/api/conversation/add-tool-response', {
|
| 2431 |
method: 'POST',
|
| 2432 |
headers: { 'Content-Type': 'application/json' },
|
|
|
|
| 3226 |
},
|
| 3227 |
e2bKey: oldSettings.e2bKey || '',
|
| 3228 |
serperKey: oldSettings.serperKey || '',
|
| 3229 |
+
hfToken: oldSettings.hfToken || '',
|
| 3230 |
+
imageGenModel: oldSettings.imageGenModel || '',
|
| 3231 |
+
imageEditModel: oldSettings.imageEditModel || '',
|
| 3232 |
researchSubAgentModel: oldSettings.researchSubAgentModel || '',
|
| 3233 |
researchParallelWorkers: oldSettings.researchParallelWorkers || null,
|
| 3234 |
researchMaxWebsites: oldSettings.researchMaxWebsites || null,
|
|
|
|
| 3408 |
// Build dropdown IDs from registry + special dropdowns
|
| 3409 |
const dropdownIds = [
|
| 3410 |
...Object.keys(AGENT_REGISTRY).map(t => `setting-notebook-${t}`),
|
| 3411 |
+
'setting-research-sub-agent-model',
|
| 3412 |
+
'setting-image-gen-model',
|
| 3413 |
+
'setting-image-edit-model'
|
| 3414 |
];
|
| 3415 |
|
| 3416 |
dropdownIds.forEach(dropdownId => {
|
|
|
|
| 3443 |
}
|
| 3444 |
const subAgentDropdown = document.getElementById('setting-research-sub-agent-model');
|
| 3445 |
if (subAgentDropdown) subAgentDropdown.value = settings.researchSubAgentModel || '';
|
| 3446 |
+
const imageGenDropdown = document.getElementById('setting-image-gen-model');
|
| 3447 |
+
if (imageGenDropdown) imageGenDropdown.value = settings.imageGenModel || '';
|
| 3448 |
+
const imageEditDropdown = document.getElementById('setting-image-edit-model');
|
| 3449 |
+
if (imageEditDropdown) imageEditDropdown.value = settings.imageEditModel || '';
|
| 3450 |
}
|
| 3451 |
|
| 3452 |
// Show add/edit provider dialog
|
|
|
|
| 3613 |
}
|
| 3614 |
|
| 3615 |
function openSettings() {
|
| 3616 |
+
// Show settings file path
|
| 3617 |
+
const pathEl = document.getElementById('settingsPath');
|
| 3618 |
+
if (pathEl) pathEl.textContent = settings._settingsPath || '';
|
| 3619 |
+
|
| 3620 |
// Render providers and models lists
|
| 3621 |
renderProvidersList();
|
| 3622 |
renderModelsList();
|
|
|
|
| 3625 |
// Populate service keys
|
| 3626 |
document.getElementById('setting-e2b-key').value = settings.e2bKey || '';
|
| 3627 |
document.getElementById('setting-serper-key').value = settings.serperKey || '';
|
| 3628 |
+
document.getElementById('setting-hf-token').value = settings.hfToken || '';
|
| 3629 |
|
| 3630 |
// Populate research settings
|
| 3631 |
document.getElementById('setting-research-parallel-workers').value = settings.researchParallelWorkers || '';
|
|
|
|
| 3663 |
// Get other settings
|
| 3664 |
const e2bKey = document.getElementById('setting-e2b-key').value.trim();
|
| 3665 |
const serperKey = document.getElementById('setting-serper-key').value.trim();
|
| 3666 |
+
const hfToken = document.getElementById('setting-hf-token').value.trim();
|
| 3667 |
+
const imageGenModel = document.getElementById('setting-image-gen-model')?.value || '';
|
| 3668 |
+
const imageEditModel = document.getElementById('setting-image-edit-model')?.value || '';
|
| 3669 |
const researchParallelWorkers = document.getElementById('setting-research-parallel-workers').value.trim();
|
| 3670 |
const researchMaxWebsites = document.getElementById('setting-research-max-websites').value.trim();
|
| 3671 |
const themeColor = document.getElementById('setting-theme-color').value || 'forest';
|
|
|
|
| 3685 |
settings.notebooks = notebookModels;
|
| 3686 |
settings.e2bKey = e2bKey;
|
| 3687 |
settings.serperKey = serperKey;
|
| 3688 |
+
settings.hfToken = hfToken;
|
| 3689 |
+
settings.imageGenModel = imageGenModel;
|
| 3690 |
+
settings.imageEditModel = imageEditModel;
|
| 3691 |
settings.researchSubAgentModel = researchSubAgentModel;
|
| 3692 |
settings.researchParallelWorkers = researchParallelWorkers ? parseInt(researchParallelWorkers) : null;
|
| 3693 |
settings.researchMaxWebsites = researchMaxWebsites ? parseInt(researchMaxWebsites) : null;
|
frontend/style.css
CHANGED
|
@@ -2729,6 +2729,17 @@ pre code [class*="token"] {
|
|
| 2729 |
letter-spacing: 0.5px;
|
| 2730 |
}
|
| 2731 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2732 |
.settings-panel-close {
|
| 2733 |
background: none;
|
| 2734 |
border: none;
|
|
@@ -3778,3 +3789,91 @@ pre code [class*="token"] {
|
|
| 3778 |
}
|
| 3779 |
|
| 3780 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2729 |
letter-spacing: 0.5px;
|
| 2730 |
}
|
| 2731 |
|
| 2732 |
+
.settings-path {
|
| 2733 |
+
font-size: 10px;
|
| 2734 |
+
color: rgba(255, 255, 255, 0.6);
|
| 2735 |
+
font-weight: 400;
|
| 2736 |
+
overflow: hidden;
|
| 2737 |
+
text-overflow: ellipsis;
|
| 2738 |
+
white-space: nowrap;
|
| 2739 |
+
flex: 1;
|
| 2740 |
+
margin: 0 12px;
|
| 2741 |
+
}
|
| 2742 |
+
|
| 2743 |
.settings-panel-close {
|
| 2744 |
background: none;
|
| 2745 |
border: none;
|
|
|
|
| 3789 |
}
|
| 3790 |
|
| 3791 |
|
| 3792 |
+
/* ============================================================
|
| 3793 |
+
Agent Tool Cells (search, read, screenshot)
|
| 3794 |
+
============================================================ */
|
| 3795 |
+
|
| 3796 |
+
.tool-cell {
|
| 3797 |
+
margin: 16px 0;
|
| 3798 |
+
overflow: hidden;
|
| 3799 |
+
}
|
| 3800 |
+
|
| 3801 |
+
.tool-cell-label {
|
| 3802 |
+
font-size: 10px;
|
| 3803 |
+
font-weight: 600;
|
| 3804 |
+
color: var(--bg-primary);
|
| 3805 |
+
text-transform: uppercase;
|
| 3806 |
+
letter-spacing: 0.5px;
|
| 3807 |
+
padding: 6px 12px;
|
| 3808 |
+
background: var(--theme-accent);
|
| 3809 |
+
display: flex;
|
| 3810 |
+
align-items: center;
|
| 3811 |
+
gap: 8px;
|
| 3812 |
+
}
|
| 3813 |
+
|
| 3814 |
+
.tool-cell-input {
|
| 3815 |
+
background: var(--bg-tertiary);
|
| 3816 |
+
padding: 10px 12px;
|
| 3817 |
+
border: 1px solid var(--border-primary);
|
| 3818 |
+
border-top: none;
|
| 3819 |
+
font-family: 'JetBrains Mono', monospace;
|
| 3820 |
+
font-size: 12px;
|
| 3821 |
+
color: var(--text-primary);
|
| 3822 |
+
word-break: break-all;
|
| 3823 |
+
}
|
| 3824 |
+
|
| 3825 |
+
.tool-cell-output {
|
| 3826 |
+
padding: 10px 12px;
|
| 3827 |
+
background: var(--bg-tertiary);
|
| 3828 |
+
border: 1px solid var(--border-primary);
|
| 3829 |
+
border-top: none;
|
| 3830 |
+
font-size: 12px;
|
| 3831 |
+
}
|
| 3832 |
+
|
| 3833 |
+
.search-results-content {
|
| 3834 |
+
display: flex;
|
| 3835 |
+
flex-direction: column;
|
| 3836 |
+
gap: 8px;
|
| 3837 |
+
}
|
| 3838 |
+
|
| 3839 |
+
.search-result-item {
|
| 3840 |
+
display: flex;
|
| 3841 |
+
flex-direction: column;
|
| 3842 |
+
gap: 1px;
|
| 3843 |
+
}
|
| 3844 |
+
|
| 3845 |
+
.search-result-item a {
|
| 3846 |
+
color: var(--accent-primary);
|
| 3847 |
+
text-decoration: none;
|
| 3848 |
+
font-weight: 500;
|
| 3849 |
+
font-size: 12px;
|
| 3850 |
+
}
|
| 3851 |
+
|
| 3852 |
+
.search-result-item a:hover {
|
| 3853 |
+
text-decoration: underline;
|
| 3854 |
+
}
|
| 3855 |
+
|
| 3856 |
+
.search-snippet {
|
| 3857 |
+
color: var(--text-muted);
|
| 3858 |
+
font-size: 11px;
|
| 3859 |
+
line-height: 1.3;
|
| 3860 |
+
}
|
| 3861 |
+
|
| 3862 |
+
.tool-cell-read-summary {
|
| 3863 |
+
color: var(--text-muted);
|
| 3864 |
+
font-size: 11px;
|
| 3865 |
+
}
|
| 3866 |
+
|
| 3867 |
+
.screenshot-img {
|
| 3868 |
+
max-width: 100%;
|
| 3869 |
+
max-height: 400px;
|
| 3870 |
+
border-radius: 2px;
|
| 3871 |
+
}
|
| 3872 |
+
|
| 3873 |
+
.generated-img {
|
| 3874 |
+
max-width: 100%;
|
| 3875 |
+
border-radius: 4px;
|
| 3876 |
+
cursor: pointer;
|
| 3877 |
+
}
|
| 3878 |
+
|
| 3879 |
+
|