Spaces:
Running
Running
Commit ·
1598bb4
1
Parent(s): b63e2df
reworked job tool descriptions + implemented batch processing for hf_job tool calls in cli
Browse files- agent/core/agent_loop.py +122 -57
- agent/main.py +85 -45
- agent/tools/jobs_tool.py +38 -91
- run_search_agent.py +6 -2
agent/core/agent_loop.py
CHANGED
|
@@ -103,32 +103,23 @@ class Handlers:
|
|
| 103 |
Event(event_type="assistant_message", data={"content": content})
|
| 104 |
)
|
| 105 |
|
| 106 |
-
#
|
|
|
|
|
|
|
|
|
|
| 107 |
for tc in tool_calls:
|
| 108 |
tool_name = tc.function.name
|
| 109 |
tool_args = json.loads(tc.function.arguments)
|
| 110 |
|
| 111 |
-
# Check if this tool requires user approval
|
| 112 |
if _needs_approval(tool_name, tool_args):
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
data={
|
| 117 |
-
"tool": tool_name,
|
| 118 |
-
"arguments": tool_args,
|
| 119 |
-
"tool_call_id": tc.id,
|
| 120 |
-
},
|
| 121 |
-
)
|
| 122 |
-
)
|
| 123 |
-
|
| 124 |
-
# Store pending approval and return early
|
| 125 |
-
session.pending_approval = {
|
| 126 |
-
"tool_call": tc,
|
| 127 |
-
"arguments": tool_args,
|
| 128 |
-
}
|
| 129 |
|
| 130 |
-
|
| 131 |
-
|
|
|
|
|
|
|
| 132 |
|
| 133 |
await session.send_event(
|
| 134 |
Event(
|
|
@@ -161,6 +152,37 @@ class Handlers:
|
|
| 161 |
)
|
| 162 |
)
|
| 163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
iteration += 1
|
| 165 |
|
| 166 |
except Exception as e:
|
|
@@ -225,10 +247,8 @@ class Handlers:
|
|
| 225 |
await session.send_event(Event(event_type="undo_complete"))
|
| 226 |
|
| 227 |
@staticmethod
|
| 228 |
-
async def exec_approval(
|
| 229 |
-
|
| 230 |
-
) -> None:
|
| 231 |
-
"""Handle job execution approval"""
|
| 232 |
if not session.pending_approval:
|
| 233 |
await session.send_event(
|
| 234 |
Event(
|
|
@@ -238,12 +258,36 @@ class Handlers:
|
|
| 238 |
)
|
| 239 |
return
|
| 240 |
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
|
| 245 |
-
|
| 246 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
await session.send_event(
|
| 248 |
Event(
|
| 249 |
event_type="tool_call",
|
|
@@ -251,36 +295,58 @@ class Handlers:
|
|
| 251 |
)
|
| 252 |
)
|
| 253 |
|
| 254 |
-
output, success = await session.tool_router.call_tool(
|
|
|
|
|
|
|
| 255 |
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
|
|
|
| 262 |
)
|
| 263 |
-
session.context_manager.add_message(tool_msg)
|
| 264 |
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
)
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
|
| 281 |
tool_msg = Message(
|
| 282 |
role="tool",
|
| 283 |
-
content=
|
| 284 |
tool_call_id=tc.id,
|
| 285 |
name=tool_name,
|
| 286 |
)
|
|
@@ -291,7 +357,7 @@ class Handlers:
|
|
| 291 |
event_type="tool_output",
|
| 292 |
data={
|
| 293 |
"tool": tool_name,
|
| 294 |
-
"output":
|
| 295 |
"success": False,
|
| 296 |
},
|
| 297 |
)
|
|
@@ -300,7 +366,7 @@ class Handlers:
|
|
| 300 |
# Clear pending approval
|
| 301 |
session.pending_approval = None
|
| 302 |
|
| 303 |
-
# Continue agent loop with empty input to process the tool
|
| 304 |
await Handlers.run_agent(session, "")
|
| 305 |
|
| 306 |
@staticmethod
|
|
@@ -339,9 +405,8 @@ async def process_submission(session: Session, submission) -> bool:
|
|
| 339 |
return True
|
| 340 |
|
| 341 |
if op.op_type == OpType.EXEC_APPROVAL:
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
await Handlers.exec_approval(session, approved, feedback)
|
| 345 |
return True
|
| 346 |
|
| 347 |
if op.op_type == OpType.SHUTDOWN:
|
|
|
|
| 103 |
Event(event_type="assistant_message", data={"content": content})
|
| 104 |
)
|
| 105 |
|
| 106 |
+
# Separate tools into those requiring approval and those that don't
|
| 107 |
+
approval_required_tools = []
|
| 108 |
+
non_approval_tools = []
|
| 109 |
+
|
| 110 |
for tc in tool_calls:
|
| 111 |
tool_name = tc.function.name
|
| 112 |
tool_args = json.loads(tc.function.arguments)
|
| 113 |
|
|
|
|
| 114 |
if _needs_approval(tool_name, tool_args):
|
| 115 |
+
approval_required_tools.append(tc)
|
| 116 |
+
else:
|
| 117 |
+
non_approval_tools.append(tc)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
+
# Execute non-approval tools first
|
| 120 |
+
for tc in non_approval_tools:
|
| 121 |
+
tool_name = tc.function.name
|
| 122 |
+
tool_args = json.loads(tc.function.arguments)
|
| 123 |
|
| 124 |
await session.send_event(
|
| 125 |
Event(
|
|
|
|
| 152 |
)
|
| 153 |
)
|
| 154 |
|
| 155 |
+
# If there are tools requiring approval, ask for batch approval
|
| 156 |
+
if approval_required_tools:
|
| 157 |
+
# Prepare batch approval data
|
| 158 |
+
tools_data = []
|
| 159 |
+
for tc in approval_required_tools:
|
| 160 |
+
tool_name = tc.function.name
|
| 161 |
+
tool_args = json.loads(tc.function.arguments)
|
| 162 |
+
tools_data.append({
|
| 163 |
+
"tool": tool_name,
|
| 164 |
+
"arguments": tool_args,
|
| 165 |
+
"tool_call_id": tc.id,
|
| 166 |
+
})
|
| 167 |
+
|
| 168 |
+
await session.send_event(
|
| 169 |
+
Event(
|
| 170 |
+
event_type="approval_required",
|
| 171 |
+
data={
|
| 172 |
+
"tools": tools_data, # Batch of tools
|
| 173 |
+
"count": len(tools_data),
|
| 174 |
+
},
|
| 175 |
+
)
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
# Store all approval-requiring tools
|
| 179 |
+
session.pending_approval = {
|
| 180 |
+
"tool_calls": approval_required_tools,
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
# Return early - wait for EXEC_APPROVAL operation
|
| 184 |
+
return None
|
| 185 |
+
|
| 186 |
iteration += 1
|
| 187 |
|
| 188 |
except Exception as e:
|
|
|
|
| 247 |
await session.send_event(Event(event_type="undo_complete"))
|
| 248 |
|
| 249 |
@staticmethod
|
| 250 |
+
async def exec_approval(session: Session, approvals: list[dict]) -> None:
|
| 251 |
+
"""Handle batch job execution approval"""
|
|
|
|
|
|
|
| 252 |
if not session.pending_approval:
|
| 253 |
await session.send_event(
|
| 254 |
Event(
|
|
|
|
| 258 |
)
|
| 259 |
return
|
| 260 |
|
| 261 |
+
tool_calls = session.pending_approval.get("tool_calls", [])
|
| 262 |
+
if not tool_calls:
|
| 263 |
+
await session.send_event(
|
| 264 |
+
Event(
|
| 265 |
+
event_type="error",
|
| 266 |
+
data={"error": "No pending tool calls found"},
|
| 267 |
+
)
|
| 268 |
+
)
|
| 269 |
+
return
|
| 270 |
+
|
| 271 |
+
# Create a map of tool_call_id -> approval decision
|
| 272 |
+
approval_map = {a["tool_call_id"]: a for a in approvals}
|
| 273 |
+
|
| 274 |
+
# Separate approved and rejected tool calls
|
| 275 |
+
approved_tasks = []
|
| 276 |
+
rejected_tasks = []
|
| 277 |
+
|
| 278 |
+
for tc in tool_calls:
|
| 279 |
+
tool_name = tc.function.name
|
| 280 |
+
tool_args = json.loads(tc.function.arguments)
|
| 281 |
+
approval_decision = approval_map.get(tc.id, {"approved": False})
|
| 282 |
|
| 283 |
+
if approval_decision.get("approved", False):
|
| 284 |
+
approved_tasks.append((tc, tool_name, tool_args))
|
| 285 |
+
else:
|
| 286 |
+
rejected_tasks.append((tc, tool_name, approval_decision))
|
| 287 |
+
|
| 288 |
+
# Execute all approved tools concurrently
|
| 289 |
+
async def execute_tool(tc, tool_name, tool_args):
|
| 290 |
+
"""Execute a single tool and return its result"""
|
| 291 |
await session.send_event(
|
| 292 |
Event(
|
| 293 |
event_type="tool_call",
|
|
|
|
| 295 |
)
|
| 296 |
)
|
| 297 |
|
| 298 |
+
output, success = await session.tool_router.call_tool(
|
| 299 |
+
tool_name, tool_args
|
| 300 |
+
)
|
| 301 |
|
| 302 |
+
return (tc, tool_name, output, success)
|
| 303 |
+
|
| 304 |
+
# Execute all approved tools concurrently and wait for ALL to complete
|
| 305 |
+
if approved_tasks:
|
| 306 |
+
results = await asyncio.gather(
|
| 307 |
+
*[execute_tool(tc, tool_name, tool_args) for tc, tool_name, tool_args in approved_tasks],
|
| 308 |
+
return_exceptions=True
|
| 309 |
)
|
|
|
|
| 310 |
|
| 311 |
+
# Process results and add to context
|
| 312 |
+
for result in results:
|
| 313 |
+
if isinstance(result, Exception):
|
| 314 |
+
# Handle execution error
|
| 315 |
+
print(f"Tool execution error: {result}")
|
| 316 |
+
continue
|
| 317 |
+
|
| 318 |
+
tc, tool_name, output, success = result
|
| 319 |
+
|
| 320 |
+
# Add tool result to context
|
| 321 |
+
tool_msg = Message(
|
| 322 |
+
role="tool",
|
| 323 |
+
content=output,
|
| 324 |
+
tool_call_id=tc.id,
|
| 325 |
+
name=tool_name,
|
| 326 |
)
|
| 327 |
+
session.context_manager.add_message(tool_msg)
|
| 328 |
+
|
| 329 |
+
await session.send_event(
|
| 330 |
+
Event(
|
| 331 |
+
event_type="tool_output",
|
| 332 |
+
data={
|
| 333 |
+
"tool": tool_name,
|
| 334 |
+
"output": output,
|
| 335 |
+
"success": success,
|
| 336 |
+
},
|
| 337 |
+
)
|
| 338 |
+
)
|
| 339 |
+
|
| 340 |
+
# Process rejected tools
|
| 341 |
+
for tc, tool_name, approval_decision in rejected_tasks:
|
| 342 |
+
rejection_msg = "Job execution cancelled by user"
|
| 343 |
+
user_feedback = approval_decision.get("feedback")
|
| 344 |
+
if user_feedback:
|
| 345 |
+
rejection_msg += f". User feedback: {user_feedback}"
|
| 346 |
|
| 347 |
tool_msg = Message(
|
| 348 |
role="tool",
|
| 349 |
+
content=rejection_msg,
|
| 350 |
tool_call_id=tc.id,
|
| 351 |
name=tool_name,
|
| 352 |
)
|
|
|
|
| 357 |
event_type="tool_output",
|
| 358 |
data={
|
| 359 |
"tool": tool_name,
|
| 360 |
+
"output": rejection_msg,
|
| 361 |
"success": False,
|
| 362 |
},
|
| 363 |
)
|
|
|
|
| 366 |
# Clear pending approval
|
| 367 |
session.pending_approval = None
|
| 368 |
|
| 369 |
+
# Continue agent loop with empty input to process the tool results
|
| 370 |
await Handlers.run_agent(session, "")
|
| 371 |
|
| 372 |
@staticmethod
|
|
|
|
| 405 |
return True
|
| 406 |
|
| 407 |
if op.op_type == OpType.EXEC_APPROVAL:
|
| 408 |
+
approvals = op.data.get("approvals", []) if op.data else []
|
| 409 |
+
await Handlers.exec_approval(session, approvals)
|
|
|
|
| 410 |
return True
|
| 411 |
|
| 412 |
if op.op_type == OpType.SHUTDOWN:
|
agent/main.py
CHANGED
|
@@ -116,61 +116,101 @@ async def event_listener(
|
|
| 116 |
new_tokens = event.data.get("new_tokens", 0) if event.data else 0
|
| 117 |
print(f"Compacted context: {old_tokens} → {new_tokens} tokens")
|
| 118 |
elif event.event_type == "approval_required":
|
| 119 |
-
#
|
| 120 |
-
|
| 121 |
-
|
| 122 |
|
| 123 |
-
operation = arguments.get("operation", "")
|
| 124 |
-
args = arguments.get("args", {})
|
| 125 |
-
|
| 126 |
-
print(f"\nOperation: {operation}")
|
| 127 |
-
|
| 128 |
-
if operation == "uv":
|
| 129 |
-
script = args.get("script", "")
|
| 130 |
-
dependencies = args.get("dependencies", [])
|
| 131 |
-
print(f"Script to run:\n{script}")
|
| 132 |
-
if dependencies:
|
| 133 |
-
print(f"Dependencies: {', '.join(dependencies)}")
|
| 134 |
-
elif operation == "run":
|
| 135 |
-
image = args.get("image", "")
|
| 136 |
-
command = args.get("command", "")
|
| 137 |
-
print(f"Docker image: {image}")
|
| 138 |
-
print(f"Command: {command}")
|
| 139 |
-
|
| 140 |
-
# Common parameters
|
| 141 |
-
flavor = args.get("flavor", "cpu-basic")
|
| 142 |
-
detached = args.get("detached", False)
|
| 143 |
-
print(f"Hardware: {flavor}")
|
| 144 |
-
print(f"Detached mode: {detached}")
|
| 145 |
-
|
| 146 |
-
secrets = args.get("secrets", [])
|
| 147 |
-
if secrets:
|
| 148 |
-
print(f"Secrets: {', '.join(secrets)}")
|
| 149 |
-
|
| 150 |
-
# Get user decision
|
| 151 |
print("\n" + format_separator())
|
| 152 |
-
print(
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
None,
|
| 157 |
-
input,
|
| 158 |
-
"Approve? (y=yes, n=no, or provide feedback to reject): ",
|
| 159 |
)
|
|
|
|
| 160 |
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
feedback = (
|
| 164 |
-
None if approved or response.lower() in ["n", "no"] else response
|
| 165 |
-
)
|
| 166 |
|
| 167 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
submission_id[0] += 1
|
| 169 |
approval_submission = Submission(
|
| 170 |
id=f"approval_{submission_id[0]}",
|
| 171 |
operation=Operation(
|
| 172 |
op_type=OpType.EXEC_APPROVAL,
|
| 173 |
-
data={"
|
| 174 |
),
|
| 175 |
)
|
| 176 |
await submission_queue.put(approval_submission)
|
|
|
|
| 116 |
new_tokens = event.data.get("new_tokens", 0) if event.data else 0
|
| 117 |
print(f"Compacted context: {old_tokens} → {new_tokens} tokens")
|
| 118 |
elif event.event_type == "approval_required":
|
| 119 |
+
# Handle batch approval format
|
| 120 |
+
tools_data = event.data.get("tools", []) if event.data else []
|
| 121 |
+
count = event.data.get("count", 0) if event.data else 0
|
| 122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
print("\n" + format_separator())
|
| 124 |
+
print(
|
| 125 |
+
format_header(
|
| 126 |
+
f"JOB EXECUTION APPROVAL REQUIRED ({count} job{'s' if count != 1 else ''})"
|
| 127 |
+
)
|
|
|
|
|
|
|
|
|
|
| 128 |
)
|
| 129 |
+
print(format_separator())
|
| 130 |
|
| 131 |
+
approvals = []
|
| 132 |
+
loop = asyncio.get_event_loop()
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
+
# Ask for approval for each tool
|
| 135 |
+
for i, tool_info in enumerate(tools_data, 1):
|
| 136 |
+
tool_name = tool_info.get("tool", "")
|
| 137 |
+
arguments = tool_info.get("arguments", {})
|
| 138 |
+
tool_call_id = tool_info.get("tool_call_id", "")
|
| 139 |
+
|
| 140 |
+
# Handle case where arguments might be a JSON string
|
| 141 |
+
if isinstance(arguments, str):
|
| 142 |
+
try:
|
| 143 |
+
arguments = json.loads(arguments)
|
| 144 |
+
except json.JSONDecodeError:
|
| 145 |
+
print(f"Warning: Failed to parse arguments for {tool_name}")
|
| 146 |
+
arguments = {}
|
| 147 |
+
|
| 148 |
+
operation = arguments.get("operation", "")
|
| 149 |
+
args = arguments.get("args", {})
|
| 150 |
+
|
| 151 |
+
# Handle case where args might be a JSON string
|
| 152 |
+
if isinstance(args, str):
|
| 153 |
+
try:
|
| 154 |
+
args = json.loads(args)
|
| 155 |
+
except json.JSONDecodeError:
|
| 156 |
+
print(f"Warning: Failed to parse args for {tool_name}")
|
| 157 |
+
args = {}
|
| 158 |
+
|
| 159 |
+
print(f"\n[Job {i}/{count}]")
|
| 160 |
+
print(f"Operation: {operation}")
|
| 161 |
+
|
| 162 |
+
if operation == "uv":
|
| 163 |
+
script = args.get("script", "")
|
| 164 |
+
dependencies = args.get("dependencies", [])
|
| 165 |
+
print("Script:\n" + script)
|
| 166 |
+
if dependencies:
|
| 167 |
+
print(f"Dependencies: {', '.join(dependencies)}")
|
| 168 |
+
elif operation == "run":
|
| 169 |
+
image = args.get("image", "")
|
| 170 |
+
command = args.get("command", "")
|
| 171 |
+
print(f"Docker image: {image}")
|
| 172 |
+
print(f"Command: {command}")
|
| 173 |
+
|
| 174 |
+
# Common parameters
|
| 175 |
+
flavor = args.get("flavor", "cpu-basic")
|
| 176 |
+
detached = args.get("detached", False)
|
| 177 |
+
print(f"Hardware: {flavor}")
|
| 178 |
+
print(f"Detached mode: {detached}")
|
| 179 |
+
|
| 180 |
+
secrets = args.get("secrets", [])
|
| 181 |
+
if secrets:
|
| 182 |
+
print(f"Secrets: {', '.join(secrets)}")
|
| 183 |
+
|
| 184 |
+
# Get user decision for this job
|
| 185 |
+
response = await loop.run_in_executor(
|
| 186 |
+
None,
|
| 187 |
+
input,
|
| 188 |
+
f"Approve job {i}? (y=yes, n=no, or provide feedback to reject): ",
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
response = response.strip()
|
| 192 |
+
approved = response.lower() in ["y", "yes"]
|
| 193 |
+
feedback = (
|
| 194 |
+
None
|
| 195 |
+
if approved or response.lower() in ["n", "no"]
|
| 196 |
+
else response
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
approvals.append(
|
| 200 |
+
{
|
| 201 |
+
"tool_call_id": tool_call_id,
|
| 202 |
+
"approved": approved,
|
| 203 |
+
"feedback": feedback,
|
| 204 |
+
}
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
# Submit batch approval
|
| 208 |
submission_id[0] += 1
|
| 209 |
approval_submission = Submission(
|
| 210 |
id=f"approval_{submission_id[0]}",
|
| 211 |
operation=Operation(
|
| 212 |
op_type=OpType.EXEC_APPROVAL,
|
| 213 |
+
data={"approvals": approvals},
|
| 214 |
),
|
| 215 |
)
|
| 216 |
await submission_queue.put(approval_submission)
|
agent/tools/jobs_tool.py
CHANGED
|
@@ -360,7 +360,7 @@ class HfJobsTool:
|
|
| 360 |
self.api.run_job,
|
| 361 |
image=image,
|
| 362 |
command=command,
|
| 363 |
-
env=
|
| 364 |
secrets=_add_environment_variables(args.get("secrets")),
|
| 365 |
flavor=args.get("hardware_flavor", "cpu-basic"),
|
| 366 |
timeout=args.get("timeout", "30m"),
|
|
@@ -575,7 +575,7 @@ To verify, call this tool with `{{"operation": "inspect", "job_id": "{job_id}"}}
|
|
| 575 |
image=image,
|
| 576 |
command=command,
|
| 577 |
schedule=schedule,
|
| 578 |
-
env=
|
| 579 |
secrets=_add_environment_variables(args.get("secrets")),
|
| 580 |
flavor=args.get("hardware_flavor", "cpu-basic"),
|
| 581 |
timeout=args.get("timeout", "30m"),
|
|
@@ -735,41 +735,29 @@ To inspect, call this tool with `{{"operation": "scheduled inspect", "scheduled_
|
|
| 735 |
HF_JOBS_TOOL_SPEC = {
|
| 736 |
"name": "hf_jobs",
|
| 737 |
"description": (
|
| 738 |
-
"
|
| 739 |
-
"
|
| 740 |
-
"
|
| 741 |
-
"
|
| 742 |
-
"**
|
| 743 |
-
"
|
| 744 |
-
"
|
| 745 |
-
"
|
| 746 |
-
"-
|
| 747 |
-
"-
|
| 748 |
-
"##
|
| 749 |
-
"**
|
| 750 |
-
"
|
| 751 |
-
"**
|
| 752 |
-
"
|
| 753 |
-
"**Run
|
| 754 |
-
"{'operation': 'run', '
|
| 755 |
-
"**
|
| 756 |
-
"{'operation': '
|
| 757 |
-
"
|
| 758 |
-
"{'operation': '
|
| 759 |
-
"
|
| 760 |
-
"
|
| 761 |
-
"**Get job logs:**\n"
|
| 762 |
-
"{'operation': 'logs', 'job_id': 'xxx'}\n\n"
|
| 763 |
-
"**Cancel job:**\n"
|
| 764 |
-
"{'operation': 'cancel', 'job_id': 'xxx'}\n\n"
|
| 765 |
-
"**Schedule daily Python job:**\n"
|
| 766 |
-
"{'operation': 'scheduled run', 'script': 'print(\"daily task\")', 'schedule': '@daily'}\n\n"
|
| 767 |
-
"## Important Notes\n"
|
| 768 |
-
"- **CRITICAL: Job files are EPHEMERAL** - ALL files created in HF Jobs (trained models, datasets, outputs, completions etc.) are DELETED when the job completes. You MUST upload any outputs to HF Hub in the script itself (using model.push_to_hub() when training models, dataset.push_to_hub() when creating text based outputs, etc.)."
|
| 769 |
-
"- Always pass full script content - no local files available on server\n"
|
| 770 |
-
"- Use array format for commands: ['/bin/sh', '-lc', 'cmd'] for shell features\n"
|
| 771 |
-
"- hf-transfer is auto-included in uv jobs for faster downloads\n"
|
| 772 |
-
"- **Remember to upload outputs to Hub before job finishes!**"
|
| 773 |
),
|
| 774 |
"parameters": {
|
| 775 |
"type": "object",
|
|
@@ -798,90 +786,49 @@ HF_JOBS_TOOL_SPEC = {
|
|
| 798 |
# Python/UV specific parameters
|
| 799 |
"script": {
|
| 800 |
"type": "string",
|
| 801 |
-
"description": (
|
| 802 |
-
"Python code to execute. Can be inline code or a raw GitHub URL. "
|
| 803 |
-
"Auto-uses UV image and builds UV command. "
|
| 804 |
-
"USED with: 'run', 'scheduled run' (triggers Python mode). "
|
| 805 |
-
"MUTUALLY EXCLUSIVE with 'command'. "
|
| 806 |
-
"NOT USED with: 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
|
| 807 |
-
),
|
| 808 |
},
|
| 809 |
"dependencies": {
|
| 810 |
"type": "array",
|
| 811 |
"items": {"type": "string"},
|
| 812 |
-
"description":
|
| 813 |
-
"List of pip packages to install. Example: ['torch', 'transformers']. "
|
| 814 |
-
"Only used when 'script' is provided. "
|
| 815 |
-
"USED with: 'run', 'scheduled run' (optional, only with script). "
|
| 816 |
-
"NOT USED with: 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
|
| 817 |
-
),
|
| 818 |
},
|
| 819 |
# Docker specific parameters
|
| 820 |
"image": {
|
| 821 |
"type": "string",
|
| 822 |
-
"description": (
|
| 823 |
-
"Docker image to use. Default: UV image if 'script' provided, else 'python:3.12'. "
|
| 824 |
-
"Can override the default UV image when using 'script'. "
|
| 825 |
-
"USED with: 'run', 'scheduled run' (optional). "
|
| 826 |
-
"NOT USED with: 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
|
| 827 |
-
),
|
| 828 |
},
|
| 829 |
"command": {
|
| 830 |
"type": "array",
|
| 831 |
"items": {"type": "string"},
|
| 832 |
-
"description":
|
| 833 |
-
"Command to execute as array. Example: ['python', '-c', 'print(42)']. "
|
| 834 |
-
"Use this for full Docker control. "
|
| 835 |
-
"USED with: 'run', 'scheduled run' (triggers Docker mode). "
|
| 836 |
-
"MUTUALLY EXCLUSIVE with 'script'. "
|
| 837 |
-
"NOT USED with: 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
|
| 838 |
-
),
|
| 839 |
},
|
| 840 |
# Hardware and environment
|
| 841 |
"hardware_flavor": {
|
| 842 |
"type": "string",
|
| 843 |
-
"description": (
|
| 844 |
-
|
| 845 |
-
|
| 846 |
-
|
| 847 |
-
|
| 848 |
-
"NOT USED with: 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
|
| 849 |
-
),
|
| 850 |
},
|
| 851 |
"secrets": {
|
| 852 |
"type": "object",
|
| 853 |
-
"description": (
|
| 854 |
-
"Secret environment variables. Format: {'KEY': 'VALUE'}. HF_TOKEN is loaded automatically. "
|
| 855 |
-
"USED with: 'run', 'uv', 'scheduled run', 'scheduled uv' (optional). "
|
| 856 |
-
"NOT USED with: 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
|
| 857 |
-
),
|
| 858 |
},
|
| 859 |
# Job management parameters
|
| 860 |
"job_id": {
|
| 861 |
"type": "string",
|
| 862 |
-
"description":
|
| 863 |
-
"Job ID to operate on. "
|
| 864 |
-
"REQUIRED for: 'logs', 'inspect', 'cancel'. "
|
| 865 |
-
"NOT USED with: 'run', 'uv', 'ps', 'scheduled run/uv/ps/inspect/delete/suspend/resume'."
|
| 866 |
-
),
|
| 867 |
},
|
| 868 |
# Scheduled job parameters
|
| 869 |
"scheduled_job_id": {
|
| 870 |
"type": "string",
|
| 871 |
-
"description":
|
| 872 |
-
"Scheduled job ID to operate on. "
|
| 873 |
-
"REQUIRED for: 'scheduled inspect', 'scheduled delete', 'scheduled suspend', 'scheduled resume'. "
|
| 874 |
-
"NOT USED with: 'run', 'uv', 'ps', 'logs', 'inspect', 'cancel', 'scheduled run', 'scheduled uv', 'scheduled ps'."
|
| 875 |
-
),
|
| 876 |
},
|
| 877 |
"schedule": {
|
| 878 |
"type": "string",
|
| 879 |
-
"description": (
|
| 880 |
-
"Cron schedule or preset. Presets: '@hourly', '@daily', '@weekly', '@monthly', '@yearly'. "
|
| 881 |
-
"Cron example: '0 9 * * 1' (9 AM every Monday). "
|
| 882 |
-
"REQUIRED for: 'scheduled run', 'scheduled uv'. "
|
| 883 |
-
"NOT USED with: 'run', 'uv', 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
|
| 884 |
-
),
|
| 885 |
},
|
| 886 |
},
|
| 887 |
"required": ["operation"],
|
|
|
|
| 360 |
self.api.run_job,
|
| 361 |
image=image,
|
| 362 |
command=command,
|
| 363 |
+
env=args.get("env"),
|
| 364 |
secrets=_add_environment_variables(args.get("secrets")),
|
| 365 |
flavor=args.get("hardware_flavor", "cpu-basic"),
|
| 366 |
timeout=args.get("timeout", "30m"),
|
|
|
|
| 575 |
image=image,
|
| 576 |
command=command,
|
| 577 |
schedule=schedule,
|
| 578 |
+
env=args.get("env"),
|
| 579 |
secrets=_add_environment_variables(args.get("secrets")),
|
| 580 |
flavor=args.get("hardware_flavor", "cpu-basic"),
|
| 581 |
timeout=args.get("timeout", "30m"),
|
|
|
|
| 735 |
HF_JOBS_TOOL_SPEC = {
|
| 736 |
"name": "hf_jobs",
|
| 737 |
"description": (
|
| 738 |
+
"Run Python scripts or Docker containers on HF cloud GPUs/CPUs.\n\n"
|
| 739 |
+
"## Operations:\n"
|
| 740 |
+
"run, ps, logs, inspect, cancel, scheduled run, scheduled ps, scheduled inspect, scheduled delete, scheduled suspend, scheduled resume\n\n"
|
| 741 |
+
"## Two modes:\n"
|
| 742 |
+
"1. **Python mode:** Provide 'script' + 'dependencies' → auto-handles pip install\n"
|
| 743 |
+
"2. **Docker mode:** Provide 'image' + 'command' → full control\n"
|
| 744 |
+
"(script and command are mutually exclusive)\n\n"
|
| 745 |
+
"## Hardware:\n"
|
| 746 |
+
"CPU: cpu-basic (default), cpu-upgrade, cpu-performance, cpu-xl\n"
|
| 747 |
+
"GPU: t4-small, t4-medium, l4x1, a10g-small, a10g-large, a100-large, h100\n\n"
|
| 748 |
+
"## Examples:\n\n"
|
| 749 |
+
"**Fine-tune LLM and push to Hub:**\n"
|
| 750 |
+
"{'operation': 'run', 'script': 'from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer\\nmodel = AutoModelForCausalLM.from_pretrained(\"gpt2\")\\n# ... training code ...\\nmodel.push_to_hub(\"my-finetuned-model\")', 'dependencies': ['transformers', 'torch', 'datasets'], 'hardware_flavor': 'a10g-large', 'timeout': '4h', 'secrets': {'HF_TOKEN': '$HF_TOKEN'}}\n\n"
|
| 751 |
+
"**Generate dataset daily and upload:**\n"
|
| 752 |
+
"{'operation': 'scheduled run', 'script': 'from datasets import Dataset\\nimport pandas as pd\\n# scrape/generate data\\ndf = pd.DataFrame(data)\\nds = Dataset.from_pandas(df)\\nds.push_to_hub(\"daily-dataset\")', 'dependencies': ['datasets', 'pandas'], 'schedule': '@daily', 'secrets': {'HF_TOKEN': '$HF_TOKEN'}}\n\n"
|
| 753 |
+
"**Run custom training with Docker:**\n"
|
| 754 |
+
"{'operation': 'run', 'image': 'pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime', 'command': ['python', 'train.py', '--epochs', '10'], 'hardware_flavor': 'a100-large'}\n\n"
|
| 755 |
+
"**Monitor jobs:**\n"
|
| 756 |
+
"{'operation': 'ps'} - list running\n"
|
| 757 |
+
"{'operation': 'logs', 'job_id': 'xxx'} - stream logs\n"
|
| 758 |
+
"{'operation': 'cancel', 'job_id': 'xxx'} - stop job\n\n"
|
| 759 |
+
"## CRITICAL: Files are ephemeral!\n"
|
| 760 |
+
"Everything created during execution is DELETED when job finishes. Always .push_to_hub() your outputs (models, datasets, artifacts) in the script."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 761 |
),
|
| 762 |
"parameters": {
|
| 763 |
"type": "object",
|
|
|
|
| 786 |
# Python/UV specific parameters
|
| 787 |
"script": {
|
| 788 |
"type": "string",
|
| 789 |
+
"description": "Python code to execute. Triggers Python mode (auto pip install). Use with 'run'/'scheduled run'. Mutually exclusive with 'command'.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 790 |
},
|
| 791 |
"dependencies": {
|
| 792 |
"type": "array",
|
| 793 |
"items": {"type": "string"},
|
| 794 |
+
"description": "Pip packages to install. Example: ['trl', 'torch', 'datasets', 'transformers']. Only used with 'script'.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 795 |
},
|
| 796 |
# Docker specific parameters
|
| 797 |
"image": {
|
| 798 |
"type": "string",
|
| 799 |
+
"description": "Docker image. Example: 'pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime'. Use with 'run'/'scheduled run'. Optional (auto-selected if not provided).",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 800 |
},
|
| 801 |
"command": {
|
| 802 |
"type": "array",
|
| 803 |
"items": {"type": "string"},
|
| 804 |
+
"description": "Command to execute as list. Example: ['python', 'train.py', '--epochs', '10']. Triggers Docker mode. Use with 'run'/'scheduled run'. Mutually exclusive with 'script'.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 805 |
},
|
| 806 |
# Hardware and environment
|
| 807 |
"hardware_flavor": {
|
| 808 |
"type": "string",
|
| 809 |
+
"description": "Hardware type. CPU: cpu-basic (default), cpu-upgrade, cpu-performance, cpu-xl. GPU: t4-small, t4-medium, l4x1, a10g-small, a10g-large, a100-large, h100. Use with 'run'/'scheduled run'.",
|
| 810 |
+
},
|
| 811 |
+
"timeout": {
|
| 812 |
+
"type": "string",
|
| 813 |
+
"description": "Max runtime. Examples: '30m', '2h', '4h'. Default: '30m'. Important for long training jobs. Use with 'run'/'scheduled run'.",
|
|
|
|
|
|
|
| 814 |
},
|
| 815 |
"secrets": {
|
| 816 |
"type": "object",
|
| 817 |
+
"description": "Environment variables (private). Format: {'KEY': 'VALUE'}. Use {'HF_TOKEN': '$HF_TOKEN'} for Hub auth. Use with 'run'/'scheduled run'.",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 818 |
},
|
| 819 |
# Job management parameters
|
| 820 |
"job_id": {
|
| 821 |
"type": "string",
|
| 822 |
+
"description": "Job ID to operate on. Required for: 'logs', 'inspect', 'cancel'.",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 823 |
},
|
| 824 |
# Scheduled job parameters
|
| 825 |
"scheduled_job_id": {
|
| 826 |
"type": "string",
|
| 827 |
+
"description": "Scheduled job ID. Required for: 'scheduled inspect', 'scheduled delete', 'scheduled suspend', 'scheduled resume'.",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 828 |
},
|
| 829 |
"schedule": {
|
| 830 |
"type": "string",
|
| 831 |
+
"description": "Schedule for recurring job. Presets: '@hourly', '@daily', '@weekly', '@monthly'. Cron: '0 9 * * 1' (Mon 9am). Required for: 'scheduled run'.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 832 |
},
|
| 833 |
},
|
| 834 |
"required": ["operation"],
|
run_search_agent.py
CHANGED
|
@@ -21,13 +21,16 @@ async def test_search_agent(query: str):
|
|
| 21 |
|
| 22 |
# Import at runtime
|
| 23 |
from pathlib import Path
|
|
|
|
| 24 |
from agent.config import load_config
|
| 25 |
|
| 26 |
# Create event queue for the sub-agent
|
| 27 |
sub_event_queue = asyncio.Queue()
|
| 28 |
|
| 29 |
# Load the search agent's own config file with GitHub MCP server
|
| 30 |
-
search_agent_config_path =
|
|
|
|
|
|
|
| 31 |
search_agent_config = load_config(search_agent_config_path)
|
| 32 |
|
| 33 |
# Extract GitHub MCP config from search agent config
|
|
@@ -122,7 +125,8 @@ async def main():
|
|
| 122 |
# Example queries to test
|
| 123 |
test_queries = [
|
| 124 |
# "Explore the TRL documentation structure and find information about DPO trainer",
|
| 125 |
-
"is there a way to get the logs from a served huggingface space",
|
|
|
|
| 126 |
# "How do I train GLM4.7 with a GRPO training loop with trl with llm judge as a reward model for training on hle?"
|
| 127 |
# "can i stream logs through the api for a served huggingface space",
|
| 128 |
# 'what tools do you have access to?',
|
|
|
|
| 21 |
|
| 22 |
# Import at runtime
|
| 23 |
from pathlib import Path
|
| 24 |
+
|
| 25 |
from agent.config import load_config
|
| 26 |
|
| 27 |
# Create event queue for the sub-agent
|
| 28 |
sub_event_queue = asyncio.Queue()
|
| 29 |
|
| 30 |
# Load the search agent's own config file with GitHub MCP server
|
| 31 |
+
search_agent_config_path = (
|
| 32 |
+
Path(__file__).parent / "configs" / "_subagent_config_search_agent.json"
|
| 33 |
+
)
|
| 34 |
search_agent_config = load_config(search_agent_config_path)
|
| 35 |
|
| 36 |
# Extract GitHub MCP config from search agent config
|
|
|
|
| 125 |
# Example queries to test
|
| 126 |
test_queries = [
|
| 127 |
# "Explore the TRL documentation structure and find information about DPO trainer",
|
| 128 |
+
# "is there a way to get the logs from a served huggingface space",
|
| 129 |
+
"""use exactly this call {\"tool_name\": \"search_hf_docs\", \"arguments\": {\"query\": \"vLLM offline batch inference Hugging Face models\"}}""",
|
| 130 |
# "How do I train GLM4.7 with a GRPO training loop with trl with llm judge as a reward model for training on hle?"
|
| 131 |
# "can i stream logs through the api for a served huggingface space",
|
| 132 |
# 'what tools do you have access to?',
|