akseljoonas HF Staff commited on
Commit
1598bb4
·
1 Parent(s): b63e2df

reworked job tool descriptions + implemented batch processing for hf_job tool calls in cli

Browse files
agent/core/agent_loop.py CHANGED
@@ -103,32 +103,23 @@ class Handlers:
103
  Event(event_type="assistant_message", data={"content": content})
104
  )
105
 
106
- # Execute tools
 
 
 
107
  for tc in tool_calls:
108
  tool_name = tc.function.name
109
  tool_args = json.loads(tc.function.arguments)
110
 
111
- # Check if this tool requires user approval
112
  if _needs_approval(tool_name, tool_args):
113
- await session.send_event(
114
- Event(
115
- event_type="approval_required",
116
- data={
117
- "tool": tool_name,
118
- "arguments": tool_args,
119
- "tool_call_id": tc.id,
120
- },
121
- )
122
- )
123
-
124
- # Store pending approval and return early
125
- session.pending_approval = {
126
- "tool_call": tc,
127
- "arguments": tool_args,
128
- }
129
 
130
- # Return early - wait for EXEC_APPROVAL operation
131
- return None
 
 
132
 
133
  await session.send_event(
134
  Event(
@@ -161,6 +152,37 @@ class Handlers:
161
  )
162
  )
163
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  iteration += 1
165
 
166
  except Exception as e:
@@ -225,10 +247,8 @@ class Handlers:
225
  await session.send_event(Event(event_type="undo_complete"))
226
 
227
  @staticmethod
228
- async def exec_approval(
229
- session: Session, approved: bool, feedback: str | None = None
230
- ) -> None:
231
- """Handle job execution approval"""
232
  if not session.pending_approval:
233
  await session.send_event(
234
  Event(
@@ -238,12 +258,36 @@ class Handlers:
238
  )
239
  return
240
 
241
- tc = session.pending_approval["tool_call"]
242
- tool_args = session.pending_approval["arguments"]
243
- tool_name = tc.function.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
- if approved:
246
- # Execute the pending tool
 
 
 
 
 
 
247
  await session.send_event(
248
  Event(
249
  event_type="tool_call",
@@ -251,36 +295,58 @@ class Handlers:
251
  )
252
  )
253
 
254
- output, success = await session.tool_router.call_tool(tool_name, tool_args)
 
 
255
 
256
- # Add tool result to context
257
- tool_msg = Message(
258
- role="tool",
259
- content=output,
260
- tool_call_id=tc.id,
261
- name=tool_name,
 
262
  )
263
- session.context_manager.add_message(tool_msg)
264
 
265
- await session.send_event(
266
- Event(
267
- event_type="tool_output",
268
- data={
269
- "tool": tool_name,
270
- "output": output,
271
- "success": success,
272
- },
 
 
 
 
 
 
 
273
  )
274
- )
275
- else:
276
- # User rejected - add cancellation message to context
277
- cancellation_msg = "Job execution cancelled by user"
278
- if feedback:
279
- cancellation_msg += f". User feedback: {feedback}"
 
 
 
 
 
 
 
 
 
 
 
 
 
280
 
281
  tool_msg = Message(
282
  role="tool",
283
- content=cancellation_msg,
284
  tool_call_id=tc.id,
285
  name=tool_name,
286
  )
@@ -291,7 +357,7 @@ class Handlers:
291
  event_type="tool_output",
292
  data={
293
  "tool": tool_name,
294
- "output": cancellation_msg,
295
  "success": False,
296
  },
297
  )
@@ -300,7 +366,7 @@ class Handlers:
300
  # Clear pending approval
301
  session.pending_approval = None
302
 
303
- # Continue agent loop with empty input to process the tool result
304
  await Handlers.run_agent(session, "")
305
 
306
  @staticmethod
@@ -339,9 +405,8 @@ async def process_submission(session: Session, submission) -> bool:
339
  return True
340
 
341
  if op.op_type == OpType.EXEC_APPROVAL:
342
- approved = op.data.get("approved", False) if op.data else False
343
- feedback = op.data.get("feedback") if op.data else None
344
- await Handlers.exec_approval(session, approved, feedback)
345
  return True
346
 
347
  if op.op_type == OpType.SHUTDOWN:
 
103
  Event(event_type="assistant_message", data={"content": content})
104
  )
105
 
106
+ # Separate tools into those requiring approval and those that don't
107
+ approval_required_tools = []
108
+ non_approval_tools = []
109
+
110
  for tc in tool_calls:
111
  tool_name = tc.function.name
112
  tool_args = json.loads(tc.function.arguments)
113
 
 
114
  if _needs_approval(tool_name, tool_args):
115
+ approval_required_tools.append(tc)
116
+ else:
117
+ non_approval_tools.append(tc)
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
+ # Execute non-approval tools first
120
+ for tc in non_approval_tools:
121
+ tool_name = tc.function.name
122
+ tool_args = json.loads(tc.function.arguments)
123
 
124
  await session.send_event(
125
  Event(
 
152
  )
153
  )
154
 
155
+ # If there are tools requiring approval, ask for batch approval
156
+ if approval_required_tools:
157
+ # Prepare batch approval data
158
+ tools_data = []
159
+ for tc in approval_required_tools:
160
+ tool_name = tc.function.name
161
+ tool_args = json.loads(tc.function.arguments)
162
+ tools_data.append({
163
+ "tool": tool_name,
164
+ "arguments": tool_args,
165
+ "tool_call_id": tc.id,
166
+ })
167
+
168
+ await session.send_event(
169
+ Event(
170
+ event_type="approval_required",
171
+ data={
172
+ "tools": tools_data, # Batch of tools
173
+ "count": len(tools_data),
174
+ },
175
+ )
176
+ )
177
+
178
+ # Store all approval-requiring tools
179
+ session.pending_approval = {
180
+ "tool_calls": approval_required_tools,
181
+ }
182
+
183
+ # Return early - wait for EXEC_APPROVAL operation
184
+ return None
185
+
186
  iteration += 1
187
 
188
  except Exception as e:
 
247
  await session.send_event(Event(event_type="undo_complete"))
248
 
249
  @staticmethod
250
+ async def exec_approval(session: Session, approvals: list[dict]) -> None:
251
+ """Handle batch job execution approval"""
 
 
252
  if not session.pending_approval:
253
  await session.send_event(
254
  Event(
 
258
  )
259
  return
260
 
261
+ tool_calls = session.pending_approval.get("tool_calls", [])
262
+ if not tool_calls:
263
+ await session.send_event(
264
+ Event(
265
+ event_type="error",
266
+ data={"error": "No pending tool calls found"},
267
+ )
268
+ )
269
+ return
270
+
271
+ # Create a map of tool_call_id -> approval decision
272
+ approval_map = {a["tool_call_id"]: a for a in approvals}
273
+
274
+ # Separate approved and rejected tool calls
275
+ approved_tasks = []
276
+ rejected_tasks = []
277
+
278
+ for tc in tool_calls:
279
+ tool_name = tc.function.name
280
+ tool_args = json.loads(tc.function.arguments)
281
+ approval_decision = approval_map.get(tc.id, {"approved": False})
282
 
283
+ if approval_decision.get("approved", False):
284
+ approved_tasks.append((tc, tool_name, tool_args))
285
+ else:
286
+ rejected_tasks.append((tc, tool_name, approval_decision))
287
+
288
+ # Execute all approved tools concurrently
289
+ async def execute_tool(tc, tool_name, tool_args):
290
+ """Execute a single tool and return its result"""
291
  await session.send_event(
292
  Event(
293
  event_type="tool_call",
 
295
  )
296
  )
297
 
298
+ output, success = await session.tool_router.call_tool(
299
+ tool_name, tool_args
300
+ )
301
 
302
+ return (tc, tool_name, output, success)
303
+
304
+ # Execute all approved tools concurrently and wait for ALL to complete
305
+ if approved_tasks:
306
+ results = await asyncio.gather(
307
+ *[execute_tool(tc, tool_name, tool_args) for tc, tool_name, tool_args in approved_tasks],
308
+ return_exceptions=True
309
  )
 
310
 
311
+ # Process results and add to context
312
+ for result in results:
313
+ if isinstance(result, Exception):
314
+ # Handle execution error
315
+ print(f"Tool execution error: {result}")
316
+ continue
317
+
318
+ tc, tool_name, output, success = result
319
+
320
+ # Add tool result to context
321
+ tool_msg = Message(
322
+ role="tool",
323
+ content=output,
324
+ tool_call_id=tc.id,
325
+ name=tool_name,
326
  )
327
+ session.context_manager.add_message(tool_msg)
328
+
329
+ await session.send_event(
330
+ Event(
331
+ event_type="tool_output",
332
+ data={
333
+ "tool": tool_name,
334
+ "output": output,
335
+ "success": success,
336
+ },
337
+ )
338
+ )
339
+
340
+ # Process rejected tools
341
+ for tc, tool_name, approval_decision in rejected_tasks:
342
+ rejection_msg = "Job execution cancelled by user"
343
+ user_feedback = approval_decision.get("feedback")
344
+ if user_feedback:
345
+ rejection_msg += f". User feedback: {user_feedback}"
346
 
347
  tool_msg = Message(
348
  role="tool",
349
+ content=rejection_msg,
350
  tool_call_id=tc.id,
351
  name=tool_name,
352
  )
 
357
  event_type="tool_output",
358
  data={
359
  "tool": tool_name,
360
+ "output": rejection_msg,
361
  "success": False,
362
  },
363
  )
 
366
  # Clear pending approval
367
  session.pending_approval = None
368
 
369
+ # Continue agent loop with empty input to process the tool results
370
  await Handlers.run_agent(session, "")
371
 
372
  @staticmethod
 
405
  return True
406
 
407
  if op.op_type == OpType.EXEC_APPROVAL:
408
+ approvals = op.data.get("approvals", []) if op.data else []
409
+ await Handlers.exec_approval(session, approvals)
 
410
  return True
411
 
412
  if op.op_type == OpType.SHUTDOWN:
agent/main.py CHANGED
@@ -116,61 +116,101 @@ async def event_listener(
116
  new_tokens = event.data.get("new_tokens", 0) if event.data else 0
117
  print(f"Compacted context: {old_tokens} → {new_tokens} tokens")
118
  elif event.event_type == "approval_required":
119
- # Display job details and prompt for approval
120
- tool_name = event.data.get("tool", "") if event.data else ""
121
- arguments = event.data.get("arguments", {}) if event.data else {}
122
 
123
- operation = arguments.get("operation", "")
124
- args = arguments.get("args", {})
125
-
126
- print(f"\nOperation: {operation}")
127
-
128
- if operation == "uv":
129
- script = args.get("script", "")
130
- dependencies = args.get("dependencies", [])
131
- print(f"Script to run:\n{script}")
132
- if dependencies:
133
- print(f"Dependencies: {', '.join(dependencies)}")
134
- elif operation == "run":
135
- image = args.get("image", "")
136
- command = args.get("command", "")
137
- print(f"Docker image: {image}")
138
- print(f"Command: {command}")
139
-
140
- # Common parameters
141
- flavor = args.get("flavor", "cpu-basic")
142
- detached = args.get("detached", False)
143
- print(f"Hardware: {flavor}")
144
- print(f"Detached mode: {detached}")
145
-
146
- secrets = args.get("secrets", [])
147
- if secrets:
148
- print(f"Secrets: {', '.join(secrets)}")
149
-
150
- # Get user decision
151
  print("\n" + format_separator())
152
- print(format_header("JOB EXECUTION APPROVAL REQUIRED"))
153
- print(format_separator())
154
- loop = asyncio.get_event_loop()
155
- response = await loop.run_in_executor(
156
- None,
157
- input,
158
- "Approve? (y=yes, n=no, or provide feedback to reject): ",
159
  )
 
160
 
161
- response = response.strip()
162
- approved = response.lower() in ["y", "yes"]
163
- feedback = (
164
- None if approved or response.lower() in ["n", "no"] else response
165
- )
166
 
167
- # Submit approval
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  submission_id[0] += 1
169
  approval_submission = Submission(
170
  id=f"approval_{submission_id[0]}",
171
  operation=Operation(
172
  op_type=OpType.EXEC_APPROVAL,
173
- data={"approved": approved, "feedback": feedback},
174
  ),
175
  )
176
  await submission_queue.put(approval_submission)
 
116
  new_tokens = event.data.get("new_tokens", 0) if event.data else 0
117
  print(f"Compacted context: {old_tokens} → {new_tokens} tokens")
118
  elif event.event_type == "approval_required":
119
+ # Handle batch approval format
120
+ tools_data = event.data.get("tools", []) if event.data else []
121
+ count = event.data.get("count", 0) if event.data else 0
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  print("\n" + format_separator())
124
+ print(
125
+ format_header(
126
+ f"JOB EXECUTION APPROVAL REQUIRED ({count} job{'s' if count != 1 else ''})"
127
+ )
 
 
 
128
  )
129
+ print(format_separator())
130
 
131
+ approvals = []
132
+ loop = asyncio.get_event_loop()
 
 
 
133
 
134
+ # Ask for approval for each tool
135
+ for i, tool_info in enumerate(tools_data, 1):
136
+ tool_name = tool_info.get("tool", "")
137
+ arguments = tool_info.get("arguments", {})
138
+ tool_call_id = tool_info.get("tool_call_id", "")
139
+
140
+ # Handle case where arguments might be a JSON string
141
+ if isinstance(arguments, str):
142
+ try:
143
+ arguments = json.loads(arguments)
144
+ except json.JSONDecodeError:
145
+ print(f"Warning: Failed to parse arguments for {tool_name}")
146
+ arguments = {}
147
+
148
+ operation = arguments.get("operation", "")
149
+ args = arguments.get("args", {})
150
+
151
+ # Handle case where args might be a JSON string
152
+ if isinstance(args, str):
153
+ try:
154
+ args = json.loads(args)
155
+ except json.JSONDecodeError:
156
+ print(f"Warning: Failed to parse args for {tool_name}")
157
+ args = {}
158
+
159
+ print(f"\n[Job {i}/{count}]")
160
+ print(f"Operation: {operation}")
161
+
162
+ if operation == "uv":
163
+ script = args.get("script", "")
164
+ dependencies = args.get("dependencies", [])
165
+ print("Script:\n" + script)
166
+ if dependencies:
167
+ print(f"Dependencies: {', '.join(dependencies)}")
168
+ elif operation == "run":
169
+ image = args.get("image", "")
170
+ command = args.get("command", "")
171
+ print(f"Docker image: {image}")
172
+ print(f"Command: {command}")
173
+
174
+ # Common parameters
175
+ flavor = args.get("flavor", "cpu-basic")
176
+ detached = args.get("detached", False)
177
+ print(f"Hardware: {flavor}")
178
+ print(f"Detached mode: {detached}")
179
+
180
+ secrets = args.get("secrets", [])
181
+ if secrets:
182
+ print(f"Secrets: {', '.join(secrets)}")
183
+
184
+ # Get user decision for this job
185
+ response = await loop.run_in_executor(
186
+ None,
187
+ input,
188
+ f"Approve job {i}? (y=yes, n=no, or provide feedback to reject): ",
189
+ )
190
+
191
+ response = response.strip()
192
+ approved = response.lower() in ["y", "yes"]
193
+ feedback = (
194
+ None
195
+ if approved or response.lower() in ["n", "no"]
196
+ else response
197
+ )
198
+
199
+ approvals.append(
200
+ {
201
+ "tool_call_id": tool_call_id,
202
+ "approved": approved,
203
+ "feedback": feedback,
204
+ }
205
+ )
206
+
207
+ # Submit batch approval
208
  submission_id[0] += 1
209
  approval_submission = Submission(
210
  id=f"approval_{submission_id[0]}",
211
  operation=Operation(
212
  op_type=OpType.EXEC_APPROVAL,
213
+ data={"approvals": approvals},
214
  ),
215
  )
216
  await submission_queue.put(approval_submission)
agent/tools/jobs_tool.py CHANGED
@@ -360,7 +360,7 @@ class HfJobsTool:
360
  self.api.run_job,
361
  image=image,
362
  command=command,
363
- env=_add_environment_variables(args.get("env")),
364
  secrets=_add_environment_variables(args.get("secrets")),
365
  flavor=args.get("hardware_flavor", "cpu-basic"),
366
  timeout=args.get("timeout", "30m"),
@@ -575,7 +575,7 @@ To verify, call this tool with `{{"operation": "inspect", "job_id": "{job_id}"}}
575
  image=image,
576
  command=command,
577
  schedule=schedule,
578
- env=_add_environment_variables(args.get("env")),
579
  secrets=_add_environment_variables(args.get("secrets")),
580
  flavor=args.get("hardware_flavor", "cpu-basic"),
581
  timeout=args.get("timeout", "30m"),
@@ -735,41 +735,29 @@ To inspect, call this tool with `{{"operation": "scheduled inspect", "scheduled_
735
  HF_JOBS_TOOL_SPEC = {
736
  "name": "hf_jobs",
737
  "description": (
738
- "Manage Hugging Face CPU/GPU compute jobs. Run Python scripts with UV or custom Docker commands. "
739
- "List, schedule and monitor jobs/logs.\n\n"
740
- "## Operations\n"
741
- "**Jobs:** run, ps, logs, inspect, cancel\n"
742
- "**Scheduled Jobs:** scheduled run, scheduled ps, scheduled inspect, scheduled delete, scheduled suspend, scheduled resume\n\n"
743
- "## 'run' Operation Behavior\n"
744
- "The 'run' operation automatically detects what you want to do:\n"
745
- "- If 'script' provided → runs a Python script and auto installs dependencies in the env\n"
746
- "- If 'command' provided → runs a custom Docker command (full control)\n"
747
- "- 'script' and 'command' are MUTUALLY EXCLUSIVE - provide one or the other, not both\n\n"
748
- "## Available Hardware Flavors\n"
749
- "**CPU:** cpu-basic, cpu-upgrade, cpu-performance, cpu-xl\n"
750
- "**GPU:** t4-small, t4-medium, l4x1, l4x4, a10g-small, a10g-large, a10g-largex2, a10g-largex4, a100-large, h100, h100x8\n"
751
- "**Specialized:** inf2x6\n\n"
752
- "## Usage Examples\n"
753
- "**Run Python script with dependencies:**\n"
754
- "{'operation': 'run', 'script': 'import torch\\nprint(torch.cuda.is_available())', 'dependencies': ['torch', 'transformers'], 'hardware_flavor': 'a10g-small'}\n\n"
755
- "**Run Python with secrets:**\n"
756
- "{'operation': 'run', 'script': 'from huggingface_hub import HfApi\\napi = HfApi()\\nprint(api.whoami())', 'dependencies': ['huggingface-hub']}\n\n"
757
- "**Run custom Docker command:**\n"
758
- "{'operation': 'run', 'image': 'nvidia/cuda:12.0-base', 'command': ['nvidia-smi']}\n\n"
759
- "**List running jobs:**\n"
760
- "{'operation': 'ps'}\n\n"
761
- "**Get job logs:**\n"
762
- "{'operation': 'logs', 'job_id': 'xxx'}\n\n"
763
- "**Cancel job:**\n"
764
- "{'operation': 'cancel', 'job_id': 'xxx'}\n\n"
765
- "**Schedule daily Python job:**\n"
766
- "{'operation': 'scheduled run', 'script': 'print(\"daily task\")', 'schedule': '@daily'}\n\n"
767
- "## Important Notes\n"
768
- "- **CRITICAL: Job files are EPHEMERAL** - ALL files created in HF Jobs (trained models, datasets, outputs, completions etc.) are DELETED when the job completes. You MUST upload any outputs to HF Hub in the script itself (using model.push_to_hub() when training models, dataset.push_to_hub() when creating text based outputs, etc.)."
769
- "- Always pass full script content - no local files available on server\n"
770
- "- Use array format for commands: ['/bin/sh', '-lc', 'cmd'] for shell features\n"
771
- "- hf-transfer is auto-included in uv jobs for faster downloads\n"
772
- "- **Remember to upload outputs to Hub before job finishes!**"
773
  ),
774
  "parameters": {
775
  "type": "object",
@@ -798,90 +786,49 @@ HF_JOBS_TOOL_SPEC = {
798
  # Python/UV specific parameters
799
  "script": {
800
  "type": "string",
801
- "description": (
802
- "Python code to execute. Can be inline code or a raw GitHub URL. "
803
- "Auto-uses UV image and builds UV command. "
804
- "USED with: 'run', 'scheduled run' (triggers Python mode). "
805
- "MUTUALLY EXCLUSIVE with 'command'. "
806
- "NOT USED with: 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
807
- ),
808
  },
809
  "dependencies": {
810
  "type": "array",
811
  "items": {"type": "string"},
812
- "description": (
813
- "List of pip packages to install. Example: ['torch', 'transformers']. "
814
- "Only used when 'script' is provided. "
815
- "USED with: 'run', 'scheduled run' (optional, only with script). "
816
- "NOT USED with: 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
817
- ),
818
  },
819
  # Docker specific parameters
820
  "image": {
821
  "type": "string",
822
- "description": (
823
- "Docker image to use. Default: UV image if 'script' provided, else 'python:3.12'. "
824
- "Can override the default UV image when using 'script'. "
825
- "USED with: 'run', 'scheduled run' (optional). "
826
- "NOT USED with: 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
827
- ),
828
  },
829
  "command": {
830
  "type": "array",
831
  "items": {"type": "string"},
832
- "description": (
833
- "Command to execute as array. Example: ['python', '-c', 'print(42)']. "
834
- "Use this for full Docker control. "
835
- "USED with: 'run', 'scheduled run' (triggers Docker mode). "
836
- "MUTUALLY EXCLUSIVE with 'script'. "
837
- "NOT USED with: 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
838
- ),
839
  },
840
  # Hardware and environment
841
  "hardware_flavor": {
842
  "type": "string",
843
- "description": (
844
- "Hardware flavor. CPU: cpu-basic, cpu-upgrade, cpu-performance, cpu-xl. "
845
- "GPU: t4-small, t4-medium, l4x1, l4x4, a10g-small, a10g-large, a10g-largex2, a10g-largex4, a100-large, h100, h100x8. "
846
- "Default: cpu-basic. "
847
- "USED with: 'run', 'uv', 'scheduled run', 'scheduled uv' (optional). "
848
- "NOT USED with: 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
849
- ),
850
  },
851
  "secrets": {
852
  "type": "object",
853
- "description": (
854
- "Secret environment variables. Format: {'KEY': 'VALUE'}. HF_TOKEN is loaded automatically. "
855
- "USED with: 'run', 'uv', 'scheduled run', 'scheduled uv' (optional). "
856
- "NOT USED with: 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
857
- ),
858
  },
859
  # Job management parameters
860
  "job_id": {
861
  "type": "string",
862
- "description": (
863
- "Job ID to operate on. "
864
- "REQUIRED for: 'logs', 'inspect', 'cancel'. "
865
- "NOT USED with: 'run', 'uv', 'ps', 'scheduled run/uv/ps/inspect/delete/suspend/resume'."
866
- ),
867
  },
868
  # Scheduled job parameters
869
  "scheduled_job_id": {
870
  "type": "string",
871
- "description": (
872
- "Scheduled job ID to operate on. "
873
- "REQUIRED for: 'scheduled inspect', 'scheduled delete', 'scheduled suspend', 'scheduled resume'. "
874
- "NOT USED with: 'run', 'uv', 'ps', 'logs', 'inspect', 'cancel', 'scheduled run', 'scheduled uv', 'scheduled ps'."
875
- ),
876
  },
877
  "schedule": {
878
  "type": "string",
879
- "description": (
880
- "Cron schedule or preset. Presets: '@hourly', '@daily', '@weekly', '@monthly', '@yearly'. "
881
- "Cron example: '0 9 * * 1' (9 AM every Monday). "
882
- "REQUIRED for: 'scheduled run', 'scheduled uv'. "
883
- "NOT USED with: 'run', 'uv', 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
884
- ),
885
  },
886
  },
887
  "required": ["operation"],
 
360
  self.api.run_job,
361
  image=image,
362
  command=command,
363
+ env=args.get("env"),
364
  secrets=_add_environment_variables(args.get("secrets")),
365
  flavor=args.get("hardware_flavor", "cpu-basic"),
366
  timeout=args.get("timeout", "30m"),
 
575
  image=image,
576
  command=command,
577
  schedule=schedule,
578
+ env=args.get("env"),
579
  secrets=_add_environment_variables(args.get("secrets")),
580
  flavor=args.get("hardware_flavor", "cpu-basic"),
581
  timeout=args.get("timeout", "30m"),
 
735
  HF_JOBS_TOOL_SPEC = {
736
  "name": "hf_jobs",
737
  "description": (
738
+ "Run Python scripts or Docker containers on HF cloud GPUs/CPUs.\n\n"
739
+ "## Operations:\n"
740
+ "run, ps, logs, inspect, cancel, scheduled run, scheduled ps, scheduled inspect, scheduled delete, scheduled suspend, scheduled resume\n\n"
741
+ "## Two modes:\n"
742
+ "1. **Python mode:** Provide 'script' + 'dependencies' auto-handles pip install\n"
743
+ "2. **Docker mode:** Provide 'image' + 'command' → full control\n"
744
+ "(script and command are mutually exclusive)\n\n"
745
+ "## Hardware:\n"
746
+ "CPU: cpu-basic (default), cpu-upgrade, cpu-performance, cpu-xl\n"
747
+ "GPU: t4-small, t4-medium, l4x1, a10g-small, a10g-large, a100-large, h100\n\n"
748
+ "## Examples:\n\n"
749
+ "**Fine-tune LLM and push to Hub:**\n"
750
+ "{'operation': 'run', 'script': 'from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer\\nmodel = AutoModelForCausalLM.from_pretrained(\"gpt2\")\\n# ... training code ...\\nmodel.push_to_hub(\"my-finetuned-model\")', 'dependencies': ['transformers', 'torch', 'datasets'], 'hardware_flavor': 'a10g-large', 'timeout': '4h', 'secrets': {'HF_TOKEN': '$HF_TOKEN'}}\n\n"
751
+ "**Generate dataset daily and upload:**\n"
752
+ "{'operation': 'scheduled run', 'script': 'from datasets import Dataset\\nimport pandas as pd\\n# scrape/generate data\\ndf = pd.DataFrame(data)\\nds = Dataset.from_pandas(df)\\nds.push_to_hub(\"daily-dataset\")', 'dependencies': ['datasets', 'pandas'], 'schedule': '@daily', 'secrets': {'HF_TOKEN': '$HF_TOKEN'}}\n\n"
753
+ "**Run custom training with Docker:**\n"
754
+ "{'operation': 'run', 'image': 'pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime', 'command': ['python', 'train.py', '--epochs', '10'], 'hardware_flavor': 'a100-large'}\n\n"
755
+ "**Monitor jobs:**\n"
756
+ "{'operation': 'ps'} - list running\n"
757
+ "{'operation': 'logs', 'job_id': 'xxx'} - stream logs\n"
758
+ "{'operation': 'cancel', 'job_id': 'xxx'} - stop job\n\n"
759
+ "## CRITICAL: Files are ephemeral!\n"
760
+ "Everything created during execution is DELETED when job finishes. Always .push_to_hub() your outputs (models, datasets, artifacts) in the script."
 
 
 
 
 
 
 
 
 
 
 
 
761
  ),
762
  "parameters": {
763
  "type": "object",
 
786
  # Python/UV specific parameters
787
  "script": {
788
  "type": "string",
789
+ "description": "Python code to execute. Triggers Python mode (auto pip install). Use with 'run'/'scheduled run'. Mutually exclusive with 'command'.",
 
 
 
 
 
 
790
  },
791
  "dependencies": {
792
  "type": "array",
793
  "items": {"type": "string"},
794
+ "description": "Pip packages to install. Example: ['trl', 'torch', 'datasets', 'transformers']. Only used with 'script'.",
 
 
 
 
 
795
  },
796
  # Docker specific parameters
797
  "image": {
798
  "type": "string",
799
+ "description": "Docker image. Example: 'pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime'. Use with 'run'/'scheduled run'. Optional (auto-selected if not provided).",
 
 
 
 
 
800
  },
801
  "command": {
802
  "type": "array",
803
  "items": {"type": "string"},
804
+ "description": "Command to execute as list. Example: ['python', 'train.py', '--epochs', '10']. Triggers Docker mode. Use with 'run'/'scheduled run'. Mutually exclusive with 'script'.",
 
 
 
 
 
 
805
  },
806
  # Hardware and environment
807
  "hardware_flavor": {
808
  "type": "string",
809
+ "description": "Hardware type. CPU: cpu-basic (default), cpu-upgrade, cpu-performance, cpu-xl. GPU: t4-small, t4-medium, l4x1, a10g-small, a10g-large, a100-large, h100. Use with 'run'/'scheduled run'.",
810
+ },
811
+ "timeout": {
812
+ "type": "string",
813
+ "description": "Max runtime. Examples: '30m', '2h', '4h'. Default: '30m'. Important for long training jobs. Use with 'run'/'scheduled run'.",
 
 
814
  },
815
  "secrets": {
816
  "type": "object",
817
+ "description": "Environment variables (private). Format: {'KEY': 'VALUE'}. Use {'HF_TOKEN': '$HF_TOKEN'} for Hub auth. Use with 'run'/'scheduled run'.",
 
 
 
 
818
  },
819
  # Job management parameters
820
  "job_id": {
821
  "type": "string",
822
+ "description": "Job ID to operate on. Required for: 'logs', 'inspect', 'cancel'.",
 
 
 
 
823
  },
824
  # Scheduled job parameters
825
  "scheduled_job_id": {
826
  "type": "string",
827
+ "description": "Scheduled job ID. Required for: 'scheduled inspect', 'scheduled delete', 'scheduled suspend', 'scheduled resume'.",
 
 
 
 
828
  },
829
  "schedule": {
830
  "type": "string",
831
+ "description": "Schedule for recurring job. Presets: '@hourly', '@daily', '@weekly', '@monthly'. Cron: '0 9 * * 1' (Mon 9am). Required for: 'scheduled run'.",
 
 
 
 
 
832
  },
833
  },
834
  "required": ["operation"],
run_search_agent.py CHANGED
@@ -21,13 +21,16 @@ async def test_search_agent(query: str):
21
 
22
  # Import at runtime
23
  from pathlib import Path
 
24
  from agent.config import load_config
25
 
26
  # Create event queue for the sub-agent
27
  sub_event_queue = asyncio.Queue()
28
 
29
  # Load the search agent's own config file with GitHub MCP server
30
- search_agent_config_path = Path(__file__).parent / "configs" / "_subagent_config_search_agent.json"
 
 
31
  search_agent_config = load_config(search_agent_config_path)
32
 
33
  # Extract GitHub MCP config from search agent config
@@ -122,7 +125,8 @@ async def main():
122
  # Example queries to test
123
  test_queries = [
124
  # "Explore the TRL documentation structure and find information about DPO trainer",
125
- "is there a way to get the logs from a served huggingface space",
 
126
  # "How do I train GLM4.7 with a GRPO training loop with trl with llm judge as a reward model for training on hle?"
127
  # "can i stream logs through the api for a served huggingface space",
128
  # 'what tools do you have access to?',
 
21
 
22
  # Import at runtime
23
  from pathlib import Path
24
+
25
  from agent.config import load_config
26
 
27
  # Create event queue for the sub-agent
28
  sub_event_queue = asyncio.Queue()
29
 
30
  # Load the search agent's own config file with GitHub MCP server
31
+ search_agent_config_path = (
32
+ Path(__file__).parent / "configs" / "_subagent_config_search_agent.json"
33
+ )
34
  search_agent_config = load_config(search_agent_config_path)
35
 
36
  # Extract GitHub MCP config from search agent config
 
125
  # Example queries to test
126
  test_queries = [
127
  # "Explore the TRL documentation structure and find information about DPO trainer",
128
+ # "is there a way to get the logs from a served huggingface space",
129
+ """use exactly this call {\"tool_name\": \"search_hf_docs\", \"arguments\": {\"query\": \"vLLM offline batch inference Hugging Face models\"}}""",
130
  # "How do I train GLM4.7 with a GRPO training loop with trl with llm judge as a reward model for training on hle?"
131
  # "can i stream logs through the api for a served huggingface space",
132
  # 'what tools do you have access to?',