Spaces:

smolagents
/

ml-agent

Running

App Files Files Community

akseljoonas HF Staff commited on Jan 3

Commit

1598bb4

1 Parent(s): b63e2df

reworked job tool descriptions + implemented batch processing for hf_job tool calls in cli

Browse files

Files changed (4) hide show

agent/core/agent_loop.py +122 -57
agent/main.py +85 -45
agent/tools/jobs_tool.py +38 -91
run_search_agent.py +6 -2

agent/core/agent_loop.py CHANGED Viewed

@@ -103,32 +103,23 @@ class Handlers:
                         Event(event_type="assistant_message", data={"content": content})
                     )
-                # Execute tools
                 for tc in tool_calls:
                     tool_name = tc.function.name
                     tool_args = json.loads(tc.function.arguments)
-                    # Check if this tool requires user approval
                     if _needs_approval(tool_name, tool_args):
-                        await session.send_event(
-                            Event(
-                                event_type="approval_required",
-                                data={
-                                    "tool": tool_name,
-                                    "arguments": tool_args,
-                                    "tool_call_id": tc.id,
-                                },
-                            )
-                        )
-                        # Store pending approval and return early
-                        session.pending_approval = {
-                            "tool_call": tc,
-                            "arguments": tool_args,
-                        }
-                        # Return early - wait for EXEC_APPROVAL operation
-                        return None
                     await session.send_event(
                         Event(
@@ -161,6 +152,37 @@ class Handlers:
                         )
                     )
                 iteration += 1
             except Exception as e:
@@ -225,10 +247,8 @@ class Handlers:
         await session.send_event(Event(event_type="undo_complete"))
     @staticmethod
-    async def exec_approval(
-        session: Session, approved: bool, feedback: str | None = None
-    ) -> None:
-        """Handle job execution approval"""
         if not session.pending_approval:
             await session.send_event(
                 Event(
@@ -238,12 +258,36 @@ class Handlers:
             )
             return
-        tc = session.pending_approval["tool_call"]
-        tool_args = session.pending_approval["arguments"]
-        tool_name = tc.function.name
-        if approved:
-            # Execute the pending tool
             await session.send_event(
                 Event(
                     event_type="tool_call",
@@ -251,36 +295,58 @@ class Handlers:
                 )
             )
-            output, success = await session.tool_router.call_tool(tool_name, tool_args)
-            # Add tool result to context
-            tool_msg = Message(
-                role="tool",
-                content=output,
-                tool_call_id=tc.id,
-                name=tool_name,
             )
-            session.context_manager.add_message(tool_msg)
-            await session.send_event(
-                Event(
-                    event_type="tool_output",
-                    data={
-                        "tool": tool_name,
-                        "output": output,
-                        "success": success,
-                    },
                 )
-            )
-        else:
-            # User rejected - add cancellation message to context
-            cancellation_msg = "Job execution cancelled by user"
-            if feedback:
-                cancellation_msg += f". User feedback: {feedback}"
             tool_msg = Message(
                 role="tool",
-                content=cancellation_msg,
                 tool_call_id=tc.id,
                 name=tool_name,
             )
@@ -291,7 +357,7 @@ class Handlers:
                     event_type="tool_output",
                     data={
                         "tool": tool_name,
-                        "output": cancellation_msg,
                         "success": False,
                     },
                 )
@@ -300,7 +366,7 @@ class Handlers:
         # Clear pending approval
         session.pending_approval = None
-        # Continue agent loop with empty input to process the tool result
         await Handlers.run_agent(session, "")
     @staticmethod
@@ -339,9 +405,8 @@ async def process_submission(session: Session, submission) -> bool:
         return True
     if op.op_type == OpType.EXEC_APPROVAL:
-        approved = op.data.get("approved", False) if op.data else False
-        feedback = op.data.get("feedback") if op.data else None
-        await Handlers.exec_approval(session, approved, feedback)
         return True
     if op.op_type == OpType.SHUTDOWN:

                         Event(event_type="assistant_message", data={"content": content})
                     )
+                # Separate tools into those requiring approval and those that don't
+                approval_required_tools = []
+                non_approval_tools = []
                 for tc in tool_calls:
                     tool_name = tc.function.name
                     tool_args = json.loads(tc.function.arguments)
                     if _needs_approval(tool_name, tool_args):
+                        approval_required_tools.append(tc)
+                    else:
+                        non_approval_tools.append(tc)
+                # Execute non-approval tools first
+                for tc in non_approval_tools:
+                    tool_name = tc.function.name
+                    tool_args = json.loads(tc.function.arguments)
                     await session.send_event(
                         Event(
                         )
                     )
+                # If there are tools requiring approval, ask for batch approval
+                if approval_required_tools:
+                    # Prepare batch approval data
+                    tools_data = []
+                    for tc in approval_required_tools:
+                        tool_name = tc.function.name
+                        tool_args = json.loads(tc.function.arguments)
+                        tools_data.append({
+                            "tool": tool_name,
+                            "arguments": tool_args,
+                            "tool_call_id": tc.id,
+                        })
+                    await session.send_event(
+                        Event(
+                            event_type="approval_required",
+                            data={
+                                "tools": tools_data,  # Batch of tools
+                                "count": len(tools_data),
+                            },
+                        )
+                    )
+                    # Store all approval-requiring tools
+                    session.pending_approval = {
+                        "tool_calls": approval_required_tools,
+                    }
+                    # Return early - wait for EXEC_APPROVAL operation
+                    return None
                 iteration += 1
             except Exception as e:
         await session.send_event(Event(event_type="undo_complete"))
     @staticmethod
+    async def exec_approval(session: Session, approvals: list[dict]) -> None:
+        """Handle batch job execution approval"""
         if not session.pending_approval:
             await session.send_event(
                 Event(
             )
             return
+        tool_calls = session.pending_approval.get("tool_calls", [])
+        if not tool_calls:
+            await session.send_event(
+                Event(
+                    event_type="error",
+                    data={"error": "No pending tool calls found"},
+                )
+            )
+            return
+        # Create a map of tool_call_id -> approval decision
+        approval_map = {a["tool_call_id"]: a for a in approvals}
+        # Separate approved and rejected tool calls
+        approved_tasks = []
+        rejected_tasks = []
+        for tc in tool_calls:
+            tool_name = tc.function.name
+            tool_args = json.loads(tc.function.arguments)
+            approval_decision = approval_map.get(tc.id, {"approved": False})
+            if approval_decision.get("approved", False):
+                approved_tasks.append((tc, tool_name, tool_args))
+            else:
+                rejected_tasks.append((tc, tool_name, approval_decision))
+        # Execute all approved tools concurrently
+        async def execute_tool(tc, tool_name, tool_args):
+            """Execute a single tool and return its result"""
             await session.send_event(
                 Event(
                     event_type="tool_call",
                 )
             )
+            output, success = await session.tool_router.call_tool(
+                tool_name, tool_args
+            )
+            return (tc, tool_name, output, success)
+        # Execute all approved tools concurrently and wait for ALL to complete
+        if approved_tasks:
+            results = await asyncio.gather(
+                *[execute_tool(tc, tool_name, tool_args) for tc, tool_name, tool_args in approved_tasks],
+                return_exceptions=True
             )
+            # Process results and add to context
+            for result in results:
+                if isinstance(result, Exception):
+                    # Handle execution error
+                    print(f"Tool execution error: {result}")
+                    continue
+                tc, tool_name, output, success = result
+                # Add tool result to context
+                tool_msg = Message(
+                    role="tool",
+                    content=output,
+                    tool_call_id=tc.id,
+                    name=tool_name,
                 )
+                session.context_manager.add_message(tool_msg)
+                await session.send_event(
+                    Event(
+                        event_type="tool_output",
+                        data={
+                            "tool": tool_name,
+                            "output": output,
+                            "success": success,
+                        },
+                    )
+                )
+        # Process rejected tools
+        for tc, tool_name, approval_decision in rejected_tasks:
+            rejection_msg = "Job execution cancelled by user"
+            user_feedback = approval_decision.get("feedback")
+            if user_feedback:
+                rejection_msg += f". User feedback: {user_feedback}"
             tool_msg = Message(
                 role="tool",
+                content=rejection_msg,
                 tool_call_id=tc.id,
                 name=tool_name,
             )
                     event_type="tool_output",
                     data={
                         "tool": tool_name,
+                        "output": rejection_msg,
                         "success": False,
                     },
                 )
         # Clear pending approval
         session.pending_approval = None
+        # Continue agent loop with empty input to process the tool results
         await Handlers.run_agent(session, "")
     @staticmethod
         return True
     if op.op_type == OpType.EXEC_APPROVAL:
+        approvals = op.data.get("approvals", []) if op.data else []
+        await Handlers.exec_approval(session, approvals)
         return True
     if op.op_type == OpType.SHUTDOWN:

agent/main.py CHANGED Viewed

@@ -116,61 +116,101 @@ async def event_listener(
                 new_tokens = event.data.get("new_tokens", 0) if event.data else 0
                 print(f"Compacted context: {old_tokens} → {new_tokens} tokens")
             elif event.event_type == "approval_required":
-                # Display job details and prompt for approval
-                tool_name = event.data.get("tool", "") if event.data else ""
-                arguments = event.data.get("arguments", {}) if event.data else {}
-                operation = arguments.get("operation", "")
-                args = arguments.get("args", {})
-                print(f"\nOperation: {operation}")
-                if operation == "uv":
-                    script = args.get("script", "")
-                    dependencies = args.get("dependencies", [])
-                    print(f"Script to run:\n{script}")
-                    if dependencies:
-                        print(f"Dependencies: {', '.join(dependencies)}")
-                elif operation == "run":
-                    image = args.get("image", "")
-                    command = args.get("command", "")
-                    print(f"Docker image: {image}")
-                    print(f"Command: {command}")
-                # Common parameters
-                flavor = args.get("flavor", "cpu-basic")
-                detached = args.get("detached", False)
-                print(f"Hardware: {flavor}")
-                print(f"Detached mode: {detached}")
-                secrets = args.get("secrets", [])
-                if secrets:
-                    print(f"Secrets: {', '.join(secrets)}")
-                # Get user decision
                 print("\n" + format_separator())
-                print(format_header("JOB EXECUTION APPROVAL REQUIRED"))
-                print(format_separator())
-                loop = asyncio.get_event_loop()
-                response = await loop.run_in_executor(
-                    None,
-                    input,
-                    "Approve? (y=yes, n=no, or provide feedback to reject): ",
                 )
-                response = response.strip()
-                approved = response.lower() in ["y", "yes"]
-                feedback = (
-                    None if approved or response.lower() in ["n", "no"] else response
-                )
-                # Submit approval
                 submission_id[0] += 1
                 approval_submission = Submission(
                     id=f"approval_{submission_id[0]}",
                     operation=Operation(
                         op_type=OpType.EXEC_APPROVAL,
-                        data={"approved": approved, "feedback": feedback},
                     ),
                 )
                 await submission_queue.put(approval_submission)

                 new_tokens = event.data.get("new_tokens", 0) if event.data else 0
                 print(f"Compacted context: {old_tokens} → {new_tokens} tokens")
             elif event.event_type == "approval_required":
+                # Handle batch approval format
+                tools_data = event.data.get("tools", []) if event.data else []
+                count = event.data.get("count", 0) if event.data else 0
                 print("\n" + format_separator())
+                print(
+                    format_header(
+                        f"JOB EXECUTION APPROVAL REQUIRED ({count} job{'s' if count != 1 else ''})"
+                    )
                 )
+                print(format_separator())
+                approvals = []
+                loop = asyncio.get_event_loop()
+                # Ask for approval for each tool
+                for i, tool_info in enumerate(tools_data, 1):
+                    tool_name = tool_info.get("tool", "")
+                    arguments = tool_info.get("arguments", {})
+                    tool_call_id = tool_info.get("tool_call_id", "")
+                    # Handle case where arguments might be a JSON string
+                    if isinstance(arguments, str):
+                        try:
+                            arguments = json.loads(arguments)
+                        except json.JSONDecodeError:
+                            print(f"Warning: Failed to parse arguments for {tool_name}")
+                            arguments = {}
+                    operation = arguments.get("operation", "")
+                    args = arguments.get("args", {})
+                    # Handle case where args might be a JSON string
+                    if isinstance(args, str):
+                        try:
+                            args = json.loads(args)
+                        except json.JSONDecodeError:
+                            print(f"Warning: Failed to parse args for {tool_name}")
+                            args = {}
+                    print(f"\n[Job {i}/{count}]")
+                    print(f"Operation: {operation}")
+                    if operation == "uv":
+                        script = args.get("script", "")
+                        dependencies = args.get("dependencies", [])
+                        print("Script:\n" + script)
+                        if dependencies:
+                            print(f"Dependencies: {', '.join(dependencies)}")
+                    elif operation == "run":
+                        image = args.get("image", "")
+                        command = args.get("command", "")
+                        print(f"Docker image: {image}")
+                        print(f"Command: {command}")
+                    # Common parameters
+                    flavor = args.get("flavor", "cpu-basic")
+                    detached = args.get("detached", False)
+                    print(f"Hardware: {flavor}")
+                    print(f"Detached mode: {detached}")
+                    secrets = args.get("secrets", [])
+                    if secrets:
+                        print(f"Secrets: {', '.join(secrets)}")
+                    # Get user decision for this job
+                    response = await loop.run_in_executor(
+                        None,
+                        input,
+                        f"Approve job {i}? (y=yes, n=no, or provide feedback to reject): ",
+                    )
+                    response = response.strip()
+                    approved = response.lower() in ["y", "yes"]
+                    feedback = (
+                        None
+                        if approved or response.lower() in ["n", "no"]
+                        else response
+                    )
+                    approvals.append(
+                        {
+                            "tool_call_id": tool_call_id,
+                            "approved": approved,
+                            "feedback": feedback,
+                        }
+                    )
+                # Submit batch approval
                 submission_id[0] += 1
                 approval_submission = Submission(
                     id=f"approval_{submission_id[0]}",
                     operation=Operation(
                         op_type=OpType.EXEC_APPROVAL,
+                        data={"approvals": approvals},
                     ),
                 )
                 await submission_queue.put(approval_submission)

agent/tools/jobs_tool.py CHANGED Viewed

@@ -360,7 +360,7 @@ class HfJobsTool:
                 self.api.run_job,
                 image=image,
                 command=command,
-                env=_add_environment_variables(args.get("env")),
                 secrets=_add_environment_variables(args.get("secrets")),
                 flavor=args.get("hardware_flavor", "cpu-basic"),
                 timeout=args.get("timeout", "30m"),
@@ -575,7 +575,7 @@ To verify, call this tool with `{{"operation": "inspect", "job_id": "{job_id}"}}
                 image=image,
                 command=command,
                 schedule=schedule,
-                env=_add_environment_variables(args.get("env")),
                 secrets=_add_environment_variables(args.get("secrets")),
                 flavor=args.get("hardware_flavor", "cpu-basic"),
                 timeout=args.get("timeout", "30m"),
@@ -735,41 +735,29 @@ To inspect, call this tool with `{{"operation": "scheduled inspect", "scheduled_
 HF_JOBS_TOOL_SPEC = {
     "name": "hf_jobs",
     "description": (
-        "Manage Hugging Face CPU/GPU compute jobs. Run Python scripts with UV or custom Docker commands. "
-        "List, schedule and monitor jobs/logs.\n\n"
-        "## Operations\n"
-        "**Jobs:** run, ps, logs, inspect, cancel\n"
-        "**Scheduled Jobs:** scheduled run, scheduled ps, scheduled inspect, scheduled delete, scheduled suspend, scheduled resume\n\n"
-        "## 'run' Operation Behavior\n"
-        "The 'run' operation automatically detects what you want to do:\n"
-        "- If 'script' provided → runs a Python script and auto installs dependencies in the env\n"
-        "- If 'command' provided → runs a custom Docker command (full control)\n"
-        "- 'script' and 'command' are MUTUALLY EXCLUSIVE - provide one or the other, not both\n\n"
-        "## Available Hardware Flavors\n"
-        "**CPU:** cpu-basic, cpu-upgrade, cpu-performance, cpu-xl\n"
-        "**GPU:** t4-small, t4-medium, l4x1, l4x4, a10g-small, a10g-large, a10g-largex2, a10g-largex4, a100-large, h100, h100x8\n"
-        "**Specialized:** inf2x6\n\n"
-        "## Usage Examples\n"
-        "**Run Python script with dependencies:**\n"
-        "{'operation': 'run', 'script': 'import torch\\nprint(torch.cuda.is_available())', 'dependencies': ['torch', 'transformers'], 'hardware_flavor': 'a10g-small'}\n\n"
-        "**Run Python with secrets:**\n"
-        "{'operation': 'run', 'script': 'from huggingface_hub import HfApi\\napi = HfApi()\\nprint(api.whoami())', 'dependencies': ['huggingface-hub']}\n\n"
-        "**Run custom Docker command:**\n"
-        "{'operation': 'run', 'image': 'nvidia/cuda:12.0-base', 'command': ['nvidia-smi']}\n\n"
-        "**List running jobs:**\n"
-        "{'operation': 'ps'}\n\n"
-        "**Get job logs:**\n"
-        "{'operation': 'logs', 'job_id': 'xxx'}\n\n"
-        "**Cancel job:**\n"
-        "{'operation': 'cancel', 'job_id': 'xxx'}\n\n"
-        "**Schedule daily Python job:**\n"
-        "{'operation': 'scheduled run', 'script': 'print(\"daily task\")', 'schedule': '@daily'}\n\n"
-        "## Important Notes\n"
-        "- **CRITICAL: Job files are EPHEMERAL** - ALL files created in HF Jobs (trained models, datasets, outputs, completions etc.) are DELETED when the job completes. You MUST upload any outputs to HF Hub in the script itself (using model.push_to_hub() when training models, dataset.push_to_hub() when creating text based outputs, etc.)."
-        "- Always pass full script content - no local files available on server\n"
-        "- Use array format for commands: ['/bin/sh', '-lc', 'cmd'] for shell features\n"
-        "- hf-transfer is auto-included in uv jobs for faster downloads\n"
-        "- **Remember to upload outputs to Hub before job finishes!**"
     ),
     "parameters": {
         "type": "object",
@@ -798,90 +786,49 @@ HF_JOBS_TOOL_SPEC = {
             # Python/UV specific parameters
             "script": {
                 "type": "string",
-                "description": (
-                    "Python code to execute. Can be inline code or a raw GitHub URL. "
-                    "Auto-uses UV image and builds UV command. "
-                    "USED with: 'run', 'scheduled run' (triggers Python mode). "
-                    "MUTUALLY EXCLUSIVE with 'command'. "
-                    "NOT USED with: 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
-                ),
             },
             "dependencies": {
                 "type": "array",
                 "items": {"type": "string"},
-                "description": (
-                    "List of pip packages to install. Example: ['torch', 'transformers']. "
-                    "Only used when 'script' is provided. "
-                    "USED with: 'run', 'scheduled run' (optional, only with script). "
-                    "NOT USED with: 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
-                ),
             },
             # Docker specific parameters
             "image": {
                 "type": "string",
-                "description": (
-                    "Docker image to use. Default: UV image if 'script' provided, else 'python:3.12'. "
-                    "Can override the default UV image when using 'script'. "
-                    "USED with: 'run', 'scheduled run' (optional). "
-                    "NOT USED with: 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
-                ),
             },
             "command": {
                 "type": "array",
                 "items": {"type": "string"},
-                "description": (
-                    "Command to execute as array. Example: ['python', '-c', 'print(42)']. "
-                    "Use this for full Docker control. "
-                    "USED with: 'run', 'scheduled run' (triggers Docker mode). "
-                    "MUTUALLY EXCLUSIVE with 'script'. "
-                    "NOT USED with: 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
-                ),
             },
             # Hardware and environment
             "hardware_flavor": {
                 "type": "string",
-                "description": (
-                    "Hardware flavor. CPU: cpu-basic, cpu-upgrade, cpu-performance, cpu-xl. "
-                    "GPU: t4-small, t4-medium, l4x1, l4x4, a10g-small, a10g-large, a10g-largex2, a10g-largex4, a100-large, h100, h100x8. "
-                    "Default: cpu-basic. "
-                    "USED with: 'run', 'uv', 'scheduled run', 'scheduled uv' (optional). "
-                    "NOT USED with: 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
-                ),
             },
             "secrets": {
                 "type": "object",
-                "description": (
-                    "Secret environment variables. Format: {'KEY': 'VALUE'}. HF_TOKEN is loaded automatically. "
-                    "USED with: 'run', 'uv', 'scheduled run', 'scheduled uv' (optional). "
-                    "NOT USED with: 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
-                ),
             },
             # Job management parameters
             "job_id": {
                 "type": "string",
-                "description": (
-                    "Job ID to operate on. "
-                    "REQUIRED for: 'logs', 'inspect', 'cancel'. "
-                    "NOT USED with: 'run', 'uv', 'ps', 'scheduled run/uv/ps/inspect/delete/suspend/resume'."
-                ),
             },
             # Scheduled job parameters
             "scheduled_job_id": {
                 "type": "string",
-                "description": (
-                    "Scheduled job ID to operate on. "
-                    "REQUIRED for: 'scheduled inspect', 'scheduled delete', 'scheduled suspend', 'scheduled resume'. "
-                    "NOT USED with: 'run', 'uv', 'ps', 'logs', 'inspect', 'cancel', 'scheduled run', 'scheduled uv', 'scheduled ps'."
-                ),
             },
             "schedule": {
                 "type": "string",
-                "description": (
-                    "Cron schedule or preset. Presets: '@hourly', '@daily', '@weekly', '@monthly', '@yearly'. "
-                    "Cron example: '0 9 * * 1' (9 AM every Monday). "
-                    "REQUIRED for: 'scheduled run', 'scheduled uv'. "
-                    "NOT USED with: 'run', 'uv', 'ps', 'logs', 'inspect', 'cancel', 'scheduled ps/inspect/delete/suspend/resume'."
-                ),
             },
         },
         "required": ["operation"],

                 self.api.run_job,
                 image=image,
                 command=command,
+                env=args.get("env"),
                 secrets=_add_environment_variables(args.get("secrets")),
                 flavor=args.get("hardware_flavor", "cpu-basic"),
                 timeout=args.get("timeout", "30m"),
                 image=image,
                 command=command,
                 schedule=schedule,
+                env=args.get("env"),
                 secrets=_add_environment_variables(args.get("secrets")),
                 flavor=args.get("hardware_flavor", "cpu-basic"),
                 timeout=args.get("timeout", "30m"),
 HF_JOBS_TOOL_SPEC = {
     "name": "hf_jobs",
     "description": (
+        "Run Python scripts or Docker containers on HF cloud GPUs/CPUs.\n\n"
+        "## Operations:\n"
+        "run, ps, logs, inspect, cancel, scheduled run, scheduled ps, scheduled inspect, scheduled delete, scheduled suspend, scheduled resume\n\n"
+        "## Two modes:\n"
+        "1. **Python mode:** Provide 'script' + 'dependencies' → auto-handles pip install\n"
+        "2. **Docker mode:** Provide 'image' + 'command' → full control\n"
+        "(script and command are mutually exclusive)\n\n"
+        "## Hardware:\n"
+        "CPU: cpu-basic (default), cpu-upgrade, cpu-performance, cpu-xl\n"
+        "GPU: t4-small, t4-medium, l4x1, a10g-small, a10g-large, a100-large, h100\n\n"
+        "## Examples:\n\n"
+        "**Fine-tune LLM and push to Hub:**\n"
+        "{'operation': 'run', 'script': 'from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer\\nmodel = AutoModelForCausalLM.from_pretrained(\"gpt2\")\\n# ... training code ...\\nmodel.push_to_hub(\"my-finetuned-model\")', 'dependencies': ['transformers', 'torch', 'datasets'], 'hardware_flavor': 'a10g-large', 'timeout': '4h', 'secrets': {'HF_TOKEN': '$HF_TOKEN'}}\n\n"
+        "**Generate dataset daily and upload:**\n"
+        "{'operation': 'scheduled run', 'script': 'from datasets import Dataset\\nimport pandas as pd\\n# scrape/generate data\\ndf = pd.DataFrame(data)\\nds = Dataset.from_pandas(df)\\nds.push_to_hub(\"daily-dataset\")', 'dependencies': ['datasets', 'pandas'], 'schedule': '@daily', 'secrets': {'HF_TOKEN': '$HF_TOKEN'}}\n\n"
+        "**Run custom training with Docker:**\n"
+        "{'operation': 'run', 'image': 'pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime', 'command': ['python', 'train.py', '--epochs', '10'], 'hardware_flavor': 'a100-large'}\n\n"
+        "**Monitor jobs:**\n"
+        "{'operation': 'ps'} - list running\n"
+        "{'operation': 'logs', 'job_id': 'xxx'} - stream logs\n"
+        "{'operation': 'cancel', 'job_id': 'xxx'} - stop job\n\n"
+        "## CRITICAL: Files are ephemeral!\n"
+        "Everything created during execution is DELETED when job finishes. Always .push_to_hub() your outputs (models, datasets, artifacts) in the script."
     ),
     "parameters": {
         "type": "object",
             # Python/UV specific parameters
             "script": {
                 "type": "string",
+                "description": "Python code to execute. Triggers Python mode (auto pip install). Use with 'run'/'scheduled run'. Mutually exclusive with 'command'.",
             },
             "dependencies": {
                 "type": "array",
                 "items": {"type": "string"},
+                "description": "Pip packages to install. Example: ['trl', 'torch', 'datasets', 'transformers']. Only used with 'script'.",
             },
             # Docker specific parameters
             "image": {
                 "type": "string",
+                "description": "Docker image. Example: 'pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime'. Use with 'run'/'scheduled run'. Optional (auto-selected if not provided).",
             },
             "command": {
                 "type": "array",
                 "items": {"type": "string"},
+                "description": "Command to execute as list. Example: ['python', 'train.py', '--epochs', '10']. Triggers Docker mode. Use with 'run'/'scheduled run'. Mutually exclusive with 'script'.",
             },
             # Hardware and environment
             "hardware_flavor": {
                 "type": "string",
+                "description": "Hardware type. CPU: cpu-basic (default), cpu-upgrade, cpu-performance, cpu-xl. GPU: t4-small, t4-medium, l4x1, a10g-small, a10g-large, a100-large, h100. Use with 'run'/'scheduled run'.",
+            },
+            "timeout": {
+                "type": "string",
+                "description": "Max runtime. Examples: '30m', '2h', '4h'. Default: '30m'. Important for long training jobs. Use with 'run'/'scheduled run'.",
             },
             "secrets": {
                 "type": "object",
+                "description": "Environment variables (private). Format: {'KEY': 'VALUE'}. Use {'HF_TOKEN': '$HF_TOKEN'} for Hub auth. Use with 'run'/'scheduled run'.",
             },
             # Job management parameters
             "job_id": {
                 "type": "string",
+                "description": "Job ID to operate on. Required for: 'logs', 'inspect', 'cancel'.",
             },
             # Scheduled job parameters
             "scheduled_job_id": {
                 "type": "string",
+                "description": "Scheduled job ID. Required for: 'scheduled inspect', 'scheduled delete', 'scheduled suspend', 'scheduled resume'.",
             },
             "schedule": {
                 "type": "string",
+                "description": "Schedule for recurring job. Presets: '@hourly', '@daily', '@weekly', '@monthly'. Cron: '0 9 * * 1' (Mon 9am). Required for: 'scheduled run'.",
             },
         },
         "required": ["operation"],

run_search_agent.py CHANGED Viewed

@@ -21,13 +21,16 @@ async def test_search_agent(query: str):
     # Import at runtime
     from pathlib import Path
     from agent.config import load_config
     # Create event queue for the sub-agent
     sub_event_queue = asyncio.Queue()
     # Load the search agent's own config file with GitHub MCP server
-    search_agent_config_path = Path(__file__).parent / "configs" / "_subagent_config_search_agent.json"
     search_agent_config = load_config(search_agent_config_path)
     # Extract GitHub MCP config from search agent config
@@ -122,7 +125,8 @@ async def main():
     # Example queries to test
     test_queries = [
         # "Explore the TRL documentation structure and find information about DPO trainer",
-        "is there a way to get the logs from a served huggingface space",
         # "How do I train GLM4.7 with a GRPO training loop with trl with llm judge as a reward model for training on hle?"
         # "can i stream logs through the api for a served huggingface space",
         # 'what tools do you have access to?',

     # Import at runtime
     from pathlib import Path
     from agent.config import load_config
     # Create event queue for the sub-agent
     sub_event_queue = asyncio.Queue()
     # Load the search agent's own config file with GitHub MCP server
+    search_agent_config_path = (
+        Path(__file__).parent / "configs" / "_subagent_config_search_agent.json"
+    )
     search_agent_config = load_config(search_agent_config_path)
     # Extract GitHub MCP config from search agent config
     # Example queries to test
     test_queries = [
         # "Explore the TRL documentation structure and find information about DPO trainer",
+        # "is there a way to get the logs from a served huggingface space",
+        """use exactly this call {\"tool_name\": \"search_hf_docs\", \"arguments\": {\"query\": \"vLLM offline batch inference Hugging Face models\"}}""",
         # "How do I train GLM4.7 with a GRPO training loop with trl with llm judge as a reward model for training on hle?"
         # "can i stream logs through the api for a served huggingface space",
         # 'what tools do you have access to?',