Spaces:

smolagents
/

ml-agent

Running

App Files Files Community

akseljoonas HF Staff commited on Dec 22, 2025

Commit

cd123dd

1 Parent(s): fb5f967

prompt update and loading from file

Browse files

Files changed (6) hide show

agent/codex_agent_demo.py +1 -1
agent/config.py +0 -1
agent/context_manager/manager.py +18 -6
agent/core/agent_loop.py +4 -7
agent/core/session.py +11 -3
agent/prompts/system_prompt.yaml +112 -0

agent/codex_agent_demo.py CHANGED Viewed

@@ -201,7 +201,7 @@ class Session:
     """
     def __init__(self, event_queue: asyncio.Queue):
-        self.context_manager = ContextManager()
         self.event_queue = event_queue
         self.is_running = True
         self.current_task: Optional[asyncio.Task] = None

     """
     def __init__(self, event_queue: asyncio.Queue):
+        self.context_manager = ContextManager(tool_specs=[])
         self.event_queue = event_queue
         self.is_running = True
         self.current_task: Optional[asyncio.Task] = None

agent/config.py CHANGED Viewed

@@ -20,7 +20,6 @@ class Config(BaseModel):
     model_name: str
     tools: list[Tool] = []
-    system_prompt_path: str = ""
     mcpServers: dict[str, MCPServerConfig] = {}

     model_name: str
     tools: list[Tool] = []
     mcpServers: dict[str, MCPServerConfig] = {}

agent/context_manager/manager.py CHANGED Viewed

@@ -2,8 +2,11 @@
 Context management for conversation history
 """
-import asyncio
 from litellm import Message, acompletion
@@ -15,19 +18,28 @@ class ContextManager:
         max_context: int = 180_000,
         compact_size: float = 0.1,
         untouched_messages: int = 5,
     ):
-        self.system_prompt = self._load_system_prompt()
         self.max_context = max_context
         self.compact_size = int(max_context * compact_size)
         self.context_length = len(self.system_prompt) // 4
         self.untouched_messages = untouched_messages
         self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
-    def _load_system_prompt(self):
-        """Load the system prompt"""
-        # TODO: get system prompt from jinja template
-        return "You are a helpful assistant, primarly for ML. Do the task you are asked to as efficiently as possible. Don't invent tasks."
     def add_message(self, message: Message, token_count: int = None) -> None:
         """Add a message to the history"""

 Context management for conversation history
 """
+from pathlib import Path
+from typing import Any
+import yaml
+from jinja2 import Template
 from litellm import Message, acompletion
         max_context: int = 180_000,
         compact_size: float = 0.1,
         untouched_messages: int = 5,
+        tool_specs: list[dict[str, Any]] | None = None,
     ):
+        self.system_prompt = self._load_system_prompt(tool_specs or [])
         self.max_context = max_context
         self.compact_size = int(max_context * compact_size)
         self.context_length = len(self.system_prompt) // 4
         self.untouched_messages = untouched_messages
         self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
+    def _load_system_prompt(self, tool_specs: list[dict[str, Any]]):
+        """Load and render the system prompt from YAML file with Jinja2"""
+        prompt_file = Path(__file__).parent.parent / "prompts" / "system_prompt.yaml"
+        with open(prompt_file, "r") as f:
+            prompt_data = yaml.safe_load(f)
+            template_str = prompt_data.get("system_prompt", "")
+        template = Template(template_str)
+        return template.render(
+            tools=tool_specs,
+            num_tools=len(tool_specs),
+        )
     def add_message(self, message: Message, token_count: int = None) -> None:
         """Add a message to the history"""

agent/core/agent_loop.py CHANGED Viewed

@@ -5,8 +5,7 @@ Main agent implementation with integrated tool system and MCP support
 import asyncio
 import json
-from litellm import (ChatCompletionMessageToolCall, Message, ModelResponse,
-                     acompletion)
 from lmnr import observe
 from agent.config import Config
@@ -68,8 +67,7 @@ class Handlers:
                 # If no tool calls, add assistant message and we're done
                 if not tool_calls:
                     if content:
-                        assistant_msg = Message(
-                            role="assistant", content=content)
                         session.context_manager.add_message(assistant_msg, token_count)
                         await session.send_event(
                             Event(
@@ -247,9 +245,8 @@ async def submission_loop(
     This is the core of the agent (like submission_loop in codex.rs:1259-1340)
     """
-    # Create session and assign tool router
-    session = Session(event_queue, config=config)
-    session.tool_router = tool_router
     print("🤖 Agent loop started")
     # Main processing loop

 import asyncio
 import json
+from litellm import ChatCompletionMessageToolCall, Message, ModelResponse, acompletion
 from lmnr import observe
 from agent.config import Config
                 # If no tool calls, add assistant message and we're done
                 if not tool_calls:
                     if content:
+                        assistant_msg = Message(role="assistant", content=content)
                         session.context_manager.add_message(assistant_msg, token_count)
                         await session.send_event(
                             Event(
     This is the core of the agent (like submission_loop in codex.rs:1259-1340)
     """
+    # Create session with tool router
+    session = Session(event_queue, config=config, tool_router=tool_router)
     print("🤖 Agent loop started")
     # Main processing loop

agent/core/session.py CHANGED Viewed

@@ -4,6 +4,8 @@ from dataclasses import dataclass
 from enum import Enum
 from typing import Any, Optional
 from agent.config import Config
 from agent.context_manager.manager import ContextManager
@@ -33,18 +35,24 @@ class Session:
         self,
         event_queue: asyncio.Queue,
         config: Config | None = None,
     ):
-        self.context_manager = ContextManager(max_context=180_000, compact_size=0.1, untouched_messages=5)
         self.event_queue = event_queue
         self.session_id = str(uuid.uuid4())
         self.config = config or Config(
             model_name="anthropic/claude-sonnet-4-5-20250929",
             tools=[],
-            system_prompt_path="",
         )
         self.is_running = True
         self.current_task: asyncio.Task | None = None
-        self.tool_router = None  # Set by submission_loop
     async def send_event(self, event: Event) -> None:
         """Send event back to client"""

 from enum import Enum
 from typing import Any, Optional
+from litellm import get_max_tokens
 from agent.config import Config
 from agent.context_manager.manager import ContextManager
         self,
         event_queue: asyncio.Queue,
         config: Config | None = None,
+        tool_router=None,
     ):
+        self.tool_router = tool_router
+        tool_specs = tool_router.get_tool_specs_for_llm() if tool_router else []
+        self.context_manager = ContextManager(
+            max_context=get_max_tokens(config.model_name),
+            compact_size=0.1,
+            untouched_messages=5,
+            tool_specs=tool_specs,
+        )
         self.event_queue = event_queue
         self.session_id = str(uuid.uuid4())
         self.config = config or Config(
             model_name="anthropic/claude-sonnet-4-5-20250929",
             tools=[],
         )
         self.is_running = True
         self.current_task: asyncio.Task | None = None
     async def send_event(self, event: Event) -> None:
         """Send event back to client"""

agent/prompts/system_prompt.yaml ADDED Viewed

	@@ -0,0 +1,112 @@

+system_prompt: |
+  You are HF Agent, a powerful AI assistant for Machine Learning Engineering, particularly training Large Language Models. You have access to {{ num_tools }} tools for interacting with Hugging Face Hub and performing ML tasks.
+  # Available Tools
+  You have access to the following categories of tools:
+  - Hugging Face Hub: Search and interact with models, datasets, papers, and documentation
+  - Spaces: Use and discover ML applications
+  - Jobs: Manage compute jobs for training and inference
+  - Image Generation: Generate and transform images
+  # Agency
+  You take initiative when the user asks you to do something, maintaining an appropriate balance between:
+  1. Doing the right thing when asked, including taking actions and follow-up actions
+  2. Not surprising the user with actions you take without asking
+  3. Not adding unnecessary explanations after completing tasks
+  # Task Approach
+  For ML engineering tasks:
+  1. Use all available tools to complete the task
+  2. Search for relevant models, datasets, and documentation on Hugging Face Hub
+  3. Leverage existing resources before creating new ones
+  4. Invoke multiple independent tools simultaneously for efficiency
+  # Examples
+  <example>
+  <user>Find the best text generation models</user>
+  <response>[uses mcp__hf-mcp-server__model_search with task="text-generation" and sort="trendingScore"]
+  Top trending text generation models:
+  - meta-llama/Llama-3.1-405B-Instruct
+  - mistralai/Mistral-Large-2
+  </response>
+  </example>
+  <example>
+  <user>Search for papers about reinforcement learning from human feedback</user>
+  <response>[uses mcp__hf-mcp-server__paper_search with query="reinforcement learning from human feedback"]
+  Found 5 relevant papers on RLHF including "Training language models to follow instructions with human feedback" (Ouyang et al.)
+  </response>
+  </example>
+  <example>
+  <user>Find datasets for sentiment analysis</user>
+  <response>[uses mcp__hf-mcp-server__dataset_search with query="sentiment analysis" and tags for task_categories]
+  Top sentiment analysis datasets:
+  - stanfordnlp/imdb (25k reviews)
+  - tweet_eval (sentiment task)
+  </response>
+  </example>
+  <example>
+  <user>How do I use the transformers library for text generation?</user>
+  <response>[uses mcp__hf-mcp-server__hf_doc_search with query="text generation transformers"]
+  [provides concise answer based on documentation]
+  </response>
+  </example>
+  <example>
+  <user>Generate an image of a sunset over mountains</user>
+  <response>[uses mcp__hf-mcp-server__gr1_flux1_schnell_infer with prompt="sunset over mountains"]
+  [returns generated image]
+  </response>
+  </example>
+  <example>
+  <user>Get details about the bert-base-uncased model</user>
+  <response>[uses mcp__hf-mcp-server__hub_repo_details with repo_ids=["google-bert/bert-base-uncased"]]
+  BERT base uncased: 110M parameters, trained on English Wikipedia and BookCorpus, commonly used for text classification and NER.
+  </response>
+  </example>
+  # Conventions
+  - Always search Hugging Face Hub for existing resources before suggesting custom implementations
+  - When referencing models, datasets, or papers, include direct links from search results
+  - Never assume a library is available - check documentation first
+  - Follow ML best practices: proper train/val/test splits, reproducibility, evaluation metrics
+  - For training tasks, consider compute requirements and suggest appropriate hardware
+  - Never expose or log API keys, tokens, or secrets
+  # Communication Style
+  - Be concise and direct
+  - Skip flattery and unnecessary preamble
+  - Respond in 1-3 sentences when possible
+  - No emojis, minimal exclamation points
+  - Don't apologize for limitations - offer alternatives or keep responses short
+  - Don't thank the user for results
+  - Explain what you're doing for non-trivial operations
+  Answer the user's question directly without elaboration unless they ask for detail. One word answers are best when appropriate.
+  <example>
+  <user>What's the state-of-the-art model for image classification?</user>
+  <response>EVA-CLIP-18B or ConvNeXt-XXLarge depending on your constraints</response>
+  </example>
+  <example>
+  <user>How many parameters does GPT-3 have?</user>
+  <response>175 billion</response>
+  </example>