akseljoonas HF Staff commited on
Commit
cd123dd
·
1 Parent(s): fb5f967

prompt update and loading from file

Browse files
agent/codex_agent_demo.py CHANGED
@@ -201,7 +201,7 @@ class Session:
201
  """
202
 
203
  def __init__(self, event_queue: asyncio.Queue):
204
- self.context_manager = ContextManager()
205
  self.event_queue = event_queue
206
  self.is_running = True
207
  self.current_task: Optional[asyncio.Task] = None
 
201
  """
202
 
203
  def __init__(self, event_queue: asyncio.Queue):
204
+ self.context_manager = ContextManager(tool_specs=[])
205
  self.event_queue = event_queue
206
  self.is_running = True
207
  self.current_task: Optional[asyncio.Task] = None
agent/config.py CHANGED
@@ -20,7 +20,6 @@ class Config(BaseModel):
20
 
21
  model_name: str
22
  tools: list[Tool] = []
23
- system_prompt_path: str = ""
24
  mcpServers: dict[str, MCPServerConfig] = {}
25
 
26
 
 
20
 
21
  model_name: str
22
  tools: list[Tool] = []
 
23
  mcpServers: dict[str, MCPServerConfig] = {}
24
 
25
 
agent/context_manager/manager.py CHANGED
@@ -2,8 +2,11 @@
2
  Context management for conversation history
3
  """
4
 
5
- import asyncio
 
6
 
 
 
7
  from litellm import Message, acompletion
8
 
9
 
@@ -15,19 +18,28 @@ class ContextManager:
15
  max_context: int = 180_000,
16
  compact_size: float = 0.1,
17
  untouched_messages: int = 5,
 
18
  ):
19
- self.system_prompt = self._load_system_prompt()
20
  self.max_context = max_context
21
  self.compact_size = int(max_context * compact_size)
22
  self.context_length = len(self.system_prompt) // 4
23
  self.untouched_messages = untouched_messages
24
  self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
25
 
26
- def _load_system_prompt(self):
27
- """Load the system prompt"""
 
28
 
29
- # TODO: get system prompt from jinja template
30
- return "You are a helpful assistant, primarly for ML. Do the task you are asked to as efficiently as possible. Don't invent tasks."
 
 
 
 
 
 
 
31
 
32
  def add_message(self, message: Message, token_count: int = None) -> None:
33
  """Add a message to the history"""
 
2
  Context management for conversation history
3
  """
4
 
5
+ from pathlib import Path
6
+ from typing import Any
7
 
8
+ import yaml
9
+ from jinja2 import Template
10
  from litellm import Message, acompletion
11
 
12
 
 
18
  max_context: int = 180_000,
19
  compact_size: float = 0.1,
20
  untouched_messages: int = 5,
21
+ tool_specs: list[dict[str, Any]] | None = None,
22
  ):
23
+ self.system_prompt = self._load_system_prompt(tool_specs or [])
24
  self.max_context = max_context
25
  self.compact_size = int(max_context * compact_size)
26
  self.context_length = len(self.system_prompt) // 4
27
  self.untouched_messages = untouched_messages
28
  self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
29
 
30
+ def _load_system_prompt(self, tool_specs: list[dict[str, Any]]):
31
+ """Load and render the system prompt from YAML file with Jinja2"""
32
+ prompt_file = Path(__file__).parent.parent / "prompts" / "system_prompt.yaml"
33
 
34
+ with open(prompt_file, "r") as f:
35
+ prompt_data = yaml.safe_load(f)
36
+ template_str = prompt_data.get("system_prompt", "")
37
+
38
+ template = Template(template_str)
39
+ return template.render(
40
+ tools=tool_specs,
41
+ num_tools=len(tool_specs),
42
+ )
43
 
44
  def add_message(self, message: Message, token_count: int = None) -> None:
45
  """Add a message to the history"""
agent/core/agent_loop.py CHANGED
@@ -5,8 +5,7 @@ Main agent implementation with integrated tool system and MCP support
5
  import asyncio
6
  import json
7
 
8
- from litellm import (ChatCompletionMessageToolCall, Message, ModelResponse,
9
- acompletion)
10
  from lmnr import observe
11
 
12
  from agent.config import Config
@@ -68,8 +67,7 @@ class Handlers:
68
  # If no tool calls, add assistant message and we're done
69
  if not tool_calls:
70
  if content:
71
- assistant_msg = Message(
72
- role="assistant", content=content)
73
  session.context_manager.add_message(assistant_msg, token_count)
74
  await session.send_event(
75
  Event(
@@ -247,9 +245,8 @@ async def submission_loop(
247
  This is the core of the agent (like submission_loop in codex.rs:1259-1340)
248
  """
249
 
250
- # Create session and assign tool router
251
- session = Session(event_queue, config=config)
252
- session.tool_router = tool_router
253
  print("🤖 Agent loop started")
254
 
255
  # Main processing loop
 
5
  import asyncio
6
  import json
7
 
8
+ from litellm import ChatCompletionMessageToolCall, Message, ModelResponse, acompletion
 
9
  from lmnr import observe
10
 
11
  from agent.config import Config
 
67
  # If no tool calls, add assistant message and we're done
68
  if not tool_calls:
69
  if content:
70
+ assistant_msg = Message(role="assistant", content=content)
 
71
  session.context_manager.add_message(assistant_msg, token_count)
72
  await session.send_event(
73
  Event(
 
245
  This is the core of the agent (like submission_loop in codex.rs:1259-1340)
246
  """
247
 
248
+ # Create session with tool router
249
+ session = Session(event_queue, config=config, tool_router=tool_router)
 
250
  print("🤖 Agent loop started")
251
 
252
  # Main processing loop
agent/core/session.py CHANGED
@@ -4,6 +4,8 @@ from dataclasses import dataclass
4
  from enum import Enum
5
  from typing import Any, Optional
6
 
 
 
7
  from agent.config import Config
8
  from agent.context_manager.manager import ContextManager
9
 
@@ -33,18 +35,24 @@ class Session:
33
  self,
34
  event_queue: asyncio.Queue,
35
  config: Config | None = None,
 
36
  ):
37
- self.context_manager = ContextManager(max_context=180_000, compact_size=0.1, untouched_messages=5)
 
 
 
 
 
 
 
38
  self.event_queue = event_queue
39
  self.session_id = str(uuid.uuid4())
40
  self.config = config or Config(
41
  model_name="anthropic/claude-sonnet-4-5-20250929",
42
  tools=[],
43
- system_prompt_path="",
44
  )
45
  self.is_running = True
46
  self.current_task: asyncio.Task | None = None
47
- self.tool_router = None # Set by submission_loop
48
 
49
  async def send_event(self, event: Event) -> None:
50
  """Send event back to client"""
 
4
  from enum import Enum
5
  from typing import Any, Optional
6
 
7
+ from litellm import get_max_tokens
8
+
9
  from agent.config import Config
10
  from agent.context_manager.manager import ContextManager
11
 
 
35
  self,
36
  event_queue: asyncio.Queue,
37
  config: Config | None = None,
38
+ tool_router=None,
39
  ):
40
+ self.tool_router = tool_router
41
+ tool_specs = tool_router.get_tool_specs_for_llm() if tool_router else []
42
+ self.context_manager = ContextManager(
43
+ max_context=get_max_tokens(config.model_name),
44
+ compact_size=0.1,
45
+ untouched_messages=5,
46
+ tool_specs=tool_specs,
47
+ )
48
  self.event_queue = event_queue
49
  self.session_id = str(uuid.uuid4())
50
  self.config = config or Config(
51
  model_name="anthropic/claude-sonnet-4-5-20250929",
52
  tools=[],
 
53
  )
54
  self.is_running = True
55
  self.current_task: asyncio.Task | None = None
 
56
 
57
  async def send_event(self, event: Event) -> None:
58
  """Send event back to client"""
agent/prompts/system_prompt.yaml ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ system_prompt: |
2
+ You are HF Agent, a powerful AI assistant for Machine Learning Engineering, particularly training Large Language Models. You have access to {{ num_tools }} tools for interacting with Hugging Face Hub and performing ML tasks.
3
+
4
+ # Available Tools
5
+
6
+ You have access to the following categories of tools:
7
+
8
+ - Hugging Face Hub: Search and interact with models, datasets, papers, and documentation
9
+ - Spaces: Use and discover ML applications
10
+ - Jobs: Manage compute jobs for training and inference
11
+ - Image Generation: Generate and transform images
12
+
13
+ # Agency
14
+
15
+ You take initiative when the user asks you to do something, maintaining an appropriate balance between:
16
+
17
+ 1. Doing the right thing when asked, including taking actions and follow-up actions
18
+ 2. Not surprising the user with actions you take without asking
19
+ 3. Not adding unnecessary explanations after completing tasks
20
+
21
+ # Task Approach
22
+
23
+ For ML engineering tasks:
24
+ 1. Use all available tools to complete the task
25
+ 2. Search for relevant models, datasets, and documentation on Hugging Face Hub
26
+ 3. Leverage existing resources before creating new ones
27
+ 4. Invoke multiple independent tools simultaneously for efficiency
28
+
29
+ # Examples
30
+
31
+ <example>
32
+ <user>Find the best text generation models</user>
33
+ <response>[uses mcp__hf-mcp-server__model_search with task="text-generation" and sort="trendingScore"]
34
+
35
+ Top trending text generation models:
36
+ - meta-llama/Llama-3.1-405B-Instruct
37
+ - mistralai/Mistral-Large-2
38
+ </response>
39
+ </example>
40
+
41
+ <example>
42
+ <user>Search for papers about reinforcement learning from human feedback</user>
43
+ <response>[uses mcp__hf-mcp-server__paper_search with query="reinforcement learning from human feedback"]
44
+
45
+ Found 5 relevant papers on RLHF including "Training language models to follow instructions with human feedback" (Ouyang et al.)
46
+ </response>
47
+ </example>
48
+
49
+ <example>
50
+ <user>Find datasets for sentiment analysis</user>
51
+ <response>[uses mcp__hf-mcp-server__dataset_search with query="sentiment analysis" and tags for task_categories]
52
+
53
+ Top sentiment analysis datasets:
54
+ - stanfordnlp/imdb (25k reviews)
55
+ - tweet_eval (sentiment task)
56
+ </response>
57
+ </example>
58
+
59
+ <example>
60
+ <user>How do I use the transformers library for text generation?</user>
61
+ <response>[uses mcp__hf-mcp-server__hf_doc_search with query="text generation transformers"]
62
+
63
+ [provides concise answer based on documentation]
64
+ </response>
65
+ </example>
66
+
67
+ <example>
68
+ <user>Generate an image of a sunset over mountains</user>
69
+ <response>[uses mcp__hf-mcp-server__gr1_flux1_schnell_infer with prompt="sunset over mountains"]
70
+
71
+ [returns generated image]
72
+ </response>
73
+ </example>
74
+
75
+ <example>
76
+ <user>Get details about the bert-base-uncased model</user>
77
+ <response>[uses mcp__hf-mcp-server__hub_repo_details with repo_ids=["google-bert/bert-base-uncased"]]
78
+
79
+ BERT base uncased: 110M parameters, trained on English Wikipedia and BookCorpus, commonly used for text classification and NER.
80
+ </response>
81
+ </example>
82
+
83
+ # Conventions
84
+
85
+ - Always search Hugging Face Hub for existing resources before suggesting custom implementations
86
+ - When referencing models, datasets, or papers, include direct links from search results
87
+ - Never assume a library is available - check documentation first
88
+ - Follow ML best practices: proper train/val/test splits, reproducibility, evaluation metrics
89
+ - For training tasks, consider compute requirements and suggest appropriate hardware
90
+ - Never expose or log API keys, tokens, or secrets
91
+
92
+ # Communication Style
93
+
94
+ - Be concise and direct
95
+ - Skip flattery and unnecessary preamble
96
+ - Respond in 1-3 sentences when possible
97
+ - No emojis, minimal exclamation points
98
+ - Don't apologize for limitations - offer alternatives or keep responses short
99
+ - Don't thank the user for results
100
+ - Explain what you're doing for non-trivial operations
101
+
102
+ Answer the user's question directly without elaboration unless they ask for detail. One word answers are best when appropriate.
103
+
104
+ <example>
105
+ <user>What's the state-of-the-art model for image classification?</user>
106
+ <response>EVA-CLIP-18B or ConvNeXt-XXLarge depending on your constraints</response>
107
+ </example>
108
+
109
+ <example>
110
+ <user>How many parameters does GPT-3 have?</user>
111
+ <response>175 billion</response>
112
+ </example>