Spaces:
Running
Running
Commit
·
cd123dd
1
Parent(s):
fb5f967
prompt update and loading from file
Browse files- agent/codex_agent_demo.py +1 -1
- agent/config.py +0 -1
- agent/context_manager/manager.py +18 -6
- agent/core/agent_loop.py +4 -7
- agent/core/session.py +11 -3
- agent/prompts/system_prompt.yaml +112 -0
agent/codex_agent_demo.py
CHANGED
|
@@ -201,7 +201,7 @@ class Session:
|
|
| 201 |
"""
|
| 202 |
|
| 203 |
def __init__(self, event_queue: asyncio.Queue):
|
| 204 |
-
self.context_manager = ContextManager()
|
| 205 |
self.event_queue = event_queue
|
| 206 |
self.is_running = True
|
| 207 |
self.current_task: Optional[asyncio.Task] = None
|
|
|
|
| 201 |
"""
|
| 202 |
|
| 203 |
def __init__(self, event_queue: asyncio.Queue):
|
| 204 |
+
self.context_manager = ContextManager(tool_specs=[])
|
| 205 |
self.event_queue = event_queue
|
| 206 |
self.is_running = True
|
| 207 |
self.current_task: Optional[asyncio.Task] = None
|
agent/config.py
CHANGED
|
@@ -20,7 +20,6 @@ class Config(BaseModel):
|
|
| 20 |
|
| 21 |
model_name: str
|
| 22 |
tools: list[Tool] = []
|
| 23 |
-
system_prompt_path: str = ""
|
| 24 |
mcpServers: dict[str, MCPServerConfig] = {}
|
| 25 |
|
| 26 |
|
|
|
|
| 20 |
|
| 21 |
model_name: str
|
| 22 |
tools: list[Tool] = []
|
|
|
|
| 23 |
mcpServers: dict[str, MCPServerConfig] = {}
|
| 24 |
|
| 25 |
|
agent/context_manager/manager.py
CHANGED
|
@@ -2,8 +2,11 @@
|
|
| 2 |
Context management for conversation history
|
| 3 |
"""
|
| 4 |
|
| 5 |
-
import
|
|
|
|
| 6 |
|
|
|
|
|
|
|
| 7 |
from litellm import Message, acompletion
|
| 8 |
|
| 9 |
|
|
@@ -15,19 +18,28 @@ class ContextManager:
|
|
| 15 |
max_context: int = 180_000,
|
| 16 |
compact_size: float = 0.1,
|
| 17 |
untouched_messages: int = 5,
|
|
|
|
| 18 |
):
|
| 19 |
-
self.system_prompt = self._load_system_prompt()
|
| 20 |
self.max_context = max_context
|
| 21 |
self.compact_size = int(max_context * compact_size)
|
| 22 |
self.context_length = len(self.system_prompt) // 4
|
| 23 |
self.untouched_messages = untouched_messages
|
| 24 |
self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
|
| 25 |
|
| 26 |
-
def _load_system_prompt(self):
|
| 27 |
-
"""Load the system prompt"""
|
|
|
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
def add_message(self, message: Message, token_count: int = None) -> None:
|
| 33 |
"""Add a message to the history"""
|
|
|
|
| 2 |
Context management for conversation history
|
| 3 |
"""
|
| 4 |
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Any
|
| 7 |
|
| 8 |
+
import yaml
|
| 9 |
+
from jinja2 import Template
|
| 10 |
from litellm import Message, acompletion
|
| 11 |
|
| 12 |
|
|
|
|
| 18 |
max_context: int = 180_000,
|
| 19 |
compact_size: float = 0.1,
|
| 20 |
untouched_messages: int = 5,
|
| 21 |
+
tool_specs: list[dict[str, Any]] | None = None,
|
| 22 |
):
|
| 23 |
+
self.system_prompt = self._load_system_prompt(tool_specs or [])
|
| 24 |
self.max_context = max_context
|
| 25 |
self.compact_size = int(max_context * compact_size)
|
| 26 |
self.context_length = len(self.system_prompt) // 4
|
| 27 |
self.untouched_messages = untouched_messages
|
| 28 |
self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
|
| 29 |
|
| 30 |
+
def _load_system_prompt(self, tool_specs: list[dict[str, Any]]):
|
| 31 |
+
"""Load and render the system prompt from YAML file with Jinja2"""
|
| 32 |
+
prompt_file = Path(__file__).parent.parent / "prompts" / "system_prompt.yaml"
|
| 33 |
|
| 34 |
+
with open(prompt_file, "r") as f:
|
| 35 |
+
prompt_data = yaml.safe_load(f)
|
| 36 |
+
template_str = prompt_data.get("system_prompt", "")
|
| 37 |
+
|
| 38 |
+
template = Template(template_str)
|
| 39 |
+
return template.render(
|
| 40 |
+
tools=tool_specs,
|
| 41 |
+
num_tools=len(tool_specs),
|
| 42 |
+
)
|
| 43 |
|
| 44 |
def add_message(self, message: Message, token_count: int = None) -> None:
|
| 45 |
"""Add a message to the history"""
|
agent/core/agent_loop.py
CHANGED
|
@@ -5,8 +5,7 @@ Main agent implementation with integrated tool system and MCP support
|
|
| 5 |
import asyncio
|
| 6 |
import json
|
| 7 |
|
| 8 |
-
from litellm import
|
| 9 |
-
acompletion)
|
| 10 |
from lmnr import observe
|
| 11 |
|
| 12 |
from agent.config import Config
|
|
@@ -68,8 +67,7 @@ class Handlers:
|
|
| 68 |
# If no tool calls, add assistant message and we're done
|
| 69 |
if not tool_calls:
|
| 70 |
if content:
|
| 71 |
-
assistant_msg = Message(
|
| 72 |
-
role="assistant", content=content)
|
| 73 |
session.context_manager.add_message(assistant_msg, token_count)
|
| 74 |
await session.send_event(
|
| 75 |
Event(
|
|
@@ -247,9 +245,8 @@ async def submission_loop(
|
|
| 247 |
This is the core of the agent (like submission_loop in codex.rs:1259-1340)
|
| 248 |
"""
|
| 249 |
|
| 250 |
-
# Create session
|
| 251 |
-
session = Session(event_queue, config=config)
|
| 252 |
-
session.tool_router = tool_router
|
| 253 |
print("🤖 Agent loop started")
|
| 254 |
|
| 255 |
# Main processing loop
|
|
|
|
| 5 |
import asyncio
|
| 6 |
import json
|
| 7 |
|
| 8 |
+
from litellm import ChatCompletionMessageToolCall, Message, ModelResponse, acompletion
|
|
|
|
| 9 |
from lmnr import observe
|
| 10 |
|
| 11 |
from agent.config import Config
|
|
|
|
| 67 |
# If no tool calls, add assistant message and we're done
|
| 68 |
if not tool_calls:
|
| 69 |
if content:
|
| 70 |
+
assistant_msg = Message(role="assistant", content=content)
|
|
|
|
| 71 |
session.context_manager.add_message(assistant_msg, token_count)
|
| 72 |
await session.send_event(
|
| 73 |
Event(
|
|
|
|
| 245 |
This is the core of the agent (like submission_loop in codex.rs:1259-1340)
|
| 246 |
"""
|
| 247 |
|
| 248 |
+
# Create session with tool router
|
| 249 |
+
session = Session(event_queue, config=config, tool_router=tool_router)
|
|
|
|
| 250 |
print("🤖 Agent loop started")
|
| 251 |
|
| 252 |
# Main processing loop
|
agent/core/session.py
CHANGED
|
@@ -4,6 +4,8 @@ from dataclasses import dataclass
|
|
| 4 |
from enum import Enum
|
| 5 |
from typing import Any, Optional
|
| 6 |
|
|
|
|
|
|
|
| 7 |
from agent.config import Config
|
| 8 |
from agent.context_manager.manager import ContextManager
|
| 9 |
|
|
@@ -33,18 +35,24 @@ class Session:
|
|
| 33 |
self,
|
| 34 |
event_queue: asyncio.Queue,
|
| 35 |
config: Config | None = None,
|
|
|
|
| 36 |
):
|
| 37 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
self.event_queue = event_queue
|
| 39 |
self.session_id = str(uuid.uuid4())
|
| 40 |
self.config = config or Config(
|
| 41 |
model_name="anthropic/claude-sonnet-4-5-20250929",
|
| 42 |
tools=[],
|
| 43 |
-
system_prompt_path="",
|
| 44 |
)
|
| 45 |
self.is_running = True
|
| 46 |
self.current_task: asyncio.Task | None = None
|
| 47 |
-
self.tool_router = None # Set by submission_loop
|
| 48 |
|
| 49 |
async def send_event(self, event: Event) -> None:
|
| 50 |
"""Send event back to client"""
|
|
|
|
| 4 |
from enum import Enum
|
| 5 |
from typing import Any, Optional
|
| 6 |
|
| 7 |
+
from litellm import get_max_tokens
|
| 8 |
+
|
| 9 |
from agent.config import Config
|
| 10 |
from agent.context_manager.manager import ContextManager
|
| 11 |
|
|
|
|
| 35 |
self,
|
| 36 |
event_queue: asyncio.Queue,
|
| 37 |
config: Config | None = None,
|
| 38 |
+
tool_router=None,
|
| 39 |
):
|
| 40 |
+
self.tool_router = tool_router
|
| 41 |
+
tool_specs = tool_router.get_tool_specs_for_llm() if tool_router else []
|
| 42 |
+
self.context_manager = ContextManager(
|
| 43 |
+
max_context=get_max_tokens(config.model_name),
|
| 44 |
+
compact_size=0.1,
|
| 45 |
+
untouched_messages=5,
|
| 46 |
+
tool_specs=tool_specs,
|
| 47 |
+
)
|
| 48 |
self.event_queue = event_queue
|
| 49 |
self.session_id = str(uuid.uuid4())
|
| 50 |
self.config = config or Config(
|
| 51 |
model_name="anthropic/claude-sonnet-4-5-20250929",
|
| 52 |
tools=[],
|
|
|
|
| 53 |
)
|
| 54 |
self.is_running = True
|
| 55 |
self.current_task: asyncio.Task | None = None
|
|
|
|
| 56 |
|
| 57 |
async def send_event(self, event: Event) -> None:
|
| 58 |
"""Send event back to client"""
|
agent/prompts/system_prompt.yaml
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
system_prompt: |
|
| 2 |
+
You are HF Agent, a powerful AI assistant for Machine Learning Engineering, particularly training Large Language Models. You have access to {{ num_tools }} tools for interacting with Hugging Face Hub and performing ML tasks.
|
| 3 |
+
|
| 4 |
+
# Available Tools
|
| 5 |
+
|
| 6 |
+
You have access to the following categories of tools:
|
| 7 |
+
|
| 8 |
+
- Hugging Face Hub: Search and interact with models, datasets, papers, and documentation
|
| 9 |
+
- Spaces: Use and discover ML applications
|
| 10 |
+
- Jobs: Manage compute jobs for training and inference
|
| 11 |
+
- Image Generation: Generate and transform images
|
| 12 |
+
|
| 13 |
+
# Agency
|
| 14 |
+
|
| 15 |
+
You take initiative when the user asks you to do something, maintaining an appropriate balance between:
|
| 16 |
+
|
| 17 |
+
1. Doing the right thing when asked, including taking actions and follow-up actions
|
| 18 |
+
2. Not surprising the user with actions you take without asking
|
| 19 |
+
3. Not adding unnecessary explanations after completing tasks
|
| 20 |
+
|
| 21 |
+
# Task Approach
|
| 22 |
+
|
| 23 |
+
For ML engineering tasks:
|
| 24 |
+
1. Use all available tools to complete the task
|
| 25 |
+
2. Search for relevant models, datasets, and documentation on Hugging Face Hub
|
| 26 |
+
3. Leverage existing resources before creating new ones
|
| 27 |
+
4. Invoke multiple independent tools simultaneously for efficiency
|
| 28 |
+
|
| 29 |
+
# Examples
|
| 30 |
+
|
| 31 |
+
<example>
|
| 32 |
+
<user>Find the best text generation models</user>
|
| 33 |
+
<response>[uses mcp__hf-mcp-server__model_search with task="text-generation" and sort="trendingScore"]
|
| 34 |
+
|
| 35 |
+
Top trending text generation models:
|
| 36 |
+
- meta-llama/Llama-3.1-405B-Instruct
|
| 37 |
+
- mistralai/Mistral-Large-2
|
| 38 |
+
</response>
|
| 39 |
+
</example>
|
| 40 |
+
|
| 41 |
+
<example>
|
| 42 |
+
<user>Search for papers about reinforcement learning from human feedback</user>
|
| 43 |
+
<response>[uses mcp__hf-mcp-server__paper_search with query="reinforcement learning from human feedback"]
|
| 44 |
+
|
| 45 |
+
Found 5 relevant papers on RLHF including "Training language models to follow instructions with human feedback" (Ouyang et al.)
|
| 46 |
+
</response>
|
| 47 |
+
</example>
|
| 48 |
+
|
| 49 |
+
<example>
|
| 50 |
+
<user>Find datasets for sentiment analysis</user>
|
| 51 |
+
<response>[uses mcp__hf-mcp-server__dataset_search with query="sentiment analysis" and tags for task_categories]
|
| 52 |
+
|
| 53 |
+
Top sentiment analysis datasets:
|
| 54 |
+
- stanfordnlp/imdb (25k reviews)
|
| 55 |
+
- tweet_eval (sentiment task)
|
| 56 |
+
</response>
|
| 57 |
+
</example>
|
| 58 |
+
|
| 59 |
+
<example>
|
| 60 |
+
<user>How do I use the transformers library for text generation?</user>
|
| 61 |
+
<response>[uses mcp__hf-mcp-server__hf_doc_search with query="text generation transformers"]
|
| 62 |
+
|
| 63 |
+
[provides concise answer based on documentation]
|
| 64 |
+
</response>
|
| 65 |
+
</example>
|
| 66 |
+
|
| 67 |
+
<example>
|
| 68 |
+
<user>Generate an image of a sunset over mountains</user>
|
| 69 |
+
<response>[uses mcp__hf-mcp-server__gr1_flux1_schnell_infer with prompt="sunset over mountains"]
|
| 70 |
+
|
| 71 |
+
[returns generated image]
|
| 72 |
+
</response>
|
| 73 |
+
</example>
|
| 74 |
+
|
| 75 |
+
<example>
|
| 76 |
+
<user>Get details about the bert-base-uncased model</user>
|
| 77 |
+
<response>[uses mcp__hf-mcp-server__hub_repo_details with repo_ids=["google-bert/bert-base-uncased"]]
|
| 78 |
+
|
| 79 |
+
BERT base uncased: 110M parameters, trained on English Wikipedia and BookCorpus, commonly used for text classification and NER.
|
| 80 |
+
</response>
|
| 81 |
+
</example>
|
| 82 |
+
|
| 83 |
+
# Conventions
|
| 84 |
+
|
| 85 |
+
- Always search Hugging Face Hub for existing resources before suggesting custom implementations
|
| 86 |
+
- When referencing models, datasets, or papers, include direct links from search results
|
| 87 |
+
- Never assume a library is available - check documentation first
|
| 88 |
+
- Follow ML best practices: proper train/val/test splits, reproducibility, evaluation metrics
|
| 89 |
+
- For training tasks, consider compute requirements and suggest appropriate hardware
|
| 90 |
+
- Never expose or log API keys, tokens, or secrets
|
| 91 |
+
|
| 92 |
+
# Communication Style
|
| 93 |
+
|
| 94 |
+
- Be concise and direct
|
| 95 |
+
- Skip flattery and unnecessary preamble
|
| 96 |
+
- Respond in 1-3 sentences when possible
|
| 97 |
+
- No emojis, minimal exclamation points
|
| 98 |
+
- Don't apologize for limitations - offer alternatives or keep responses short
|
| 99 |
+
- Don't thank the user for results
|
| 100 |
+
- Explain what you're doing for non-trivial operations
|
| 101 |
+
|
| 102 |
+
Answer the user's question directly without elaboration unless they ask for detail. One word answers are best when appropriate.
|
| 103 |
+
|
| 104 |
+
<example>
|
| 105 |
+
<user>What's the state-of-the-art model for image classification?</user>
|
| 106 |
+
<response>EVA-CLIP-18B or ConvNeXt-XXLarge depending on your constraints</response>
|
| 107 |
+
</example>
|
| 108 |
+
|
| 109 |
+
<example>
|
| 110 |
+
<user>How many parameters does GPT-3 have?</user>
|
| 111 |
+
<response>175 billion</response>
|
| 112 |
+
</example>
|