Spaces:
Restarting
Restarting
Henri Bonamy commited on
Commit ·
d574d65
1
Parent(s): 8f4b322
added compaction every x tokens
Browse files- agent/context_manager/manager.py +52 -10
- agent/core/agent_loop.py +28 -12
- agent/core/session.py +1 -1
- agent/main.py +4 -0
agent/context_manager/manager.py
CHANGED
|
@@ -2,14 +2,25 @@
|
|
| 2 |
Context management for conversation history
|
| 3 |
"""
|
| 4 |
|
| 5 |
-
|
|
|
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
class ContextManager:
|
| 9 |
"""Manages conversation context and message history for the agent"""
|
| 10 |
|
| 11 |
-
def __init__(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
self.system_prompt = self._load_system_prompt()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
|
| 14 |
|
| 15 |
def _load_system_prompt(self):
|
|
@@ -18,27 +29,58 @@ class ContextManager:
|
|
| 18 |
# TODO: get system prompt from jinja template
|
| 19 |
return "You are a helpful assistant."
|
| 20 |
|
| 21 |
-
def add_message(self, message: Message) -> None:
|
| 22 |
"""Add a message to the history"""
|
|
|
|
|
|
|
|
|
|
| 23 |
self.items.append(message)
|
| 24 |
|
| 25 |
def get_messages(self) -> list[Message]:
|
| 26 |
"""Get all messages for sending to LLM"""
|
| 27 |
return self.items
|
| 28 |
|
| 29 |
-
def compact(self,
|
| 30 |
"""Remove old messages to keep history under target size"""
|
| 31 |
-
|
| 32 |
-
if len(self.items) <= target_size:
|
| 33 |
return
|
| 34 |
|
| 35 |
-
# Always keep system prompt
|
| 36 |
system_msg = (
|
| 37 |
self.items[0] if self.items and self.items[0].role == "system" else None
|
| 38 |
)
|
| 39 |
-
messages_to_keep = self.items[-(target_size - 1) :]
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
if system_msg:
|
| 42 |
-
self.items = [system_msg] +
|
| 43 |
else:
|
| 44 |
-
self.items =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
Context management for conversation history
|
| 3 |
"""
|
| 4 |
|
| 5 |
+
import asyncio
|
| 6 |
+
|
| 7 |
+
from litellm import Message, acompletion
|
| 8 |
|
| 9 |
|
| 10 |
class ContextManager:
|
| 11 |
"""Manages conversation context and message history for the agent"""
|
| 12 |
|
| 13 |
+
def __init__(
|
| 14 |
+
self,
|
| 15 |
+
max_context: int = 180_000,
|
| 16 |
+
compact_size: float = 0.1,
|
| 17 |
+
untouched_messages: int = 5,
|
| 18 |
+
):
|
| 19 |
self.system_prompt = self._load_system_prompt()
|
| 20 |
+
self.max_context = max_context
|
| 21 |
+
self.compact_size = int(max_context * compact_size)
|
| 22 |
+
self.context_length = len(self.system_prompt) // 4
|
| 23 |
+
self.untouched_messages = untouched_messages
|
| 24 |
self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
|
| 25 |
|
| 26 |
def _load_system_prompt(self):
|
|
|
|
| 29 |
# TODO: get system prompt from jinja template
|
| 30 |
return "You are a helpful assistant."
|
| 31 |
|
| 32 |
+
def add_message(self, message: Message, token_count: int = None) -> None:
|
| 33 |
"""Add a message to the history"""
|
| 34 |
+
if token_count:
|
| 35 |
+
self.context_length = token_count
|
| 36 |
+
print(f"DEBUG : token_count = {self.context_length}")
|
| 37 |
self.items.append(message)
|
| 38 |
|
| 39 |
def get_messages(self) -> list[Message]:
|
| 40 |
"""Get all messages for sending to LLM"""
|
| 41 |
return self.items
|
| 42 |
|
| 43 |
+
async def compact(self, model_name: str) -> None:
|
| 44 |
"""Remove old messages to keep history under target size"""
|
| 45 |
+
if (self.context_length <= self.max_context) or not self.items:
|
|
|
|
| 46 |
return
|
| 47 |
|
|
|
|
| 48 |
system_msg = (
|
| 49 |
self.items[0] if self.items and self.items[0].role == "system" else None
|
| 50 |
)
|
|
|
|
| 51 |
|
| 52 |
+
# Don't summarize a certain number of just-preceding messages
|
| 53 |
+
recent_messages = self.items[-self.untouched_messages :]
|
| 54 |
+
|
| 55 |
+
# Summarize everything in between (skip system prompt, skip preceding n)
|
| 56 |
+
messages_to_summarize = self.items[1 : -self.untouched_messages]
|
| 57 |
+
|
| 58 |
+
# improbable, messages would have to very long
|
| 59 |
+
if not messages_to_summarize:
|
| 60 |
+
return
|
| 61 |
+
|
| 62 |
+
messages_to_summarize.append(
|
| 63 |
+
Message(
|
| 64 |
+
role="user",
|
| 65 |
+
content="Please provide a concise summary of the conversation above, focusing on key decisions, code changes, problems solved, and important context needed for future turns.",
|
| 66 |
+
)
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
response = await acompletion(
|
| 70 |
+
model=model_name,
|
| 71 |
+
messages=messages_to_summarize,
|
| 72 |
+
max_completion_tokens=self.compact_size,
|
| 73 |
+
)
|
| 74 |
+
summarized_message = Message(
|
| 75 |
+
role="assistant", content=response.choices[0].message.content
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
# Reconstruct: system + summary + recent 2 messages
|
| 79 |
if system_msg:
|
| 80 |
+
self.items = [system_msg, summarized_message] + recent_messages
|
| 81 |
else:
|
| 82 |
+
self.items = [summarized_message] + recent_messages
|
| 83 |
+
|
| 84 |
+
self.context_length = (
|
| 85 |
+
len(self.system_prompt) // 4 + response.usage.completion_tokens
|
| 86 |
+
)
|
agent/core/agent_loop.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
| 1 |
-
"""
|
| 2 |
Main agent implementation with integrated tool system and MCP support
|
| 3 |
"""
|
| 4 |
|
| 5 |
import asyncio
|
| 6 |
import json
|
| 7 |
|
| 8 |
-
from litellm import ChatCompletionMessageToolCall, Message, ModelResponse,
|
|
|
|
| 9 |
from lmnr import observe
|
| 10 |
|
| 11 |
from agent.config import Config
|
|
@@ -58,17 +59,18 @@ class Handlers:
|
|
| 58 |
tool_choice="auto",
|
| 59 |
)
|
| 60 |
|
|
|
|
| 61 |
message = response.choices[0].message
|
| 62 |
-
|
| 63 |
-
# Extract content and tool calls
|
| 64 |
content = message.content
|
|
|
|
| 65 |
tool_calls: list[ToolCall] = message.get("tool_calls", [])
|
| 66 |
|
| 67 |
# If no tool calls, add assistant message and we're done
|
| 68 |
if not tool_calls:
|
| 69 |
if content:
|
| 70 |
-
assistant_msg = Message(
|
| 71 |
-
|
|
|
|
| 72 |
await session.send_event(
|
| 73 |
Event(
|
| 74 |
event_type="assistant_message",
|
|
@@ -81,9 +83,11 @@ class Handlers:
|
|
| 81 |
# Add assistant message with tool calls to history
|
| 82 |
# LiteLLM will format this correctly for the provider
|
| 83 |
assistant_msg = Message(
|
| 84 |
-
role="assistant",
|
|
|
|
|
|
|
| 85 |
)
|
| 86 |
-
session.context_manager.add_message(assistant_msg)
|
| 87 |
|
| 88 |
if content:
|
| 89 |
await session.send_event(
|
|
@@ -139,6 +143,18 @@ class Handlers:
|
|
| 139 |
)
|
| 140 |
break
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
await session.send_event(
|
| 143 |
Event(
|
| 144 |
event_type="turn_complete",
|
|
@@ -156,14 +172,14 @@ class Handlers:
|
|
| 156 |
@staticmethod
|
| 157 |
async def compact(session: Session) -> None:
|
| 158 |
"""Handle compact (like compact in codex.rs:1317)"""
|
| 159 |
-
|
| 160 |
-
session.context_manager.compact(
|
| 161 |
-
|
| 162 |
|
| 163 |
await session.send_event(
|
| 164 |
Event(
|
| 165 |
event_type="compacted",
|
| 166 |
-
data={"removed":
|
| 167 |
)
|
| 168 |
)
|
| 169 |
|
|
|
|
| 1 |
+
"""loop
|
| 2 |
Main agent implementation with integrated tool system and MCP support
|
| 3 |
"""
|
| 4 |
|
| 5 |
import asyncio
|
| 6 |
import json
|
| 7 |
|
| 8 |
+
from litellm import (ChatCompletionMessageToolCall, Message, ModelResponse,
|
| 9 |
+
acompletion)
|
| 10 |
from lmnr import observe
|
| 11 |
|
| 12 |
from agent.config import Config
|
|
|
|
| 59 |
tool_choice="auto",
|
| 60 |
)
|
| 61 |
|
| 62 |
+
# Extract text response, token usage, and tool calls
|
| 63 |
message = response.choices[0].message
|
|
|
|
|
|
|
| 64 |
content = message.content
|
| 65 |
+
token_count = response.usage.total_tokens
|
| 66 |
tool_calls: list[ToolCall] = message.get("tool_calls", [])
|
| 67 |
|
| 68 |
# If no tool calls, add assistant message and we're done
|
| 69 |
if not tool_calls:
|
| 70 |
if content:
|
| 71 |
+
assistant_msg = Message(
|
| 72 |
+
role="assistant", content=content)
|
| 73 |
+
session.context_manager.add_message(assistant_msg, token_count)
|
| 74 |
await session.send_event(
|
| 75 |
Event(
|
| 76 |
event_type="assistant_message",
|
|
|
|
| 83 |
# Add assistant message with tool calls to history
|
| 84 |
# LiteLLM will format this correctly for the provider
|
| 85 |
assistant_msg = Message(
|
| 86 |
+
role="assistant",
|
| 87 |
+
content=content,
|
| 88 |
+
tool_calls=tool_calls,
|
| 89 |
)
|
| 90 |
+
session.context_manager.add_message(assistant_msg, token_count)
|
| 91 |
|
| 92 |
if content:
|
| 93 |
await session.send_event(
|
|
|
|
| 143 |
)
|
| 144 |
break
|
| 145 |
|
| 146 |
+
old_length = session.context_manager.context_length
|
| 147 |
+
await session.context_manager.compact(model_name=session.config.model_name)
|
| 148 |
+
new_length = session.context_manager.context_length
|
| 149 |
+
|
| 150 |
+
if new_length != old_length:
|
| 151 |
+
await session.send_event(
|
| 152 |
+
Event(
|
| 153 |
+
event_type="compacted",
|
| 154 |
+
data={"old_tokens": old_length, "new_tokens": new_length},
|
| 155 |
+
)
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
await session.send_event(
|
| 159 |
Event(
|
| 160 |
event_type="turn_complete",
|
|
|
|
| 172 |
@staticmethod
|
| 173 |
async def compact(session: Session) -> None:
|
| 174 |
"""Handle compact (like compact in codex.rs:1317)"""
|
| 175 |
+
old_length = session.context_manager.context_length
|
| 176 |
+
await session.context_manager.compact(model_name=session.config.model_name)
|
| 177 |
+
new_length = session.context_manager.context_length
|
| 178 |
|
| 179 |
await session.send_event(
|
| 180 |
Event(
|
| 181 |
event_type="compacted",
|
| 182 |
+
data={"removed": old_length, "remaining": new_length},
|
| 183 |
)
|
| 184 |
)
|
| 185 |
|
agent/core/session.py
CHANGED
|
@@ -34,7 +34,7 @@ class Session:
|
|
| 34 |
event_queue: asyncio.Queue,
|
| 35 |
config: Config | None = None,
|
| 36 |
):
|
| 37 |
-
self.context_manager = ContextManager()
|
| 38 |
self.event_queue = event_queue
|
| 39 |
self.session_id = str(uuid.uuid4())
|
| 40 |
self.config = config or Config(
|
|
|
|
| 34 |
event_queue: asyncio.Queue,
|
| 35 |
config: Config | None = None,
|
| 36 |
):
|
| 37 |
+
self.context_manager = ContextManager(max_context=4_000, compact_size=0.1, untouched_messages=5)
|
| 38 |
self.event_queue = event_queue
|
| 39 |
self.session_id = str(uuid.uuid4())
|
| 40 |
self.config = config or Config(
|
agent/main.py
CHANGED
|
@@ -88,6 +88,10 @@ async def event_listener(
|
|
| 88 |
break
|
| 89 |
elif event.event_type == "processing":
|
| 90 |
print("⏳ Processing...", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
# Silently ignore other events
|
| 92 |
|
| 93 |
except asyncio.CancelledError:
|
|
|
|
| 88 |
break
|
| 89 |
elif event.event_type == "processing":
|
| 90 |
print("⏳ Processing...", flush=True)
|
| 91 |
+
elif event.event_type == "compacted":
|
| 92 |
+
old_tokens = event.data.get("old_tokens", 0) if event.data else 0
|
| 93 |
+
new_tokens = event.data.get("new_tokens", 0) if event.data else 0
|
| 94 |
+
print(f"📦 Compacted context: {old_tokens} → {new_tokens} tokens")
|
| 95 |
# Silently ignore other events
|
| 96 |
|
| 97 |
except asyncio.CancelledError:
|