Henri Bonamy commited on
Commit
d574d65
·
1 Parent(s): 8f4b322

added compaction every x tokens

Browse files
agent/context_manager/manager.py CHANGED
@@ -2,14 +2,25 @@
2
  Context management for conversation history
3
  """
4
 
5
- from litellm import Message
 
 
6
 
7
 
8
  class ContextManager:
9
  """Manages conversation context and message history for the agent"""
10
 
11
- def __init__(self):
 
 
 
 
 
12
  self.system_prompt = self._load_system_prompt()
 
 
 
 
13
  self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
14
 
15
  def _load_system_prompt(self):
@@ -18,27 +29,58 @@ class ContextManager:
18
  # TODO: get system prompt from jinja template
19
  return "You are a helpful assistant."
20
 
21
- def add_message(self, message: Message) -> None:
22
  """Add a message to the history"""
 
 
 
23
  self.items.append(message)
24
 
25
  def get_messages(self) -> list[Message]:
26
  """Get all messages for sending to LLM"""
27
  return self.items
28
 
29
- def compact(self, target_size: int) -> None:
30
  """Remove old messages to keep history under target size"""
31
- # Keep system prompt (first message) and remove oldest user/assistant messages
32
- if len(self.items) <= target_size:
33
  return
34
 
35
- # Always keep system prompt
36
  system_msg = (
37
  self.items[0] if self.items and self.items[0].role == "system" else None
38
  )
39
- messages_to_keep = self.items[-(target_size - 1) :]
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  if system_msg:
42
- self.items = [system_msg] + messages_to_keep
43
  else:
44
- self.items = messages_to_keep
 
 
 
 
 
2
  Context management for conversation history
3
  """
4
 
5
+ import asyncio
6
+
7
+ from litellm import Message, acompletion
8
 
9
 
10
  class ContextManager:
11
  """Manages conversation context and message history for the agent"""
12
 
13
+ def __init__(
14
+ self,
15
+ max_context: int = 180_000,
16
+ compact_size: float = 0.1,
17
+ untouched_messages: int = 5,
18
+ ):
19
  self.system_prompt = self._load_system_prompt()
20
+ self.max_context = max_context
21
+ self.compact_size = int(max_context * compact_size)
22
+ self.context_length = len(self.system_prompt) // 4
23
+ self.untouched_messages = untouched_messages
24
  self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
25
 
26
  def _load_system_prompt(self):
 
29
  # TODO: get system prompt from jinja template
30
  return "You are a helpful assistant."
31
 
32
+ def add_message(self, message: Message, token_count: int = None) -> None:
33
  """Add a message to the history"""
34
+ if token_count:
35
+ self.context_length = token_count
36
+ print(f"DEBUG : token_count = {self.context_length}")
37
  self.items.append(message)
38
 
39
  def get_messages(self) -> list[Message]:
40
  """Get all messages for sending to LLM"""
41
  return self.items
42
 
43
+ async def compact(self, model_name: str) -> None:
44
  """Remove old messages to keep history under target size"""
45
+ if (self.context_length <= self.max_context) or not self.items:
 
46
  return
47
 
 
48
  system_msg = (
49
  self.items[0] if self.items and self.items[0].role == "system" else None
50
  )
 
51
 
52
+ # Don't summarize a certain number of just-preceding messages
53
+ recent_messages = self.items[-self.untouched_messages :]
54
+
55
+ # Summarize everything in between (skip system prompt, skip preceding n)
56
+ messages_to_summarize = self.items[1 : -self.untouched_messages]
57
+
58
+ # improbable, messages would have to very long
59
+ if not messages_to_summarize:
60
+ return
61
+
62
+ messages_to_summarize.append(
63
+ Message(
64
+ role="user",
65
+ content="Please provide a concise summary of the conversation above, focusing on key decisions, code changes, problems solved, and important context needed for future turns.",
66
+ )
67
+ )
68
+
69
+ response = await acompletion(
70
+ model=model_name,
71
+ messages=messages_to_summarize,
72
+ max_completion_tokens=self.compact_size,
73
+ )
74
+ summarized_message = Message(
75
+ role="assistant", content=response.choices[0].message.content
76
+ )
77
+
78
+ # Reconstruct: system + summary + recent 2 messages
79
  if system_msg:
80
+ self.items = [system_msg, summarized_message] + recent_messages
81
  else:
82
+ self.items = [summarized_message] + recent_messages
83
+
84
+ self.context_length = (
85
+ len(self.system_prompt) // 4 + response.usage.completion_tokens
86
+ )
agent/core/agent_loop.py CHANGED
@@ -1,11 +1,12 @@
1
- """
2
  Main agent implementation with integrated tool system and MCP support
3
  """
4
 
5
  import asyncio
6
  import json
7
 
8
- from litellm import ChatCompletionMessageToolCall, Message, ModelResponse, acompletion
 
9
  from lmnr import observe
10
 
11
  from agent.config import Config
@@ -58,17 +59,18 @@ class Handlers:
58
  tool_choice="auto",
59
  )
60
 
 
61
  message = response.choices[0].message
62
-
63
- # Extract content and tool calls
64
  content = message.content
 
65
  tool_calls: list[ToolCall] = message.get("tool_calls", [])
66
 
67
  # If no tool calls, add assistant message and we're done
68
  if not tool_calls:
69
  if content:
70
- assistant_msg = Message(role="assistant", content=content)
71
- session.context_manager.add_message(assistant_msg)
 
72
  await session.send_event(
73
  Event(
74
  event_type="assistant_message",
@@ -81,9 +83,11 @@ class Handlers:
81
  # Add assistant message with tool calls to history
82
  # LiteLLM will format this correctly for the provider
83
  assistant_msg = Message(
84
- role="assistant", content=content, tool_calls=tool_calls
 
 
85
  )
86
- session.context_manager.add_message(assistant_msg)
87
 
88
  if content:
89
  await session.send_event(
@@ -139,6 +143,18 @@ class Handlers:
139
  )
140
  break
141
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  await session.send_event(
143
  Event(
144
  event_type="turn_complete",
@@ -156,14 +172,14 @@ class Handlers:
156
  @staticmethod
157
  async def compact(session: Session) -> None:
158
  """Handle compact (like compact in codex.rs:1317)"""
159
- old_size = len(session.context_manager.items)
160
- session.context_manager.compact(target_size=10)
161
- new_size = len(session.context_manager.items)
162
 
163
  await session.send_event(
164
  Event(
165
  event_type="compacted",
166
- data={"removed": old_size - new_size, "remaining": new_size},
167
  )
168
  )
169
 
 
1
+ """loop
2
  Main agent implementation with integrated tool system and MCP support
3
  """
4
 
5
  import asyncio
6
  import json
7
 
8
+ from litellm import (ChatCompletionMessageToolCall, Message, ModelResponse,
9
+ acompletion)
10
  from lmnr import observe
11
 
12
  from agent.config import Config
 
59
  tool_choice="auto",
60
  )
61
 
62
+ # Extract text response, token usage, and tool calls
63
  message = response.choices[0].message
 
 
64
  content = message.content
65
+ token_count = response.usage.total_tokens
66
  tool_calls: list[ToolCall] = message.get("tool_calls", [])
67
 
68
  # If no tool calls, add assistant message and we're done
69
  if not tool_calls:
70
  if content:
71
+ assistant_msg = Message(
72
+ role="assistant", content=content)
73
+ session.context_manager.add_message(assistant_msg, token_count)
74
  await session.send_event(
75
  Event(
76
  event_type="assistant_message",
 
83
  # Add assistant message with tool calls to history
84
  # LiteLLM will format this correctly for the provider
85
  assistant_msg = Message(
86
+ role="assistant",
87
+ content=content,
88
+ tool_calls=tool_calls,
89
  )
90
+ session.context_manager.add_message(assistant_msg, token_count)
91
 
92
  if content:
93
  await session.send_event(
 
143
  )
144
  break
145
 
146
+ old_length = session.context_manager.context_length
147
+ await session.context_manager.compact(model_name=session.config.model_name)
148
+ new_length = session.context_manager.context_length
149
+
150
+ if new_length != old_length:
151
+ await session.send_event(
152
+ Event(
153
+ event_type="compacted",
154
+ data={"old_tokens": old_length, "new_tokens": new_length},
155
+ )
156
+ )
157
+
158
  await session.send_event(
159
  Event(
160
  event_type="turn_complete",
 
172
  @staticmethod
173
  async def compact(session: Session) -> None:
174
  """Handle compact (like compact in codex.rs:1317)"""
175
+ old_length = session.context_manager.context_length
176
+ await session.context_manager.compact(model_name=session.config.model_name)
177
+ new_length = session.context_manager.context_length
178
 
179
  await session.send_event(
180
  Event(
181
  event_type="compacted",
182
+ data={"removed": old_length, "remaining": new_length},
183
  )
184
  )
185
 
agent/core/session.py CHANGED
@@ -34,7 +34,7 @@ class Session:
34
  event_queue: asyncio.Queue,
35
  config: Config | None = None,
36
  ):
37
- self.context_manager = ContextManager()
38
  self.event_queue = event_queue
39
  self.session_id = str(uuid.uuid4())
40
  self.config = config or Config(
 
34
  event_queue: asyncio.Queue,
35
  config: Config | None = None,
36
  ):
37
+ self.context_manager = ContextManager(max_context=4_000, compact_size=0.1, untouched_messages=5)
38
  self.event_queue = event_queue
39
  self.session_id = str(uuid.uuid4())
40
  self.config = config or Config(
agent/main.py CHANGED
@@ -88,6 +88,10 @@ async def event_listener(
88
  break
89
  elif event.event_type == "processing":
90
  print("⏳ Processing...", flush=True)
 
 
 
 
91
  # Silently ignore other events
92
 
93
  except asyncio.CancelledError:
 
88
  break
89
  elif event.event_type == "processing":
90
  print("⏳ Processing...", flush=True)
91
+ elif event.event_type == "compacted":
92
+ old_tokens = event.data.get("old_tokens", 0) if event.data else 0
93
+ new_tokens = event.data.get("new_tokens", 0) if event.data else 0
94
+ print(f"📦 Compacted context: {old_tokens} → {new_tokens} tokens")
95
  # Silently ignore other events
96
 
97
  except asyncio.CancelledError: