frdel commited on
Commit
c65967d
·
1 Parent(s): c69afa0

vision prototype, terminal sessions

Browse files

Vision functionality, vision_load tool
History.py re-work to support and optimize attachments token usage
Code execution tool support for multiple sessions in parallel

agent.py CHANGED
@@ -10,7 +10,12 @@ import models
10
  from langchain_core.prompt_values import ChatPromptValue
11
  from python.helpers import extract_tools, rate_limiter, files, errors, history, tokens
12
  from python.helpers.print_style import PrintStyle
13
- from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate, StringPromptTemplate
 
 
 
 
 
14
  from langchain_core.prompts.image import ImagePromptTemplate
15
  from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, BaseMessage
16
  from langchain_core.language_models.chat_models import BaseChatModel
@@ -91,7 +96,7 @@ class AgentContext:
91
  else:
92
  current_agent = self.agent0
93
 
94
- self.task =self.run_task(current_agent.monologue)
95
  return self.task
96
 
97
  def communicate(self, msg: "UserMessage", broadcast_level: int = 1):
@@ -130,9 +135,9 @@ class AgentContext:
130
  async def _process_chain(self, agent: "Agent", msg: "UserMessage|str", user=True):
131
  try:
132
  msg_template = (
133
- await agent.hist_add_user_message(msg) # type: ignore
134
  if user
135
- else await agent.hist_add_tool_result(
136
  tool_name="call_subordinate", tool_result=msg # type: ignore
137
  )
138
  )
@@ -283,9 +288,6 @@ class Agent:
283
  printer.stream(chunk)
284
  self.log_from_stream(full, log)
285
 
286
- # store as last context window content
287
- self.set_data(Agent.DATA_NAME_CTX_WINDOW, prompt.format())
288
-
289
  agent_response = await self.call_chat_model(
290
  prompt, callback=stream_callback
291
  )
@@ -296,10 +298,10 @@ class Agent:
296
  self.loop_data.last_response == agent_response
297
  ): # if assistant_response is the same as last message in history, let him know
298
  # Append the assistant's response to the history
299
- await self.hist_add_ai_response(agent_response)
300
  # Append warning message to the history
301
  warning_msg = self.read_prompt("fw.msg_repeat.md")
302
- await self.hist_add_warning(message=warning_msg)
303
  PrintStyle(font_color="orange", padding=True).print(
304
  warning_msg
305
  )
@@ -307,7 +309,7 @@ class Agent:
307
 
308
  else: # otherwise proceed with tool
309
  # Append the assistant's response to the history
310
- await self.hist_add_ai_response(agent_response)
311
  # process tools requested in agent message
312
  tools_result = await self.process_tools(agent_response)
313
  if tools_result: # final response of message loop available
@@ -319,7 +321,7 @@ class Agent:
319
  except RepairableException as e:
320
  # Forward repairable errors to the LLM, maybe it can fix them
321
  error_message = errors.format_error(e)
322
- await self.hist_add_warning(error_message)
323
  PrintStyle(font_color="red", padding=True).print(error_message)
324
  self.context.log.log(type="error", content=error_message)
325
  except Exception as e:
@@ -358,23 +360,31 @@ class Agent:
358
  extras += history.Message(False, content=extra).output()
359
  loop_data.extras_temporary.clear()
360
 
361
- # combine history and extras
362
- history_combined: list[OutputMessage] = history.group_outputs_abab(loop_data.history_output + extras)
363
-
364
- # convert history to LLM format
365
- history_langchain: list[BaseMessage] = history.output_langchain(history_combined)
366
-
367
- PrintStyle(font_color="grey", background_color="black", bold=True, padding=True).print(
368
- f"History Langchain: {history_langchain}"
369
  )
370
 
371
  # build chain from system prompt, message history and model
 
372
  prompt = ChatPromptTemplate.from_messages(
373
  [
374
- SystemMessage(content="\n\n".join(loop_data.system)),
375
  *history_langchain,
376
  ]
377
  )
 
 
 
 
 
 
 
 
 
 
 
 
378
  return prompt
379
 
380
  def handle_critical_exception(self, exception: Exception):
@@ -441,12 +451,12 @@ class Agent:
441
  def set_data(self, field: str, value):
442
  self.data[field] = value
443
 
444
- def hist_add_message(self, ai: bool, content: history.MessageContent):
445
- return self.history.add_message(ai=ai, content=content)
446
-
447
- async def hist_add_user_message(
448
- self, message: UserMessage, intervention: bool = False
449
  ):
 
 
 
450
  self.history.new_topic() # user message starts a new topic in history
451
 
452
  # load message template based on intervention
@@ -476,19 +486,18 @@ class Agent:
476
  self.last_user_message = msg
477
  return msg
478
 
479
- async def hist_add_ai_response(self, message: str):
480
  self.loop_data.last_response = message
481
  content = self.parse_prompt("fw.ai_response.md", message=message)
482
  return self.hist_add_message(True, content=content)
483
 
484
- async def hist_add_warning(self, message: history.MessageContent):
485
  content = self.parse_prompt("fw.warning.md", message=message)
486
  return self.hist_add_message(False, content=content)
487
 
488
- async def hist_add_tool_result(self, tool_name: str, tool_result: str, attachments: list[str] = []):
489
- attachments_str = json.dumps(attachments).replace("\n", "")
490
  content = self.parse_prompt(
491
- "fw.tool_result.md", tool_name=tool_name, tool_result=tool_result, attachments=attachments_str
492
  )
493
  return self.hist_add_message(False, content=content)
494
 
@@ -620,9 +629,9 @@ class Agent:
620
  msg = self.intervention
621
  self.intervention = None # reset the intervention message
622
  if progress.strip():
623
- await self.hist_add_ai_response(progress)
624
  # append the intervention message
625
- await self.hist_add_user_message(msg, intervention=True)
626
  raise InterventionException(msg)
627
 
628
  async def wait_if_paused(self):
@@ -649,7 +658,7 @@ class Agent:
649
  return response.message
650
  else:
651
  msg = self.read_prompt("fw.msg_misformat.md")
652
- await self.hist_add_warning(msg)
653
  PrintStyle(font_color="red", padding=True).print(msg)
654
  self.context.log.log(
655
  type="error", content=f"{self.agent_name}: Message misformat"
 
10
  from langchain_core.prompt_values import ChatPromptValue
11
  from python.helpers import extract_tools, rate_limiter, files, errors, history, tokens
12
  from python.helpers.print_style import PrintStyle
13
+ from langchain_core.prompts import (
14
+ ChatPromptTemplate,
15
+ MessagesPlaceholder,
16
+ HumanMessagePromptTemplate,
17
+ StringPromptTemplate,
18
+ )
19
  from langchain_core.prompts.image import ImagePromptTemplate
20
  from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, BaseMessage
21
  from langchain_core.language_models.chat_models import BaseChatModel
 
96
  else:
97
  current_agent = self.agent0
98
 
99
+ self.task = self.run_task(current_agent.monologue)
100
  return self.task
101
 
102
  def communicate(self, msg: "UserMessage", broadcast_level: int = 1):
 
135
  async def _process_chain(self, agent: "Agent", msg: "UserMessage|str", user=True):
136
  try:
137
  msg_template = (
138
+ agent.hist_add_user_message(msg) # type: ignore
139
  if user
140
+ else agent.hist_add_tool_result(
141
  tool_name="call_subordinate", tool_result=msg # type: ignore
142
  )
143
  )
 
288
  printer.stream(chunk)
289
  self.log_from_stream(full, log)
290
 
 
 
 
291
  agent_response = await self.call_chat_model(
292
  prompt, callback=stream_callback
293
  )
 
298
  self.loop_data.last_response == agent_response
299
  ): # if assistant_response is the same as last message in history, let him know
300
  # Append the assistant's response to the history
301
+ self.hist_add_ai_response(agent_response)
302
  # Append warning message to the history
303
  warning_msg = self.read_prompt("fw.msg_repeat.md")
304
+ self.hist_add_warning(message=warning_msg)
305
  PrintStyle(font_color="orange", padding=True).print(
306
  warning_msg
307
  )
 
309
 
310
  else: # otherwise proceed with tool
311
  # Append the assistant's response to the history
312
+ self.hist_add_ai_response(agent_response)
313
  # process tools requested in agent message
314
  tools_result = await self.process_tools(agent_response)
315
  if tools_result: # final response of message loop available
 
321
  except RepairableException as e:
322
  # Forward repairable errors to the LLM, maybe it can fix them
323
  error_message = errors.format_error(e)
324
+ self.hist_add_warning(error_message)
325
  PrintStyle(font_color="red", padding=True).print(error_message)
326
  self.context.log.log(type="error", content=error_message)
327
  except Exception as e:
 
360
  extras += history.Message(False, content=extra).output()
361
  loop_data.extras_temporary.clear()
362
 
363
+ # convert history + extras to LLM format
364
+ history_langchain: list[BaseMessage] = history.output_langchain(
365
+ loop_data.history_output + extras
 
 
 
 
 
366
  )
367
 
368
  # build chain from system prompt, message history and model
369
+ system_text = "\n\n".join(loop_data.system)
370
  prompt = ChatPromptTemplate.from_messages(
371
  [
372
+ SystemMessage(content=system_text),
373
  *history_langchain,
374
  ]
375
  )
376
+
377
+ # store as last context window content
378
+ self.set_data(
379
+ Agent.DATA_NAME_CTX_WINDOW,
380
+ {
381
+ "text": prompt.format(),
382
+ "tokens": self.history.get_tokens()
383
+ + tokens.approximate_tokens(system_text)
384
+ + tokens.approximate_tokens(history.output_text(extras)),
385
+ },
386
+ )
387
+
388
  return prompt
389
 
390
  def handle_critical_exception(self, exception: Exception):
 
451
  def set_data(self, field: str, value):
452
  self.data[field] = value
453
 
454
+ def hist_add_message(
455
+ self, ai: bool, content: history.MessageContent, tokens: int = 0
 
 
 
456
  ):
457
+ return self.history.add_message(ai=ai, content=content, tokens=tokens)
458
+
459
+ def hist_add_user_message(self, message: UserMessage, intervention: bool = False):
460
  self.history.new_topic() # user message starts a new topic in history
461
 
462
  # load message template based on intervention
 
486
  self.last_user_message = msg
487
  return msg
488
 
489
+ def hist_add_ai_response(self, message: str):
490
  self.loop_data.last_response = message
491
  content = self.parse_prompt("fw.ai_response.md", message=message)
492
  return self.hist_add_message(True, content=content)
493
 
494
+ def hist_add_warning(self, message: history.MessageContent):
495
  content = self.parse_prompt("fw.warning.md", message=message)
496
  return self.hist_add_message(False, content=content)
497
 
498
+ def hist_add_tool_result(self, tool_name: str, tool_result: str):
 
499
  content = self.parse_prompt(
500
+ "fw.tool_result.md", tool_name=tool_name, tool_result=tool_result
501
  )
502
  return self.hist_add_message(False, content=content)
503
 
 
629
  msg = self.intervention
630
  self.intervention = None # reset the intervention message
631
  if progress.strip():
632
+ self.hist_add_ai_response(progress)
633
  # append the intervention message
634
+ self.hist_add_user_message(msg, intervention=True)
635
  raise InterventionException(msg)
636
 
637
  async def wait_if_paused(self):
 
658
  return response.message
659
  else:
660
  msg = self.read_prompt("fw.msg_misformat.md")
661
+ self.hist_add_warning(msg)
662
  PrintStyle(font_color="red", padding=True).print(msg)
663
  self.context.log.log(
664
  type="error", content=f"{self.agent_name}: Message misformat"
prompts/default/agent.system.instruments.md CHANGED
@@ -1,4 +1,5 @@
1
  # Instruments
2
- - following are instruments at disposal:
 
3
 
4
- {{instruments}}
 
1
  # Instruments
2
+ - following are instruments at disposal
3
+ - do not overly rely on them they might not be relevant
4
 
5
+ {{instruments}}
prompts/default/agent.system.memories.md CHANGED
@@ -1,4 +1,5 @@
1
  # Memories on the topic
2
- - following are your memories about current topic:
 
3
 
4
  {{memories}}
 
1
  # Memories on the topic
2
+ - following are memories about current topic
3
+ - do not overly rely on them they might not be relevant
4
 
5
  {{memories}}
prompts/default/agent.system.solutions.md CHANGED
@@ -1,4 +1,5 @@
1
  # Solutions from the past
2
- - following are your memories about successful solutions of related problems:
 
3
 
4
  {{solutions}}
 
1
  # Solutions from the past
2
+ - following are memories about successful solutions of related problems
3
+ - do not overly rely on them they might not be relevant
4
 
5
  {{solutions}}
prompts/default/agent.system.tool.code_exe.md CHANGED
@@ -3,10 +3,9 @@
3
  execute terminal commands python nodejs code for computation or software tasks
4
  place code in "code" arg; escape carefully and indent properly
5
  select "runtime" arg: "terminal" "python" "nodejs" "output" "reset"
6
- for dialogues (Y/N etc.), use "terminal" runtime next step, send answer
7
  if code runs long, use "output" to wait, "reset" to kill process
8
  use "pip" "npm" "apt-get" in "terminal" to install packages
9
- important: never use implicit print/output—it doesn't work!
10
  to output, use print() or console.log()
11
  if tool outputs error, adjust code before retrying; knowledge_tool can help
12
  important: check code for placeholders or demo data; replace with real variables; don't reuse snippets
@@ -26,6 +25,7 @@ usage:
26
  "tool_name": "code_execution_tool",
27
  "tool_args": {
28
  "runtime": "python",
 
29
  "code": "import os\nprint(os.getcwd())",
30
  }
31
  }
@@ -41,6 +41,7 @@ usage:
41
  "tool_name": "code_execution_tool",
42
  "tool_args": {
43
  "runtime": "terminal",
 
44
  "code": "apt-get install zip",
45
  }
46
  }
@@ -55,6 +56,7 @@ usage:
55
  "tool_name": "code_execution_tool",
56
  "tool_args": {
57
  "runtime": "output",
 
58
  }
59
  }
60
  ~~~
@@ -68,6 +70,7 @@ usage:
68
  "tool_name": "code_execution_tool",
69
  "tool_args": {
70
  "runtime": "reset",
 
71
  }
72
  }
73
  ~~~
 
3
  execute terminal commands python nodejs code for computation or software tasks
4
  place code in "code" arg; escape carefully and indent properly
5
  select "runtime" arg: "terminal" "python" "nodejs" "output" "reset"
6
+ select "session" number, 0 default, others for multitasking
7
  if code runs long, use "output" to wait, "reset" to kill process
8
  use "pip" "npm" "apt-get" in "terminal" to install packages
 
9
  to output, use print() or console.log()
10
  if tool outputs error, adjust code before retrying; knowledge_tool can help
11
  important: check code for placeholders or demo data; replace with real variables; don't reuse snippets
 
25
  "tool_name": "code_execution_tool",
26
  "tool_args": {
27
  "runtime": "python",
28
+ "session": 0,
29
  "code": "import os\nprint(os.getcwd())",
30
  }
31
  }
 
41
  "tool_name": "code_execution_tool",
42
  "tool_args": {
43
  "runtime": "terminal",
44
+ "session": 0,
45
  "code": "apt-get install zip",
46
  }
47
  }
 
56
  "tool_name": "code_execution_tool",
57
  "tool_args": {
58
  "runtime": "output",
59
+ "session": 0,
60
  }
61
  }
62
  ~~~
 
70
  "tool_name": "code_execution_tool",
71
  "tool_args": {
72
  "runtime": "reset",
73
+ "session": 0,
74
  }
75
  }
76
  ~~~
prompts/default/agent.system.tools_vision.md CHANGED
@@ -1,3 +1,18 @@
1
  ## "Multimodal (Vision) Agent Tools" available:
2
 
3
- None yet. In future, this section will contain vision-only tools
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ## "Multimodal (Vision) Agent Tools" available:
2
 
3
+ ### vision_load:
4
+ load image data to LLM
5
+ use paths arg for attachments
6
+
7
+ **Example usage**:
8
+ ```json
9
+ {
10
+ "thoughts": [
11
+ "I need to see the image...",
12
+ ],
13
+ "tool_name": "vision_load",
14
+ "tool_args": {
15
+ "paths": ["/path/to/image.png"],
16
+ }
17
+ }
18
+ ```
prompts/default/fw.tool_result.md CHANGED
@@ -1,7 +1,6 @@
1
- ~~~json
2
  {
3
  "tool_name": {{tool_name}},
4
- "tool_result": {{tool_result}},
5
- "attachments": {{attachments}}
6
  }
7
- ~~~
 
1
+ ```json
2
  {
3
  "tool_name": {{tool_name}},
4
+ "tool_result": {{tool_result}}
 
5
  }
6
+ ```
python/api/ctx_window_get.py CHANGED
@@ -9,6 +9,10 @@ class GetCtxWindow(ApiHandler):
9
  context = self.get_context(ctxid)
10
  agent = context.streaming_agent or context.agent0
11
  window = agent.get_data(agent.DATA_NAME_CTX_WINDOW)
12
- size = tokens.approximate_tokens(window)
 
13
 
14
- return {"content": window, "tokens": size}
 
 
 
 
9
  context = self.get_context(ctxid)
10
  agent = context.streaming_agent or context.agent0
11
  window = agent.get_data(agent.DATA_NAME_CTX_WINDOW)
12
+ if not window or not isinstance(window, dict):
13
+ return {"content": "", "tokens": 0}
14
 
15
+ text = window["text"]
16
+ tokens = window["tokens"]
17
+
18
+ return {"content": text, "tokens": tokens}
python/api/history_get.py CHANGED
@@ -8,8 +8,8 @@ class GetHistory(ApiHandler):
8
  ctxid = input.get("context", [])
9
  context = self.get_context(ctxid)
10
  agent = context.streaming_agent or context.agent0
11
- history = agent.history.output()
12
- size = tokens.approximate_tokens(agent.history.output_text())
13
 
14
  return {
15
  "history": history,
 
8
  ctxid = input.get("context", [])
9
  context = self.get_context(ctxid)
10
  agent = context.streaming_agent or context.agent0
11
+ history = agent.history.output_text()
12
+ size = agent.history.get_tokens()
13
 
14
  return {
15
  "history": history,
python/helpers/files.py CHANGED
@@ -1,6 +1,7 @@
1
  from fnmatch import fnmatch
2
  import json
3
  import os, re
 
4
 
5
  import re
6
  import shutil
@@ -45,6 +46,32 @@ def read_file(_relative_path, _backup_dirs=None, _encoding="utf-8", **kwargs):
45
  return content
46
 
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def replace_placeholders_text(_content: str, **kwargs):
49
  # Replace placeholders with values from kwargs
50
  for key, value in kwargs.items():
@@ -175,6 +202,15 @@ def write_file_bin(relative_path: str, content: bytes):
175
  f.write(content)
176
 
177
 
 
 
 
 
 
 
 
 
 
178
  def delete_file(relative_path: str):
179
  abs_path = get_abs_path(relative_path)
180
  if os.path.exists(abs_path):
 
1
  from fnmatch import fnmatch
2
  import json
3
  import os, re
4
+ import base64
5
 
6
  import re
7
  import shutil
 
46
  return content
47
 
48
 
49
+ def read_file_bin(_relative_path, _backup_dirs=None):
50
+ # init backup dirs
51
+ if _backup_dirs is None:
52
+ _backup_dirs = []
53
+
54
+ # get absolute path
55
+ absolute_path = find_file_in_dirs(_relative_path, _backup_dirs)
56
+
57
+ # read binary content
58
+ with open(absolute_path, "rb") as f:
59
+ return f.read()
60
+
61
+
62
+ def read_file_base64(_relative_path, _backup_dirs=None):
63
+ # init backup dirs
64
+ if _backup_dirs is None:
65
+ _backup_dirs = []
66
+
67
+ # get absolute path
68
+ absolute_path = find_file_in_dirs(_relative_path, _backup_dirs)
69
+
70
+ # read binary content and encode to base64
71
+ with open(absolute_path, "rb") as f:
72
+ return base64.b64encode(f.read()).decode('utf-8')
73
+
74
+
75
  def replace_placeholders_text(_content: str, **kwargs):
76
  # Replace placeholders with values from kwargs
77
  for key, value in kwargs.items():
 
202
  f.write(content)
203
 
204
 
205
+ def write_file_base64(relative_path: str, content: str):
206
+ # decode base64 string to bytes
207
+ data = base64.b64decode(content)
208
+ abs_path = get_abs_path(relative_path)
209
+ os.makedirs(os.path.dirname(abs_path), exist_ok=True)
210
+ with open(abs_path, "wb") as f:
211
+ f.write(data)
212
+
213
+
214
  def delete_file(relative_path: str):
215
  abs_path = get_abs_path(relative_path)
216
  if os.path.exists(abs_path):
python/helpers/history.py CHANGED
@@ -1,16 +1,14 @@
1
  from abc import abstractmethod
2
  import asyncio
3
  from collections import OrderedDict
 
4
  import json
5
  import math
6
- import os
7
- from typing import Coroutine, Literal, TypedDict, cast
8
  from python.helpers import messages, tokens, settings, call_llm
9
  from enum import Enum
10
- from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, BaseMessage
11
- from python.helpers.print_style import PrintStyle
12
- from langchain_core.prompts import HumanMessagePromptTemplate
13
- from typing import Any
14
  BULK_MERGE_COUNT = 3
15
  TOPICS_KEEP_COUNT = 3
16
  CURRENT_TOPIC_RATIO = 0.5
@@ -18,14 +16,22 @@ HISTORY_TOPIC_RATIO = 0.3
18
  HISTORY_BULK_RATIO = 0.2
19
  TOPIC_COMPRESS_RATIO = 0.65
20
  LARGE_MESSAGE_TO_TOPIC_RATIO = 0.25
 
 
 
 
 
 
 
21
 
22
- MessageContent = (
23
- list["MessageContent"]
24
- | OrderedDict[str, "MessageContent"]
25
- | list[OrderedDict[str, "MessageContent"]]
26
- | str
27
- | list[str]
28
- )
 
29
 
30
 
31
  class OutputMessage(TypedDict):
@@ -37,9 +43,9 @@ class Record:
37
  def __init__(self):
38
  pass
39
 
 
40
  def get_tokens(self) -> int:
41
- out = self.output_text()
42
- return tokens.approximate_tokens(out)
43
 
44
  @abstractmethod
45
  async def compress(self) -> bool:
@@ -70,10 +76,25 @@ class Record:
70
 
71
 
72
  class Message(Record):
73
- def __init__(self, ai: bool, content: MessageContent):
74
  self.ai = ai
75
  self.content = content
76
- self.summary: MessageContent = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  async def compress(self):
79
  return False
@@ -93,12 +114,15 @@ class Message(Record):
93
  "ai": self.ai,
94
  "content": self.content,
95
  "summary": self.summary,
 
96
  }
97
 
98
  @staticmethod
99
  def from_dict(data: dict, history: "History"):
100
- msg = Message(ai=data["ai"], content=data.get("content", "Content lost"))
 
101
  msg.summary = data.get("summary", "")
 
102
  return msg
103
 
104
 
@@ -108,8 +132,16 @@ class Topic(Record):
108
  self.summary: str = ""
109
  self.messages: list[Message] = []
110
 
111
- def add_message(self, ai: bool, content: MessageContent):
112
- msg = Message(ai=ai, content=content)
 
 
 
 
 
 
 
 
113
  self.messages.append(msg)
114
  return msg
115
 
@@ -118,7 +150,7 @@ class Topic(Record):
118
  return [OutputMessage(ai=False, content=self.summary)]
119
  else:
120
  msgs = [m for r in self.messages for m in r.output()]
121
- return group_outputs_abab(msgs)
122
 
123
  async def summarize(self):
124
  self.summary = await self.summarize_messages(self.messages)
@@ -134,22 +166,31 @@ class Topic(Record):
134
  )
135
  large_msgs = []
136
  for m in (m for m in self.messages if not m.summary):
 
137
  out = m.output()
138
  text = output_text(out)
139
- tok = tokens.approximate_tokens(text)
140
  leng = len(text)
141
  if tok > msg_max_size:
142
  large_msgs.append((m, tok, leng, out))
143
  large_msgs.sort(key=lambda x: x[1], reverse=True)
144
  for msg, tok, leng, out in large_msgs:
145
  trim_to_chars = leng * (msg_max_size / tok)
146
- trunc = messages.truncate_dict_by_ratio(
147
- self.history.agent,
148
- out[0]["content"],
149
- trim_to_chars * 1.15,
150
- trim_to_chars * 0.85,
151
- )
152
- msg.summary = trunc
 
 
 
 
 
 
 
 
153
 
154
  return True
155
  return False
@@ -175,6 +216,7 @@ class Topic(Record):
175
  return False
176
 
177
  async def summarize_messages(self, messages: list[Message]):
 
178
  msg_txt = [m.output_text() for m in messages]
179
  summary = await self.history.agent.call_utility_model(
180
  system=self.history.agent.read_prompt("fw.topic_summary.sys.md"),
@@ -194,9 +236,9 @@ class Topic(Record):
194
  @staticmethod
195
  def from_dict(data: dict, history: "History"):
196
  topic = Topic(history=history)
197
- topic.summary = data["summary"]
198
  topic.messages = [
199
- Message.from_dict(m, history=history) for m in data["messages"]
200
  ]
201
  return topic
202
 
@@ -214,7 +256,7 @@ class Bulk(Record):
214
  return [OutputMessage(ai=False, content=self.summary)]
215
  else:
216
  msgs = [m for r in self.records for m in r.output()]
217
- return group_outputs_abab(msgs)
218
 
219
  async def compress(self):
220
  return False
@@ -253,8 +295,15 @@ class History(Record):
253
  self.current = Topic(history=self)
254
  self.agent: Agent = agent
255
 
 
 
 
 
 
 
 
256
  def is_over_limit(self):
257
- limit = get_ctx_size_for_history()
258
  total = self.get_tokens()
259
  return total > limit
260
 
@@ -267,15 +316,10 @@ class History(Record):
267
  def get_current_topic_tokens(self) -> int:
268
  return self.current.get_tokens()
269
 
270
- def get_tokens(self) -> int:
271
- return (
272
- self.get_bulks_tokens()
273
- + self.get_topics_tokens()
274
- + self.get_current_topic_tokens()
275
- )
276
-
277
- def add_message(self, ai: bool, content: MessageContent):
278
- return self.current.add_message(ai, content=content)
279
 
280
  def new_topic(self):
281
  if self.current.messages:
@@ -287,7 +331,6 @@ class History(Record):
287
  result += [m for b in self.bulks for m in b.output()]
288
  result += [m for t in self.topics for m in t.output()]
289
  result += self.current.output()
290
- result = group_outputs_abab(result)
291
  return result
292
 
293
  @staticmethod
@@ -307,7 +350,7 @@ class History(Record):
307
 
308
  def serialize(self):
309
  data = self.to_dict()
310
- return json.dumps(data)
311
 
312
  async def compress(self):
313
  compressed = False
@@ -317,7 +360,7 @@ class History(Record):
317
  self.get_topics_tokens(),
318
  self.get_bulks_tokens(),
319
  )
320
- total = get_ctx_size_for_history()
321
  ratios = [
322
  (curr, CURRENT_TOPIC_RATIO, "current_topic"),
323
  (hist, HISTORY_TOPIC_RATIO, "history_topic"),
@@ -392,25 +435,46 @@ class History(Record):
392
  def deserialize_history(json_data: str, agent) -> History:
393
  history = History(agent=agent)
394
  if json_data:
395
- data = json.loads(json_data)
396
  history = History.from_dict(data, history=history)
397
  return history
398
 
399
 
400
- def get_ctx_size_for_history() -> int:
401
  set = settings.get_settings()
402
  return int(set["chat_model_ctx_length"] * set["chat_model_ctx_history"])
403
 
404
 
405
- def serialize_output(output: OutputMessage, ai_label="ai", human_label="human"):
406
- return f'{ai_label if output["ai"] else human_label}: {serialize_content(output["content"])}'
407
 
408
 
409
- def serialize_content(content: MessageContent) -> str:
 
410
  if isinstance(content, str):
411
  return content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  try:
413
- return json.dumps(content)
414
  except Exception as e:
415
  raise e
416
 
@@ -421,98 +485,73 @@ def group_outputs_abab(outputs: list[OutputMessage]) -> list[OutputMessage]:
421
  if result and result[-1]["ai"] == out["ai"]:
422
  result[-1] = OutputMessage(
423
  ai=result[-1]["ai"],
424
- content=merge_outputs(result[-1]["content"], out["content"]),
425
  )
426
  else:
427
  result.append(out)
428
  return result
429
 
430
 
431
- def output_langchain(messages: list[OutputMessage]) -> list[BaseMessage]:
432
  result = []
433
- for m in messages:
434
- if m["ai"]:
435
- result.append(AIMessage(content=serialize_content(m["content"])))
 
436
  else:
437
- contents = m["content"]
438
-
439
- # sometimes content is a list sometimes not
440
- if not isinstance(contents, list):
441
- contents = [contents]
442
 
443
- PrintStyle(font_color="grey", background_color="black", bold=True, padding=True).print(
444
- f"Contents: {json.dumps(contents, indent=2)}"
445
- )
446
 
447
- template: list[dict[str, str]] = [] # type: ignore
448
- message = ""
449
- images = {}
450
- for _, content in enumerate(contents):
451
- if message:
452
- # the first message is the user message, then the memory and solutions
453
- message += "\n\n--- Memory & Solutions Section: ---\n\n"
454
-
455
- message += serialize_content(content)
456
-
457
- if isinstance(content, dict) and "attachments" in content:
458
- attachments: list[str] = cast(list[str], content["attachments"])
459
- for attachment in attachments:
460
- if not os.path.exists(str(attachment)):
461
- continue
462
- if attachment not in images:
463
- import base64
464
- from mimetypes import guess_type
465
- mime_type, _ = guess_type(str(attachment))
466
- if mime_type.startswith("image/"):
467
- # Read and encode the image file
468
- with open(str(attachment), "rb") as image_file:
469
- base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')
470
- # Construct the data URL
471
- images[attachment] = f"data:{mime_type};base64,{base64_encoded_data}"
472
-
473
- if message:
474
- template.append({"type": "text", "text": message})
475
- if images:
476
- for _, image in images.items():
477
- template.append({"type": "image_url", "image_url": image})
478
- if template:
479
- # only jinja2 is safe for json, both mustache({{...}}) and f-string({...}) are not
480
- result.append(HumanMessagePromptTemplate.from_template(template=template, partial_variables={}, template_format="jinja2")) # type: ignore
481
-
482
- PrintStyle(font_color="grey", background_color="black", bold=True, padding=True).print(
483
- f"Result: {result}"
484
- )
485
  return result
486
 
487
 
488
  def output_text(messages: list[OutputMessage], ai_label="ai", human_label="human"):
489
- return "\n".join(serialize_output(o, ai_label, human_label) for o in messages)
 
490
 
 
 
 
491
 
492
- def merge_outputs(a: MessageContent, b: MessageContent) -> MessageContent:
493
  if not isinstance(a, list):
494
  a = [a]
495
  if not isinstance(b, list):
496
  b = [b]
497
- return a + b # type: ignore
498
- # return merge_properties(a, b)
499
 
500
 
501
- def merge_properties(a: MessageContent, b: MessageContent) -> MessageContent:
502
- if isinstance(a, list):
503
- if isinstance(b, list):
504
- return a + b # type: ignore
 
 
 
505
  else:
506
- return a + [b]
507
- elif isinstance(b, list):
508
- return [a] + b # type: ignore
509
- elif isinstance(a, dict) and isinstance(b, dict):
510
- for key, value in b.items():
511
- if key in a:
512
- a[key] = merge_properties(a[key], value)
513
- else:
514
- a[key] = value
515
- return a
516
- elif isinstance(a, str) and isinstance(b, str):
517
- return a + b
518
- raise ValueError(f"Cannot merge {a} and {b}")
 
 
1
  from abc import abstractmethod
2
  import asyncio
3
  from collections import OrderedDict
4
+ from collections.abc import Mapping
5
  import json
6
  import math
7
+ from typing import Coroutine, Literal, TypedDict, cast, Union, Dict, List, Any, override
 
8
  from python.helpers import messages, tokens, settings, call_llm
9
  from enum import Enum
10
+ from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage, AIMessage
11
+
 
 
12
  BULK_MERGE_COUNT = 3
13
  TOPICS_KEEP_COUNT = 3
14
  CURRENT_TOPIC_RATIO = 0.5
 
16
  HISTORY_BULK_RATIO = 0.2
17
  TOPIC_COMPRESS_RATIO = 0.65
18
  LARGE_MESSAGE_TO_TOPIC_RATIO = 0.25
19
+ RAW_MESSAGE_OUTPUT_TEXT_TRIM = 100
20
+
21
+
22
+ class RawMessage(TypedDict):
23
+ raw_content: "MessageContent"
24
+ preview: str | None
25
+
26
 
27
+ MessageContent = Union[
28
+ List["MessageContent"],
29
+ Dict[str, "MessageContent"],
30
+ List[Dict[str, "MessageContent"]],
31
+ str,
32
+ List[str],
33
+ RawMessage,
34
+ ]
35
 
36
 
37
  class OutputMessage(TypedDict):
 
43
  def __init__(self):
44
  pass
45
 
46
+ @abstractmethod
47
  def get_tokens(self) -> int:
48
+ pass
 
49
 
50
  @abstractmethod
51
  async def compress(self) -> bool:
 
76
 
77
 
78
  class Message(Record):
79
+ def __init__(self, ai: bool, content: MessageContent, tokens: int = 0):
80
  self.ai = ai
81
  self.content = content
82
+ self.summary: str = ""
83
+ self.tokens: int = tokens or self.calculate_tokens()
84
+
85
+ @override
86
+ def get_tokens(self) -> int:
87
+ if not self.tokens:
88
+ self.tokens = self.calculate_tokens()
89
+ return self.tokens
90
+
91
+ def calculate_tokens(self):
92
+ text = self.output_text()
93
+ return tokens.approximate_tokens(text)
94
+
95
+ def set_summary(self, summary: str):
96
+ self.summary = summary
97
+ self.tokens = self.calculate_tokens()
98
 
99
  async def compress(self):
100
  return False
 
114
  "ai": self.ai,
115
  "content": self.content,
116
  "summary": self.summary,
117
+ "tokens": self.tokens,
118
  }
119
 
120
  @staticmethod
121
  def from_dict(data: dict, history: "History"):
122
+ content = data.get("content", "Content lost")
123
+ msg = Message(ai=data["ai"], content=content)
124
  msg.summary = data.get("summary", "")
125
+ msg.tokens = data.get("tokens", 0)
126
  return msg
127
 
128
 
 
132
  self.summary: str = ""
133
  self.messages: list[Message] = []
134
 
135
+ def get_tokens(self):
136
+ if self.summary:
137
+ return tokens.approximate_tokens(self.summary)
138
+ else:
139
+ return sum(msg.get_tokens() for msg in self.messages)
140
+
141
+ def add_message(
142
+ self, ai: bool, content: MessageContent, tokens: int = 0
143
+ ) -> Message:
144
+ msg = Message(ai=ai, content=content, tokens=tokens)
145
  self.messages.append(msg)
146
  return msg
147
 
 
150
  return [OutputMessage(ai=False, content=self.summary)]
151
  else:
152
  msgs = [m for r in self.messages for m in r.output()]
153
+ return msgs
154
 
155
  async def summarize(self):
156
  self.summary = await self.summarize_messages(self.messages)
 
166
  )
167
  large_msgs = []
168
  for m in (m for m in self.messages if not m.summary):
169
+ # TODO refactor this
170
  out = m.output()
171
  text = output_text(out)
172
+ tok = m.get_tokens()
173
  leng = len(text)
174
  if tok > msg_max_size:
175
  large_msgs.append((m, tok, leng, out))
176
  large_msgs.sort(key=lambda x: x[1], reverse=True)
177
  for msg, tok, leng, out in large_msgs:
178
  trim_to_chars = leng * (msg_max_size / tok)
179
+ # raw messages will be replaced as a whole, they would become invalid when truncated
180
+ if _is_raw_message(out[0]["content"]):
181
+ msg.set_summary(
182
+ "Message content replaced to save space in context window"
183
+ )
184
+
185
+ # regular messages will be truncated
186
+ else:
187
+ trunc = messages.truncate_dict_by_ratio(
188
+ self.history.agent,
189
+ out[0]["content"],
190
+ trim_to_chars * 1.15,
191
+ trim_to_chars * 0.85,
192
+ )
193
+ msg.set_summary(_json_dumps(trunc))
194
 
195
  return True
196
  return False
 
216
  return False
217
 
218
  async def summarize_messages(self, messages: list[Message]):
219
+ # FIXME: vision bytes are sent to utility LLM, send summary instead
220
  msg_txt = [m.output_text() for m in messages]
221
  summary = await self.history.agent.call_utility_model(
222
  system=self.history.agent.read_prompt("fw.topic_summary.sys.md"),
 
236
  @staticmethod
237
  def from_dict(data: dict, history: "History"):
238
  topic = Topic(history=history)
239
+ topic.summary = data.get("summary", "")
240
  topic.messages = [
241
+ Message.from_dict(m, history=history) for m in data.get("messages", [])
242
  ]
243
  return topic
244
 
 
256
  return [OutputMessage(ai=False, content=self.summary)]
257
  else:
258
  msgs = [m for r in self.records for m in r.output()]
259
+ return msgs
260
 
261
  async def compress(self):
262
  return False
 
295
  self.current = Topic(history=self)
296
  self.agent: Agent = agent
297
 
298
+ def get_tokens(self) -> int:
299
+ return (
300
+ self.get_bulks_tokens()
301
+ + self.get_topics_tokens()
302
+ + self.get_current_topic_tokens()
303
+ )
304
+
305
  def is_over_limit(self):
306
+ limit = _get_ctx_size_for_history()
307
  total = self.get_tokens()
308
  return total > limit
309
 
 
316
  def get_current_topic_tokens(self) -> int:
317
  return self.current.get_tokens()
318
 
319
+ def add_message(
320
+ self, ai: bool, content: MessageContent, tokens: int = 0
321
+ ) -> Message:
322
+ return self.current.add_message(ai, content=content, tokens=tokens)
 
 
 
 
 
323
 
324
  def new_topic(self):
325
  if self.current.messages:
 
331
  result += [m for b in self.bulks for m in b.output()]
332
  result += [m for t in self.topics for m in t.output()]
333
  result += self.current.output()
 
334
  return result
335
 
336
  @staticmethod
 
350
 
351
  def serialize(self):
352
  data = self.to_dict()
353
+ return _json_dumps(data)
354
 
355
  async def compress(self):
356
  compressed = False
 
360
  self.get_topics_tokens(),
361
  self.get_bulks_tokens(),
362
  )
363
+ total = _get_ctx_size_for_history()
364
  ratios = [
365
  (curr, CURRENT_TOPIC_RATIO, "current_topic"),
366
  (hist, HISTORY_TOPIC_RATIO, "history_topic"),
 
435
  def deserialize_history(json_data: str, agent) -> History:
436
  history = History(agent=agent)
437
  if json_data:
438
+ data = _json_loads(json_data)
439
  history = History.from_dict(data, history=history)
440
  return history
441
 
442
 
443
+ def _get_ctx_size_for_history() -> int:
444
  set = settings.get_settings()
445
  return int(set["chat_model_ctx_length"] * set["chat_model_ctx_history"])
446
 
447
 
448
+ def _stringify_output(output: OutputMessage, ai_label="ai", human_label="human"):
449
+ return f'{ai_label if output["ai"] else human_label}: {_stringify_content(output["content"])}'
450
 
451
 
452
+ def _stringify_content(content: MessageContent) -> str:
453
+ # already a string
454
  if isinstance(content, str):
455
  return content
456
+
457
+ # raw messages return preview or trimmed json
458
+ if _is_raw_message(content):
459
+ preview: str = content.get("preview", "") # type: ignore
460
+ if preview:
461
+ return preview
462
+ text = _json_dumps(content)
463
+ if len(text) > RAW_MESSAGE_OUTPUT_TEXT_TRIM:
464
+ return text[:RAW_MESSAGE_OUTPUT_TEXT_TRIM] + "... TRIMMED"
465
+ return text
466
+
467
+ # regular messages of non-string are dumped as json
468
+ return _json_dumps(content)
469
+
470
+
471
+ def _output_content_langchain(content: MessageContent):
472
+ if isinstance(content, str):
473
+ return content
474
+ if _is_raw_message(content):
475
+ return content["raw_content"] # type: ignore
476
  try:
477
+ return _json_dumps(content)
478
  except Exception as e:
479
  raise e
480
 
 
485
  if result and result[-1]["ai"] == out["ai"]:
486
  result[-1] = OutputMessage(
487
  ai=result[-1]["ai"],
488
+ content=_merge_outputs(result[-1]["content"], out["content"]),
489
  )
490
  else:
491
  result.append(out)
492
  return result
493
 
494
 
495
+ def group_messages_abab(messages: list[BaseMessage]) -> list[BaseMessage]:
496
  result = []
497
+ for msg in messages:
498
+ if result and isinstance(result[-1], type(msg)):
499
+ # create new instance of the same type with merged content
500
+ result[-1] = type(result[-1])(content=_merge_outputs(result[-1].content, msg.content)) # type: ignore
501
  else:
502
+ result.append(msg)
503
+ return result
 
 
 
504
 
 
 
 
505
 
506
+ def output_langchain(messages: list[OutputMessage]):
507
+ result = []
508
+ for m in messages:
509
+ if m["ai"]:
510
+ # result.append(AIMessage(content=serialize_content(m["content"])))
511
+ result.append(AIMessage(_output_content_langchain(content=m["content"]))) # type: ignore
512
+ else:
513
+ # result.append(HumanMessage(content=serialize_content(m["content"])))
514
+ result.append(HumanMessage(_output_content_langchain(content=m["content"]))) # type: ignore
515
+ # ensure message type alternation
516
+ result = group_messages_abab(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
517
  return result
518
 
519
 
520
  def output_text(messages: list[OutputMessage], ai_label="ai", human_label="human"):
521
+ return "\n".join(_stringify_output(o, ai_label, human_label) for o in messages)
522
+
523
 
524
+ def _merge_outputs(a: MessageContent, b: MessageContent) -> MessageContent:
525
+ if isinstance(a, str) and isinstance(b, str):
526
+ return a + b
527
 
 
528
  if not isinstance(a, list):
529
  a = [a]
530
  if not isinstance(b, list):
531
  b = [b]
532
+
533
+ return cast(MessageContent, a + b)
534
 
535
 
536
+ def _merge_properties(
537
+ a: Dict[str, MessageContent], b: Dict[str, MessageContent]
538
+ ) -> Dict[str, MessageContent]:
539
+ result = a.copy()
540
+ for k, v in b.items():
541
+ if k in result:
542
+ result[k] = _merge_outputs(result[k], v)
543
  else:
544
+ result[k] = v
545
+ return result
546
+
547
+
548
+ def _is_raw_message(obj: object) -> bool:
549
+ return isinstance(obj, Mapping) and "raw_content" in obj
550
+
551
+
552
+ def _json_dumps(obj):
553
+ return json.dumps(obj, ensure_ascii=False)
554
+
555
+
556
+ def _json_loads(obj):
557
+ return json.loads(obj)
python/helpers/images.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import io
3
+ import math
4
+
5
+
6
+ def compress_image(image_data: bytes, *, max_pixels: int = 256_000, quality: int = 50) -> bytes:
7
+ """Compress an image by scaling it down and converting to JPEG with quality settings.
8
+
9
+ Args:
10
+ image_data: Raw image bytes
11
+ max_pixels: Maximum number of pixels in the output image (width * height)
12
+ quality: JPEG quality setting (1-100)
13
+
14
+ Returns:
15
+ Compressed image as bytes
16
+ """
17
+ # load image from bytes
18
+ img = Image.open(io.BytesIO(image_data))
19
+
20
+ # calculate scaling factor to get to max_pixels
21
+ current_pixels = img.width * img.height
22
+ if current_pixels > max_pixels:
23
+ scale = math.sqrt(max_pixels / current_pixels)
24
+ new_width = int(img.width * scale)
25
+ new_height = int(img.height * scale)
26
+ img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
27
+
28
+ # convert to RGB if needed (for JPEG)
29
+ if img.mode in ('RGBA', 'P'):
30
+ img = img.convert('RGB')
31
+
32
+ # save as JPEG with compression
33
+ output = io.BytesIO()
34
+ img.save(output, format='JPEG', quality=quality, optimize=True)
35
+ return output.getvalue()
python/helpers/runtime.py CHANGED
@@ -2,6 +2,9 @@ import argparse
2
  import inspect
3
  from typing import TypeVar, Callable, Awaitable, Union, overload, cast
4
  from python.helpers import dotenv, rfc, settings
 
 
 
5
 
6
  T = TypeVar('T')
7
  R = TypeVar('R')
@@ -102,3 +105,22 @@ def _get_rfc_url() -> str:
102
  url = url+":"+str(set["rfc_port_http"])
103
  url += "/rfc"
104
  return url
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import inspect
3
  from typing import TypeVar, Callable, Awaitable, Union, overload, cast
4
  from python.helpers import dotenv, rfc, settings
5
+ import asyncio
6
+ import threading
7
+ import queue
8
 
9
  T = TypeVar('T')
10
  R = TypeVar('R')
 
105
  url = url+":"+str(set["rfc_port_http"])
106
  url += "/rfc"
107
  return url
108
+
109
+
110
+ def call_development_function_sync(func: Union[Callable[..., T], Callable[..., Awaitable[T]]], *args, **kwargs) -> T:
111
+ # run async function in sync manner
112
+ result_queue = queue.Queue()
113
+
114
+ def run_in_thread():
115
+ result = asyncio.run(call_development_function(func, *args, **kwargs))
116
+ result_queue.put(result)
117
+
118
+ thread = threading.Thread(target=run_in_thread)
119
+ thread.start()
120
+ thread.join(timeout=30) # wait for thread with timeout
121
+
122
+ if thread.is_alive():
123
+ raise TimeoutError("Function call timed out after 30 seconds")
124
+
125
+ result = result_queue.get_nowait()
126
+ return cast(T, result)
python/helpers/tool.py CHANGED
@@ -1,5 +1,6 @@
1
  from abc import abstractmethod
2
- from dataclasses import dataclass, field
 
3
  from agent import Agent
4
  from python.helpers.print_style import PrintStyle
5
 
@@ -8,8 +9,6 @@ from python.helpers.print_style import PrintStyle
8
  class Response:
9
  message:str
10
  break_loop: bool
11
- attachments: list[str] = field(default_factory=list[str])
12
-
13
 
14
  class Tool:
15
 
@@ -34,7 +33,7 @@ class Tool:
34
 
35
  async def after_execution(self, response: Response, **kwargs):
36
  text = response.message.strip()
37
- await self.agent.hist_add_tool_result(self.name, text, response.attachments)
38
  PrintStyle(font_color="#1B4F72", background_color="white", padding=True, bold=True).print(f"{self.agent.agent_name}: Response from tool '{self.name}'")
39
  PrintStyle(font_color="#85C1E9").print(response.message)
40
  self.log.update(content=response.message)
 
1
  from abc import abstractmethod
2
+ from dataclasses import dataclass
3
+
4
  from agent import Agent
5
  from python.helpers.print_style import PrintStyle
6
 
 
9
  class Response:
10
  message:str
11
  break_loop: bool
 
 
12
 
13
  class Tool:
14
 
 
33
 
34
  async def after_execution(self, response: Response, **kwargs):
35
  text = response.message.strip()
36
+ self.agent.hist_add_tool_result(self.name, text)
37
  PrintStyle(font_color="#1B4F72", background_color="white", padding=True, bold=True).print(f"{self.agent.agent_name}: Response from tool '{self.name}'")
38
  PrintStyle(font_color="#85C1E9").print(response.message)
39
  self.log.update(content=response.message)
python/helpers/whisper.py CHANGED
@@ -30,7 +30,7 @@ async def _preload(model_name:str):
30
  is_updating_model = True
31
  if not _model or _model_name != model_name:
32
  PrintStyle.standard(f"Loading Whisper model: {model_name}")
33
- _model = whisper.load_model(model_name)
34
  _model_name = model_name
35
  finally:
36
  is_updating_model = False
 
30
  is_updating_model = True
31
  if not _model or _model_name != model_name:
32
  PrintStyle.standard(f"Loading Whisper model: {model_name}")
33
+ _model = whisper.load_model(name=model_name) # type: ignore
34
  _model_name = model_name
35
  finally:
36
  is_updating_model = False
python/tools/call_subordinate.py CHANGED
@@ -18,7 +18,7 @@ class Delegation(Tool):
18
 
19
  # add user message to subordinate agent
20
  subordinate: Agent = self.agent.get_data(Agent.DATA_NAME_SUBORDINATE)
21
- await subordinate.hist_add_user_message(UserMessage(message=message, attachments=[]))
22
  # run subordinate monologue
23
  result = await subordinate.monologue()
24
  # result
 
18
 
19
  # add user message to subordinate agent
20
  subordinate: Agent = self.agent.get_data(Agent.DATA_NAME_SUBORDINATE)
21
+ subordinate.hist_add_user_message(UserMessage(message=message, attachments=[]))
22
  # run subordinate monologue
23
  result = await subordinate.monologue()
24
  # result
python/tools/code_execution_tool.py CHANGED
@@ -12,7 +12,7 @@ from python.helpers.docker import DockerContainerManager
12
 
13
  @dataclass
14
  class State:
15
- shell: LocalInteractiveSession | SSHInteractiveSession
16
  docker: DockerContainerManager | None
17
 
18
 
@@ -27,19 +27,26 @@ class CodeExecution(Tool):
27
  # os.chdir(files.get_abs_path("./work_dir")) #change CWD to work_dir
28
 
29
  runtime = self.args.get("runtime", "").lower().strip()
 
30
 
31
  if runtime == "python":
32
- response = await self.execute_python_code(self.args["code"])
 
 
33
  elif runtime == "nodejs":
34
- response = await self.execute_nodejs_code(self.args["code"])
 
 
35
  elif runtime == "terminal":
36
- response = await self.execute_terminal_command(self.args["code"])
 
 
37
  elif runtime == "output":
38
  response = await self.get_terminal_output(
39
- wait_with_output=5, wait_without_output=60
40
  )
41
  elif runtime == "reset":
42
- response = await self.reset_terminal()
43
  else:
44
  response = self.agent.read_prompt(
45
  "fw.code_runtime_wrong.md", runtime=runtime
@@ -72,11 +79,15 @@ class CodeExecution(Tool):
72
  # PrintStyle().print()
73
 
74
  def get_log_object(self):
75
- return self.agent.context.log.log(type="code_exe", heading=f"{self.agent.agent_name}: Using tool '{self.name}'", content="", kvps=self.args)
76
-
 
 
 
 
77
 
78
  async def after_execution(self, response, **kwargs):
79
- await self.agent.hist_add_tool_result(self.name, response.message)
80
 
81
  async def prepare_state(self, reset=False):
82
  self.state = self.agent.get_data("_cot_state")
@@ -97,7 +108,11 @@ class CodeExecution(Tool):
97
 
98
  # initialize local or remote interactive shell insterface
99
  if self.agent.config.code_exec_ssh_enabled:
100
- pswd = self.agent.config.code_exec_ssh_pass if self.agent.config.code_exec_ssh_pass else await rfc_exchange.get_root_password()
 
 
 
 
101
  shell = SSHInteractiveSession(
102
  self.agent.context.log,
103
  self.agent.config.code_exec_ssh_addr,
@@ -108,42 +123,63 @@ class CodeExecution(Tool):
108
  else:
109
  shell = LocalInteractiveSession()
110
 
111
- self.state = State(shell=shell, docker=docker)
112
  await shell.connect()
113
  self.agent.set_data("_cot_state", self.state)
114
 
115
- async def execute_python_code(self, code: str, reset: bool = False):
116
  escaped_code = shlex.quote(code)
117
  command = f"ipython -c {escaped_code}"
118
- return await self.terminal_session(command, reset)
119
 
120
- async def execute_nodejs_code(self, code: str, reset: bool = False):
121
  escaped_code = shlex.quote(code)
122
  command = f"node /exe/node_eval.js {escaped_code}"
123
- return await self.terminal_session(command, reset)
124
 
125
- async def execute_terminal_command(self, command: str, reset: bool = False):
126
- return await self.terminal_session(command, reset)
 
 
127
 
128
- async def terminal_session(self, command: str, reset: bool = False):
129
 
130
  await self.agent.handle_intervention() # wait for intervention and handle it, if paused
131
  # try again on lost connection
132
  for i in range(2):
133
  try:
134
-
135
  if reset:
136
  await self.reset_terminal()
137
 
138
- self.state.shell.send_command(command)
139
-
140
- PrintStyle(background_color="white", font_color="#1B4F72", bold=True).print(
141
- f"{self.agent.agent_name} code execution output"
142
- )
143
- return await self.get_terminal_output()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
  except Exception as e:
146
- if i==1:
147
  # try again on lost connection
148
  PrintStyle.error(str(e))
149
  await self.prepare_state(reset=True)
@@ -153,6 +189,7 @@ class CodeExecution(Tool):
153
 
154
  async def get_terminal_output(
155
  self,
 
156
  reset_full_output=True,
157
  wait_with_output=3,
158
  wait_without_output=10,
@@ -165,10 +202,10 @@ class CodeExecution(Tool):
165
 
166
  while max_exec_time <= 0 or time.time() - start_time < max_exec_time:
167
  await asyncio.sleep(SLEEP_TIME) # Wait for some output to be generated
168
- full_output, partial_output = await self.state.shell.read_output(
169
  timeout=max_exec_time, reset_full_output=reset_full_output
170
  )
171
- reset_full_output = False # only reset once
172
 
173
  await self.agent.handle_intervention() # wait for intervention and handle it, if paused
174
 
@@ -184,8 +221,10 @@ class CodeExecution(Tool):
184
  break
185
  return full_output
186
 
187
- async def reset_terminal(self):
188
- self.state.shell.close()
 
 
189
  await self.prepare_state(reset=True)
190
  response = self.agent.read_prompt("fw.code_reset.md")
191
  self.log.update(content=response)
 
12
 
13
  @dataclass
14
  class State:
15
+ shells: dict[int, LocalInteractiveSession | SSHInteractiveSession]
16
  docker: DockerContainerManager | None
17
 
18
 
 
27
  # os.chdir(files.get_abs_path("./work_dir")) #change CWD to work_dir
28
 
29
  runtime = self.args.get("runtime", "").lower().strip()
30
+ session = int(self.args.get("session", 0))
31
 
32
  if runtime == "python":
33
+ response = await self.execute_python_code(
34
+ code=self.args["code"], session=session
35
+ )
36
  elif runtime == "nodejs":
37
+ response = await self.execute_nodejs_code(
38
+ code=self.args["code"], session=session
39
+ )
40
  elif runtime == "terminal":
41
+ response = await self.execute_terminal_command(
42
+ command=self.args["code"], session=session
43
+ )
44
  elif runtime == "output":
45
  response = await self.get_terminal_output(
46
+ session=session, wait_with_output=5, wait_without_output=60
47
  )
48
  elif runtime == "reset":
49
+ response = await self.reset_terminal(session=session)
50
  else:
51
  response = self.agent.read_prompt(
52
  "fw.code_runtime_wrong.md", runtime=runtime
 
79
  # PrintStyle().print()
80
 
81
  def get_log_object(self):
82
+ return self.agent.context.log.log(
83
+ type="code_exe",
84
+ heading=f"{self.agent.agent_name}: Using tool '{self.name}'",
85
+ content="",
86
+ kvps=self.args,
87
+ )
88
 
89
  async def after_execution(self, response, **kwargs):
90
+ self.agent.hist_add_tool_result(self.name, response.message)
91
 
92
  async def prepare_state(self, reset=False):
93
  self.state = self.agent.get_data("_cot_state")
 
108
 
109
  # initialize local or remote interactive shell insterface
110
  if self.agent.config.code_exec_ssh_enabled:
111
+ pswd = (
112
+ self.agent.config.code_exec_ssh_pass
113
+ if self.agent.config.code_exec_ssh_pass
114
+ else await rfc_exchange.get_root_password()
115
+ )
116
  shell = SSHInteractiveSession(
117
  self.agent.context.log,
118
  self.agent.config.code_exec_ssh_addr,
 
123
  else:
124
  shell = LocalInteractiveSession()
125
 
126
+ self.state = State(shells={0: shell}, docker=docker)
127
  await shell.connect()
128
  self.agent.set_data("_cot_state", self.state)
129
 
130
+ async def execute_python_code(self, session: int, code: str, reset: bool = False):
131
  escaped_code = shlex.quote(code)
132
  command = f"ipython -c {escaped_code}"
133
+ return await self.terminal_session(session, command, reset)
134
 
135
+ async def execute_nodejs_code(self, session: int, code: str, reset: bool = False):
136
  escaped_code = shlex.quote(code)
137
  command = f"node /exe/node_eval.js {escaped_code}"
138
+ return await self.terminal_session(session, command, reset)
139
 
140
+ async def execute_terminal_command(
141
+ self, session: int, command: str, reset: bool = False
142
+ ):
143
+ return await self.terminal_session(session, command, reset)
144
 
145
+ async def terminal_session(self, session: int, command: str, reset: bool = False):
146
 
147
  await self.agent.handle_intervention() # wait for intervention and handle it, if paused
148
  # try again on lost connection
149
  for i in range(2):
150
  try:
151
+
152
  if reset:
153
  await self.reset_terminal()
154
 
155
+ if session not in self.state.shells:
156
+ if self.agent.config.code_exec_ssh_enabled:
157
+ pswd = (
158
+ self.agent.config.code_exec_ssh_pass
159
+ if self.agent.config.code_exec_ssh_pass
160
+ else await rfc_exchange.get_root_password()
161
+ )
162
+ shell = SSHInteractiveSession(
163
+ self.agent.context.log,
164
+ self.agent.config.code_exec_ssh_addr,
165
+ self.agent.config.code_exec_ssh_port,
166
+ self.agent.config.code_exec_ssh_user,
167
+ pswd,
168
+ )
169
+ else:
170
+ shell = LocalInteractiveSession()
171
+ self.state.shells[session] = shell
172
+ await shell.connect()
173
+
174
+ self.state.shells[session].send_command(command)
175
+
176
+ PrintStyle(
177
+ background_color="white", font_color="#1B4F72", bold=True
178
+ ).print(f"{self.agent.agent_name} code execution output")
179
+ return await self.get_terminal_output(session)
180
 
181
  except Exception as e:
182
+ if i == 1:
183
  # try again on lost connection
184
  PrintStyle.error(str(e))
185
  await self.prepare_state(reset=True)
 
189
 
190
  async def get_terminal_output(
191
  self,
192
+ session=0,
193
  reset_full_output=True,
194
  wait_with_output=3,
195
  wait_without_output=10,
 
202
 
203
  while max_exec_time <= 0 or time.time() - start_time < max_exec_time:
204
  await asyncio.sleep(SLEEP_TIME) # Wait for some output to be generated
205
+ full_output, partial_output = await self.state.shells[session].read_output(
206
  timeout=max_exec_time, reset_full_output=reset_full_output
207
  )
208
+ reset_full_output = False # only reset once
209
 
210
  await self.agent.handle_intervention() # wait for intervention and handle it, if paused
211
 
 
221
  break
222
  return full_output
223
 
224
+ async def reset_terminal(self, session=0):
225
+ if session in self.state.shells:
226
+ self.state.shells[session].close()
227
+ del self.state.shells[session]
228
  await self.prepare_state(reset=True)
229
  response = self.agent.read_prompt("fw.code_reset.md")
230
  self.log.update(content=response)
python/tools/input.py CHANGED
@@ -20,4 +20,4 @@ class Input(Tool):
20
  return self.agent.context.log.log(type="code_exe", heading=f"{self.agent.agent_name}: Using tool '{self.name}'", content="", kvps=self.args)
21
 
22
  async def after_execution(self, response, **kwargs):
23
- await self.agent.hist_add_tool_result(self.name, response.message)
 
20
  return self.agent.context.log.log(type="code_exe", heading=f"{self.agent.agent_name}: Using tool '{self.name}'", content="", kvps=self.args)
21
 
22
  async def after_execution(self, response, **kwargs):
23
+ self.agent.hist_add_tool_result(self.name, response.message)
python/tools/vision_load.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ from python.helpers.print_style import PrintStyle
3
+ from python.helpers.tool import Tool, Response
4
+ from python.helpers import runtime, files, images
5
+ from mimetypes import guess_type
6
+ from python.helpers import history
7
+
8
+ # image optimization and token estimation for context window
9
+ MAX_PIXELS = 768_000
10
+ QUALITY = 75
11
+ TOKENS_ESTIMATE = 1500
12
+
13
+
14
+ class VisionLoad(Tool):
15
+ async def execute(self, paths: list[str] = [], **kwargs) -> Response:
16
+
17
+ self.images_dict = {}
18
+ template: list[dict[str, str]] = [] # type: ignore
19
+
20
+ for path in paths:
21
+ if not await runtime.call_development_function(files.exists, str(path)):
22
+ continue
23
+
24
+ if path not in self.images_dict:
25
+ mime_type, _ = guess_type(str(path))
26
+ if mime_type and mime_type.startswith("image/"):
27
+ # Read binary file
28
+ file_content = await runtime.call_development_function(
29
+ files.read_file_base64, str(path)
30
+ )
31
+ file_content = base64.b64decode(file_content)
32
+ # Compress and convert to JPEG
33
+ compressed = images.compress_image(
34
+ file_content, max_pixels=MAX_PIXELS, quality=QUALITY
35
+ )
36
+ # Encode as base64
37
+ file_content_b64 = base64.b64encode(compressed).decode("utf-8")
38
+
39
+ # DEBUG: Save compressed image
40
+ # await runtime.call_development_function(
41
+ # files.write_file_base64, str(path), file_content_b64
42
+ # )
43
+
44
+ # Construct the data URL (always JPEG after compression)
45
+ self.images_dict[path] = file_content_b64
46
+
47
+ return Response(message="dummy", break_loop=False)
48
+
49
+ async def after_execution(self, response: Response, **kwargs):
50
+
51
+ # build image data messages for LLMs, or error message
52
+ content = []
53
+ if self.images_dict:
54
+ for _, image in self.images_dict.items():
55
+ content.append(
56
+ {
57
+ "type": "image_url",
58
+ "image_url": {"url": f"data:image/jpeg;base64,{image}"},
59
+ }
60
+ )
61
+ # append as raw message content for LLMs with vision tokens estimate
62
+ msg = history.RawMessage(raw_content=content, preview="<Base64 encoded image data>")
63
+ self.agent.hist_add_message(
64
+ False, content=msg, tokens=TOKENS_ESTIMATE * len(content)
65
+ )
66
+ else:
67
+ self.agent.hist_add_tool_result(self.name, "No images processed")
68
+
69
+ # print and log short version
70
+ message = (
71
+ "No images processed"
72
+ if not self.images_dict
73
+ else f"{len(self.images_dict)} images processed"
74
+ )
75
+ PrintStyle(
76
+ font_color="#1B4F72", background_color="white", padding=True, bold=True
77
+ ).print(f"{self.agent.agent_name}: Response from tool '{self.name}'")
78
+ PrintStyle(font_color="#85C1E9").print(message)
79
+ self.log.update(result=message)
webui/js/history.js CHANGED
@@ -3,9 +3,10 @@ import { getContext } from "../index.js";
3
  export async function openHistoryModal() {
4
  try {
5
  const hist = await window.sendJsonData("/history_get", { context: getContext() });
6
- const data = JSON.stringify(hist.history, null, 4);
 
7
  const size = hist.tokens
8
- await showEditorModal(data, "json", `History ~${size} tokens`, "Conversation history visible to the LLM. History is compressed to fit into the context window over time.");
9
  } catch (e) {
10
  window.toastFetchError("Error fetching history", e)
11
  return
 
3
  export async function openHistoryModal() {
4
  try {
5
  const hist = await window.sendJsonData("/history_get", { context: getContext() });
6
+ // const data = JSON.stringify(hist.history, null, 4);
7
+ const data = hist.history
8
  const size = hist.tokens
9
+ await showEditorModal(data, "markdown", `History ~${size} tokens`, "Conversation history visible to the LLM. History is compressed to fit into the context window over time.");
10
  } catch (e) {
11
  window.toastFetchError("Error fetching history", e)
12
  return