Spaces:

OnyxMunk
/

GravityFalls

Paused

frdel commited on Apr 1, 2025

Commit

c65967d

1 Parent(s): c69afa0

vision prototype, terminal sessions

Vision functionality, vision_load tool
History.py re-work to support and optimize attachments token usage
Code execution tool support for multiple sessions in parallel

Files changed (20) hide show

agent.py +42 -33
prompts/default/agent.system.instruments.md +3 -2
prompts/default/agent.system.memories.md +2 -1
prompts/default/agent.system.solutions.md +2 -1
prompts/default/agent.system.tool.code_exe.md +5 -2
prompts/default/agent.system.tools_vision.md +16 -1
prompts/default/fw.tool_result.md +3 -4
python/api/ctx_window_get.py +6 -2
python/api/history_get.py +2 -2
python/helpers/files.py +36 -0
python/helpers/history.py +162 -123
python/helpers/images.py +35 -0
python/helpers/runtime.py +22 -0
python/helpers/tool.py +3 -4
python/helpers/whisper.py +1 -1
python/tools/call_subordinate.py +1 -1
python/tools/code_execution_tool.py +69 -30
python/tools/input.py +1 -1
python/tools/vision_load.py +79 -0
webui/js/history.js +3 -2

agent.py CHANGED Viewed

@@ -10,7 +10,12 @@ import models
 from langchain_core.prompt_values import ChatPromptValue
 from python.helpers import extract_tools, rate_limiter, files, errors, history, tokens
 from python.helpers.print_style import PrintStyle
-from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate, StringPromptTemplate
 from langchain_core.prompts.image import ImagePromptTemplate
 from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, BaseMessage
 from langchain_core.language_models.chat_models import BaseChatModel
@@ -91,7 +96,7 @@ class AgentContext:
         else:
             current_agent = self.agent0
-        self.task =self.run_task(current_agent.monologue)
         return self.task
     def communicate(self, msg: "UserMessage", broadcast_level: int = 1):
@@ -130,9 +135,9 @@ class AgentContext:
     async def _process_chain(self, agent: "Agent", msg: "UserMessage|str", user=True):
         try:
             msg_template = (
-                await agent.hist_add_user_message(msg)  # type: ignore
                 if user
-                else await agent.hist_add_tool_result(
                     tool_name="call_subordinate", tool_result=msg  # type: ignore
                 )
             )
@@ -283,9 +288,6 @@ class Agent:
                                 printer.stream(chunk)
                                 self.log_from_stream(full, log)
-                        # store as last context window content
-                        self.set_data(Agent.DATA_NAME_CTX_WINDOW, prompt.format())
                         agent_response = await self.call_chat_model(
                             prompt, callback=stream_callback
                         )
@@ -296,10 +298,10 @@ class Agent:
                             self.loop_data.last_response == agent_response
                         ):  # if assistant_response is the same as last message in history, let him know
                             # Append the assistant's response to the history
-                            await self.hist_add_ai_response(agent_response)
                             # Append warning message to the history
                             warning_msg = self.read_prompt("fw.msg_repeat.md")
-                            await self.hist_add_warning(message=warning_msg)
                             PrintStyle(font_color="orange", padding=True).print(
                                 warning_msg
                             )
@@ -307,7 +309,7 @@ class Agent:
                         else:  # otherwise proceed with tool
                             # Append the assistant's response to the history
-                            await self.hist_add_ai_response(agent_response)
                             # process tools requested in agent message
                             tools_result = await self.process_tools(agent_response)
                             if tools_result:  # final response of message loop available
@@ -319,7 +321,7 @@ class Agent:
                     except RepairableException as e:
                         # Forward repairable errors to the LLM, maybe it can fix them
                         error_message = errors.format_error(e)
-                        await self.hist_add_warning(error_message)
                         PrintStyle(font_color="red", padding=True).print(error_message)
                         self.context.log.log(type="error", content=error_message)
                     except Exception as e:
@@ -358,23 +360,31 @@ class Agent:
             extras += history.Message(False, content=extra).output()
         loop_data.extras_temporary.clear()
-        # combine history and extras
-        history_combined: list[OutputMessage] = history.group_outputs_abab(loop_data.history_output + extras)
-        # convert history to LLM format
-        history_langchain: list[BaseMessage] = history.output_langchain(history_combined)
-        PrintStyle(font_color="grey", background_color="black", bold=True, padding=True).print(
-            f"History Langchain: {history_langchain}"
         )
         # build chain from system prompt, message history and model
         prompt = ChatPromptTemplate.from_messages(
             [
-                SystemMessage(content="\n\n".join(loop_data.system)),
                 *history_langchain,
             ]
         )
         return prompt
     def handle_critical_exception(self, exception: Exception):
@@ -441,12 +451,12 @@ class Agent:
     def set_data(self, field: str, value):
         self.data[field] = value
-    def hist_add_message(self, ai: bool, content: history.MessageContent):
-        return self.history.add_message(ai=ai, content=content)
-    async def hist_add_user_message(
-        self, message: UserMessage, intervention: bool = False
     ):
         self.history.new_topic()  # user message starts a new topic in history
         # load message template based on intervention
@@ -476,19 +486,18 @@ class Agent:
         self.last_user_message = msg
         return msg
-    async def hist_add_ai_response(self, message: str):
         self.loop_data.last_response = message
         content = self.parse_prompt("fw.ai_response.md", message=message)
         return self.hist_add_message(True, content=content)
-    async def hist_add_warning(self, message: history.MessageContent):
         content = self.parse_prompt("fw.warning.md", message=message)
         return self.hist_add_message(False, content=content)
-    async def hist_add_tool_result(self, tool_name: str, tool_result: str, attachments: list[str] = []):
-        attachments_str = json.dumps(attachments).replace("\n", "")
         content = self.parse_prompt(
-            "fw.tool_result.md", tool_name=tool_name, tool_result=tool_result, attachments=attachments_str
         )
         return self.hist_add_message(False, content=content)
@@ -620,9 +629,9 @@ class Agent:
             msg = self.intervention
             self.intervention = None  # reset the intervention message
             if progress.strip():
-                await self.hist_add_ai_response(progress)
             # append the intervention message
-            await self.hist_add_user_message(msg, intervention=True)
             raise InterventionException(msg)
     async def wait_if_paused(self):
@@ -649,7 +658,7 @@ class Agent:
                 return response.message
         else:
             msg = self.read_prompt("fw.msg_misformat.md")
-            await self.hist_add_warning(msg)
             PrintStyle(font_color="red", padding=True).print(msg)
             self.context.log.log(
                 type="error", content=f"{self.agent_name}: Message misformat"

 from langchain_core.prompt_values import ChatPromptValue
 from python.helpers import extract_tools, rate_limiter, files, errors, history, tokens
 from python.helpers.print_style import PrintStyle
+from langchain_core.prompts import (
+    ChatPromptTemplate,
+    MessagesPlaceholder,
+    HumanMessagePromptTemplate,
+    StringPromptTemplate,
+)
 from langchain_core.prompts.image import ImagePromptTemplate
 from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, BaseMessage
 from langchain_core.language_models.chat_models import BaseChatModel
         else:
             current_agent = self.agent0
+        self.task = self.run_task(current_agent.monologue)
         return self.task
     def communicate(self, msg: "UserMessage", broadcast_level: int = 1):
     async def _process_chain(self, agent: "Agent", msg: "UserMessage|str", user=True):
         try:
             msg_template = (
+                agent.hist_add_user_message(msg)  # type: ignore
                 if user
+                else agent.hist_add_tool_result(
                     tool_name="call_subordinate", tool_result=msg  # type: ignore
                 )
             )
                                 printer.stream(chunk)
                                 self.log_from_stream(full, log)
                         agent_response = await self.call_chat_model(
                             prompt, callback=stream_callback
                         )
                             self.loop_data.last_response == agent_response
                         ):  # if assistant_response is the same as last message in history, let him know
                             # Append the assistant's response to the history
+                            self.hist_add_ai_response(agent_response)
                             # Append warning message to the history
                             warning_msg = self.read_prompt("fw.msg_repeat.md")
+                            self.hist_add_warning(message=warning_msg)
                             PrintStyle(font_color="orange", padding=True).print(
                                 warning_msg
                             )
                         else:  # otherwise proceed with tool
                             # Append the assistant's response to the history
+                            self.hist_add_ai_response(agent_response)
                             # process tools requested in agent message
                             tools_result = await self.process_tools(agent_response)
                             if tools_result:  # final response of message loop available
                     except RepairableException as e:
                         # Forward repairable errors to the LLM, maybe it can fix them
                         error_message = errors.format_error(e)
+                        self.hist_add_warning(error_message)
                         PrintStyle(font_color="red", padding=True).print(error_message)
                         self.context.log.log(type="error", content=error_message)
                     except Exception as e:
             extras += history.Message(False, content=extra).output()
         loop_data.extras_temporary.clear()
+        # convert history + extras to LLM format
+        history_langchain: list[BaseMessage] = history.output_langchain(
+            loop_data.history_output + extras
         )
         # build chain from system prompt, message history and model
+        system_text = "\n\n".join(loop_data.system)
         prompt = ChatPromptTemplate.from_messages(
             [
+                SystemMessage(content=system_text),
                 *history_langchain,
             ]
         )
+        # store as last context window content
+        self.set_data(
+            Agent.DATA_NAME_CTX_WINDOW,
+            {
+                "text": prompt.format(),
+                "tokens": self.history.get_tokens()
+                + tokens.approximate_tokens(system_text)
+                + tokens.approximate_tokens(history.output_text(extras)),
+            },
+        )
         return prompt
     def handle_critical_exception(self, exception: Exception):
     def set_data(self, field: str, value):
         self.data[field] = value
+    def hist_add_message(
+        self, ai: bool, content: history.MessageContent, tokens: int = 0
     ):
+        return self.history.add_message(ai=ai, content=content, tokens=tokens)
+    def hist_add_user_message(self, message: UserMessage, intervention: bool = False):
         self.history.new_topic()  # user message starts a new topic in history
         # load message template based on intervention
         self.last_user_message = msg
         return msg
+    def hist_add_ai_response(self, message: str):
         self.loop_data.last_response = message
         content = self.parse_prompt("fw.ai_response.md", message=message)
         return self.hist_add_message(True, content=content)
+    def hist_add_warning(self, message: history.MessageContent):
         content = self.parse_prompt("fw.warning.md", message=message)
         return self.hist_add_message(False, content=content)
+    def hist_add_tool_result(self, tool_name: str, tool_result: str):
         content = self.parse_prompt(
+            "fw.tool_result.md", tool_name=tool_name, tool_result=tool_result
         )
         return self.hist_add_message(False, content=content)
             msg = self.intervention
             self.intervention = None  # reset the intervention message
             if progress.strip():
+                self.hist_add_ai_response(progress)
             # append the intervention message
+            self.hist_add_user_message(msg, intervention=True)
             raise InterventionException(msg)
     async def wait_if_paused(self):
                 return response.message
         else:
             msg = self.read_prompt("fw.msg_misformat.md")
+            self.hist_add_warning(msg)
             PrintStyle(font_color="red", padding=True).print(msg)
             self.context.log.log(
                 type="error", content=f"{self.agent_name}: Message misformat"

prompts/default/agent.system.instruments.md CHANGED Viewed

@@ -1,4 +1,5 @@
 # Instruments
-- following are instruments at disposal:
-{{instruments}}

 # Instruments
+- following are instruments at disposal
+- do not overly rely on them they might not be relevant
+{{instruments}}

prompts/default/agent.system.memories.md CHANGED Viewed

@@ -1,4 +1,5 @@
 # Memories on the topic
-- following are your memories about current topic:
 {{memories}}

 # Memories on the topic
+- following are memories about current topic
+- do not overly rely on them they might not be relevant
 {{memories}}

prompts/default/agent.system.solutions.md CHANGED Viewed

@@ -1,4 +1,5 @@
 # Solutions from the past
-- following are your memories about successful solutions of related problems:
 {{solutions}}

 # Solutions from the past
+- following are memories about successful solutions of related problems
+- do not overly rely on them they might not be relevant
 {{solutions}}

prompts/default/agent.system.tool.code_exe.md CHANGED Viewed

@@ -3,10 +3,9 @@
 execute terminal commands python nodejs code for computation or software tasks
 place code in "code" arg; escape carefully and indent properly
 select "runtime" arg: "terminal" "python" "nodejs" "output" "reset"
-for dialogues (Y/N etc.), use "terminal" runtime next step, send answer
 if code runs long, use "output" to wait, "reset" to kill process
 use "pip" "npm" "apt-get" in "terminal" to install packages
-important: never use implicit print/output—it doesn't work!
 to output, use print() or console.log()
 if tool outputs error, adjust code before retrying; knowledge_tool can help
 important: check code for placeholders or demo data; replace with real variables; don't reuse snippets
@@ -26,6 +25,7 @@ usage:
     "tool_name": "code_execution_tool",
     "tool_args": {
         "runtime": "python",
         "code": "import os\nprint(os.getcwd())",
     }
 }
@@ -41,6 +41,7 @@ usage:
     "tool_name": "code_execution_tool",
     "tool_args": {
         "runtime": "terminal",
         "code": "apt-get install zip",
     }
 }
@@ -55,6 +56,7 @@ usage:
     "tool_name": "code_execution_tool",
     "tool_args": {
         "runtime": "output",
     }
 }
 ~~~
@@ -68,6 +70,7 @@ usage:
     "tool_name": "code_execution_tool",
     "tool_args": {
         "runtime": "reset",
     }
 }
 ~~~

 execute terminal commands python nodejs code for computation or software tasks
 place code in "code" arg; escape carefully and indent properly
 select "runtime" arg: "terminal" "python" "nodejs" "output" "reset"
+select "session" number, 0 default, others for multitasking
 if code runs long, use "output" to wait, "reset" to kill process
 use "pip" "npm" "apt-get" in "terminal" to install packages
 to output, use print() or console.log()
 if tool outputs error, adjust code before retrying; knowledge_tool can help
 important: check code for placeholders or demo data; replace with real variables; don't reuse snippets
     "tool_name": "code_execution_tool",
     "tool_args": {
         "runtime": "python",
+        "session": 0,
         "code": "import os\nprint(os.getcwd())",
     }
 }
     "tool_name": "code_execution_tool",
     "tool_args": {
         "runtime": "terminal",
+        "session": 0,
         "code": "apt-get install zip",
     }
 }
     "tool_name": "code_execution_tool",
     "tool_args": {
         "runtime": "output",
+        "session": 0,
     }
 }
 ~~~
     "tool_name": "code_execution_tool",
     "tool_args": {
         "runtime": "reset",
+        "session": 0,
     }
 }
 ~~~

prompts/default/agent.system.tools_vision.md CHANGED Viewed

@@ -1,3 +1,18 @@
 ## "Multimodal (Vision) Agent Tools" available:
-None yet. In future, this section will contain vision-only tools

 ## "Multimodal (Vision) Agent Tools" available:
+### vision_load:
+load image data to LLM
+use paths arg for attachments
+**Example usage**:
+```json
+{
+    "thoughts": [
+        "I need to see the image...",
+    ],
+    "tool_name": "vision_load",
+    "tool_args": {
+        "paths": ["/path/to/image.png"],
+    }
+}
+```

prompts/default/fw.tool_result.md CHANGED Viewed

@@ -1,7 +1,6 @@
-~~~json
 {
     "tool_name": {{tool_name}},
-    "tool_result": {{tool_result}},
-    "attachments": {{attachments}}
 }
-~~~

+```json
 {
     "tool_name": {{tool_name}},
+    "tool_result": {{tool_result}}
 }
+```

python/api/ctx_window_get.py CHANGED Viewed

@@ -9,6 +9,10 @@ class GetCtxWindow(ApiHandler):
         context = self.get_context(ctxid)
         agent = context.streaming_agent or context.agent0
         window = agent.get_data(agent.DATA_NAME_CTX_WINDOW)
-        size = tokens.approximate_tokens(window)
-        return {"content": window, "tokens": size}

         context = self.get_context(ctxid)
         agent = context.streaming_agent or context.agent0
         window = agent.get_data(agent.DATA_NAME_CTX_WINDOW)
+        if not window or not isinstance(window, dict):
+            return {"content": "", "tokens": 0}
+        text = window["text"]
+        tokens = window["tokens"]
+        return {"content": text, "tokens": tokens}

python/api/history_get.py CHANGED Viewed

@@ -8,8 +8,8 @@ class GetHistory(ApiHandler):
         ctxid = input.get("context", [])
         context = self.get_context(ctxid)
         agent = context.streaming_agent or context.agent0
-        history = agent.history.output()
-        size = tokens.approximate_tokens(agent.history.output_text())
         return {
             "history": history,

         ctxid = input.get("context", [])
         context = self.get_context(ctxid)
         agent = context.streaming_agent or context.agent0
+        history = agent.history.output_text()
+        size = agent.history.get_tokens()
         return {
             "history": history,

python/helpers/files.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from fnmatch import fnmatch
 import json
 import os, re
 import re
 import shutil
@@ -45,6 +46,32 @@ def read_file(_relative_path, _backup_dirs=None, _encoding="utf-8", **kwargs):
     return content
 def replace_placeholders_text(_content: str, **kwargs):
     # Replace placeholders with values from kwargs
     for key, value in kwargs.items():
@@ -175,6 +202,15 @@ def write_file_bin(relative_path: str, content: bytes):
         f.write(content)
 def delete_file(relative_path: str):
     abs_path = get_abs_path(relative_path)
     if os.path.exists(abs_path):

 from fnmatch import fnmatch
 import json
 import os, re
+import base64
 import re
 import shutil
     return content
+def read_file_bin(_relative_path, _backup_dirs=None):
+    # init backup dirs
+    if _backup_dirs is None:
+        _backup_dirs = []
+    # get absolute path
+    absolute_path = find_file_in_dirs(_relative_path, _backup_dirs)
+    # read binary content
+    with open(absolute_path, "rb") as f:
+        return f.read()
+def read_file_base64(_relative_path, _backup_dirs=None):
+    # init backup dirs
+    if _backup_dirs is None:
+        _backup_dirs = []
+    # get absolute path
+    absolute_path = find_file_in_dirs(_relative_path, _backup_dirs)
+    # read binary content and encode to base64
+    with open(absolute_path, "rb") as f:
+        return base64.b64encode(f.read()).decode('utf-8')
 def replace_placeholders_text(_content: str, **kwargs):
     # Replace placeholders with values from kwargs
     for key, value in kwargs.items():
         f.write(content)
+def write_file_base64(relative_path: str, content: str):
+    # decode base64 string to bytes
+    data = base64.b64decode(content)
+    abs_path = get_abs_path(relative_path)
+    os.makedirs(os.path.dirname(abs_path), exist_ok=True)
+    with open(abs_path, "wb") as f:
+        f.write(data)
 def delete_file(relative_path: str):
     abs_path = get_abs_path(relative_path)
     if os.path.exists(abs_path):

python/helpers/history.py CHANGED Viewed

@@ -1,16 +1,14 @@
 from abc import abstractmethod
 import asyncio
 from collections import OrderedDict
 import json
 import math
-import os
-from typing import Coroutine, Literal, TypedDict, cast
 from python.helpers import messages, tokens, settings, call_llm
 from enum import Enum
-from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, BaseMessage
-from python.helpers.print_style import PrintStyle
-from langchain_core.prompts import HumanMessagePromptTemplate
-from typing import Any
 BULK_MERGE_COUNT = 3
 TOPICS_KEEP_COUNT = 3
 CURRENT_TOPIC_RATIO = 0.5
@@ -18,14 +16,22 @@ HISTORY_TOPIC_RATIO = 0.3
 HISTORY_BULK_RATIO = 0.2
 TOPIC_COMPRESS_RATIO = 0.65
 LARGE_MESSAGE_TO_TOPIC_RATIO = 0.25
-MessageContent = (
-    list["MessageContent"]
-    | OrderedDict[str, "MessageContent"]
-    | list[OrderedDict[str, "MessageContent"]]
-    | str
-    | list[str]
-)
 class OutputMessage(TypedDict):
@@ -37,9 +43,9 @@ class Record:
     def __init__(self):
         pass
     def get_tokens(self) -> int:
-        out = self.output_text()
-        return tokens.approximate_tokens(out)
     @abstractmethod
     async def compress(self) -> bool:
@@ -70,10 +76,25 @@ class Record:
 class Message(Record):
-    def __init__(self, ai: bool, content: MessageContent):
         self.ai = ai
         self.content = content
-        self.summary: MessageContent = ""
     async def compress(self):
         return False
@@ -93,12 +114,15 @@ class Message(Record):
             "ai": self.ai,
             "content": self.content,
             "summary": self.summary,
         }
     @staticmethod
     def from_dict(data: dict, history: "History"):
-        msg = Message(ai=data["ai"], content=data.get("content", "Content lost"))
         msg.summary = data.get("summary", "")
         return msg
@@ -108,8 +132,16 @@ class Topic(Record):
         self.summary: str = ""
         self.messages: list[Message] = []
-    def add_message(self, ai: bool, content: MessageContent):
-        msg = Message(ai=ai, content=content)
         self.messages.append(msg)
         return msg
@@ -118,7 +150,7 @@ class Topic(Record):
             return [OutputMessage(ai=False, content=self.summary)]
         else:
             msgs = [m for r in self.messages for m in r.output()]
-            return group_outputs_abab(msgs)
     async def summarize(self):
         self.summary = await self.summarize_messages(self.messages)
@@ -134,22 +166,31 @@ class Topic(Record):
         )
         large_msgs = []
         for m in (m for m in self.messages if not m.summary):
             out = m.output()
             text = output_text(out)
-            tok = tokens.approximate_tokens(text)
             leng = len(text)
             if tok > msg_max_size:
                 large_msgs.append((m, tok, leng, out))
         large_msgs.sort(key=lambda x: x[1], reverse=True)
         for msg, tok, leng, out in large_msgs:
             trim_to_chars = leng * (msg_max_size / tok)
-            trunc = messages.truncate_dict_by_ratio(
-                self.history.agent,
-                out[0]["content"],
-                trim_to_chars * 1.15,
-                trim_to_chars * 0.85,
-            )
-            msg.summary = trunc
             return True
         return False
@@ -175,6 +216,7 @@ class Topic(Record):
         return False
     async def summarize_messages(self, messages: list[Message]):
         msg_txt = [m.output_text() for m in messages]
         summary = await self.history.agent.call_utility_model(
             system=self.history.agent.read_prompt("fw.topic_summary.sys.md"),
@@ -194,9 +236,9 @@ class Topic(Record):
     @staticmethod
     def from_dict(data: dict, history: "History"):
         topic = Topic(history=history)
-        topic.summary = data["summary"]
         topic.messages = [
-            Message.from_dict(m, history=history) for m in data["messages"]
         ]
         return topic
@@ -214,7 +256,7 @@ class Bulk(Record):
             return [OutputMessage(ai=False, content=self.summary)]
         else:
             msgs = [m for r in self.records for m in r.output()]
-            return group_outputs_abab(msgs)
     async def compress(self):
         return False
@@ -253,8 +295,15 @@ class History(Record):
         self.current = Topic(history=self)
         self.agent: Agent = agent
     def is_over_limit(self):
-        limit = get_ctx_size_for_history()
         total = self.get_tokens()
         return total > limit
@@ -267,15 +316,10 @@ class History(Record):
     def get_current_topic_tokens(self) -> int:
         return self.current.get_tokens()
-    def get_tokens(self) -> int:
-        return (
-            self.get_bulks_tokens()
-            + self.get_topics_tokens()
-            + self.get_current_topic_tokens()
-        )
-    def add_message(self, ai: bool, content: MessageContent):
-        return self.current.add_message(ai, content=content)
     def new_topic(self):
         if self.current.messages:
@@ -287,7 +331,6 @@ class History(Record):
         result += [m for b in self.bulks for m in b.output()]
         result += [m for t in self.topics for m in t.output()]
         result += self.current.output()
-        result = group_outputs_abab(result)
         return result
     @staticmethod
@@ -307,7 +350,7 @@ class History(Record):
     def serialize(self):
         data = self.to_dict()
-        return json.dumps(data)
     async def compress(self):
         compressed = False
@@ -317,7 +360,7 @@ class History(Record):
                 self.get_topics_tokens(),
                 self.get_bulks_tokens(),
             )
-            total = get_ctx_size_for_history()
             ratios = [
                 (curr, CURRENT_TOPIC_RATIO, "current_topic"),
                 (hist, HISTORY_TOPIC_RATIO, "history_topic"),
@@ -392,25 +435,46 @@ class History(Record):
 def deserialize_history(json_data: str, agent) -> History:
     history = History(agent=agent)
     if json_data:
-        data = json.loads(json_data)
         history = History.from_dict(data, history=history)
     return history
-def get_ctx_size_for_history() -> int:
     set = settings.get_settings()
     return int(set["chat_model_ctx_length"] * set["chat_model_ctx_history"])
-def serialize_output(output: OutputMessage, ai_label="ai", human_label="human"):
-    return f'{ai_label if output["ai"] else human_label}: {serialize_content(output["content"])}'
-def serialize_content(content: MessageContent) -> str:
     if isinstance(content, str):
         return content
     try:
-        return json.dumps(content)
     except Exception as e:
         raise e
@@ -421,98 +485,73 @@ def group_outputs_abab(outputs: list[OutputMessage]) -> list[OutputMessage]:
         if result and result[-1]["ai"] == out["ai"]:
             result[-1] = OutputMessage(
                 ai=result[-1]["ai"],
-                content=merge_outputs(result[-1]["content"], out["content"]),
             )
         else:
             result.append(out)
     return result
-def output_langchain(messages: list[OutputMessage]) -> list[BaseMessage]:
     result = []
-    for m in messages:
-        if m["ai"]:
-            result.append(AIMessage(content=serialize_content(m["content"])))
         else:
-            contents = m["content"]
-            # sometimes content is a list sometimes not
-            if not isinstance(contents, list):
-                contents = [contents]
-            PrintStyle(font_color="grey", background_color="black", bold=True, padding=True).print(
-                f"Contents: {json.dumps(contents, indent=2)}"
-            )
-            template: list[dict[str, str]] = []  # type: ignore
-            message = ""
-            images = {}
-            for _, content in enumerate(contents):
-                if message:
-                    # the first message is the user message, then the memory and solutions
-                    message += "\n\n--- Memory & Solutions Section: ---\n\n"
-                message += serialize_content(content)
-                if isinstance(content, dict) and "attachments" in content:
-                    attachments: list[str] = cast(list[str], content["attachments"])
-                    for attachment in attachments:
-                        if not os.path.exists(str(attachment)):
-                            continue
-                        if attachment not in images:
-                            import base64
-                            from mimetypes import guess_type
-                            mime_type, _ = guess_type(str(attachment))
-                            if mime_type.startswith("image/"):
-                                # Read and encode the image file
-                                with open(str(attachment), "rb") as image_file:
-                                    base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')
-                                    # Construct the data URL
-                                    images[attachment] = f"data:{mime_type};base64,{base64_encoded_data}"
-            if message:
-                template.append({"type": "text", "text": message})
-            if images:
-                for _, image in images.items():
-                    template.append({"type": "image_url", "image_url": image})
-            if template:
-                # only jinja2 is safe for json, both mustache({{...}}) and f-string({...}) are not
-                result.append(HumanMessagePromptTemplate.from_template(template=template, partial_variables={}, template_format="jinja2"))  # type: ignore
-    PrintStyle(font_color="grey", background_color="black", bold=True, padding=True).print(
-        f"Result: {result}"
-    )
     return result
 def output_text(messages: list[OutputMessage], ai_label="ai", human_label="human"):
-    return "\n".join(serialize_output(o, ai_label, human_label) for o in messages)
-def merge_outputs(a: MessageContent, b: MessageContent) -> MessageContent:
     if not isinstance(a, list):
         a = [a]
     if not isinstance(b, list):
         b = [b]
-    return a + b  # type: ignore
-    # return merge_properties(a, b)
-def merge_properties(a: MessageContent, b: MessageContent) -> MessageContent:
-    if isinstance(a, list):
-        if isinstance(b, list):
-            return a + b  # type: ignore
         else:
-            return a + [b]
-    elif isinstance(b, list):
-        return [a] + b  # type: ignore
-    elif isinstance(a, dict) and isinstance(b, dict):
-        for key, value in b.items():
-            if key in a:
-                a[key] = merge_properties(a[key], value)
-            else:
-                a[key] = value
-        return a
-    elif isinstance(a, str) and isinstance(b, str):
-        return a + b
-    raise ValueError(f"Cannot merge {a} and {b}")

 from abc import abstractmethod
 import asyncio
 from collections import OrderedDict
+from collections.abc import Mapping
 import json
 import math
+from typing import Coroutine, Literal, TypedDict, cast, Union, Dict, List, Any, override
 from python.helpers import messages, tokens, settings, call_llm
 from enum import Enum
+from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage, AIMessage
 BULK_MERGE_COUNT = 3
 TOPICS_KEEP_COUNT = 3
 CURRENT_TOPIC_RATIO = 0.5
 HISTORY_BULK_RATIO = 0.2
 TOPIC_COMPRESS_RATIO = 0.65
 LARGE_MESSAGE_TO_TOPIC_RATIO = 0.25
+RAW_MESSAGE_OUTPUT_TEXT_TRIM = 100
+class RawMessage(TypedDict):
+    raw_content: "MessageContent"
+    preview: str | None
+MessageContent = Union[
+    List["MessageContent"],
+    Dict[str, "MessageContent"],
+    List[Dict[str, "MessageContent"]],
+    str,
+    List[str],
+    RawMessage,
+]
 class OutputMessage(TypedDict):
     def __init__(self):
         pass
+    @abstractmethod
     def get_tokens(self) -> int:
+        pass
     @abstractmethod
     async def compress(self) -> bool:
 class Message(Record):
+    def __init__(self, ai: bool, content: MessageContent, tokens: int = 0):
         self.ai = ai
         self.content = content
+        self.summary: str = ""
+        self.tokens: int = tokens or self.calculate_tokens()
+    @override
+    def get_tokens(self) -> int:
+        if not self.tokens:
+            self.tokens = self.calculate_tokens()
+        return self.tokens
+    def calculate_tokens(self):
+        text = self.output_text()
+        return tokens.approximate_tokens(text)
+    def set_summary(self, summary: str):
+        self.summary = summary
+        self.tokens = self.calculate_tokens()
     async def compress(self):
         return False
             "ai": self.ai,
             "content": self.content,
             "summary": self.summary,
+            "tokens": self.tokens,
         }
     @staticmethod
     def from_dict(data: dict, history: "History"):
+        content = data.get("content", "Content lost")
+        msg = Message(ai=data["ai"], content=content)
         msg.summary = data.get("summary", "")
+        msg.tokens = data.get("tokens", 0)
         return msg
         self.summary: str = ""
         self.messages: list[Message] = []
+    def get_tokens(self):
+        if self.summary:
+            return tokens.approximate_tokens(self.summary)
+        else:
+            return sum(msg.get_tokens() for msg in self.messages)
+    def add_message(
+        self, ai: bool, content: MessageContent, tokens: int = 0
+    ) -> Message:
+        msg = Message(ai=ai, content=content, tokens=tokens)
         self.messages.append(msg)
         return msg
             return [OutputMessage(ai=False, content=self.summary)]
         else:
             msgs = [m for r in self.messages for m in r.output()]
+            return msgs
     async def summarize(self):
         self.summary = await self.summarize_messages(self.messages)
         )
         large_msgs = []
         for m in (m for m in self.messages if not m.summary):
+            # TODO refactor this
             out = m.output()
             text = output_text(out)
+            tok = m.get_tokens()
             leng = len(text)
             if tok > msg_max_size:
                 large_msgs.append((m, tok, leng, out))
         large_msgs.sort(key=lambda x: x[1], reverse=True)
         for msg, tok, leng, out in large_msgs:
             trim_to_chars = leng * (msg_max_size / tok)
+            # raw messages will be replaced as a whole, they would become invalid when truncated
+            if _is_raw_message(out[0]["content"]):
+                msg.set_summary(
+                    "Message content replaced to save space in context window"
+                )
+            # regular messages will be truncated
+            else:
+                trunc = messages.truncate_dict_by_ratio(
+                    self.history.agent,
+                    out[0]["content"],
+                    trim_to_chars * 1.15,
+                    trim_to_chars * 0.85,
+                )
+                msg.set_summary(_json_dumps(trunc))
             return True
         return False
         return False
     async def summarize_messages(self, messages: list[Message]):
+        # FIXME: vision bytes are sent to utility LLM, send summary instead
         msg_txt = [m.output_text() for m in messages]
         summary = await self.history.agent.call_utility_model(
             system=self.history.agent.read_prompt("fw.topic_summary.sys.md"),
     @staticmethod
     def from_dict(data: dict, history: "History"):
         topic = Topic(history=history)
+        topic.summary = data.get("summary", "")
         topic.messages = [
+            Message.from_dict(m, history=history) for m in data.get("messages", [])
         ]
         return topic
             return [OutputMessage(ai=False, content=self.summary)]
         else:
             msgs = [m for r in self.records for m in r.output()]
+            return msgs
     async def compress(self):
         return False
         self.current = Topic(history=self)
         self.agent: Agent = agent
+    def get_tokens(self) -> int:
+        return (
+            self.get_bulks_tokens()
+            + self.get_topics_tokens()
+            + self.get_current_topic_tokens()
+        )
     def is_over_limit(self):
+        limit = _get_ctx_size_for_history()
         total = self.get_tokens()
         return total > limit
     def get_current_topic_tokens(self) -> int:
         return self.current.get_tokens()
+    def add_message(
+        self, ai: bool, content: MessageContent, tokens: int = 0
+    ) -> Message:
+        return self.current.add_message(ai, content=content, tokens=tokens)
     def new_topic(self):
         if self.current.messages:
         result += [m for b in self.bulks for m in b.output()]
         result += [m for t in self.topics for m in t.output()]
         result += self.current.output()
         return result
     @staticmethod
     def serialize(self):
         data = self.to_dict()
+        return _json_dumps(data)
     async def compress(self):
         compressed = False
                 self.get_topics_tokens(),
                 self.get_bulks_tokens(),
             )
+            total = _get_ctx_size_for_history()
             ratios = [
                 (curr, CURRENT_TOPIC_RATIO, "current_topic"),
                 (hist, HISTORY_TOPIC_RATIO, "history_topic"),
 def deserialize_history(json_data: str, agent) -> History:
     history = History(agent=agent)
     if json_data:
+        data = _json_loads(json_data)
         history = History.from_dict(data, history=history)
     return history
+def _get_ctx_size_for_history() -> int:
     set = settings.get_settings()
     return int(set["chat_model_ctx_length"] * set["chat_model_ctx_history"])
+def _stringify_output(output: OutputMessage, ai_label="ai", human_label="human"):
+    return f'{ai_label if output["ai"] else human_label}: {_stringify_content(output["content"])}'
+def _stringify_content(content: MessageContent) -> str:
+    # already a string
     if isinstance(content, str):
         return content
+    # raw messages return preview or trimmed json
+    if _is_raw_message(content):
+        preview: str = content.get("preview", "") # type: ignore
+        if preview:
+            return preview
+        text = _json_dumps(content)
+        if len(text) > RAW_MESSAGE_OUTPUT_TEXT_TRIM:
+            return text[:RAW_MESSAGE_OUTPUT_TEXT_TRIM] + "... TRIMMED"
+        return text
+    # regular messages of non-string are dumped as json
+    return _json_dumps(content)
+def _output_content_langchain(content: MessageContent):
+    if isinstance(content, str):
+        return content
+    if _is_raw_message(content):
+        return content["raw_content"]  # type: ignore
     try:
+        return _json_dumps(content)
     except Exception as e:
         raise e
         if result and result[-1]["ai"] == out["ai"]:
             result[-1] = OutputMessage(
                 ai=result[-1]["ai"],
+                content=_merge_outputs(result[-1]["content"], out["content"]),
             )
         else:
             result.append(out)
     return result
+def group_messages_abab(messages: list[BaseMessage]) -> list[BaseMessage]:
     result = []
+    for msg in messages:
+        if result and isinstance(result[-1], type(msg)):
+            # create new instance of the same type with merged content
+            result[-1] = type(result[-1])(content=_merge_outputs(result[-1].content, msg.content))  # type: ignore
         else:
+            result.append(msg)
+    return result
+def output_langchain(messages: list[OutputMessage]):
+    result = []
+    for m in messages:
+        if m["ai"]:
+            # result.append(AIMessage(content=serialize_content(m["content"])))
+            result.append(AIMessage(_output_content_langchain(content=m["content"])))  # type: ignore
+        else:
+            # result.append(HumanMessage(content=serialize_content(m["content"])))
+            result.append(HumanMessage(_output_content_langchain(content=m["content"])))  # type: ignore
+    # ensure message type alternation
+    result = group_messages_abab(result)
     return result
 def output_text(messages: list[OutputMessage], ai_label="ai", human_label="human"):
+    return "\n".join(_stringify_output(o, ai_label, human_label) for o in messages)
+def _merge_outputs(a: MessageContent, b: MessageContent) -> MessageContent:
+    if isinstance(a, str) and isinstance(b, str):
+        return a + b
     if not isinstance(a, list):
         a = [a]
     if not isinstance(b, list):
         b = [b]
+    return cast(MessageContent, a + b)
+def _merge_properties(
+    a: Dict[str, MessageContent], b: Dict[str, MessageContent]
+) -> Dict[str, MessageContent]:
+    result = a.copy()
+    for k, v in b.items():
+        if k in result:
+            result[k] = _merge_outputs(result[k], v)
         else:
+            result[k] = v
+    return result
+def _is_raw_message(obj: object) -> bool:
+    return isinstance(obj, Mapping) and "raw_content" in obj
+def _json_dumps(obj):
+    return json.dumps(obj, ensure_ascii=False)
+def _json_loads(obj):
+    return json.loads(obj)

python/helpers/images.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from PIL import Image
+import io
+import math
+def compress_image(image_data: bytes, *, max_pixels: int = 256_000, quality: int = 50) -> bytes:
+    """Compress an image by scaling it down and converting to JPEG with quality settings.
+    Args:
+        image_data: Raw image bytes
+        max_pixels: Maximum number of pixels in the output image (width * height)
+        quality: JPEG quality setting (1-100)
+    Returns:
+        Compressed image as bytes
+    """
+    # load image from bytes
+    img = Image.open(io.BytesIO(image_data))
+    # calculate scaling factor to get to max_pixels
+    current_pixels = img.width * img.height
+    if current_pixels > max_pixels:
+        scale = math.sqrt(max_pixels / current_pixels)
+        new_width = int(img.width * scale)
+        new_height = int(img.height * scale)
+        img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
+    # convert to RGB if needed (for JPEG)
+    if img.mode in ('RGBA', 'P'):
+        img = img.convert('RGB')
+    # save as JPEG with compression
+    output = io.BytesIO()
+    img.save(output, format='JPEG', quality=quality, optimize=True)
+    return output.getvalue()

python/helpers/runtime.py CHANGED Viewed

@@ -2,6 +2,9 @@ import argparse
 import inspect
 from typing import TypeVar, Callable, Awaitable, Union, overload, cast
 from python.helpers import dotenv, rfc, settings
 T = TypeVar('T')
 R = TypeVar('R')
@@ -102,3 +105,22 @@ def _get_rfc_url() -> str:
     url = url+":"+str(set["rfc_port_http"])
     url += "/rfc"
     return url

 import inspect
 from typing import TypeVar, Callable, Awaitable, Union, overload, cast
 from python.helpers import dotenv, rfc, settings
+import asyncio
+import threading
+import queue
 T = TypeVar('T')
 R = TypeVar('R')
     url = url+":"+str(set["rfc_port_http"])
     url += "/rfc"
     return url
+def call_development_function_sync(func: Union[Callable[..., T], Callable[..., Awaitable[T]]], *args, **kwargs) -> T:
+    # run async function in sync manner
+    result_queue = queue.Queue()
+    def run_in_thread():
+        result = asyncio.run(call_development_function(func, *args, **kwargs))
+        result_queue.put(result)
+    thread = threading.Thread(target=run_in_thread)
+    thread.start()
+    thread.join(timeout=30)  # wait for thread with timeout
+    if thread.is_alive():
+        raise TimeoutError("Function call timed out after 30 seconds")
+    result = result_queue.get_nowait()
+    return cast(T, result)

python/helpers/tool.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from abc import abstractmethod
-from dataclasses import dataclass, field
 from agent import Agent
 from python.helpers.print_style import PrintStyle
@@ -8,8 +9,6 @@ from python.helpers.print_style import PrintStyle
 class Response:
     message:str
     break_loop: bool
-    attachments: list[str] = field(default_factory=list[str])
 class Tool:
@@ -34,7 +33,7 @@ class Tool:
     async def after_execution(self, response: Response, **kwargs):
         text = response.message.strip()
-        await self.agent.hist_add_tool_result(self.name, text, response.attachments)
         PrintStyle(font_color="#1B4F72", background_color="white", padding=True, bold=True).print(f"{self.agent.agent_name}: Response from tool '{self.name}'")
         PrintStyle(font_color="#85C1E9").print(response.message)
         self.log.update(content=response.message)

 from abc import abstractmethod
+from dataclasses import dataclass
 from agent import Agent
 from python.helpers.print_style import PrintStyle
 class Response:
     message:str
     break_loop: bool
 class Tool:
     async def after_execution(self, response: Response, **kwargs):
         text = response.message.strip()
+        self.agent.hist_add_tool_result(self.name, text)
         PrintStyle(font_color="#1B4F72", background_color="white", padding=True, bold=True).print(f"{self.agent.agent_name}: Response from tool '{self.name}'")
         PrintStyle(font_color="#85C1E9").print(response.message)
         self.log.update(content=response.message)

python/helpers/whisper.py CHANGED Viewed

@@ -30,7 +30,7 @@ async def _preload(model_name:str):
         is_updating_model = True
         if not _model or _model_name != model_name:
                 PrintStyle.standard(f"Loading Whisper model: {model_name}")
-                _model = whisper.load_model(model_name)
                 _model_name = model_name
     finally:
         is_updating_model = False

         is_updating_model = True
         if not _model or _model_name != model_name:
                 PrintStyle.standard(f"Loading Whisper model: {model_name}")
+                _model = whisper.load_model(name=model_name) # type: ignore
                 _model_name = model_name
     finally:
         is_updating_model = False

python/tools/call_subordinate.py CHANGED Viewed

@@ -18,7 +18,7 @@ class Delegation(Tool):
         # add user message to subordinate agent
         subordinate: Agent = self.agent.get_data(Agent.DATA_NAME_SUBORDINATE)
-        await subordinate.hist_add_user_message(UserMessage(message=message, attachments=[]))
         # run subordinate monologue
         result = await subordinate.monologue()
         # result

         # add user message to subordinate agent
         subordinate: Agent = self.agent.get_data(Agent.DATA_NAME_SUBORDINATE)
+        subordinate.hist_add_user_message(UserMessage(message=message, attachments=[]))
         # run subordinate monologue
         result = await subordinate.monologue()
         # result

python/tools/code_execution_tool.py CHANGED Viewed

@@ -12,7 +12,7 @@ from python.helpers.docker import DockerContainerManager
 @dataclass
 class State:
-    shell: LocalInteractiveSession | SSHInteractiveSession
     docker: DockerContainerManager | None
@@ -27,19 +27,26 @@ class CodeExecution(Tool):
         # os.chdir(files.get_abs_path("./work_dir")) #change CWD to work_dir
         runtime = self.args.get("runtime", "").lower().strip()
         if runtime == "python":
-            response = await self.execute_python_code(self.args["code"])
         elif runtime == "nodejs":
-            response = await self.execute_nodejs_code(self.args["code"])
         elif runtime == "terminal":
-            response = await self.execute_terminal_command(self.args["code"])
         elif runtime == "output":
             response = await self.get_terminal_output(
-                wait_with_output=5, wait_without_output=60
             )
         elif runtime == "reset":
-            response = await self.reset_terminal()
         else:
             response = self.agent.read_prompt(
                 "fw.code_runtime_wrong.md", runtime=runtime
@@ -72,11 +79,15 @@ class CodeExecution(Tool):
     #             PrintStyle().print()
     def get_log_object(self):
-        return self.agent.context.log.log(type="code_exe", heading=f"{self.agent.agent_name}: Using tool '{self.name}'", content="", kvps=self.args)
     async def after_execution(self, response, **kwargs):
-        await self.agent.hist_add_tool_result(self.name, response.message)
     async def prepare_state(self, reset=False):
         self.state = self.agent.get_data("_cot_state")
@@ -97,7 +108,11 @@ class CodeExecution(Tool):
             # initialize local or remote interactive shell insterface
             if self.agent.config.code_exec_ssh_enabled:
-                pswd = self.agent.config.code_exec_ssh_pass if self.agent.config.code_exec_ssh_pass else await rfc_exchange.get_root_password()
                 shell = SSHInteractiveSession(
                     self.agent.context.log,
                     self.agent.config.code_exec_ssh_addr,
@@ -108,42 +123,63 @@ class CodeExecution(Tool):
             else:
                 shell = LocalInteractiveSession()
-            self.state = State(shell=shell, docker=docker)
             await shell.connect()
         self.agent.set_data("_cot_state", self.state)
-    async def execute_python_code(self, code: str, reset: bool = False):
         escaped_code = shlex.quote(code)
         command = f"ipython -c {escaped_code}"
-        return await self.terminal_session(command, reset)
-    async def execute_nodejs_code(self, code: str, reset: bool = False):
         escaped_code = shlex.quote(code)
         command = f"node /exe/node_eval.js {escaped_code}"
-        return await self.terminal_session(command, reset)
-    async def execute_terminal_command(self, command: str, reset: bool = False):
-        return await self.terminal_session(command, reset)
-    async def terminal_session(self, command: str, reset: bool = False):
         await self.agent.handle_intervention()  # wait for intervention and handle it, if paused
         # try again on lost connection
         for i in range(2):
             try:
                 if reset:
                     await self.reset_terminal()
-                self.state.shell.send_command(command)
-                PrintStyle(background_color="white", font_color="#1B4F72", bold=True).print(
-                    f"{self.agent.agent_name} code execution output"
-                )
-                return await self.get_terminal_output()
             except Exception as e:
-                if i==1:
                     # try again on lost connection
                     PrintStyle.error(str(e))
                     await self.prepare_state(reset=True)
@@ -153,6 +189,7 @@ class CodeExecution(Tool):
     async def get_terminal_output(
         self,
         reset_full_output=True,
         wait_with_output=3,
         wait_without_output=10,
@@ -165,10 +202,10 @@ class CodeExecution(Tool):
         while max_exec_time <= 0 or time.time() - start_time < max_exec_time:
             await asyncio.sleep(SLEEP_TIME)  # Wait for some output to be generated
-            full_output, partial_output = await self.state.shell.read_output(
                 timeout=max_exec_time, reset_full_output=reset_full_output
             )
-            reset_full_output = False # only reset once
             await self.agent.handle_intervention()  # wait for intervention and handle it, if paused
@@ -184,8 +221,10 @@ class CodeExecution(Tool):
                     break
         return full_output
-    async def reset_terminal(self):
-        self.state.shell.close()
         await self.prepare_state(reset=True)
         response = self.agent.read_prompt("fw.code_reset.md")
         self.log.update(content=response)

 @dataclass
 class State:
+    shells: dict[int, LocalInteractiveSession | SSHInteractiveSession]
     docker: DockerContainerManager | None
         # os.chdir(files.get_abs_path("./work_dir")) #change CWD to work_dir
         runtime = self.args.get("runtime", "").lower().strip()
+        session = int(self.args.get("session", 0))
         if runtime == "python":
+            response = await self.execute_python_code(
+                code=self.args["code"], session=session
+            )
         elif runtime == "nodejs":
+            response = await self.execute_nodejs_code(
+                code=self.args["code"], session=session
+            )
         elif runtime == "terminal":
+            response = await self.execute_terminal_command(
+                command=self.args["code"], session=session
+            )
         elif runtime == "output":
             response = await self.get_terminal_output(
+                session=session, wait_with_output=5, wait_without_output=60
             )
         elif runtime == "reset":
+            response = await self.reset_terminal(session=session)
         else:
             response = self.agent.read_prompt(
                 "fw.code_runtime_wrong.md", runtime=runtime
     #             PrintStyle().print()
     def get_log_object(self):
+        return self.agent.context.log.log(
+            type="code_exe",
+            heading=f"{self.agent.agent_name}: Using tool '{self.name}'",
+            content="",
+            kvps=self.args,
+        )
     async def after_execution(self, response, **kwargs):
+        self.agent.hist_add_tool_result(self.name, response.message)
     async def prepare_state(self, reset=False):
         self.state = self.agent.get_data("_cot_state")
             # initialize local or remote interactive shell insterface
             if self.agent.config.code_exec_ssh_enabled:
+                pswd = (
+                    self.agent.config.code_exec_ssh_pass
+                    if self.agent.config.code_exec_ssh_pass
+                    else await rfc_exchange.get_root_password()
+                )
                 shell = SSHInteractiveSession(
                     self.agent.context.log,
                     self.agent.config.code_exec_ssh_addr,
             else:
                 shell = LocalInteractiveSession()
+            self.state = State(shells={0: shell}, docker=docker)
             await shell.connect()
         self.agent.set_data("_cot_state", self.state)
+    async def execute_python_code(self, session: int, code: str, reset: bool = False):
         escaped_code = shlex.quote(code)
         command = f"ipython -c {escaped_code}"
+        return await self.terminal_session(session, command, reset)
+    async def execute_nodejs_code(self, session: int, code: str, reset: bool = False):
         escaped_code = shlex.quote(code)
         command = f"node /exe/node_eval.js {escaped_code}"
+        return await self.terminal_session(session, command, reset)
+    async def execute_terminal_command(
+        self, session: int, command: str, reset: bool = False
+    ):
+        return await self.terminal_session(session, command, reset)
+    async def terminal_session(self, session: int, command: str, reset: bool = False):
         await self.agent.handle_intervention()  # wait for intervention and handle it, if paused
         # try again on lost connection
         for i in range(2):
             try:
                 if reset:
                     await self.reset_terminal()
+                if session not in self.state.shells:
+                    if self.agent.config.code_exec_ssh_enabled:
+                        pswd = (
+                            self.agent.config.code_exec_ssh_pass
+                            if self.agent.config.code_exec_ssh_pass
+                            else await rfc_exchange.get_root_password()
+                        )
+                        shell = SSHInteractiveSession(
+                            self.agent.context.log,
+                            self.agent.config.code_exec_ssh_addr,
+                            self.agent.config.code_exec_ssh_port,
+                            self.agent.config.code_exec_ssh_user,
+                            pswd,
+                        )
+                    else:
+                        shell = LocalInteractiveSession()
+                    self.state.shells[session] = shell
+                    await shell.connect()
+                self.state.shells[session].send_command(command)
+                PrintStyle(
+                    background_color="white", font_color="#1B4F72", bold=True
+                ).print(f"{self.agent.agent_name} code execution output")
+                return await self.get_terminal_output(session)
             except Exception as e:
+                if i == 1:
                     # try again on lost connection
                     PrintStyle.error(str(e))
                     await self.prepare_state(reset=True)
     async def get_terminal_output(
         self,
+        session=0,
         reset_full_output=True,
         wait_with_output=3,
         wait_without_output=10,
         while max_exec_time <= 0 or time.time() - start_time < max_exec_time:
             await asyncio.sleep(SLEEP_TIME)  # Wait for some output to be generated
+            full_output, partial_output = await self.state.shells[session].read_output(
                 timeout=max_exec_time, reset_full_output=reset_full_output
             )
+            reset_full_output = False  # only reset once
             await self.agent.handle_intervention()  # wait for intervention and handle it, if paused
                     break
         return full_output
+    async def reset_terminal(self, session=0):
+        if session in self.state.shells:
+            self.state.shells[session].close()
+            del self.state.shells[session]
         await self.prepare_state(reset=True)
         response = self.agent.read_prompt("fw.code_reset.md")
         self.log.update(content=response)

python/tools/input.py CHANGED Viewed

@@ -20,4 +20,4 @@ class Input(Tool):
         return self.agent.context.log.log(type="code_exe", heading=f"{self.agent.agent_name}: Using tool '{self.name}'", content="", kvps=self.args)
     async def after_execution(self, response, **kwargs):
-        await self.agent.hist_add_tool_result(self.name, response.message)

         return self.agent.context.log.log(type="code_exe", heading=f"{self.agent.agent_name}: Using tool '{self.name}'", content="", kvps=self.args)
     async def after_execution(self, response, **kwargs):
+        self.agent.hist_add_tool_result(self.name, response.message)

python/tools/vision_load.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import base64
+from python.helpers.print_style import PrintStyle
+from python.helpers.tool import Tool, Response
+from python.helpers import runtime, files, images
+from mimetypes import guess_type
+from python.helpers import history
+# image optimization and token estimation for context window
+MAX_PIXELS = 768_000
+QUALITY = 75
+TOKENS_ESTIMATE = 1500
+class VisionLoad(Tool):
+    async def execute(self, paths: list[str] = [], **kwargs) -> Response:
+        self.images_dict = {}
+        template: list[dict[str, str]] = []  # type: ignore
+        for path in paths:
+            if not await runtime.call_development_function(files.exists, str(path)):
+                continue
+            if path not in self.images_dict:
+                mime_type, _ = guess_type(str(path))
+                if mime_type and mime_type.startswith("image/"):
+                    # Read binary file
+                    file_content = await runtime.call_development_function(
+                        files.read_file_base64, str(path)
+                    )
+                    file_content = base64.b64decode(file_content)
+                    # Compress and convert to JPEG
+                    compressed = images.compress_image(
+                        file_content, max_pixels=MAX_PIXELS, quality=QUALITY
+                    )
+                    # Encode as base64
+                    file_content_b64 = base64.b64encode(compressed).decode("utf-8")
+                    # DEBUG: Save compressed image
+                    # await runtime.call_development_function(
+                    #     files.write_file_base64, str(path), file_content_b64
+                    # )
+                    # Construct the data URL (always JPEG after compression)
+                    self.images_dict[path] = file_content_b64
+        return Response(message="dummy", break_loop=False)
+    async def after_execution(self, response: Response, **kwargs):
+        # build image data messages for LLMs, or error message
+        content = []
+        if self.images_dict:
+            for _, image in self.images_dict.items():
+                content.append(
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/jpeg;base64,{image}"},
+                    }
+                )
+            # append as raw message content for LLMs with vision tokens estimate
+            msg = history.RawMessage(raw_content=content, preview="<Base64 encoded image data>")
+            self.agent.hist_add_message(
+                False, content=msg, tokens=TOKENS_ESTIMATE * len(content)
+            )
+        else:
+            self.agent.hist_add_tool_result(self.name, "No images processed")
+        # print and log short version
+        message = (
+            "No images processed"
+            if not self.images_dict
+            else f"{len(self.images_dict)} images processed"
+        )
+        PrintStyle(
+            font_color="#1B4F72", background_color="white", padding=True, bold=True
+        ).print(f"{self.agent.agent_name}: Response from tool '{self.name}'")
+        PrintStyle(font_color="#85C1E9").print(message)
+        self.log.update(result=message)

webui/js/history.js CHANGED Viewed

@@ -3,9 +3,10 @@ import { getContext } from "../index.js";
 export async function openHistoryModal() {
     try {
         const hist = await window.sendJsonData("/history_get", { context: getContext() });
-        const data = JSON.stringify(hist.history, null, 4);
         const size = hist.tokens
-        await showEditorModal(data, "json", `History ~${size} tokens`, "Conversation history visible to the LLM. History is compressed to fit into the context window over time.");
     } catch (e) {
         window.toastFetchError("Error fetching history", e)
         return

 export async function openHistoryModal() {
     try {
         const hist = await window.sendJsonData("/history_get", { context: getContext() });
+        // const data = JSON.stringify(hist.history, null, 4);
+        const data = hist.history
         const size = hist.tokens
+        await showEditorModal(data, "markdown", `History ~${size} tokens`, "Conversation history visible to the LLM. History is compressed to fit into the context window over time.");
     } catch (e) {
         window.toastFetchError("Error fetching history", e)
         return