Spaces:
Paused
Paused
frdel commited on
Commit ·
2cb62e9
1
Parent(s): 5bbfdd3
Instruments
Browse filesAdded instruments - loading to memory, injecting to system prompt, example yt_download
- .gitignore +15 -1
- agent.py +2 -1
- initialize.py +5 -1
- instruments/custom/.gitkeep +0 -0
- instruments/default/.gitkeep +0 -0
- instruments/default/yt_download/yt_download.md +6 -0
- instruments/default/yt_download/yt_download.sh +7 -0
- knowledge/default/solutions/get_current_time.md +13 -0
- knowledge/default/solutions/yt_download.md +0 -6
- prompts/default/agent.system.instruments.md +4 -0
- prompts/default/agent.system.main.solving.md +1 -1
- prompts/default/agent.system.main.tips.md +4 -0
- python/extensions/message_loop_prompts/_50_recall_memories.py +2 -2
- python/extensions/message_loop_prompts/_51_recall_solutions.py +65 -60
- python/helpers/knowledge_import.py +59 -51
- python/helpers/memory.py +37 -7
- python/tools/code_execution_tool.py +2 -2
.gitignore
CHANGED
|
@@ -38,4 +38,18 @@ knowledge/**/*.*
|
|
| 38 |
|
| 39 |
# Explicitly allow the default folder and its contents
|
| 40 |
!knowledge/default/
|
| 41 |
-
!knowledge/default/**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
# Explicitly allow the default folder and its contents
|
| 40 |
!knowledge/default/
|
| 41 |
+
!knowledge/default/**
|
| 42 |
+
|
| 43 |
+
# Ignore everything in the "instruments" directory
|
| 44 |
+
instruments/*
|
| 45 |
+
|
| 46 |
+
# Do not ignore subdirectories (so we can track .gitkeep)
|
| 47 |
+
!instruments/*/
|
| 48 |
+
|
| 49 |
+
# Ignore all files within subdirectories (except .gitkeep)
|
| 50 |
+
instruments/**/*.*
|
| 51 |
+
!instruments/**/.gitkeep
|
| 52 |
+
|
| 53 |
+
# Explicitly allow the default folder and its contents
|
| 54 |
+
!instruments/default/
|
| 55 |
+
!instruments/default/**
|
agent.py
CHANGED
|
@@ -111,7 +111,8 @@ class AgentConfig:
|
|
| 111 |
)
|
| 112 |
code_exec_docker_volumes: dict[str, dict[str, str]] = field(
|
| 113 |
default_factory=lambda: {
|
| 114 |
-
files.get_abs_path("work_dir"): {"bind": "/root", "mode": "rw"}
|
|
|
|
| 115 |
}
|
| 116 |
)
|
| 117 |
code_exec_ssh_enabled: bool = True
|
|
|
|
| 111 |
)
|
| 112 |
code_exec_docker_volumes: dict[str, dict[str, str]] = field(
|
| 113 |
default_factory=lambda: {
|
| 114 |
+
files.get_abs_path("work_dir"): {"bind": "/root", "mode": "rw"},
|
| 115 |
+
files.get_abs_path("instruments"): {"bind": "/instruments", "mode": "rw"},
|
| 116 |
}
|
| 117 |
)
|
| 118 |
code_exec_ssh_enabled: bool = True
|
initialize.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import models
|
| 2 |
from agent import AgentConfig
|
|
|
|
| 3 |
|
| 4 |
def initialize():
|
| 5 |
|
|
@@ -47,7 +48,10 @@ def initialize():
|
|
| 47 |
# code_exec_docker_name = "agent-zero-exe",
|
| 48 |
# code_exec_docker_image = "frdel/agent-zero-exe:latest",
|
| 49 |
# code_exec_docker_ports = { "22/tcp": 50022 }
|
| 50 |
-
# code_exec_docker_volumes = {
|
|
|
|
|
|
|
|
|
|
| 51 |
code_exec_ssh_enabled = True,
|
| 52 |
# code_exec_ssh_addr = "localhost",
|
| 53 |
# code_exec_ssh_port = 50022,
|
|
|
|
| 1 |
import models
|
| 2 |
from agent import AgentConfig
|
| 3 |
+
from python.helpers import files
|
| 4 |
|
| 5 |
def initialize():
|
| 6 |
|
|
|
|
| 48 |
# code_exec_docker_name = "agent-zero-exe",
|
| 49 |
# code_exec_docker_image = "frdel/agent-zero-exe:latest",
|
| 50 |
# code_exec_docker_ports = { "22/tcp": 50022 }
|
| 51 |
+
# code_exec_docker_volumes = {
|
| 52 |
+
# files.get_abs_path("work_dir"): {"bind": "/root", "mode": "rw"},
|
| 53 |
+
# files.get_abs_path("instruments"): {"bind": "/instruments", "mode": "rw"},
|
| 54 |
+
# },
|
| 55 |
code_exec_ssh_enabled = True,
|
| 56 |
# code_exec_ssh_addr = "localhost",
|
| 57 |
# code_exec_ssh_port = 50022,
|
instruments/custom/.gitkeep
ADDED
|
File without changes
|
instruments/default/.gitkeep
ADDED
|
File without changes
|
instruments/default/yt_download/yt_download.md
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Problem
|
| 2 |
+
Download a YouTube video
|
| 3 |
+
# Solution
|
| 4 |
+
1. cd to the desired location to download
|
| 5 |
+
2. Run instrument "bash /instruments/default/yt_download/yt_download.sh <url>" with your video URL
|
| 6 |
+
3. Wait for the terminal to finish
|
instruments/default/yt_download/yt_download.sh
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# Install yt-dlp and ffmpeg
|
| 4 |
+
sudo apt-get update && sudo apt-get install -y yt-dlp ffmpeg
|
| 5 |
+
|
| 6 |
+
# Download the best video and audio, and merge them
|
| 7 |
+
yt-dlp -f bestvideo+bestaudio --merge-output-format mp4 "$1"
|
knowledge/default/solutions/get_current_time.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Problem
|
| 2 |
+
User asked for current time in timezone
|
| 3 |
+
# Solution
|
| 4 |
+
Use code_execution_tool with following python code adjusted for your timezone
|
| 5 |
+
~~~python
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
import pytz
|
| 8 |
+
|
| 9 |
+
timezone = pytz.timezone('America/New_York')
|
| 10 |
+
current_time = datetime.now(timezone)
|
| 11 |
+
|
| 12 |
+
print("Current time in New York:", current_time)
|
| 13 |
+
~~~
|
knowledge/default/solutions/yt_download.md
DELETED
|
@@ -1,6 +0,0 @@
|
|
| 1 |
-
# Problem
|
| 2 |
-
Download a YouTube video
|
| 3 |
-
# Solution
|
| 4 |
-
1. If you don't have exact URL, use knowledge_tool to get it
|
| 5 |
-
2. Pip install yt-dlp and ffmpeg
|
| 6 |
-
3. Download the video using yt-dlp command: 'yt-dlp YT_URL', replace YT_URL with your video URL.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompts/default/agent.system.instruments.md
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Instruments
|
| 2 |
+
- following are instruments that could possibly be used:
|
| 3 |
+
|
| 4 |
+
{{instruments}}
|
prompts/default/agent.system.main.solving.md
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
- Explain each step using your thoughts argument.
|
| 4 |
|
| 5 |
0. Outline the plan by repeating these instructions.
|
| 6 |
-
1. Check
|
| 7 |
2. Check the online sources output of your knowledge_tool.
|
| 8 |
- Look for straightforward solutions compatible with your available tools.
|
| 9 |
- Always look for opensource python/nodejs/terminal tools and packages first.
|
|
|
|
| 3 |
- Explain each step using your thoughts argument.
|
| 4 |
|
| 5 |
0. Outline the plan by repeating these instructions.
|
| 6 |
+
1. Check your memories, solutions and instruments. Prefer using instruments when possible.
|
| 7 |
2. Check the online sources output of your knowledge_tool.
|
| 8 |
- Look for straightforward solutions compatible with your available tools.
|
| 9 |
- Always look for opensource python/nodejs/terminal tools and packages first.
|
prompts/default/agent.system.main.tips.md
CHANGED
|
@@ -14,6 +14,10 @@
|
|
| 14 |
- Communication is the key to succesfull solution.
|
| 15 |
- NEVER delegate your whole task, only parts of it.
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
## Tips and tricks
|
| 18 |
- Focus on python/nodejs/linux libraries when searching for solutions. You can use them with your tools and make solutions easy.
|
| 19 |
- Sometimes you don't need tools, some things can be determined.
|
|
|
|
| 14 |
- Communication is the key to succesfull solution.
|
| 15 |
- NEVER delegate your whole task, only parts of it.
|
| 16 |
|
| 17 |
+
## Instruments
|
| 18 |
+
- Instruments are programs you can utilize to solve tasks
|
| 19 |
+
- Instrument descriptions are injected into the prompt and can be executed with code_execution_tool
|
| 20 |
+
|
| 21 |
## Tips and tricks
|
| 22 |
- Focus on python/nodejs/linux libraries when searching for solutions. You can use them with your tools and make solutions easy.
|
| 23 |
- Sometimes you don't need tools, some things can be determined.
|
python/extensions/message_loop_prompts/_50_recall_memories.py
CHANGED
|
@@ -60,12 +60,12 @@ class RecallMemories(Extension):
|
|
| 60 |
# log the short result
|
| 61 |
if not isinstance(memories, list) or len(memories) == 0:
|
| 62 |
log_item.update(
|
| 63 |
-
heading="No useful memories found
|
| 64 |
)
|
| 65 |
return
|
| 66 |
else:
|
| 67 |
log_item.update(
|
| 68 |
-
heading=f"
|
| 69 |
)
|
| 70 |
|
| 71 |
# concatenate memory.page_content in memories:
|
|
|
|
| 60 |
# log the short result
|
| 61 |
if not isinstance(memories, list) or len(memories) == 0:
|
| 62 |
log_item.update(
|
| 63 |
+
heading="No useful memories found",
|
| 64 |
)
|
| 65 |
return
|
| 66 |
else:
|
| 67 |
log_item.update(
|
| 68 |
+
heading=f"{len(memories)} memories found",
|
| 69 |
)
|
| 70 |
|
| 71 |
# concatenate memory.page_content in memories:
|
python/extensions/message_loop_prompts/_51_recall_solutions.py
CHANGED
|
@@ -7,7 +7,8 @@ class RecallSolutions(Extension):
|
|
| 7 |
|
| 8 |
INTERVAL = 3
|
| 9 |
HISTORY = 5
|
| 10 |
-
|
|
|
|
| 11 |
THRESHOLD = 0.1
|
| 12 |
|
| 13 |
async def execute(self, loop_data: LoopData = LoopData(), **kwargs):
|
|
@@ -19,74 +20,78 @@ class RecallSolutions(Extension):
|
|
| 19 |
|
| 20 |
async def search_solutions(self, loop_data: LoopData, **kwargs):
|
| 21 |
# try:
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
)
|
|
|
|
| 26 |
|
| 27 |
-
|
| 28 |
-
log_item = self.agent.context.log.log(
|
| 29 |
-
type="util",
|
| 30 |
-
heading="Searching memory for solutions...",
|
| 31 |
-
)
|
| 32 |
-
|
| 33 |
-
# get system message and chat history for util llm
|
| 34 |
-
msgs_text = self.agent.concat_messages(
|
| 35 |
-
self.agent.history[-RecallSolutions.HISTORY :]
|
| 36 |
-
) # only last X messages
|
| 37 |
-
system = self.agent.read_prompt(
|
| 38 |
-
"memory.solutions_query.sys.md", history=msgs_text
|
| 39 |
-
)
|
| 40 |
-
|
| 41 |
-
# log query streamed by LLM
|
| 42 |
-
def log_callback(content):
|
| 43 |
-
log_item.stream(query=content)
|
| 44 |
-
|
| 45 |
-
# call util llm to summarize conversation
|
| 46 |
-
query = await self.agent.call_utility_llm(
|
| 47 |
-
system=system, msg=loop_data.message, callback=log_callback
|
| 48 |
-
)
|
| 49 |
-
|
| 50 |
-
# get solutions database
|
| 51 |
-
db = await Memory.get(self.agent)
|
| 52 |
-
|
| 53 |
-
solutions = await db.search_similarity_threshold(
|
| 54 |
-
query=query,
|
| 55 |
-
limit=RecallSolutions.RESULTS,
|
| 56 |
-
threshold=RecallSolutions.THRESHOLD,
|
| 57 |
-
filter=f"area == '{Memory.Area.SOLUTIONS.value}'"
|
| 58 |
-
)
|
| 59 |
-
|
| 60 |
-
# log the short result
|
| 61 |
-
if not isinstance(solutions, list) or len(solutions) == 0:
|
| 62 |
-
log_item.update(
|
| 63 |
-
heading="No successful solution memories found.",
|
| 64 |
-
)
|
| 65 |
-
return
|
| 66 |
-
else:
|
| 67 |
-
log_item.update(
|
| 68 |
-
heading=f"\n\n{len(solutions)} successful solution memories found.",
|
| 69 |
-
)
|
| 70 |
-
|
| 71 |
-
# concatenate solution.page_content in solutions:
|
| 72 |
solutions_text = ""
|
| 73 |
for solution in solutions:
|
| 74 |
solutions_text += solution.page_content + "\n\n"
|
| 75 |
solutions_text = solutions_text.strip()
|
| 76 |
-
|
| 77 |
-
# log the full results
|
| 78 |
log_item.update(solutions=solutions_text)
|
| 79 |
-
|
| 80 |
-
# place to prompt
|
| 81 |
solutions_prompt = self.agent.read_prompt(
|
| 82 |
"agent.system.solutions.md", solutions=solutions_text
|
| 83 |
)
|
| 84 |
-
|
| 85 |
-
# append to system message
|
| 86 |
loop_data.system.append(solutions_prompt)
|
| 87 |
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
|
|
|
| 7 |
|
| 8 |
INTERVAL = 3
|
| 9 |
HISTORY = 5
|
| 10 |
+
SOLUTIONS_COUNT = 2
|
| 11 |
+
INSTRUMENTS_COUNT = 2
|
| 12 |
THRESHOLD = 0.1
|
| 13 |
|
| 14 |
async def execute(self, loop_data: LoopData = LoopData(), **kwargs):
|
|
|
|
| 20 |
|
| 21 |
async def search_solutions(self, loop_data: LoopData, **kwargs):
|
| 22 |
# try:
|
| 23 |
+
# show temp info message
|
| 24 |
+
self.agent.context.log.log(
|
| 25 |
+
type="info", content="Searching memory for solutions...", temp=True
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
# show full util message, this will hide temp message immediately if turned on
|
| 29 |
+
log_item = self.agent.context.log.log(
|
| 30 |
+
type="util",
|
| 31 |
+
heading="Searching memory for solutions...",
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
# get system message and chat history for util llm
|
| 35 |
+
msgs_text = self.agent.concat_messages(
|
| 36 |
+
self.agent.history[-RecallSolutions.HISTORY :]
|
| 37 |
+
) # only last X messages
|
| 38 |
+
system = self.agent.read_prompt(
|
| 39 |
+
"memory.solutions_query.sys.md", history=msgs_text
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
# log query streamed by LLM
|
| 43 |
+
def log_callback(content):
|
| 44 |
+
log_item.stream(query=content)
|
| 45 |
+
|
| 46 |
+
# call util llm to summarize conversation
|
| 47 |
+
query = await self.agent.call_utility_llm(
|
| 48 |
+
system=system, msg=loop_data.message, callback=log_callback
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
# get solutions database
|
| 52 |
+
db = await Memory.get(self.agent)
|
| 53 |
+
|
| 54 |
+
solutions = await db.search_similarity_threshold(
|
| 55 |
+
query=query,
|
| 56 |
+
limit=RecallSolutions.SOLUTIONS_COUNT,
|
| 57 |
+
threshold=RecallSolutions.THRESHOLD,
|
| 58 |
+
filter=f"area == '{Memory.Area.SOLUTIONS.value}'",
|
| 59 |
+
)
|
| 60 |
+
instruments = await db.search_similarity_threshold(
|
| 61 |
+
query=query,
|
| 62 |
+
limit=RecallSolutions.INSTRUMENTS_COUNT,
|
| 63 |
+
threshold=RecallSolutions.THRESHOLD,
|
| 64 |
+
filter=f"area == '{Memory.Area.INSTRUMENTS.value}'",
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
log_item.update(
|
| 68 |
+
heading=f"{len(instruments)} instruments, {len(solutions)} solutions found",
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
if instruments:
|
| 72 |
+
instruments_text = ""
|
| 73 |
+
for instrument in instruments:
|
| 74 |
+
instruments_text += instrument.page_content + "\n\n"
|
| 75 |
+
instruments_text = instruments_text.strip()
|
| 76 |
+
log_item.update(instruments=instruments_text)
|
| 77 |
+
instruments_prompt = self.agent.read_prompt(
|
| 78 |
+
"agent.system.instruments.md", instruments=instruments_text
|
| 79 |
)
|
| 80 |
+
loop_data.system.append(instruments_prompt)
|
| 81 |
|
| 82 |
+
if solutions:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
solutions_text = ""
|
| 84 |
for solution in solutions:
|
| 85 |
solutions_text += solution.page_content + "\n\n"
|
| 86 |
solutions_text = solutions_text.strip()
|
|
|
|
|
|
|
| 87 |
log_item.update(solutions=solutions_text)
|
|
|
|
|
|
|
| 88 |
solutions_prompt = self.agent.read_prompt(
|
| 89 |
"agent.system.solutions.md", solutions=solutions_text
|
| 90 |
)
|
|
|
|
|
|
|
| 91 |
loop_data.system.append(solutions_prompt)
|
| 92 |
|
| 93 |
+
# except Exception as e:
|
| 94 |
+
# err = errors.format_error(e)
|
| 95 |
+
# self.agent.context.log.log(
|
| 96 |
+
# type="error", heading="Recall solutions extension error:", content=err
|
| 97 |
+
# )
|
python/helpers/knowledge_import.py
CHANGED
|
@@ -34,11 +34,14 @@ def calculate_checksum(file_path: str) -> str:
|
|
| 34 |
|
| 35 |
|
| 36 |
def load_knowledge(
|
| 37 |
-
log_item: LogItem | None,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
) -> Dict[str, KnowledgeImport]:
|
| 39 |
-
knowledge_dir = files.get_abs_path("knowledge",knowledge_dir)
|
| 40 |
|
| 41 |
-
from python.helpers.memory import Memory
|
| 42 |
|
| 43 |
# Mapping file extensions to corresponding loader classes
|
| 44 |
file_types_loaders = {
|
|
@@ -47,62 +50,67 @@ def load_knowledge(
|
|
| 47 |
"csv": CSVLoader,
|
| 48 |
"html": UnstructuredHTMLLoader,
|
| 49 |
"json": JSONLoader,
|
| 50 |
-
"md": UnstructuredMarkdownLoader,
|
|
|
|
| 51 |
}
|
| 52 |
|
| 53 |
cnt_files = 0
|
| 54 |
cnt_docs = 0
|
| 55 |
|
| 56 |
-
for area in Memory.Area:
|
| 57 |
-
|
| 58 |
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
print(f"Found {len(kn_files)} knowledge files in {subdir}, processing...")
|
| 67 |
-
if log_item:
|
| 68 |
-
log_item.stream(
|
| 69 |
-
progress=f"\nFound {len(kn_files)} knowledge files in {subdir}, processing...",
|
| 70 |
-
)
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
# loop index where state is not set and mark it as removed
|
| 108 |
for file_key, file_data in index.items():
|
|
|
|
| 34 |
|
| 35 |
|
| 36 |
def load_knowledge(
|
| 37 |
+
log_item: LogItem | None,
|
| 38 |
+
knowledge_dir: str,
|
| 39 |
+
index: Dict[str, KnowledgeImport],
|
| 40 |
+
metadata: dict[str, Any] = {},
|
| 41 |
+
filename_pattern: str = "**/*",
|
| 42 |
) -> Dict[str, KnowledgeImport]:
|
|
|
|
| 43 |
|
| 44 |
+
# from python.helpers.memory import Memory
|
| 45 |
|
| 46 |
# Mapping file extensions to corresponding loader classes
|
| 47 |
file_types_loaders = {
|
|
|
|
| 50 |
"csv": CSVLoader,
|
| 51 |
"html": UnstructuredHTMLLoader,
|
| 52 |
"json": JSONLoader,
|
| 53 |
+
# "md": UnstructuredMarkdownLoader,
|
| 54 |
+
"md": TextLoader,
|
| 55 |
}
|
| 56 |
|
| 57 |
cnt_files = 0
|
| 58 |
cnt_docs = 0
|
| 59 |
|
| 60 |
+
# for area in Memory.Area:
|
| 61 |
+
# subdir = files.get_abs_path(knowledge_dir, area.value)
|
| 62 |
|
| 63 |
+
# if not os.path.exists(knowledge_dir):
|
| 64 |
+
# os.makedirs(knowledge_dir)
|
| 65 |
+
# continue
|
| 66 |
|
| 67 |
+
# Fetch all files in the directory with specified extensions
|
| 68 |
+
kn_files = glob.glob(knowledge_dir + "/" + filename_pattern, recursive=True)
|
| 69 |
+
kn_files = [f for f in kn_files if os.path.isfile(f)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
+
if kn_files:
|
| 72 |
+
print(
|
| 73 |
+
f"Found {len(kn_files)} knowledge files in {knowledge_dir}, processing..."
|
| 74 |
+
)
|
| 75 |
+
if log_item:
|
| 76 |
+
log_item.stream(
|
| 77 |
+
progress=f"\nFound {len(kn_files)} knowledge files in {knowledge_dir}, processing...",
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
for file_path in kn_files:
|
| 81 |
+
ext = file_path.split(".")[-1].lower()
|
| 82 |
+
if ext in file_types_loaders:
|
| 83 |
+
checksum = calculate_checksum(file_path)
|
| 84 |
+
file_key = file_path # os.path.relpath(file_path, knowledge_dir)
|
| 85 |
+
|
| 86 |
+
# Load existing data from the index or create a new entry
|
| 87 |
+
file_data = index.get(file_key, {})
|
| 88 |
+
|
| 89 |
+
if file_data.get("checksum") == checksum:
|
| 90 |
+
file_data["state"] = "original"
|
| 91 |
+
else:
|
| 92 |
+
file_data["state"] = "changed"
|
| 93 |
+
|
| 94 |
+
if file_data["state"] == "changed":
|
| 95 |
+
file_data["checksum"] = checksum
|
| 96 |
+
loader_cls = file_types_loaders[ext]
|
| 97 |
+
loader = loader_cls(
|
| 98 |
+
file_path,
|
| 99 |
+
**(
|
| 100 |
+
text_loader_kwargs
|
| 101 |
+
if ext in ["txt", "csv", "html", "md"]
|
| 102 |
+
else {}
|
| 103 |
+
),
|
| 104 |
+
)
|
| 105 |
+
file_data["documents"] = loader.load_and_split()
|
| 106 |
+
for doc in file_data["documents"]:
|
| 107 |
+
doc.metadata = {**doc.metadata, **metadata}
|
| 108 |
+
cnt_files += 1
|
| 109 |
+
cnt_docs += len(file_data["documents"])
|
| 110 |
+
# print(f"Imported {len(file_data['documents'])} documents from {file_path}")
|
| 111 |
+
|
| 112 |
+
# Update the index
|
| 113 |
+
index[file_key] = file_data # type: ignore
|
| 114 |
|
| 115 |
# loop index where state is not set and mark it as removed
|
| 116 |
for file_key, file_data in index.items():
|
python/helpers/memory.py
CHANGED
|
@@ -21,20 +21,23 @@ from python.helpers.log import Log, LogItem
|
|
| 21 |
from enum import Enum
|
| 22 |
from agent import Agent
|
| 23 |
|
|
|
|
| 24 |
class MyFaiss(FAISS):
|
| 25 |
-
#override aget_by_ids
|
| 26 |
def get_by_ids(self, ids: Sequence[str], /) -> List[Document]:
|
| 27 |
# return all self.docstore._dict[id] in ids
|
| 28 |
-
return [self.docstore._dict[id] for id in ids if id in self.docstore._dict]
|
| 29 |
|
| 30 |
async def aget_by_ids(self, ids: Sequence[str], /) -> List[Document]:
|
| 31 |
return self.get_by_ids(ids)
|
| 32 |
|
|
|
|
| 33 |
class Memory:
|
| 34 |
|
| 35 |
class Area(Enum):
|
| 36 |
MAIN = "main"
|
| 37 |
SOLUTIONS = "solutions"
|
|
|
|
| 38 |
|
| 39 |
index: dict[str, "MyFaiss"] = {}
|
| 40 |
|
|
@@ -130,7 +133,7 @@ class Memory:
|
|
| 130 |
# normalize_L2=True,
|
| 131 |
relevance_score_fn=Memory._cosine_normalizer,
|
| 132 |
)
|
| 133 |
-
return db
|
| 134 |
|
| 135 |
def __init__(
|
| 136 |
self,
|
|
@@ -160,8 +163,8 @@ class Memory:
|
|
| 160 |
with open(index_path, "r") as f:
|
| 161 |
index = json.load(f)
|
| 162 |
|
| 163 |
-
|
| 164 |
-
|
| 165 |
|
| 166 |
for file in index:
|
| 167 |
if index[file]["state"] in ["changed", "removed"] and index[file].get(
|
|
@@ -187,6 +190,33 @@ class Memory:
|
|
| 187 |
with open(index_path, "w") as f:
|
| 188 |
json.dump(index, f)
|
| 189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
async def search_similarity_threshold(
|
| 191 |
self, query: str, limit: int, threshold: float, filter: str = ""
|
| 192 |
):
|
|
@@ -235,9 +265,9 @@ class Memory:
|
|
| 235 |
|
| 236 |
async def delete_documents_by_ids(self, ids: list[str]):
|
| 237 |
# aget_by_ids is not yet implemented in faiss, need to do a workaround
|
| 238 |
-
rem_docs =self.db.get_by_ids(ids)
|
| 239 |
if rem_docs:
|
| 240 |
-
rem_ids = [doc.metadata["id"] for doc in rem_docs]
|
| 241 |
await self.db.adelete(ids=rem_ids)
|
| 242 |
|
| 243 |
if rem_docs:
|
|
|
|
| 21 |
from enum import Enum
|
| 22 |
from agent import Agent
|
| 23 |
|
| 24 |
+
|
| 25 |
class MyFaiss(FAISS):
|
| 26 |
+
# override aget_by_ids
|
| 27 |
def get_by_ids(self, ids: Sequence[str], /) -> List[Document]:
|
| 28 |
# return all self.docstore._dict[id] in ids
|
| 29 |
+
return [self.docstore._dict[id] for id in ids if id in self.docstore._dict] # type: ignore
|
| 30 |
|
| 31 |
async def aget_by_ids(self, ids: Sequence[str], /) -> List[Document]:
|
| 32 |
return self.get_by_ids(ids)
|
| 33 |
|
| 34 |
+
|
| 35 |
class Memory:
|
| 36 |
|
| 37 |
class Area(Enum):
|
| 38 |
MAIN = "main"
|
| 39 |
SOLUTIONS = "solutions"
|
| 40 |
+
INSTRUMENTS = "instruments"
|
| 41 |
|
| 42 |
index: dict[str, "MyFaiss"] = {}
|
| 43 |
|
|
|
|
| 133 |
# normalize_L2=True,
|
| 134 |
relevance_score_fn=Memory._cosine_normalizer,
|
| 135 |
)
|
| 136 |
+
return db # type: ignore
|
| 137 |
|
| 138 |
def __init__(
|
| 139 |
self,
|
|
|
|
| 163 |
with open(index_path, "r") as f:
|
| 164 |
index = json.load(f)
|
| 165 |
|
| 166 |
+
# preload knowledge folders
|
| 167 |
+
index = self._preload_knowledge_folders(log_item, kn_dirs, index)
|
| 168 |
|
| 169 |
for file in index:
|
| 170 |
if index[file]["state"] in ["changed", "removed"] and index[file].get(
|
|
|
|
| 190 |
with open(index_path, "w") as f:
|
| 191 |
json.dump(index, f)
|
| 192 |
|
| 193 |
+
def _preload_knowledge_folders(
|
| 194 |
+
self,
|
| 195 |
+
log_item: LogItem | None,
|
| 196 |
+
kn_dirs: list[str],
|
| 197 |
+
index: dict[str, knowledge_import.KnowledgeImport],
|
| 198 |
+
):
|
| 199 |
+
# load knowledge folders, subfolders by area
|
| 200 |
+
for kn_dir in kn_dirs:
|
| 201 |
+
for area in Memory.Area:
|
| 202 |
+
index = knowledge_import.load_knowledge(
|
| 203 |
+
log_item,
|
| 204 |
+
files.get_abs_path("knowledge", kn_dir, area.value),
|
| 205 |
+
index,
|
| 206 |
+
{"area": area.value},
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
# load instruments descriptions
|
| 210 |
+
index = knowledge_import.load_knowledge(
|
| 211 |
+
log_item,
|
| 212 |
+
files.get_abs_path("instruments"),
|
| 213 |
+
index,
|
| 214 |
+
{"area": Memory.Area.INSTRUMENTS.value},
|
| 215 |
+
filename_pattern="**/*.md",
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
return index
|
| 219 |
+
|
| 220 |
async def search_similarity_threshold(
|
| 221 |
self, query: str, limit: int, threshold: float, filter: str = ""
|
| 222 |
):
|
|
|
|
| 265 |
|
| 266 |
async def delete_documents_by_ids(self, ids: list[str]):
|
| 267 |
# aget_by_ids is not yet implemented in faiss, need to do a workaround
|
| 268 |
+
rem_docs = self.db.get_by_ids(ids) # existing docs to remove (prevents error)
|
| 269 |
if rem_docs:
|
| 270 |
+
rem_ids = [doc.metadata["id"] for doc in rem_docs] # ids to remove
|
| 271 |
await self.db.adelete(ids=rem_ids)
|
| 272 |
|
| 273 |
if rem_docs:
|
python/tools/code_execution_tool.py
CHANGED
|
@@ -36,7 +36,7 @@ class CodeExecution(Tool):
|
|
| 36 |
response = await self.execute_terminal_command(self.args["code"])
|
| 37 |
elif runtime == "output":
|
| 38 |
response = await self.get_terminal_output(
|
| 39 |
-
wait_with_output=5, wait_without_output=
|
| 40 |
)
|
| 41 |
elif runtime == "reset":
|
| 42 |
response = await self.reset_terminal()
|
|
@@ -137,7 +137,7 @@ class CodeExecution(Tool):
|
|
| 137 |
return await self.get_terminal_output()
|
| 138 |
|
| 139 |
async def get_terminal_output(
|
| 140 |
-
self, wait_with_output=3, wait_without_output=10, max_exec_time=
|
| 141 |
):
|
| 142 |
idle = 0
|
| 143 |
SLEEP_TIME = 0.1
|
|
|
|
| 36 |
response = await self.execute_terminal_command(self.args["code"])
|
| 37 |
elif runtime == "output":
|
| 38 |
response = await self.get_terminal_output(
|
| 39 |
+
wait_with_output=5, wait_without_output=60
|
| 40 |
)
|
| 41 |
elif runtime == "reset":
|
| 42 |
response = await self.reset_terminal()
|
|
|
|
| 137 |
return await self.get_terminal_output()
|
| 138 |
|
| 139 |
async def get_terminal_output(
|
| 140 |
+
self, wait_with_output=3, wait_without_output=10, max_exec_time=60
|
| 141 |
):
|
| 142 |
idle = 0
|
| 143 |
SLEEP_TIME = 0.1
|