Commit
·
2e8bb22
1
Parent(s):
e511adb
Chunk oversized tavily extracts
Browse files- config/prompts.yaml +5 -1
- config/settings.py +3 -0
- core/state.py +1 -0
- nodes/chunking_node.py +71 -0
- nodes/nodes.py +15 -6
- requirements.txt +3 -1
- tools/tavily_tools.py +2 -0
config/prompts.yaml
CHANGED
|
@@ -57,6 +57,10 @@ prompts:
|
|
| 57 |
* **Action:** Use Tavily Web Crawl on the URL of a leading renewable energy industry website, setting `max_depth` to 2.
|
| 58 |
* **Observation:** Gathered extensive content from multiple articles linked on the site, highlighting new technologies and innovations.
|
| 59 |
* **Final Answer:** Provide a synthesized summary of findings with citations.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
type: base_system
|
| 61 |
variables: ["summary"]
|
| 62 |
version: 1.0
|
|
@@ -79,7 +83,7 @@ prompts:
|
|
| 79 |
|
| 80 |
|
| 81 |
Extend the summary by taking into account the new messages above.
|
| 82 |
-
Try to follow this guideline. If the message consists in a tool call add a new bullet point and specify the tool and its action.
|
| 83 |
If the message consists in a tool call result append a summary of the result to the appropriate bullet point.
|
| 84 |
After analyzing the tool call result, specify if this has been useful or not.
|
| 85 |
type: memory_optimization
|
|
|
|
| 57 |
* **Action:** Use Tavily Web Crawl on the URL of a leading renewable energy industry website, setting `max_depth` to 2.
|
| 58 |
* **Observation:** Gathered extensive content from multiple articles linked on the site, highlighting new technologies and innovations.
|
| 59 |
* **Final Answer:** Provide a synthesized summary of findings with citations.
|
| 60 |
+
|
| 61 |
+
If the value of chunked_last_tool_call is true, this means that the last tool execution returns a result formed from the concatenation
|
| 62 |
+
of multiple chunks.
|
| 63 |
+
Current value of the chunked_last_tool_call is {{chunked_last_tool_call}}
|
| 64 |
type: base_system
|
| 65 |
variables: ["summary"]
|
| 66 |
version: 1.0
|
|
|
|
| 83 |
|
| 84 |
|
| 85 |
Extend the summary by taking into account the new messages above.
|
| 86 |
+
Try to follow this guideline. If the message consists in a tool call add a new bullet point and specify the tool name, its mai parameters values and its action.
|
| 87 |
If the message consists in a tool call result append a summary of the result to the appropriate bullet point.
|
| 88 |
After analyzing the tool call result, specify if this has been useful or not.
|
| 89 |
type: memory_optimization
|
config/settings.py
CHANGED
|
@@ -12,6 +12,9 @@ class AgentConfig:
|
|
| 12 |
# LLM Configuration
|
| 13 |
self.MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4.1")
|
| 14 |
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# File Paths
|
| 17 |
self.PROJECT_ROOT = Path(__file__).parent.parent
|
|
|
|
| 12 |
# LLM Configuration
|
| 13 |
self.MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4.1")
|
| 14 |
|
| 15 |
+
#Sizing limitations
|
| 16 |
+
self.MAX_CONTEXT_TOKENS = 20000
|
| 17 |
+
|
| 18 |
|
| 19 |
# File Paths
|
| 20 |
self.PROJECT_ROOT = Path(__file__).parent.parent
|
core/state.py
CHANGED
|
@@ -5,3 +5,4 @@ class State(MessagesState):
|
|
| 5 |
summary: str
|
| 6 |
question: str
|
| 7 |
attachment: str
|
|
|
|
|
|
| 5 |
summary: str
|
| 6 |
question: str
|
| 7 |
attachment: str
|
| 8 |
+
chunked_last_tool_call: bool
|
nodes/chunking_node.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 2 |
+
from langchain_text_splitters import MarkdownHeaderTextSplitter
|
| 3 |
+
from langchain_core.messages.base import BaseMessage
|
| 4 |
+
from langchain_core.messages import ToolMessage
|
| 5 |
+
from langchain_community.embeddings import OpenAIEmbeddings
|
| 6 |
+
from langchain_community.vectorstores import FAISS
|
| 7 |
+
|
| 8 |
+
from config.settings import config
|
| 9 |
+
import json
|
| 10 |
+
import tiktoken
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def parse_mark_down(data: str) -> list:
|
| 14 |
+
headers_to_split_on = [
|
| 15 |
+
("#", "Header 1"),
|
| 16 |
+
("##", "Header 2"),
|
| 17 |
+
]
|
| 18 |
+
|
| 19 |
+
markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
|
| 20 |
+
md_header_splits = markdown_splitter.split_text(data)
|
| 21 |
+
return md_header_splits
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class OversizedContentHandler:
|
| 25 |
+
"""Main handler for content that exceeds context limits"""
|
| 26 |
+
|
| 27 |
+
def __init__(self,
|
| 28 |
+
model_name: str = "gpt-4.1",
|
| 29 |
+
max_context_tokens: int = 8000,
|
| 30 |
+
reserved_tokens: int = 2000):
|
| 31 |
+
self.encoding = tiktoken.encoding_for_model(model_name)
|
| 32 |
+
self.max_context_tokens = max_context_tokens
|
| 33 |
+
self.reserved_tokens = reserved_tokens
|
| 34 |
+
self.max_chunk_tokens = max_context_tokens - reserved_tokens
|
| 35 |
+
|
| 36 |
+
def count_tokens(self, text: str) -> int:
|
| 37 |
+
return len(self.encoding.encode(text))
|
| 38 |
+
|
| 39 |
+
def extract_relevant_chunks(self, content: str, query: str):
|
| 40 |
+
# Try to check if the content can be parsed with a Markdown parser
|
| 41 |
+
md_chunks = parse_mark_down(content)
|
| 42 |
+
# Further split large chunks
|
| 43 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=15000, chunk_overlap=500)
|
| 44 |
+
final_chunks = text_splitter.split_documents(md_chunks)
|
| 45 |
+
|
| 46 |
+
embeddings = OpenAIEmbeddings()
|
| 47 |
+
vector_db = FAISS.from_documents(final_chunks, embeddings)
|
| 48 |
+
|
| 49 |
+
relevant_chunks = vector_db.similarity_search(query, k=3)
|
| 50 |
+
# Concatenate relevant chunk and update last message content
|
| 51 |
+
context_with_metadata = [
|
| 52 |
+
{"text": doc.page_content, "source": doc.metadata.get("source")}
|
| 53 |
+
for doc in relevant_chunks
|
| 54 |
+
]
|
| 55 |
+
return context_with_metadata
|
| 56 |
+
|
| 57 |
+
def process_oversized_message(self, message: BaseMessage, query: str) -> bool:
|
| 58 |
+
chunked = False
|
| 59 |
+
# At this point we are chunking only tavily_extract results messages
|
| 60 |
+
if isinstance(message, ToolMessage) and message.name == "tavily_extract":
|
| 61 |
+
json_content = json.loads(message.content)
|
| 62 |
+
result = json_content['results'][0]
|
| 63 |
+
raw_content = result['raw_content']
|
| 64 |
+
|
| 65 |
+
content_size = self.count_tokens(raw_content)
|
| 66 |
+
if content_size > config.MAX_CONTEXT_TOKENS:
|
| 67 |
+
print(f"Proceed with chunking, evaluated no of tokens {content_size} for message {message.id}")
|
| 68 |
+
chunked = True
|
| 69 |
+
result['raw_content'] = self.extract_relevant_chunks(raw_content, query=query)
|
| 70 |
+
message.content = json.dumps(json_content)
|
| 71 |
+
return chunked
|
nodes/nodes.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
-
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, RemoveMessage
|
| 2 |
from langchain_openai import ChatOpenAI
|
| 3 |
|
| 4 |
from core.state import State
|
| 5 |
import time
|
| 6 |
|
|
|
|
| 7 |
from tools.tavily_tools import llm_tools
|
| 8 |
from utils.prompt_manager import prompt_mgmt
|
| 9 |
|
|
@@ -26,7 +27,7 @@ def orchestrator(state: State):
|
|
| 26 |
messages = [HumanMessage(content=message)]
|
| 27 |
response = response_processing_model.invoke(messages)
|
| 28 |
if response.content == "YES":
|
| 29 |
-
return {"question": question, "attachment": "true", "messages":[response]}
|
| 30 |
return {"question": question}
|
| 31 |
|
| 32 |
|
|
@@ -41,12 +42,14 @@ def assistant(state: State):
|
|
| 41 |
if not question:
|
| 42 |
question = state["messages"][0].content
|
| 43 |
|
| 44 |
-
|
|
|
|
| 45 |
try:
|
| 46 |
response = model.invoke([sys_msg] + state["messages"])
|
| 47 |
except Exception as e:
|
| 48 |
if "429" in str(e):
|
| 49 |
time.sleep(5)
|
|
|
|
| 50 |
response = model.invoke([sys_msg] + state["messages"])
|
| 51 |
return {"messages": [response]}
|
| 52 |
raise
|
|
@@ -77,9 +80,15 @@ def optimize_memory(state: State):
|
|
| 77 |
summary_message = "Create a summary of the conversation above:"
|
| 78 |
|
| 79 |
# Add prompt to our history
|
| 80 |
-
messages = state["messages"] + [HumanMessage(content=summary_message)]
|
| 81 |
response = model.invoke(messages)
|
| 82 |
|
|
|
|
| 83 |
# Delete all but the 2 most recent messages and the first one
|
| 84 |
-
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, RemoveMessage, ToolMessage
|
| 2 |
from langchain_openai import ChatOpenAI
|
| 3 |
|
| 4 |
from core.state import State
|
| 5 |
import time
|
| 6 |
|
| 7 |
+
from nodes.chunking_node import OversizedContentHandler
|
| 8 |
from tools.tavily_tools import llm_tools
|
| 9 |
from utils.prompt_manager import prompt_mgmt
|
| 10 |
|
|
|
|
| 27 |
messages = [HumanMessage(content=message)]
|
| 28 |
response = response_processing_model.invoke(messages)
|
| 29 |
if response.content == "YES":
|
| 30 |
+
return {"question": question, "attachment": "true", "messages": [response]}
|
| 31 |
return {"question": question}
|
| 32 |
|
| 33 |
|
|
|
|
| 42 |
if not question:
|
| 43 |
question = state["messages"][0].content
|
| 44 |
|
| 45 |
+
prompt_params = {"summary": summary, "chunked_last_tool_call": state.get("chunked_last_tool_call", False)}
|
| 46 |
+
sys_msg = SystemMessage(content=prompt_mgmt.render_template("base_system", prompt_params))
|
| 47 |
try:
|
| 48 |
response = model.invoke([sys_msg] + state["messages"])
|
| 49 |
except Exception as e:
|
| 50 |
if "429" in str(e):
|
| 51 |
time.sleep(5)
|
| 52 |
+
print("Retrying after receiving 429 error")
|
| 53 |
response = model.invoke([sys_msg] + state["messages"])
|
| 54 |
return {"messages": [response]}
|
| 55 |
raise
|
|
|
|
| 80 |
summary_message = "Create a summary of the conversation above:"
|
| 81 |
|
| 82 |
# Add prompt to our history
|
| 83 |
+
messages = state["messages"][:-2] + [HumanMessage(content=summary_message)]
|
| 84 |
response = model.invoke(messages)
|
| 85 |
|
| 86 |
+
print("&&&" * 50, state["messages"][-1].type)
|
| 87 |
# Delete all but the 2 most recent messages and the first one
|
| 88 |
+
remaining_messages = [RemoveMessage(id=m.id) for m in state["messages"][:-2]]
|
| 89 |
+
|
| 90 |
+
# If the last message returned from a tool is oversized, chunk it and retrieve only the relevant chunks
|
| 91 |
+
content_handler = OversizedContentHandler()
|
| 92 |
+
chunked = content_handler.process_oversized_message(state["messages"][-1], state.get("question"))
|
| 93 |
+
|
| 94 |
+
return {"summary": response.content, "messages": remaining_messages, "chunked_last_tool_call": chunked}
|
requirements.txt
CHANGED
|
@@ -3,4 +3,6 @@ requests
|
|
| 3 |
langchain_openai
|
| 4 |
langchain_core
|
| 5 |
langgraph
|
| 6 |
-
langchain-tavily
|
|
|
|
|
|
|
|
|
| 3 |
langchain_openai
|
| 4 |
langchain_core
|
| 5 |
langgraph
|
| 6 |
+
langchain-tavily
|
| 7 |
+
langchain-community
|
| 8 |
+
faiss-cpu
|
tools/tavily_tools.py
CHANGED
|
@@ -6,6 +6,8 @@ from langchain_tavily import TavilyCrawl
|
|
| 6 |
tavily_search_tool = TavilySearch(
|
| 7 |
max_results=10,
|
| 8 |
topic="general",
|
|
|
|
|
|
|
| 9 |
)
|
| 10 |
|
| 11 |
# Define the LangChain extract tool
|
|
|
|
| 6 |
tavily_search_tool = TavilySearch(
|
| 7 |
max_results=10,
|
| 8 |
topic="general",
|
| 9 |
+
# Make sure to avoid retrieving the response from a dataset or a space
|
| 10 |
+
exclude_domains =["https://huggingface.co/datasets", "https://huggingface.co/spaces"]
|
| 11 |
)
|
| 12 |
|
| 13 |
# Define the LangChain extract tool
|