Final_Assignment_Template

Sleeping

App Files Files Community

carolinacon commited on Aug 18, 2025

Commit

2e8bb22

1 Parent(s): e511adb

Chunk oversized tavily extracts

Browse files

Files changed (7) hide show

config/prompts.yaml +5 -1
config/settings.py +3 -0
core/state.py +1 -0
nodes/chunking_node.py +71 -0
nodes/nodes.py +15 -6
requirements.txt +3 -1
tools/tavily_tools.py +2 -0

config/prompts.yaml CHANGED Viewed

@@ -57,6 +57,10 @@ prompts:
        * **Action:** Use Tavily Web Crawl on the URL of a leading renewable energy industry website, setting `max_depth` to 2.
        * **Observation:** Gathered extensive content from multiple articles linked on the site, highlighting new technologies and innovations.
        * **Final Answer:** Provide a synthesized summary of findings with citations.
     type: base_system
     variables: ["summary"]
     version: 1.0
@@ -79,7 +83,7 @@ prompts:
       Extend the summary by taking into account the new messages above.
-      Try to follow this guideline. If the message consists in a tool call add a new bullet point and specify the tool and its action.
       If the message consists in a tool call result append a summary of the result to the appropriate bullet point.
       After analyzing the tool call result, specify if this has been useful or not.
     type: memory_optimization

        * **Action:** Use Tavily Web Crawl on the URL of a leading renewable energy industry website, setting `max_depth` to 2.
        * **Observation:** Gathered extensive content from multiple articles linked on the site, highlighting new technologies and innovations.
        * **Final Answer:** Provide a synthesized summary of findings with citations.
+       If the value of chunked_last_tool_call is true, this means that the last tool execution returns a result formed from the concatenation
+       of multiple chunks.
+       Current value of the chunked_last_tool_call is {{chunked_last_tool_call}}
     type: base_system
     variables: ["summary"]
     version: 1.0
       Extend the summary by taking into account the new messages above.
+      Try to follow this guideline. If the message consists in a tool call add a new bullet point and specify the tool name, its mai parameters values and its action.
       If the message consists in a tool call result append a summary of the result to the appropriate bullet point.
       After analyzing the tool call result, specify if this has been useful or not.
     type: memory_optimization

config/settings.py CHANGED Viewed

@@ -12,6 +12,9 @@ class AgentConfig:
         # LLM Configuration
         self.MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4.1")
         # File Paths
         self.PROJECT_ROOT = Path(__file__).parent.parent

         # LLM Configuration
         self.MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4.1")
+        #Sizing limitations
+        self.MAX_CONTEXT_TOKENS = 20000
         # File Paths
         self.PROJECT_ROOT = Path(__file__).parent.parent

core/state.py CHANGED Viewed

@@ -5,3 +5,4 @@ class State(MessagesState):
     summary: str
     question: str
     attachment: str

     summary: str
     question: str
     attachment: str
+    chunked_last_tool_call: bool

nodes/chunking_node.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_text_splitters import MarkdownHeaderTextSplitter
+from langchain_core.messages.base import BaseMessage
+from langchain_core.messages import ToolMessage
+from langchain_community.embeddings import OpenAIEmbeddings
+from langchain_community.vectorstores import FAISS
+from config.settings import config
+import json
+import tiktoken
+def parse_mark_down(data: str) -> list:
+    headers_to_split_on = [
+        ("#", "Header 1"),
+        ("##", "Header 2"),
+    ]
+    markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
+    md_header_splits = markdown_splitter.split_text(data)
+    return md_header_splits
+class OversizedContentHandler:
+    """Main handler for content that exceeds context limits"""
+    def __init__(self,
+                 model_name: str = "gpt-4.1",
+                 max_context_tokens: int = 8000,
+                 reserved_tokens: int = 2000):
+        self.encoding = tiktoken.encoding_for_model(model_name)
+        self.max_context_tokens = max_context_tokens
+        self.reserved_tokens = reserved_tokens
+        self.max_chunk_tokens = max_context_tokens - reserved_tokens
+    def count_tokens(self, text: str) -> int:
+        return len(self.encoding.encode(text))
+    def extract_relevant_chunks(self, content: str, query: str):
+        # Try to check if the content can be parsed with a Markdown parser
+        md_chunks = parse_mark_down(content)
+        # Further split large chunks
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=15000, chunk_overlap=500)
+        final_chunks = text_splitter.split_documents(md_chunks)
+        embeddings = OpenAIEmbeddings()
+        vector_db = FAISS.from_documents(final_chunks, embeddings)
+        relevant_chunks = vector_db.similarity_search(query, k=3)
+        # Concatenate relevant chunk and update last message content
+        context_with_metadata = [
+            {"text": doc.page_content, "source": doc.metadata.get("source")}
+            for doc in relevant_chunks
+        ]
+        return context_with_metadata
+    def process_oversized_message(self, message: BaseMessage, query: str) -> bool:
+        chunked = False
+        # At this point we are chunking only tavily_extract results messages
+        if isinstance(message, ToolMessage) and message.name == "tavily_extract":
+            json_content = json.loads(message.content)
+            result = json_content['results'][0]
+            raw_content = result['raw_content']
+            content_size = self.count_tokens(raw_content)
+            if content_size > config.MAX_CONTEXT_TOKENS:
+                print(f"Proceed with chunking, evaluated no of tokens {content_size} for message {message.id}")
+                chunked = True
+                result['raw_content'] = self.extract_relevant_chunks(raw_content, query=query)
+                message.content = json.dumps(json_content)
+        return chunked

nodes/nodes.py CHANGED Viewed

@@ -1,9 +1,10 @@
-from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, RemoveMessage
 from langchain_openai import ChatOpenAI
 from core.state import State
 import time
 from tools.tavily_tools import llm_tools
 from utils.prompt_manager import prompt_mgmt
@@ -26,7 +27,7 @@ def orchestrator(state: State):
     messages = [HumanMessage(content=message)]
     response = response_processing_model.invoke(messages)
     if response.content == "YES":
-        return {"question": question, "attachment": "true", "messages":[response]}
     return {"question": question}
@@ -41,12 +42,14 @@ def assistant(state: State):
     if not question:
         question = state["messages"][0].content
-    sys_msg = SystemMessage(content=prompt_mgmt.render_template("base_system", {"summary": summary}))
     try:
         response = model.invoke([sys_msg] + state["messages"])
     except Exception as e:
         if "429" in str(e):
             time.sleep(5)
             response = model.invoke([sys_msg] + state["messages"])
             return {"messages": [response]}
         raise
@@ -77,9 +80,15 @@ def optimize_memory(state: State):
         summary_message = "Create a summary of the conversation above:"
     # Add prompt to our history
-    messages = state["messages"] + [HumanMessage(content=summary_message)]
     response = model.invoke(messages)
     # Delete all but the 2 most recent messages and the first one
-    delete_messages = [RemoveMessage(id=m.id) for m in state["messages"][:-2]]
-    return {"summary": response.content, "messages": delete_messages}

+from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, RemoveMessage, ToolMessage
 from langchain_openai import ChatOpenAI
 from core.state import State
 import time
+from nodes.chunking_node import OversizedContentHandler
 from tools.tavily_tools import llm_tools
 from utils.prompt_manager import prompt_mgmt
     messages = [HumanMessage(content=message)]
     response = response_processing_model.invoke(messages)
     if response.content == "YES":
+        return {"question": question, "attachment": "true", "messages": [response]}
     return {"question": question}
     if not question:
         question = state["messages"][0].content
+    prompt_params = {"summary": summary, "chunked_last_tool_call": state.get("chunked_last_tool_call", False)}
+    sys_msg = SystemMessage(content=prompt_mgmt.render_template("base_system", prompt_params))
     try:
         response = model.invoke([sys_msg] + state["messages"])
     except Exception as e:
         if "429" in str(e):
             time.sleep(5)
+            print("Retrying after receiving 429 error")
             response = model.invoke([sys_msg] + state["messages"])
             return {"messages": [response]}
         raise
         summary_message = "Create a summary of the conversation above:"
     # Add prompt to our history
+    messages = state["messages"][:-2] + [HumanMessage(content=summary_message)]
     response = model.invoke(messages)
+    print("&&&" * 50, state["messages"][-1].type)
     # Delete all but the 2 most recent messages and the first one
+    remaining_messages = [RemoveMessage(id=m.id) for m in state["messages"][:-2]]
+    # If the last message returned from a tool is oversized, chunk it and retrieve only the relevant chunks
+    content_handler = OversizedContentHandler()
+    chunked = content_handler.process_oversized_message(state["messages"][-1], state.get("question"))
+    return {"summary": response.content, "messages": remaining_messages, "chunked_last_tool_call": chunked}

requirements.txt CHANGED Viewed

@@ -3,4 +3,6 @@ requests
 langchain_openai
 langchain_core
 langgraph
-langchain-tavily

 langchain_openai
 langchain_core
 langgraph
+langchain-tavily
+langchain-community
+faiss-cpu

tools/tavily_tools.py CHANGED Viewed

@@ -6,6 +6,8 @@ from langchain_tavily import TavilyCrawl
 tavily_search_tool = TavilySearch(
     max_results=10,
     topic="general",
 )
 # Define the LangChain extract tool

 tavily_search_tool = TavilySearch(
     max_results=10,
     topic="general",
+    # Make sure to avoid retrieving the response from a dataset or a space
+    exclude_domains =["https://huggingface.co/datasets", "https://huggingface.co/spaces"]
 )
 # Define the LangChain extract tool