carolinacon commited on
Commit
8073bab
·
1 Parent(s): f22eb38

Added basic web search agent functionality with langgraph

Browse files
config/__init__.py ADDED
File without changes
config/prompts.yaml ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ prompts:
2
+ base_system:
3
+ content: |
4
+ You are a general AI assistant tasked with answering complex questions.
5
+
6
+
7
+ Make sure you think step by step in order to answer the given question.
8
+
9
+ Here is a summary of the steps you took so far:
10
+ <summary>
11
+ {{summary}}
12
+ </summary>
13
+
14
+ Include citations for all the information you retrieve, ensuring you know exactly where the data comes from.
15
+ If you have the information inside your knowledge, still call a tool in order to confirm it.
16
+
17
+ **Guidelines for Conducting Research:**
18
+
19
+ * **Citations:** Always support findings with source URLs, clearly provided as in-text citations.
20
+ * **Accuracy:** Rely solely on data obtained via provided tools—never fabricate information.
21
+ * **Methodology:** Follow a structured approach:
22
+
23
+ * **Thought:** Consider necessary information and next steps.
24
+ * **Action:** Select and execute appropriate tools.
25
+ * **Observation:** Analyze obtained results.
26
+ * Repeat Thought/Action/Observation cycles as needed.
27
+ * **Final Answer:** Synthesize and present findings with citations in markdown format.
28
+
29
+ **Example Workflows:**
30
+ **Workflow 1: Search Only**
31
+
32
+ **Question:** What are recent news headlines about artificial intelligence?
33
+
34
+ * **Thought:** I need quick, recent articles about AI.
35
+ * **Action:** Use Tavily Web Search with the query "recent artificial intelligence news" and set `time_range` to "week".
36
+ * **Observation:** Retrieved 10 relevant articles from reputable news sources.
37
+ * **Final Answer:** Summarize key headlines with citations.
38
+
39
+ **Workflow 2: Search and Extract**
40
+
41
+ **Question:** Provide detailed insights into recent advancements in quantum computing.
42
+ * **Thought:** I should find recent detailed articles first.
43
+ * **Action:** Use Tavily Web Search with the query "recent advancements in quantum computing" and set `time_range` to "month".
44
+ * **Observation:** Retrieved 10 relevant results.
45
+ * **Thought:** I should extract content from the most comprehensive article.
46
+ * **Action:** Use Tavily Web Extract on the most relevant URL from search results.
47
+ * **Observation:** Extracted detailed information about quantum computing advancements.
48
+ * **Final Answer:** Provide detailed insights summarized from extracted content with citations.
49
+ **Workflow 3: Search and Crawl**
50
+
51
+ **Question:** What are the latest advancements in renewable energy technologies?
52
+
53
+ * **Thought:** I need recent articles about advancements in renewable energy.
54
+ * **Action:** Use Tavily Web Search with the query "latest advancements in renewable energy technologies" and set `time_range` to "month".
55
+ * **Observation:** Retrieved 10 articles discussing recent developments in solar panels, wind turbines, and energy storage.
56
+ * **Thought:** To gain deeper insights, I'll crawl a relevant industry-leading renewable energy site.
57
+ * **Action:** Use Tavily Web Crawl on the URL of a leading renewable energy industry website, setting `max_depth` to 2.
58
+ * **Observation:** Gathered extensive content from multiple articles linked on the site, highlighting new technologies and innovations.
59
+ * **Final Answer:** Provide a synthesized summary of findings with citations.
60
+ type: base_system
61
+ variables: ["summary"]
62
+ version: 1.0
63
+ description: "Core system prompt for all interactions"
64
+ final_answer_processor:
65
+ content: |
66
+ You are a general AI assistant. You are given a question and an answer to that question.
67
+ Process the answer such that it contains only YOUR FINAL ANSWER and respects the following guidelines.
68
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
69
+ If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
70
+ If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
71
+ If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
72
+ type: answer_refinement
73
+ variables: []
74
+ version: 1.0
75
+ description: "Prompt for processing the final answer in order to make it compliant with gaia answers submission rules"
76
+ summarization:
77
+ content: |
78
+ This is summary of the conversation to date: {{summary}}
79
+
80
+
81
+ Extend the summary by taking into account the new messages above.
82
+ Try to follow this guideline. If the message consists in a tool call add a new bullet point and specify the tool and its action.
83
+ If the message consists in a tool call result append a summary of the result to the appropriate bullet point.
84
+ After analyzing the tool call result, specify if this has been useful or not.
85
+ type: memory_optimization
86
+ variables: ["summary"]
87
+ version: 1.0
88
+ description: "Prompt for summarization and memory optimization"
config/settings.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Configuration management
2
+
3
+ import os
4
+ from typing import Dict, Any
5
+ from pathlib import Path
6
+
7
+
8
+ class AgentConfig:
9
+ """Centralized configuration"""
10
+
11
+ def __init__(self):
12
+ # LLM Configuration
13
+ self.MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4.1")
14
+
15
+
16
+ # File Paths
17
+ self.PROJECT_ROOT = Path(__file__).parent.parent
18
+ self.PROMPTS_PATH = self.PROJECT_ROOT / "config" / "prompts.yaml"
19
+
20
+ def to_dict(self) -> Dict[str, Any]:
21
+ """Convert config to dictionary"""
22
+ return {
23
+ key: value for key, value in self.__dict__.items()
24
+ if not key.startswith('_')
25
+ }
26
+
27
+
28
+ # Global config instance
29
+ config = AgentConfig()
core/__init__.py ADDED
File without changes
core/agent.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.messages import HumanMessage
2
+ from langgraph.graph.state import CompiledStateGraph
3
+
4
+
5
+ from core.state import State
6
+ from nodes.nodes import assistant, optimize_memory, response_processing
7
+ from tools.tavily_tools import llm_tools
8
+
9
+ from langgraph.graph import START, StateGraph, END
10
+ from langgraph.prebuilt import tools_condition
11
+ from langgraph.prebuilt import ToolNode
12
+
13
+
14
+ class GaiaAgent:
15
+ react_graph: CompiledStateGraph
16
+
17
+ def __init__(self):
18
+ # Graph
19
+ builder = StateGraph(State)
20
+
21
+ # Define nodes: these do the work
22
+ builder.add_node("assistant", assistant)
23
+ builder.add_node("tools", ToolNode(llm_tools))
24
+ builder.add_node("optimize_memory", optimize_memory)
25
+ builder.add_node("response_processing", response_processing)
26
+
27
+ # Define edges: these determine how the control flow moves
28
+ builder.add_edge(START, "assistant")
29
+ builder.add_conditional_edges(
30
+ "assistant",
31
+ # If the latest message (result) from assistant is a tool call -> tools_condition routes to tools If the
32
+ # latest message (result) from assistant is a not a tool call -> tools_condition routes to
33
+ # response_processing
34
+ tools_condition, {"tools": "tools", "__end__": "response_processing"}
35
+ )
36
+ builder.add_edge("tools", "optimize_memory")
37
+ builder.add_edge("optimize_memory", "assistant")
38
+ builder.add_edge("response_processing", END)
39
+ self.react_graph = builder.compile()
40
+
41
+ def __call__(self, question: str) -> str:
42
+ messages = [HumanMessage(content=question)]
43
+ messages = self.react_graph.invoke({"messages": messages})
44
+ for m in messages['messages']:
45
+ m.pretty_print()
46
+
47
+ return m
48
+
49
+ def __streamed_call__(self, question: str) -> str:
50
+ # Test the web agent
51
+ inputs = {
52
+ "messages": [
53
+ HumanMessage(
54
+ content=question
55
+ )
56
+ ]
57
+ }
58
+
59
+ # Stream the web agent's response
60
+ for s in self.react_graph.stream(inputs, stream_mode="values"):
61
+ message = s["messages"][-1]
62
+ if isinstance(message, tuple):
63
+ print(message)
64
+ else:
65
+ message.pretty_print()
66
+ return message.content
core/state.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from langgraph.graph import MessagesState
2
+
3
+
4
+ class State(MessagesState):
5
+ summary: str
6
+ question: str
nodes/__init__.py ADDED
File without changes
nodes/nodes.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, RemoveMessage
2
+ from langchain_openai import ChatOpenAI
3
+
4
+ from core.state import State
5
+ import time
6
+
7
+ from tools.tavily_tools import llm_tools
8
+ from utils.prompt_manager import prompt_mgmt
9
+
10
+ model = ChatOpenAI(model="gpt-4.1")
11
+ response_processing_model = ChatOpenAI(model="gpt-4.1-mini")
12
+
13
+ model = model.bind_tools(llm_tools, parallel_tool_calls=False)
14
+
15
+
16
+ # Node
17
+ def assistant(state: State):
18
+ # set up the question
19
+ # Get summary if it exists
20
+ summary = state.get("summary", "")
21
+
22
+ # Get original question if it exists
23
+ question = state.get("question", "")
24
+ if not question:
25
+ question = state["messages"][0]
26
+
27
+ sys_msg = SystemMessage(content=prompt_mgmt.render_template("base_system", {"summary": summary}))
28
+ try:
29
+ response = model.invoke([sys_msg] + state["messages"])
30
+ except Exception as e:
31
+ if "429" in str(e):
32
+ time.sleep(5)
33
+ response = model.invoke([sys_msg] + state["messages"])
34
+ return {"messages": [response]}
35
+ raise
36
+ return {"question": question, "messages": [response]}
37
+
38
+
39
+ def response_processing(state: State):
40
+ question = state.get("question", "")
41
+ answer = state["messages"][-1]
42
+ print("Question:", question.content)
43
+ print("Answer:", answer.content)
44
+ gaia_messages = [HumanMessage(content=question.content), AIMessage(content=answer.content)]
45
+ gaia_sys_msg = SystemMessage(content=prompt_mgmt.render_template("final_answer_processor", {}))
46
+ response = response_processing_model.invoke([gaia_sys_msg] + gaia_messages)
47
+
48
+ return {"messages": [response]}
49
+
50
+
51
+ def optimize_memory(state: State):
52
+ # First, we get any existing summary
53
+ summary = state.get("summary", "")
54
+
55
+ # Create our summarization prompt
56
+ if summary:
57
+
58
+ # A summary already exists
59
+ summary_message = prompt_mgmt.render_template("summarization", {"summary":summary})
60
+
61
+ else:
62
+ summary_message = "Create a summary of the conversation above:"
63
+
64
+ # Add prompt to our history
65
+ messages = state["messages"] + [HumanMessage(content=summary_message)]
66
+ response = model.invoke(messages)
67
+
68
+ # Delete all but the 2 most recent messages and the first one
69
+ delete_messages = [RemoveMessage(id=m.id) for m in state["messages"][:-2]]
70
+ return {"summary": response.content, "messages": delete_messages}
tools/__init__.py ADDED
File without changes
tools/tavily_tools.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_tavily import TavilySearch
2
+ from langchain_tavily import TavilyExtract
3
+ from langchain_tavily import TavilyCrawl
4
+
5
+ # Initialize Tavily Search Tool
6
+ tavily_search_tool = TavilySearch(
7
+ max_results=10,
8
+ topic="general",
9
+ )
10
+
11
+ # Define the LangChain extract tool
12
+ tavily_extract_tool = TavilyExtract(extract_depth="basic")
13
+
14
+ # Define the LangChain crawl tool
15
+ tavily_crawl_tool = TavilyCrawl()
16
+
17
+ llm_tools = [
18
+ tavily_search_tool, tavily_extract_tool, tavily_crawl_tool
19
+ ]
utils/__init__.py ADDED
File without changes
utils/prompt_manager.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum
2
+ from dataclasses import dataclass, field
3
+ from typing import Any, Dict, List, Optional
4
+ from jinja2 import Environment, BaseLoader
5
+ import tiktoken
6
+ import yaml
7
+ from pathlib import Path
8
+
9
+
10
+ class PromptType(Enum):
11
+ BASE_SYSTEM = "base_system"
12
+ ANSWER_REFINEMENT = "answer_refinement"
13
+ MEMORY_OPTIMIZATION = "memory_optimization"
14
+
15
+
16
+ @dataclass
17
+ class PromptTemplate:
18
+ """Structured prompt template with metadata"""
19
+ name: str
20
+ content: str
21
+ prompt_type: PromptType
22
+ variables: List[str] = field(default_factory=list)
23
+ token_estimate: int = 0
24
+ version: str = "1.0"
25
+ description: str = ""
26
+
27
+
28
+ class PromptManager:
29
+ """Centralized management for Agent's prompts"""
30
+
31
+ def __init__(self, prompt_config_path: str, model_name: str = "gpt-4.1"):
32
+ self.templates: Dict[str, PromptTemplate] = {}
33
+ self.jinja_env = Environment(loader=BaseLoader())
34
+ self.token_counter = tiktoken.encoding_for_model(model_name)
35
+
36
+ # Load prompts from config
37
+ self.load_prompts_from_config(prompt_config_path)
38
+
39
+ def load_prompts_from_config(self, config_path: str):
40
+ """Load prompts from YAML configuration file"""
41
+ path = Path(config_path)
42
+
43
+ if path.suffix.lower() == '.yaml' or path.suffix.lower() == '.yml':
44
+ with open(path, 'r') as f:
45
+ config = yaml.safe_load(f)
46
+
47
+ for name, prompt_data in config.get('prompts', {}).items():
48
+ template = PromptTemplate(
49
+ name=name,
50
+ content=prompt_data['content'],
51
+ prompt_type=PromptType(prompt_data.get('type', 'base_system')),
52
+ variables=prompt_data.get('variables', []),
53
+ version=prompt_data.get('version', '1.0'),
54
+ description=prompt_data.get('description', '')
55
+ )
56
+ template.token_estimate = self._estimate_tokens(template.content)
57
+ self.templates[name] = template
58
+
59
+ def _estimate_tokens(self, text: str) -> int:
60
+ """Estimate token count for text"""
61
+ return len(self.token_counter.encode(text))
62
+
63
+ def render_template(self, name: str, state: Dict[str, Any]) -> str:
64
+ """Render template with current state variables"""
65
+
66
+ # Prepare template variables
67
+ template_vars = {}
68
+
69
+ # Add all state variables
70
+ template_vars.update(state)
71
+
72
+ # Create and render template
73
+ template = self.templates[name]
74
+ jinja_template = self.jinja_env.from_string(template.content)
75
+ return jinja_template.render(**template_vars)
76
+
77
+
78
+ # Global instance
79
+ prompt_mgmt = PromptManager("config\prompts.yaml")