Spaces:
Sleeping
Sleeping
José Enrique commited on
Commit ·
d4bb43c
1
Parent(s): 9ccff9e
langgraph agent
Browse files- evaluation_langgraph.py +31 -18
- langgraph_agent.py +14 -12
- requirements.txt +2 -0
- responses_GAIA_Evaluation_DatasetSingle Langraph agent.json +7 -0
- tests/search_tools_tests.py +77 -0
- tools/searchTools_lg.py +47 -0
evaluation_langgraph.py
CHANGED
|
@@ -14,9 +14,13 @@ from langgraph_agent import build_agents
|
|
| 14 |
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
|
| 15 |
|
| 16 |
from langfuse import Langfuse
|
|
|
|
| 17 |
# Load environment variables
|
| 18 |
load_dotenv()
|
|
|
|
|
|
|
| 19 |
langfuse = Langfuse()
|
|
|
|
| 20 |
# Initialize OpenTelemetry Tracer
|
| 21 |
#trace_provider = TracerProvider()
|
| 22 |
#trace_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter()))
|
|
@@ -50,20 +54,26 @@ def add_image(metadata)->list:
|
|
| 50 |
|
| 51 |
#@observe()
|
| 52 |
def run_agent(agent, question,trace_name,metadata):
|
|
|
|
| 53 |
# with tracer.start_as_current_span(trace_name) as span:
|
| 54 |
# span.set_attribute("langfuse.tag", "dataset-run")
|
| 55 |
# span.set_attribute("langfuse.input", question)
|
| 56 |
# if the question has attachments:
|
| 57 |
# find file under /attachments with the same task_id
|
| 58 |
-
images = add_image(metadata)
|
| 59 |
-
|
| 60 |
question = question + " The task_id is: " + metadata["task_id"]
|
| 61 |
messages = [HumanMessage(content=question )]
|
| 62 |
|
| 63 |
-
try:
|
|
|
|
| 64 |
messages = agent.invoke(
|
| 65 |
-
{"messages": messages}
|
| 66 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
except Exception as e:
|
| 68 |
print(f"Error running agent: {e}")
|
| 69 |
output = f"Error running agent: {e}"
|
|
@@ -81,7 +91,7 @@ def run_agent(agent, question,trace_name,metadata):
|
|
| 81 |
# input=question,
|
| 82 |
# output=output
|
| 83 |
# )
|
| 84 |
-
return
|
| 85 |
def simple_evaluation(output, expected_output):
|
| 86 |
|
| 87 |
trimmed_output = str(output).strip().strip('"').strip("$")
|
|
@@ -117,7 +127,7 @@ def simple_evaluation(output, expected_output):
|
|
| 117 |
return common_items_count
|
| 118 |
|
| 119 |
|
| 120 |
-
def run_evaluation(agent,langfuse_dataset,run_name,model_id,trace_name):
|
| 121 |
dataset = langfuse.get_dataset(langfuse_dataset)
|
| 122 |
responses = []
|
| 123 |
# Run our agent against each dataset item (limited to first 10 above)
|
|
@@ -126,18 +136,23 @@ def run_evaluation(agent,langfuse_dataset,run_name,model_id,trace_name):
|
|
| 126 |
with item.run(
|
| 127 |
run_name = run_name
|
| 128 |
) as root_span:
|
| 129 |
-
|
|
|
|
| 130 |
task_id = item.metadata["task_id"]
|
| 131 |
-
if task_id == "
|
| 132 |
try:
|
|
|
|
| 133 |
output = run_agent(agent,item.input,trace_name,item.metadata)
|
| 134 |
responses.append({"task_id": task_id, "submitted_answer": output})
|
| 135 |
-
|
|
|
|
| 136 |
except Exception as e:
|
|
|
|
| 137 |
output = f"Error running agent: {e}"
|
| 138 |
|
| 139 |
# score the result against the expected output
|
| 140 |
-
|
|
|
|
| 141 |
|
| 142 |
# Link the trace to the dataset item for analysis
|
| 143 |
# item.link(
|
|
@@ -154,7 +169,8 @@ def run_evaluation(agent,langfuse_dataset,run_name,model_id,trace_name):
|
|
| 154 |
# )
|
| 155 |
|
| 156 |
# Flush data to ensure all telemetry is sent
|
| 157 |
-
|
|
|
|
| 158 |
|
| 159 |
# Save the responses to a JSON lines file
|
| 160 |
print("Saving responses to file...")
|
|
@@ -168,16 +184,13 @@ def run_evaluation(agent,langfuse_dataset,run_name,model_id,trace_name):
|
|
| 168 |
|
| 169 |
|
| 170 |
def evaluate():
|
|
|
|
|
|
|
| 171 |
print("Starting agent...")
|
| 172 |
agent = build_agents()
|
| 173 |
print("Agent built successfully.")
|
| 174 |
-
run_evaluation(agent,"GAIA_Evaluation_Dataset","Single Langraph agent","OpenAI gpt4o","langraph-trace")
|
| 175 |
-
|
| 176 |
-
# print("comparison", simple_evaluation("Dimitry","Clasu"))
|
| 177 |
-
# print("sain", simple_evaluation('"Saint Petersburg"',"Saint Petersburg"))
|
| 178 |
-
# print("pages", simple_evaluation('"132,133,136,195,245"',"132, 133, 134, 197, 245"))
|
| 179 |
-
# print("veg", simple_evaluation('"cornstarch, freshly squeezed lemon juice, granulated sugar, pure vanilla extract, ripe strawberries"',"cornstarch, freshly squeezed lemon juice, granulated sugar, pure vanilla extract, ripe strawberries"))
|
| 180 |
-
# print("right", simple_evaluation('"right"',"Right"))
|
| 181 |
|
| 182 |
if __name__ == "__main__":
|
| 183 |
evaluate()
|
|
|
|
| 14 |
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
|
| 15 |
|
| 16 |
from langfuse import Langfuse
|
| 17 |
+
|
| 18 |
# Load environment variables
|
| 19 |
load_dotenv()
|
| 20 |
+
print("Environment variables loaded.")
|
| 21 |
+
# initialize langfuse
|
| 22 |
langfuse = Langfuse()
|
| 23 |
+
|
| 24 |
# Initialize OpenTelemetry Tracer
|
| 25 |
#trace_provider = TracerProvider()
|
| 26 |
#trace_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter()))
|
|
|
|
| 54 |
|
| 55 |
#@observe()
|
| 56 |
def run_agent(agent, question,trace_name,metadata):
|
| 57 |
+
print("Running agent with question:", question)
|
| 58 |
# with tracer.start_as_current_span(trace_name) as span:
|
| 59 |
# span.set_attribute("langfuse.tag", "dataset-run")
|
| 60 |
# span.set_attribute("langfuse.input", question)
|
| 61 |
# if the question has attachments:
|
| 62 |
# find file under /attachments with the same task_id
|
| 63 |
+
#images = add_image(metadata)
|
| 64 |
+
print("Running agent with question:")
|
| 65 |
question = question + " The task_id is: " + metadata["task_id"]
|
| 66 |
messages = [HumanMessage(content=question )]
|
| 67 |
|
| 68 |
+
try:
|
| 69 |
+
print("Invoking agent with question:", question)
|
| 70 |
messages = agent.invoke(
|
| 71 |
+
{"messages": messages, "input_file": None}
|
| 72 |
)
|
| 73 |
+
print("Agent messages")
|
| 74 |
+
# Show the messages
|
| 75 |
+
for m in messages['messages']:
|
| 76 |
+
m.pretty_print()
|
| 77 |
except Exception as e:
|
| 78 |
print(f"Error running agent: {e}")
|
| 79 |
output = f"Error running agent: {e}"
|
|
|
|
| 91 |
# input=question,
|
| 92 |
# output=output
|
| 93 |
# )
|
| 94 |
+
return messages
|
| 95 |
def simple_evaluation(output, expected_output):
|
| 96 |
|
| 97 |
trimmed_output = str(output).strip().strip('"').strip("$")
|
|
|
|
| 127 |
return common_items_count
|
| 128 |
|
| 129 |
|
| 130 |
+
def run_evaluation(agent,langfuse_dataset,run_name,model_id,trace_name,update_dataset=True):
|
| 131 |
dataset = langfuse.get_dataset(langfuse_dataset)
|
| 132 |
responses = []
|
| 133 |
# Run our agent against each dataset item (limited to first 10 above)
|
|
|
|
| 136 |
with item.run(
|
| 137 |
run_name = run_name
|
| 138 |
) as root_span:
|
| 139 |
+
if update_dataset:
|
| 140 |
+
root_span.update(input=item.input)
|
| 141 |
task_id = item.metadata["task_id"]
|
| 142 |
+
if task_id == "5a0c1adf-205e-4841-a666-7c3ef95def9d":
|
| 143 |
try:
|
| 144 |
+
print("Running agent")
|
| 145 |
output = run_agent(agent,item.input,trace_name,item.metadata)
|
| 146 |
responses.append({"task_id": task_id, "submitted_answer": output})
|
| 147 |
+
if update_dataset:
|
| 148 |
+
root_span.update(output=output)
|
| 149 |
except Exception as e:
|
| 150 |
+
print(f"Error running agent: {e}")
|
| 151 |
output = f"Error running agent: {e}"
|
| 152 |
|
| 153 |
# score the result against the expected output
|
| 154 |
+
if update_dataset:
|
| 155 |
+
root_span.score_trace(name="exact_match", value = simple_evaluation(output, item.expected_output))
|
| 156 |
|
| 157 |
# Link the trace to the dataset item for analysis
|
| 158 |
# item.link(
|
|
|
|
| 169 |
# )
|
| 170 |
|
| 171 |
# Flush data to ensure all telemetry is sent
|
| 172 |
+
if update_dataset:
|
| 173 |
+
langfuse.flush()
|
| 174 |
|
| 175 |
# Save the responses to a JSON lines file
|
| 176 |
print("Saving responses to file...")
|
|
|
|
| 184 |
|
| 185 |
|
| 186 |
def evaluate():
|
| 187 |
+
|
| 188 |
+
|
| 189 |
print("Starting agent...")
|
| 190 |
agent = build_agents()
|
| 191 |
print("Agent built successfully.")
|
| 192 |
+
run_evaluation(agent,"GAIA_Evaluation_Dataset","Single Langraph agent","OpenAI gpt4o","langraph-trace",update_dataset=False)
|
| 193 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
if __name__ == "__main__":
|
| 196 |
evaluate()
|
langgraph_agent.py
CHANGED
|
@@ -5,26 +5,20 @@ from langchain_openai import ChatOpenAI
|
|
| 5 |
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
|
| 6 |
from langgraph.graph.message import add_messages
|
| 7 |
from langgraph.prebuilt import ToolNode, tools_condition
|
| 8 |
-
from tools.
|
| 9 |
|
| 10 |
class AgentState(TypedDict):
|
| 11 |
input_file: Optional[str]
|
| 12 |
messages: Annotated[list[AnyMessage], add_messages]
|
| 13 |
|
| 14 |
|
| 15 |
-
# add toools:
|
| 16 |
tools = [
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
|
| 21 |
-
]
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
# LLM model and tools
|
| 25 |
-
vision_llm = ChatOpenAI(model="gpt-4o")
|
| 26 |
-
llm = ChatOpenAI(model="gpt-4o")
|
| 27 |
-
llm_withtools = llm.bind_tools(tools, parallel_tool_calls = False)
|
| 28 |
|
| 29 |
def agent(state:AgentState):
|
| 30 |
tools_description = """
|
|
@@ -56,6 +50,14 @@ def agent(state:AgentState):
|
|
| 56 |
If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
| 57 |
""")
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
return {
|
| 60 |
"input_file": state["input_file"],
|
| 61 |
"messages": [llm_withtools.invoke([sys_message]+ state["messages"])]
|
|
|
|
| 5 |
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
|
| 6 |
from langgraph.graph.message import add_messages
|
| 7 |
from langgraph.prebuilt import ToolNode, tools_condition
|
| 8 |
+
from tools.searchTools_lg import wiki_search, mini_web_search, arvix_search
|
| 9 |
|
| 10 |
class AgentState(TypedDict):
|
| 11 |
input_file: Optional[str]
|
| 12 |
messages: Annotated[list[AnyMessage], add_messages]
|
| 13 |
|
| 14 |
|
| 15 |
+
# add toools:
|
| 16 |
tools = [
|
| 17 |
+
wiki_search,
|
| 18 |
+
mini_web_search,
|
| 19 |
+
arvix_search,
|
| 20 |
|
| 21 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
def agent(state:AgentState):
|
| 24 |
tools_description = """
|
|
|
|
| 50 |
If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
| 51 |
""")
|
| 52 |
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# LLM model and tools
|
| 57 |
+
vision_llm = ChatOpenAI(model="gpt-4o")
|
| 58 |
+
llm = ChatOpenAI(model="gpt-4o")
|
| 59 |
+
llm_withtools = llm.bind_tools(tools, parallel_tool_calls = False)
|
| 60 |
+
|
| 61 |
return {
|
| 62 |
"input_file": state["input_file"],
|
| 63 |
"messages": [llm_withtools.invoke([sys_message]+ state["messages"])]
|
requirements.txt
CHANGED
|
@@ -18,6 +18,8 @@ langchain-huggingface
|
|
| 18 |
langchain-groq
|
| 19 |
langchain-tavily
|
| 20 |
langchain-chroma
|
|
|
|
|
|
|
| 21 |
arxiv
|
| 22 |
pymupdf
|
| 23 |
wikipedia
|
|
|
|
| 18 |
langchain-groq
|
| 19 |
langchain-tavily
|
| 20 |
langchain-chroma
|
| 21 |
+
langchain_openai
|
| 22 |
+
langgraph
|
| 23 |
arxiv
|
| 24 |
pymupdf
|
| 25 |
wikipedia
|
responses_GAIA_Evaluation_DatasetSingle Langraph agent.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"task_id": "5a0c1adf-205e-4841-a666-7c3ef95def9d",
|
| 4 |
+
"submitted_answer": {
|
| 5 |
+
"input_file": null,
|
| 6 |
+
"messages": [
|
| 7 |
+
|
tests/search_tools_tests.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
import unittest
|
| 7 |
+
from unittest.mock import patch, MagicMock
|
| 8 |
+
from tools.searchTools_lg import wiki_search, mini_web_search, arvix_search
|
| 9 |
+
|
| 10 |
+
class TestSearchToolsLG(unittest.TestCase):
|
| 11 |
+
|
| 12 |
+
@patch("tools.searchTools_lg.WikipediaLoader")
|
| 13 |
+
def test_wiki_search(self, mock_loader):
|
| 14 |
+
mock_doc = MagicMock()
|
| 15 |
+
mock_doc.metadata = {"source": "Wikipedia", "page": "1"}
|
| 16 |
+
mock_doc.page_content = "Test Wikipedia content"
|
| 17 |
+
mock_loader.return_value.load.return_value = [mock_doc]
|
| 18 |
+
result = wiki_search("test query")
|
| 19 |
+
self.assertIn("wiki_results", result)
|
| 20 |
+
self.assertIn("Test Wikipedia content", result["wiki_results"])
|
| 21 |
+
|
| 22 |
+
@patch("tools.searchTools_lg.TavilySearch")
|
| 23 |
+
def test_mini_web_search(self, mock_tavily):
|
| 24 |
+
mock_doc = MagicMock()
|
| 25 |
+
mock_doc.result = [
|
| 26 |
+
{'url': 'https://www.python.org/',
|
| 27 |
+
'title': 'Welcome to Python.org',
|
| 28 |
+
'content': '**Notice:** While JavaScript is not essential for this website, your interaction with the content will be limited. # Python 3: Fibonacci series up to n More about defining functions in Python\xa03 # Python 3: List comprehensions Lists (known as arrays in other languages) are one of the compound data types that Python understands. More about lists in Python\xa03 # Python 3: Simple arithmetic More about simple math functions in Python\xa03. >>> print("Hello, I\'m Python!") Hello, I\'m Python! Python Hi, Python. Experienced programmers in any other language can pick up Python very quickly, and beginners find the clean syntax and indentation structure easy to learn. Latest: Python 3.13.7 docs.python.org jobs.python.org * Python 3.14.0rc2 and 3.13.7 are go!',
|
| 29 |
+
'score': 0.98583,
|
| 30 |
+
'raw_content': None},
|
| 31 |
+
{'url': 'https://www.w3schools.com/python/python_intro.asp',
|
| 32 |
+
'title': 'Introduction to Python - W3Schools',
|
| 33 |
+
'content': 'PYTHON # Python Introduction ## What is Python? Python is a popular programming language. ### What can Python do? ## Why Python? * Python has syntax that allows developers to write programs with fewer lines than some other programming languages. * In this tutorial Python will be written in a text editor. ### Python Syntax compared to other programming languages ##### Top Tutorials HTML Tutorial CSS Tutorial How To Tutorial Python Tutorial W3.CSS Tutorial ##### Top References HTML Reference CSS Reference Python Reference W3.CSS Reference ##### Top Examples HTML Examples CSS Examples How To Examples Python Examples W3.CSS Examples CSS Certificate Python Certificate Tutorials, references, and examples are constantly reviewed to avoid errors, but we cannot warrant full correctness ',
|
| 34 |
+
'score': 0.98365,
|
| 35 |
+
'raw_content': None},
|
| 36 |
+
{'url': 'https://en.wikipedia.org/wiki/Python_(programming_language)',
|
| 37 |
+
'title': 'Python (programming language) - Wikipedia', 'content': 'Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code readability with the use of significant indentation.',
|
| 38 |
+
'score': 0.97793,
|
| 39 |
+
'raw_content': None}]
|
| 40 |
+
|
| 41 |
+
mock_tavily.return_value.invoke.return_value = [mock_doc]
|
| 42 |
+
result = mini_web_search("test query")
|
| 43 |
+
self.assertTrue(len(result) > 0)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
@patch("tools.searchTools_lg.ArxivLoader")
|
| 47 |
+
def test_arvix_search(self, mock_arxiv):
|
| 48 |
+
mock_doc = MagicMock()
|
| 49 |
+
mock_doc.metadata = {'Published': '2024-04-05', 'Title': 'Egglog Python: A Pythonic Library for E-graphs', 'Authors': 'Saul Shanabrook', 'Summary': 'E-graphs have emerged as a versatile data structure with applications in\nsynthesis, optimization, and verification through techniques such as equality\nsaturation. This paper introduces Python bindings for the experimental egglog\nlibrary (previously called egg-smol), which aims to bring the benefits of\ne-graphs to the Python ecosystem. The bindings offer a high-level, Pythonic API\nproviding an accessible and familiar interface for Python users. By integrating\ne-graph techniques with Python, we hope to enable collaboration and innovation\nacross various domains in the scientific computing and machine learning\ncommunities. We discuss the advantages of using Python bindings for both Python\nand existing egg-smol users, as well as possible future directions for\ndevelopment.'}
|
| 50 |
+
mock_doc.page_content = "'Egg-smol Python: A Pythonic Library for E-graphs\nSaul Shanabrook\ns.shanabrook@gmail.com\nAbstract\nE-graphs have emerged as a versatile data structure with ap-\nplications in synthesis, optimization, and verification through\ntechniques such as equality saturation."
|
| 51 |
+
mock_arxiv.return_value.load.return_value = [mock_doc]
|
| 52 |
+
result = arvix_search("Python programming")
|
| 53 |
+
self.assertTrue(len(result) > 0)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def test_wiki_search_integration(self):
|
| 57 |
+
result = wiki_search("Python programming")
|
| 58 |
+
self.assertIn("wiki_results", result)
|
| 59 |
+
self.assertTrue(len(result["wiki_results"]) > 0)
|
| 60 |
+
#print("Wiki search result:", result["wiki_results"][:200])
|
| 61 |
+
|
| 62 |
+
def test_mini_web_search_integration(self):
|
| 63 |
+
result = mini_web_search("Python programming")
|
| 64 |
+
self.assertIn("results", result)
|
| 65 |
+
self.assertTrue(len(result["results"]) > 0)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def test_arvix_search_integration(self):
|
| 69 |
+
result = arvix_search("Python programming")
|
| 70 |
+
self.assertTrue(len(result) > 0)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
if __name__ == "__main__":
|
| 75 |
+
# load environment variables from .env file
|
| 76 |
+
load_dotenv()
|
| 77 |
+
unittest.main()
|
tools/searchTools_lg.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_core.tools import tool
|
| 2 |
+
from langchain_tavily import TavilySearch
|
| 3 |
+
from langchain_community.document_loaders import WikipediaLoader
|
| 4 |
+
from langchain_community.document_loaders import ArxivLoader
|
| 5 |
+
from langchain_community.vectorstores import SupabaseVectorStore
|
| 6 |
+
|
| 7 |
+
@tool
|
| 8 |
+
def wiki_search(query: str) -> str:
|
| 9 |
+
"""Search Wikipedia for a query and return maximum 2 results.
|
| 10 |
+
|
| 11 |
+
Args:
|
| 12 |
+
query: The search query."""
|
| 13 |
+
search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
|
| 14 |
+
formatted_search_docs = "\n\n---\n\n".join(
|
| 15 |
+
[
|
| 16 |
+
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
|
| 17 |
+
for doc in search_docs
|
| 18 |
+
])
|
| 19 |
+
return {"wiki_results": formatted_search_docs}
|
| 20 |
+
|
| 21 |
+
@tool
|
| 22 |
+
def mini_web_search(query: str) -> str:
|
| 23 |
+
"""Search Tavily for a query and return maximum 3 results.
|
| 24 |
+
|
| 25 |
+
Args:
|
| 26 |
+
query: The search query."""
|
| 27 |
+
search_docs = TavilySearch(max_results=3, topic="general",).invoke({"query":query})
|
| 28 |
+
# formatted_search_docs = "\n\n---\n\n".join(
|
| 29 |
+
# [
|
| 30 |
+
# f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
|
| 31 |
+
# for doc in search_docs
|
| 32 |
+
# ])
|
| 33 |
+
# return {"web_results": formatted_search_docs}
|
| 34 |
+
return search_docs
|
| 35 |
+
@tool
|
| 36 |
+
def arvix_search(query: str) -> str:
|
| 37 |
+
"""Search for scientific papers in Arxiv for a query and return maximum 3 result.
|
| 38 |
+
|
| 39 |
+
Args:
|
| 40 |
+
query: The search query."""
|
| 41 |
+
search_docs = ArxivLoader(query=query, load_max_docs=3).load()
|
| 42 |
+
# formatted_search_docs = "\n\n---\n\n".join(
|
| 43 |
+
# [
|
| 44 |
+
# f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
|
| 45 |
+
# for doc in search_docs
|
| 46 |
+
# ])
|
| 47 |
+
return search_docs
|