Spaces:
Sleeping
Sleeping
Added more tools
Browse files- agent.py +339 -123
- requirements.txt +1 -5
agent.py
CHANGED
|
@@ -21,102 +21,14 @@ from urllib.parse import quote, urlparse
|
|
| 21 |
import sys
|
| 22 |
from bs4 import BeautifulSoup
|
| 23 |
import html2text
|
| 24 |
-
import base64
|
| 25 |
-
from io import BytesIO
|
| 26 |
-
from PIL import Image
|
| 27 |
-
from typing import Optional, Union
|
| 28 |
-
import torch
|
| 29 |
-
from transformers import AutoProcessor, AutoModelForVision2Seq
|
| 30 |
|
| 31 |
from apify_client import ApifyClient
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
load_dotenv()
|
| 34 |
|
| 35 |
-
# Initialize vision model and processor globally for reuse
|
| 36 |
-
# This avoids reloading the model on each call
|
| 37 |
-
try:
|
| 38 |
-
# Load model and processor once
|
| 39 |
-
vision_processor = AutoProcessor.from_pretrained("google/paligemma-3b-mix-224")
|
| 40 |
-
vision_model = AutoModelForVision2Seq.from_pretrained("google/paligemma-3b-mix-224", torch_dtype=torch.float16)
|
| 41 |
-
|
| 42 |
-
# Move model to GPU if available
|
| 43 |
-
if torch.cuda.is_available():
|
| 44 |
-
vision_model = vision_model.to("cuda")
|
| 45 |
-
|
| 46 |
-
VISION_MODEL_LOADED = True
|
| 47 |
-
print("Vision model loaded successfully")
|
| 48 |
-
except Exception as e:
|
| 49 |
-
VISION_MODEL_LOADED = False
|
| 50 |
-
print(f"Error loading vision model: {e}")
|
| 51 |
-
|
| 52 |
-
def describe_image(image_url: str) -> str:
|
| 53 |
-
"""
|
| 54 |
-
Process an image from a URL and generate a description using a vision model.
|
| 55 |
-
|
| 56 |
-
Args:
|
| 57 |
-
image_url: URL of the image to describe
|
| 58 |
-
|
| 59 |
-
Returns:
|
| 60 |
-
Text description of the image
|
| 61 |
-
"""
|
| 62 |
-
if not VISION_MODEL_LOADED:
|
| 63 |
-
return "Error: Vision model is not available. Could not load the model."
|
| 64 |
-
|
| 65 |
-
try:
|
| 66 |
-
# Check if URL is valid
|
| 67 |
-
if not image_url or not isinstance(image_url, str):
|
| 68 |
-
return "Error: Invalid image URL. Please provide a valid URL."
|
| 69 |
-
|
| 70 |
-
# Standardize prompt for the vision model
|
| 71 |
-
prompt = "Describe this image in detail."
|
| 72 |
-
|
| 73 |
-
# Download the image
|
| 74 |
-
print(f"Downloading image from: {image_url}")
|
| 75 |
-
response = requests.get(image_url, timeout=10)
|
| 76 |
-
|
| 77 |
-
if response.status_code != 200:
|
| 78 |
-
return f"Error: Failed to download image (Status code: {response.status_code})"
|
| 79 |
-
|
| 80 |
-
# Open the image from bytes
|
| 81 |
-
image = Image.open(BytesIO(response.content))
|
| 82 |
-
|
| 83 |
-
# Ensure the image is in RGB format (some images might be in RGBA or other formats)
|
| 84 |
-
if image.mode != "RGB":
|
| 85 |
-
image = image.convert("RGB")
|
| 86 |
-
|
| 87 |
-
# Process image and text inputs
|
| 88 |
-
inputs = vision_processor(text=prompt, images=image, return_tensors="pt")
|
| 89 |
-
|
| 90 |
-
# Move inputs to the same device as the model
|
| 91 |
-
if torch.cuda.is_available():
|
| 92 |
-
inputs = {name: tensor.to("cuda") for name, tensor in inputs.items()}
|
| 93 |
-
|
| 94 |
-
# Generate description
|
| 95 |
-
with torch.no_grad():
|
| 96 |
-
output = vision_model.generate(
|
| 97 |
-
**inputs,
|
| 98 |
-
max_new_tokens=256,
|
| 99 |
-
temperature=0.1,
|
| 100 |
-
do_sample=True
|
| 101 |
-
)
|
| 102 |
-
|
| 103 |
-
# Decode the output
|
| 104 |
-
generated_text = vision_processor.decode(output[0], skip_special_tokens=True)
|
| 105 |
-
|
| 106 |
-
# Trim any leading/trailing whitespace and remove the prompt if it's included in the output
|
| 107 |
-
description = generated_text.strip()
|
| 108 |
-
if description.startswith(prompt):
|
| 109 |
-
description = description[len(prompt):].strip()
|
| 110 |
-
|
| 111 |
-
return f"Image description: {description}"
|
| 112 |
-
|
| 113 |
-
except requests.exceptions.Timeout:
|
| 114 |
-
return f"Error: Request timed out while trying to download the image."
|
| 115 |
-
except requests.exceptions.ConnectionError:
|
| 116 |
-
return f"Error: Failed to connect to the image URL. The site might be down or the URL might be incorrect."
|
| 117 |
-
except Exception as e:
|
| 118 |
-
return f"Error processing image: {str(e)}"
|
| 119 |
-
|
| 120 |
def run_python_code(code: str):
|
| 121 |
"""Execute Python code in a temporary file and return the output."""
|
| 122 |
# Check for potentially dangerous operations
|
|
@@ -258,6 +170,7 @@ def apify_google_search(query: str, limit: int = 10) -> str:
|
|
| 258 |
except Exception as e:
|
| 259 |
print(f"Error using Apify: {str(e)}")
|
| 260 |
return fallback_search(query)
|
|
|
|
| 261 |
def scrape_webpage(url: str) -> str:
|
| 262 |
"""
|
| 263 |
Safely scrape content from a specified URL.
|
|
@@ -356,6 +269,7 @@ def scrape_webpage(url: str) -> str:
|
|
| 356 |
return f"Error requesting {url}: {str(e)}"
|
| 357 |
except Exception as e:
|
| 358 |
return f"Error scraping webpage {url}: {str(e)}"
|
|
|
|
| 359 |
def format_search_results(results: List[Dict], query: str) -> str:
|
| 360 |
"""Format the search results into a readable string"""
|
| 361 |
if not results or len(results) == 0:
|
|
@@ -454,13 +368,182 @@ def safe_web_search(query: str) -> str:
|
|
| 454 |
|
| 455 |
return f"Failed to search for '{query}' after multiple attempts due to rate limiting."
|
| 456 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
# System prompt to guide the model's behavior
|
| 458 |
SYSTEM_PROMPT = """Answer the following questions as best you can. DO NOT rely on your internal knowledge unless web searches are rate-limited or you're specifically instructed to. You have access to the following tools:
|
| 459 |
|
| 460 |
-
web_search: Search the
|
| 461 |
python_code: Execute Python code. Provide the complete Python code as a string. Use this tool to calculate math problems.
|
| 462 |
webpage_scrape: Scrape content from a specific webpage URL. Provide a valid URL to extract information from a particular web page.
|
| 463 |
-
|
|
|
|
|
|
|
| 464 |
|
| 465 |
IMPORTANT: You MUST strictly follow the ReAct pattern (Reasoning, Action, Observation):
|
| 466 |
1. First reason about the problem in the "Thought" section
|
|
@@ -478,7 +561,9 @@ The only values that should be in the "action" field are:
|
|
| 478 |
web_search: Search the web for current information, args: {"query": {"type": "string"}}
|
| 479 |
python_code: Execute Python code, args: {"code": {"type": "string"}}
|
| 480 |
webpage_scrape: Scrape a specific webpage, args: {"url": {"type": "string"}}
|
| 481 |
-
|
|
|
|
|
|
|
| 482 |
|
| 483 |
IMPORTANT: Make sure your JSON is properly formatted with double quotes around keys and string values.
|
| 484 |
|
|
@@ -500,12 +585,12 @@ Or for scraping a webpage:
|
|
| 500 |
}
|
| 501 |
```
|
| 502 |
|
| 503 |
-
Or for
|
| 504 |
|
| 505 |
```json
|
| 506 |
{
|
| 507 |
-
"action": "
|
| 508 |
-
"action_input": {"
|
| 509 |
}
|
| 510 |
```
|
| 511 |
|
|
@@ -553,7 +638,6 @@ Thought: I now know the final answer
|
|
| 553 |
Final Answer: Directly answer the question in the shortest possible way. For example, if the question is "What is the capital of France?", the answer should be "Paris" without any additional text. If the question is "What is the population of New York City?", the answer should be "8.4 million" without any additional text.
|
| 554 |
|
| 555 |
Now begin! Reminder to ALWAYS use the exact characters `Final Answer:` when you provide a definitive answer."""
|
| 556 |
-
#Your response will be evaluated for accuracy and completeness. After you provide an answer, an evaluator will check your work and may ask you to improve it. The evaluation process has a maximum of 3 attempts.
|
| 557 |
|
| 558 |
# Generate the chat interface, including the tools
|
| 559 |
llm = ChatOpenAI(
|
|
@@ -580,9 +664,19 @@ tools_config = [
|
|
| 580 |
"func": scrape_webpage
|
| 581 |
},
|
| 582 |
{
|
| 583 |
-
"name": "
|
| 584 |
-
"description": "
|
| 585 |
-
"func":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 586 |
}
|
| 587 |
]
|
| 588 |
|
|
@@ -596,6 +690,9 @@ class ActionInput(TypedDict, total=False):
|
|
| 596 |
code: Optional[str]
|
| 597 |
url: Optional[str]
|
| 598 |
image_url: Optional[str]
|
|
|
|
|
|
|
|
|
|
| 599 |
|
| 600 |
class AgentState(TypedDict, total=False):
|
| 601 |
messages: Annotated[list[AnyMessage], add_messages]
|
|
@@ -857,34 +954,143 @@ def webpage_scrape_node(state: AgentState) -> Dict[str, Any]:
|
|
| 857 |
"action_input": None # Clear the action input
|
| 858 |
}
|
| 859 |
|
| 860 |
-
def
|
| 861 |
-
"""Node that processes
|
| 862 |
-
print("
|
| 863 |
|
| 864 |
# Extract tool arguments
|
| 865 |
action_input = state.get("action_input", {})
|
| 866 |
-
print(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 867 |
|
| 868 |
-
# Try different ways to extract the image URL
|
| 869 |
-
image_url = ""
|
| 870 |
if isinstance(action_input, dict):
|
| 871 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 872 |
elif isinstance(action_input, str):
|
| 873 |
-
|
| 874 |
|
| 875 |
-
print(f"
|
| 876 |
|
| 877 |
-
# Safety check - don't run with empty
|
| 878 |
-
if not
|
| 879 |
-
result = "Error: No
|
| 880 |
else:
|
| 881 |
-
# Call the
|
| 882 |
-
result =
|
| 883 |
|
| 884 |
-
print(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 885 |
|
| 886 |
# Format the observation to continue the ReAct cycle
|
| 887 |
-
# Always prefix with "Observation:" for consistency in the ReAct cycle
|
| 888 |
tool_message = AIMessage(
|
| 889 |
content=f"Observation: {result.strip()}"
|
| 890 |
)
|
|
@@ -916,8 +1122,12 @@ def router(state: AgentState) -> str:
|
|
| 916 |
return "python_code"
|
| 917 |
elif tool == "webpage_scrape":
|
| 918 |
return "webpage_scrape"
|
| 919 |
-
elif tool == "
|
| 920 |
-
return "
|
|
|
|
|
|
|
|
|
|
|
|
|
| 921 |
else:
|
| 922 |
return "end"
|
| 923 |
|
|
@@ -931,7 +1141,9 @@ def create_agent_graph() -> StateGraph:
|
|
| 931 |
builder.add_node("web_search", web_search_node)
|
| 932 |
builder.add_node("python_code", python_code_node)
|
| 933 |
builder.add_node("webpage_scrape", webpage_scrape_node)
|
| 934 |
-
builder.add_node("
|
|
|
|
|
|
|
| 935 |
|
| 936 |
# Define edges: these determine how the control flow moves
|
| 937 |
builder.add_edge(START, "assistant")
|
|
@@ -959,7 +1171,9 @@ def create_agent_graph() -> StateGraph:
|
|
| 959 |
"web_search": "web_search",
|
| 960 |
"python_code": "python_code",
|
| 961 |
"webpage_scrape": "webpage_scrape",
|
| 962 |
-
"
|
|
|
|
|
|
|
| 963 |
"end": END
|
| 964 |
}
|
| 965 |
)
|
|
@@ -968,7 +1182,9 @@ def create_agent_graph() -> StateGraph:
|
|
| 968 |
builder.add_edge("web_search", "assistant")
|
| 969 |
builder.add_edge("python_code", "assistant")
|
| 970 |
builder.add_edge("webpage_scrape", "assistant")
|
| 971 |
-
builder.add_edge("
|
|
|
|
|
|
|
| 972 |
|
| 973 |
# Compile with a reasonable recursion limit to prevent infinite loops
|
| 974 |
return builder.compile()
|
|
@@ -1024,7 +1240,7 @@ class TurboNerd:
|
|
| 1024 |
# Example usage:
|
| 1025 |
if __name__ == "__main__":
|
| 1026 |
agent = TurboNerd(max_execution_time=60)
|
| 1027 |
-
response = agent("
|
| 1028 |
print("\nFinal Response:")
|
| 1029 |
print(response)
|
| 1030 |
|
|
|
|
| 21 |
import sys
|
| 22 |
from bs4 import BeautifulSoup
|
| 23 |
import html2text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
from apify_client import ApifyClient
|
| 26 |
+
from langchain_community.document_loaders import WikipediaLoader
|
| 27 |
+
from langchain_community.document_loaders import ArxivLoader
|
| 28 |
+
from langchain_community.utilities.tavily_search import TavilySearchAPIWrapper # For Tavily search
|
| 29 |
|
| 30 |
load_dotenv()
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
def run_python_code(code: str):
|
| 33 |
"""Execute Python code in a temporary file and return the output."""
|
| 34 |
# Check for potentially dangerous operations
|
|
|
|
| 170 |
except Exception as e:
|
| 171 |
print(f"Error using Apify: {str(e)}")
|
| 172 |
return fallback_search(query)
|
| 173 |
+
|
| 174 |
def scrape_webpage(url: str) -> str:
|
| 175 |
"""
|
| 176 |
Safely scrape content from a specified URL.
|
|
|
|
| 269 |
return f"Error requesting {url}: {str(e)}"
|
| 270 |
except Exception as e:
|
| 271 |
return f"Error scraping webpage {url}: {str(e)}"
|
| 272 |
+
|
| 273 |
def format_search_results(results: List[Dict], query: str) -> str:
|
| 274 |
"""Format the search results into a readable string"""
|
| 275 |
if not results or len(results) == 0:
|
|
|
|
| 368 |
|
| 369 |
return f"Failed to search for '{query}' after multiple attempts due to rate limiting."
|
| 370 |
|
| 371 |
+
def wikipedia_search(query: str, num_results: int = 3) -> str:
|
| 372 |
+
"""
|
| 373 |
+
Search Wikipedia for information about a specific query.
|
| 374 |
+
|
| 375 |
+
Args:
|
| 376 |
+
query: Search query
|
| 377 |
+
num_results: Number of search results to return (default: 3)
|
| 378 |
+
|
| 379 |
+
Returns:
|
| 380 |
+
Formatted Wikipedia search results
|
| 381 |
+
"""
|
| 382 |
+
try:
|
| 383 |
+
# Validate input
|
| 384 |
+
if not query or not isinstance(query, str):
|
| 385 |
+
return "Error: Please provide a valid search query."
|
| 386 |
+
|
| 387 |
+
# Ensure num_results is valid
|
| 388 |
+
try:
|
| 389 |
+
num_results = int(num_results)
|
| 390 |
+
if num_results <= 0:
|
| 391 |
+
num_results = 3 # Default to 3 if invalid
|
| 392 |
+
except:
|
| 393 |
+
num_results = 3 # Default to 3 if conversion fails
|
| 394 |
+
|
| 395 |
+
print(f"Searching Wikipedia for: {query}")
|
| 396 |
+
|
| 397 |
+
# Use WikipediaLoader from LangChain
|
| 398 |
+
loader = WikipediaLoader(query=query, load_max_docs=num_results)
|
| 399 |
+
docs = loader.load()
|
| 400 |
+
|
| 401 |
+
if not docs:
|
| 402 |
+
return f"No Wikipedia results found for '{query}'. Try refining your search."
|
| 403 |
+
|
| 404 |
+
# Format the results
|
| 405 |
+
formatted_results = f"Wikipedia search results for '{query}':\n\n"
|
| 406 |
+
|
| 407 |
+
for i, doc in enumerate(docs, 1):
|
| 408 |
+
title = doc.metadata.get('title', 'Unknown Title')
|
| 409 |
+
source = doc.metadata.get('source', 'No URL')
|
| 410 |
+
content = doc.page_content
|
| 411 |
+
|
| 412 |
+
# Truncate content if too long
|
| 413 |
+
if len(content) > 500:
|
| 414 |
+
content = content[:500] + "..."
|
| 415 |
+
|
| 416 |
+
formatted_results += f"{i}. {title}\n"
|
| 417 |
+
formatted_results += f" URL: {source}\n"
|
| 418 |
+
formatted_results += f" {content}\n\n"
|
| 419 |
+
|
| 420 |
+
return formatted_results
|
| 421 |
+
|
| 422 |
+
except Exception as e:
|
| 423 |
+
return f"Error searching Wikipedia: {str(e)}"
|
| 424 |
+
|
| 425 |
+
def tavily_search(query: str, search_depth: str = "basic") -> str:
|
| 426 |
+
"""
|
| 427 |
+
Search the web using the Tavily Search API.
|
| 428 |
+
|
| 429 |
+
Args:
|
| 430 |
+
query: Search query
|
| 431 |
+
search_depth: Depth of search ('basic' or 'comprehensive')
|
| 432 |
+
|
| 433 |
+
Returns:
|
| 434 |
+
Formatted search results from Tavily
|
| 435 |
+
"""
|
| 436 |
+
try:
|
| 437 |
+
# Check for API key
|
| 438 |
+
tavily_api_key = os.environ.get("TAVILY_API_KEY")
|
| 439 |
+
if not tavily_api_key:
|
| 440 |
+
return "Error: Tavily API key not found. Please set the TAVILY_API_KEY environment variable."
|
| 441 |
+
|
| 442 |
+
# Validate input
|
| 443 |
+
if not query or not isinstance(query, str):
|
| 444 |
+
return "Error: Please provide a valid search query."
|
| 445 |
+
|
| 446 |
+
# Validate search_depth
|
| 447 |
+
if search_depth not in ["basic", "comprehensive"]:
|
| 448 |
+
search_depth = "basic" # Default to basic if invalid
|
| 449 |
+
|
| 450 |
+
print(f"Searching Tavily for: {query} (depth: {search_depth})")
|
| 451 |
+
|
| 452 |
+
# Initialize the Tavily search wrapper
|
| 453 |
+
search = TavilySearchAPIWrapper()
|
| 454 |
+
|
| 455 |
+
# Execute the search
|
| 456 |
+
results = search.results(query, search_depth=search_depth)
|
| 457 |
+
|
| 458 |
+
if not results:
|
| 459 |
+
return f"No Tavily search results found for '{query}'. Try refining your search."
|
| 460 |
+
|
| 461 |
+
# Format the results
|
| 462 |
+
formatted_results = f"Tavily search results for '{query}':\n\n"
|
| 463 |
+
|
| 464 |
+
for i, result in enumerate(results, 1):
|
| 465 |
+
formatted_results += f"{i}. {result.get('title', 'No title')}\n"
|
| 466 |
+
formatted_results += f" URL: {result.get('url', 'No URL')}\n"
|
| 467 |
+
formatted_results += f" {result.get('content', 'No content')}\n\n"
|
| 468 |
+
|
| 469 |
+
return formatted_results
|
| 470 |
+
|
| 471 |
+
except Exception as e:
|
| 472 |
+
return f"Error searching with Tavily: {str(e)}"
|
| 473 |
+
|
| 474 |
+
def arxiv_search(query: str, max_results: int = 5) -> str:
|
| 475 |
+
"""
|
| 476 |
+
Search ArXiv for scientific papers matching the query.
|
| 477 |
+
|
| 478 |
+
Args:
|
| 479 |
+
query: Search query for ArXiv
|
| 480 |
+
max_results: Maximum number of results to return
|
| 481 |
+
|
| 482 |
+
Returns:
|
| 483 |
+
Formatted ArXiv search results
|
| 484 |
+
"""
|
| 485 |
+
try:
|
| 486 |
+
# Validate input
|
| 487 |
+
if not query or not isinstance(query, str):
|
| 488 |
+
return "Error: Please provide a valid search query."
|
| 489 |
+
|
| 490 |
+
# Ensure max_results is valid
|
| 491 |
+
try:
|
| 492 |
+
max_results = int(max_results)
|
| 493 |
+
if max_results <= 0 or max_results > 10:
|
| 494 |
+
max_results = 5 # Default to 5 if invalid or too large
|
| 495 |
+
except:
|
| 496 |
+
max_results = 5 # Default to 5 if conversion fails
|
| 497 |
+
|
| 498 |
+
print(f"Searching ArXiv for: {query}")
|
| 499 |
+
|
| 500 |
+
# Use ArxivLoader from LangChain
|
| 501 |
+
loader = ArxivLoader(
|
| 502 |
+
query=query,
|
| 503 |
+
load_max_docs=max_results,
|
| 504 |
+
load_all_available_meta=True
|
| 505 |
+
)
|
| 506 |
+
|
| 507 |
+
docs = loader.load()
|
| 508 |
+
|
| 509 |
+
if not docs:
|
| 510 |
+
return f"No ArXiv papers found for '{query}'. Try refining your search."
|
| 511 |
+
|
| 512 |
+
# Format the results
|
| 513 |
+
formatted_results = f"ArXiv papers for '{query}':\n\n"
|
| 514 |
+
|
| 515 |
+
for i, doc in enumerate(docs, 1):
|
| 516 |
+
meta = doc.metadata
|
| 517 |
+
title = meta.get('Title', 'Unknown Title')
|
| 518 |
+
url = meta.get('Entry ID', 'No URL')
|
| 519 |
+
authors = meta.get('Authors', 'Unknown Authors')
|
| 520 |
+
published = meta.get('Published', 'Unknown Date')
|
| 521 |
+
|
| 522 |
+
formatted_results += f"{i}. {title}\n"
|
| 523 |
+
formatted_results += f" URL: {url}\n"
|
| 524 |
+
formatted_results += f" Authors: {authors}\n"
|
| 525 |
+
formatted_results += f" Published: {published}\n"
|
| 526 |
+
|
| 527 |
+
# Add abstract, truncated if too long
|
| 528 |
+
abstract = doc.page_content.replace('\n', ' ')
|
| 529 |
+
if len(abstract) > 300:
|
| 530 |
+
abstract = abstract[:300] + "..."
|
| 531 |
+
formatted_results += f" Abstract: {abstract}\n\n"
|
| 532 |
+
|
| 533 |
+
return formatted_results
|
| 534 |
+
|
| 535 |
+
except Exception as e:
|
| 536 |
+
return f"Error searching ArXiv: {str(e)}"
|
| 537 |
+
|
| 538 |
# System prompt to guide the model's behavior
|
| 539 |
SYSTEM_PROMPT = """Answer the following questions as best you can. DO NOT rely on your internal knowledge unless web searches are rate-limited or you're specifically instructed to. You have access to the following tools:
|
| 540 |
|
| 541 |
+
web_search: Search the google search engine for current information. Provide a specific search query.
|
| 542 |
python_code: Execute Python code. Provide the complete Python code as a string. Use this tool to calculate math problems.
|
| 543 |
webpage_scrape: Scrape content from a specific webpage URL. Provide a valid URL to extract information from a particular web page.
|
| 544 |
+
wikipedia_search: Search Wikipedia for information about a specific topic. Optionally specify the number of results to return.
|
| 545 |
+
tavily_search: Search the web using Tavily for more comprehensive results. Optionally specify search_depth as 'basic' or 'comprehensive'.
|
| 546 |
+
arxiv_search: Search ArXiv for scientific papers on a specific topic. Optionally specify max_results to control the number of papers returned.
|
| 547 |
|
| 548 |
IMPORTANT: You MUST strictly follow the ReAct pattern (Reasoning, Action, Observation):
|
| 549 |
1. First reason about the problem in the "Thought" section
|
|
|
|
| 561 |
web_search: Search the web for current information, args: {"query": {"type": "string"}}
|
| 562 |
python_code: Execute Python code, args: {"code": {"type": "string"}}
|
| 563 |
webpage_scrape: Scrape a specific webpage, args: {"url": {"type": "string"}}
|
| 564 |
+
wikipedia_search: Search Wikipedia, args: {"query": {"type": "string"}, "num_results": {"type": "integer", "optional": true}}
|
| 565 |
+
tavily_search: Search with Tavily, args: {"query": {"type": "string"}, "search_depth": {"type": "string", "optional": true}}
|
| 566 |
+
arxiv_search: Search ArXiv papers, args: {"query": {"type": "string"}, "max_results": {"type": "integer", "optional": true}}
|
| 567 |
|
| 568 |
IMPORTANT: Make sure your JSON is properly formatted with double quotes around keys and string values.
|
| 569 |
|
|
|
|
| 585 |
}
|
| 586 |
```
|
| 587 |
|
| 588 |
+
Or for searching Wikipedia:
|
| 589 |
|
| 590 |
```json
|
| 591 |
{
|
| 592 |
+
"action": "wikipedia_search",
|
| 593 |
+
"action_input": {"query": "quantum physics", "num_results": 3}
|
| 594 |
}
|
| 595 |
```
|
| 596 |
|
|
|
|
| 638 |
Final Answer: Directly answer the question in the shortest possible way. For example, if the question is "What is the capital of France?", the answer should be "Paris" without any additional text. If the question is "What is the population of New York City?", the answer should be "8.4 million" without any additional text.
|
| 639 |
|
| 640 |
Now begin! Reminder to ALWAYS use the exact characters `Final Answer:` when you provide a definitive answer."""
|
|
|
|
| 641 |
|
| 642 |
# Generate the chat interface, including the tools
|
| 643 |
llm = ChatOpenAI(
|
|
|
|
| 664 |
"func": scrape_webpage
|
| 665 |
},
|
| 666 |
{
|
| 667 |
+
"name": "wikipedia_search",
|
| 668 |
+
"description": "Search Wikipedia for information about a specific topic. Provide a query in the format: {\"query\": \"your topic\", \"num_results\": 3}",
|
| 669 |
+
"func": wikipedia_search
|
| 670 |
+
},
|
| 671 |
+
{
|
| 672 |
+
"name": "tavily_search",
|
| 673 |
+
"description": "Search the web using Tavily for more comprehensive results. Provide a query in the format: {\"query\": \"your search query\", \"search_depth\": \"basic\"}",
|
| 674 |
+
"func": tavily_search
|
| 675 |
+
},
|
| 676 |
+
{
|
| 677 |
+
"name": "arxiv_search",
|
| 678 |
+
"description": "Search ArXiv for scientific papers. Provide a query in the format: {\"query\": \"your research topic\", \"max_results\": 5}",
|
| 679 |
+
"func": arxiv_search
|
| 680 |
}
|
| 681 |
]
|
| 682 |
|
|
|
|
| 690 |
code: Optional[str]
|
| 691 |
url: Optional[str]
|
| 692 |
image_url: Optional[str]
|
| 693 |
+
num_results: Optional[int]
|
| 694 |
+
search_depth: Optional[str]
|
| 695 |
+
max_results: Optional[int]
|
| 696 |
|
| 697 |
class AgentState(TypedDict, total=False):
|
| 698 |
messages: Annotated[list[AnyMessage], add_messages]
|
|
|
|
| 954 |
"action_input": None # Clear the action input
|
| 955 |
}
|
| 956 |
|
| 957 |
+
def wikipedia_search_node(state: AgentState) -> Dict[str, Any]:
|
| 958 |
+
"""Node that processes Wikipedia search requests."""
|
| 959 |
+
print("Wikipedia Search Tool Called...\n\n")
|
| 960 |
|
| 961 |
# Extract tool arguments
|
| 962 |
action_input = state.get("action_input", {})
|
| 963 |
+
print(f"Wikipedia search action_input: {action_input}")
|
| 964 |
+
|
| 965 |
+
# Extract query and num_results
|
| 966 |
+
query = ""
|
| 967 |
+
num_results = 3 # Default
|
| 968 |
|
|
|
|
|
|
|
| 969 |
if isinstance(action_input, dict):
|
| 970 |
+
query = action_input.get("query", "")
|
| 971 |
+
if "num_results" in action_input:
|
| 972 |
+
try:
|
| 973 |
+
num_results = int(action_input["num_results"])
|
| 974 |
+
except:
|
| 975 |
+
print("Invalid num_results, using default")
|
| 976 |
elif isinstance(action_input, str):
|
| 977 |
+
query = action_input
|
| 978 |
|
| 979 |
+
print(f"Searching Wikipedia for: '{query}' (max results: {num_results})")
|
| 980 |
|
| 981 |
+
# Safety check - don't run with empty query
|
| 982 |
+
if not query:
|
| 983 |
+
result = "Error: No search query provided. Please provide a valid query for Wikipedia search."
|
| 984 |
else:
|
| 985 |
+
# Call the Wikipedia search function
|
| 986 |
+
result = wikipedia_search(query, num_results)
|
| 987 |
|
| 988 |
+
print(f"Wikipedia search result length: {len(result)}")
|
| 989 |
+
|
| 990 |
+
# Format the observation to continue the ReAct cycle
|
| 991 |
+
tool_message = AIMessage(
|
| 992 |
+
content=f"Observation: {result.strip()}"
|
| 993 |
+
)
|
| 994 |
+
|
| 995 |
+
# Print the observation that will be sent back to the assistant
|
| 996 |
+
print("\n=== TOOL OBSERVATION ===")
|
| 997 |
+
content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
|
| 998 |
+
print(content_preview)
|
| 999 |
+
print("=== END OBSERVATION ===\n")
|
| 1000 |
+
|
| 1001 |
+
# Return the updated state
|
| 1002 |
+
return {
|
| 1003 |
+
"messages": state["messages"] + [tool_message],
|
| 1004 |
+
"current_tool": None, # Reset the current tool
|
| 1005 |
+
"action_input": None # Clear the action input
|
| 1006 |
+
}
|
| 1007 |
+
|
| 1008 |
+
def tavily_search_node(state: AgentState) -> Dict[str, Any]:
|
| 1009 |
+
"""Node that processes Tavily search requests."""
|
| 1010 |
+
print("Tavily Search Tool Called...\n\n")
|
| 1011 |
+
|
| 1012 |
+
# Extract tool arguments
|
| 1013 |
+
action_input = state.get("action_input", {})
|
| 1014 |
+
print(f"Tavily search action_input: {action_input}")
|
| 1015 |
+
|
| 1016 |
+
# Extract query and search_depth
|
| 1017 |
+
query = ""
|
| 1018 |
+
search_depth = "basic" # Default
|
| 1019 |
+
|
| 1020 |
+
if isinstance(action_input, dict):
|
| 1021 |
+
query = action_input.get("query", "")
|
| 1022 |
+
if "search_depth" in action_input:
|
| 1023 |
+
depth = action_input["search_depth"]
|
| 1024 |
+
if depth in ["basic", "comprehensive"]:
|
| 1025 |
+
search_depth = depth
|
| 1026 |
+
elif isinstance(action_input, str):
|
| 1027 |
+
query = action_input
|
| 1028 |
+
|
| 1029 |
+
print(f"Searching Tavily for: '{query}' (depth: {search_depth})")
|
| 1030 |
+
|
| 1031 |
+
# Safety check - don't run with empty query
|
| 1032 |
+
if not query:
|
| 1033 |
+
result = "Error: No search query provided. Please provide a valid query for Tavily search."
|
| 1034 |
+
else:
|
| 1035 |
+
# Call the Tavily search function
|
| 1036 |
+
result = tavily_search(query, search_depth)
|
| 1037 |
+
|
| 1038 |
+
print(f"Tavily search result length: {len(result)}")
|
| 1039 |
+
|
| 1040 |
+
# Format the observation to continue the ReAct cycle
|
| 1041 |
+
tool_message = AIMessage(
|
| 1042 |
+
content=f"Observation: {result.strip()}"
|
| 1043 |
+
)
|
| 1044 |
+
|
| 1045 |
+
# Print the observation that will be sent back to the assistant
|
| 1046 |
+
print("\n=== TOOL OBSERVATION ===")
|
| 1047 |
+
content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
|
| 1048 |
+
print(content_preview)
|
| 1049 |
+
print("=== END OBSERVATION ===\n")
|
| 1050 |
+
|
| 1051 |
+
# Return the updated state
|
| 1052 |
+
return {
|
| 1053 |
+
"messages": state["messages"] + [tool_message],
|
| 1054 |
+
"current_tool": None, # Reset the current tool
|
| 1055 |
+
"action_input": None # Clear the action input
|
| 1056 |
+
}
|
| 1057 |
+
|
| 1058 |
+
def arxiv_search_node(state: AgentState) -> Dict[str, Any]:
|
| 1059 |
+
"""Node that processes ArXiv search requests."""
|
| 1060 |
+
print("ArXiv Search Tool Called...\n\n")
|
| 1061 |
+
|
| 1062 |
+
# Extract tool arguments
|
| 1063 |
+
action_input = state.get("action_input", {})
|
| 1064 |
+
print(f"ArXiv search action_input: {action_input}")
|
| 1065 |
+
|
| 1066 |
+
# Extract query and max_results
|
| 1067 |
+
query = ""
|
| 1068 |
+
max_results = 5 # Default
|
| 1069 |
+
|
| 1070 |
+
if isinstance(action_input, dict):
|
| 1071 |
+
query = action_input.get("query", "")
|
| 1072 |
+
if "max_results" in action_input:
|
| 1073 |
+
try:
|
| 1074 |
+
max_results = int(action_input["max_results"])
|
| 1075 |
+
if max_results <= 0 or max_results > 10:
|
| 1076 |
+
max_results = 5 # Reset to default if out of range
|
| 1077 |
+
except:
|
| 1078 |
+
print("Invalid max_results, using default")
|
| 1079 |
+
elif isinstance(action_input, str):
|
| 1080 |
+
query = action_input
|
| 1081 |
+
|
| 1082 |
+
print(f"Searching ArXiv for: '{query}' (max results: {max_results})")
|
| 1083 |
+
|
| 1084 |
+
# Safety check - don't run with empty query
|
| 1085 |
+
if not query:
|
| 1086 |
+
result = "Error: No search query provided. Please provide a valid query for ArXiv search."
|
| 1087 |
+
else:
|
| 1088 |
+
# Call the ArXiv search function
|
| 1089 |
+
result = arxiv_search(query, max_results)
|
| 1090 |
+
|
| 1091 |
+
print(f"ArXiv search result length: {len(result)}")
|
| 1092 |
|
| 1093 |
# Format the observation to continue the ReAct cycle
|
|
|
|
| 1094 |
tool_message = AIMessage(
|
| 1095 |
content=f"Observation: {result.strip()}"
|
| 1096 |
)
|
|
|
|
| 1122 |
return "python_code"
|
| 1123 |
elif tool == "webpage_scrape":
|
| 1124 |
return "webpage_scrape"
|
| 1125 |
+
elif tool == "wikipedia_search":
|
| 1126 |
+
return "wikipedia_search"
|
| 1127 |
+
elif tool == "tavily_search":
|
| 1128 |
+
return "tavily_search"
|
| 1129 |
+
elif tool == "arxiv_search":
|
| 1130 |
+
return "arxiv_search"
|
| 1131 |
else:
|
| 1132 |
return "end"
|
| 1133 |
|
|
|
|
| 1141 |
builder.add_node("web_search", web_search_node)
|
| 1142 |
builder.add_node("python_code", python_code_node)
|
| 1143 |
builder.add_node("webpage_scrape", webpage_scrape_node)
|
| 1144 |
+
builder.add_node("wikipedia_search", wikipedia_search_node)
|
| 1145 |
+
builder.add_node("tavily_search", tavily_search_node)
|
| 1146 |
+
builder.add_node("arxiv_search", arxiv_search_node)
|
| 1147 |
|
| 1148 |
# Define edges: these determine how the control flow moves
|
| 1149 |
builder.add_edge(START, "assistant")
|
|
|
|
| 1171 |
"web_search": "web_search",
|
| 1172 |
"python_code": "python_code",
|
| 1173 |
"webpage_scrape": "webpage_scrape",
|
| 1174 |
+
"wikipedia_search": "wikipedia_search",
|
| 1175 |
+
"tavily_search": "tavily_search",
|
| 1176 |
+
"arxiv_search": "arxiv_search",
|
| 1177 |
"end": END
|
| 1178 |
}
|
| 1179 |
)
|
|
|
|
| 1182 |
builder.add_edge("web_search", "assistant")
|
| 1183 |
builder.add_edge("python_code", "assistant")
|
| 1184 |
builder.add_edge("webpage_scrape", "assistant")
|
| 1185 |
+
builder.add_edge("wikipedia_search", "assistant")
|
| 1186 |
+
builder.add_edge("tavily_search", "assistant")
|
| 1187 |
+
builder.add_edge("arxiv_search", "assistant")
|
| 1188 |
|
| 1189 |
# Compile with a reasonable recursion limit to prevent infinite loops
|
| 1190 |
return builder.compile()
|
|
|
|
| 1240 |
# Example usage:
|
| 1241 |
if __name__ == "__main__":
|
| 1242 |
agent = TurboNerd(max_execution_time=60)
|
| 1243 |
+
response = agent("When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect? Use Tavily Search")
|
| 1244 |
print("\nFinal Response:")
|
| 1245 |
print(response)
|
| 1246 |
|
requirements.txt
CHANGED
|
@@ -7,8 +7,4 @@ duckduckgo-search
|
|
| 7 |
langchain-community
|
| 8 |
apify-client
|
| 9 |
beautifulsoup4
|
| 10 |
-
html2text
|
| 11 |
-
transformers
|
| 12 |
-
pillow
|
| 13 |
-
torch
|
| 14 |
-
accelerate
|
|
|
|
| 7 |
langchain-community
|
| 8 |
apify-client
|
| 9 |
beautifulsoup4
|
| 10 |
+
html2text
|
|
|
|
|
|
|
|
|
|
|
|