Spaces:
Sleeping
Sleeping
Update tools/WebSearchTool.py
Browse files- tools/WebSearchTool.py +48 -62
tools/WebSearchTool.py
CHANGED
|
@@ -1,80 +1,66 @@
|
|
| 1 |
-
import
|
| 2 |
from smolagents import Tool
|
| 3 |
-
import
|
| 4 |
-
|
| 5 |
-
class DuckDuckGoSearchTool(Tool):
|
| 6 |
"""
|
| 7 |
-
A
|
| 8 |
-
|
| 9 |
-
It primarily returns a single, instant answer or the top result snippet.
|
| 10 |
"""
|
| 11 |
-
name
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
| 14 |
inputs = {
|
| 15 |
"query": {"type": "string", "description": "The search term to look up."}
|
| 16 |
}
|
| 17 |
output_type = "string"
|
| 18 |
-
|
| 19 |
def __init__(self, **kwargs):
|
| 20 |
super().__init__(**kwargs)
|
| 21 |
-
#
|
| 22 |
-
self.
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
def forward(self, query: str) -> str:
|
| 25 |
"""
|
| 26 |
-
Executes a
|
| 27 |
-
|
|
|
|
| 28 |
Args:
|
| 29 |
query: The search term provided by the agent.
|
| 30 |
-
|
| 31 |
Returns:
|
| 32 |
-
A formatted string
|
| 33 |
"""
|
| 34 |
-
print(f"Executing
|
| 35 |
-
|
| 36 |
-
params = {
|
| 37 |
-
"q": query,
|
| 38 |
-
"format": "json",
|
| 39 |
-
"no_html": "1", # Ensure content is plain text
|
| 40 |
-
"skip_disambig": "1" # Skip pages that offer multiple results
|
| 41 |
-
}
|
| 42 |
-
|
| 43 |
try:
|
| 44 |
-
#
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
)
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
related_topics = data.get('RelatedTopics', [])
|
| 62 |
-
if related_topics and isinstance(related_topics[0], dict):
|
| 63 |
-
# We expect the first related topic to be the main search result snippet
|
| 64 |
-
top_topic = related_topics[0]
|
| 65 |
-
text = top_topic.get('Text', 'No snippet available.')
|
| 66 |
-
url = top_topic.get('FirstURL', 'N/A')
|
| 67 |
-
|
| 68 |
-
return (
|
| 69 |
-
f"RESULT 1 (Top Snippet): '{query}'\n"
|
| 70 |
-
f"CONTENT: {text}\n"
|
| 71 |
-
f"SOURCE: {url}"
|
| 72 |
-
)
|
| 73 |
-
|
| 74 |
-
# If nothing useful is found:
|
| 75 |
-
return "XX record info: No results found."
|
| 76 |
-
|
| 77 |
-
except requests.exceptions.RequestException as e:
|
| 78 |
-
return f"Error during DuckDuckGo Search API Request: {e}"
|
| 79 |
except Exception as e:
|
| 80 |
-
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
from smolagents import Tool
|
| 3 |
+
from googleapiclient.discovery import build
|
| 4 |
+
class GoogleSearchTool(Tool):
|
|
|
|
| 5 |
"""
|
| 6 |
+
A tool to perform web search Google searches using the Custom Search Engine (CSE) API.
|
| 7 |
+
Use this tool first to get the necessary information for calculation or further tools.
|
|
|
|
| 8 |
"""
|
| 9 |
+
# Use a descriptive name to guide the agent
|
| 10 |
+
name = "google_search"
|
| 11 |
+
# Update the description to reflect the new functionality
|
| 12 |
+
description = "Use Google to find current information and general knowledge. Returns a snippet and URL."
|
| 13 |
+
# Define the required input structure for the agent framework
|
| 14 |
inputs = {
|
| 15 |
"query": {"type": "string", "description": "The search term to look up."}
|
| 16 |
}
|
| 17 |
output_type = "string"
|
|
|
|
| 18 |
def __init__(self, **kwargs):
|
| 19 |
super().__init__(**kwargs)
|
| 20 |
+
# Retrieve credentials from environment variables
|
| 21 |
+
self.api_key = os.getenv("GOOGLE_API_KEY")
|
| 22 |
+
self.cse_id = os.getenv("GOOGLE_CSE_ID")
|
| 23 |
+
# Check for mandatory credentials
|
| 24 |
+
if not self.api_key or not self.cse_id:
|
| 25 |
+
raise ValueError("GOOGLE_API_KEY or GOOGLE_CSE_ID secret not found. Check environment variables.")
|
| 26 |
+
# Initialize the Google Custom Search service
|
| 27 |
+
# 'customsearch' is the API name, 'v1' is the version
|
| 28 |
+
self.service = build(
|
| 29 |
+
"customsearch", "v1", developerKey=self.api_key
|
| 30 |
+
)
|
| 31 |
def forward(self, query: str) -> str:
|
| 32 |
"""
|
| 33 |
+
Executes a Google search query and formats the top results (up to 3).
|
| 34 |
+
The output is formatted to prioritize the Title and Snippet (informational content)
|
| 35 |
+
over the URL to address the confusion that it was only returning links.
|
| 36 |
Args:
|
| 37 |
query: The search term provided by the agent.
|
|
|
|
| 38 |
Returns:
|
| 39 |
+
A formatted string of search results, or an error message.
|
| 40 |
"""
|
| 41 |
+
print(f"Executing Google search for: '{query}'")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
try:
|
| 43 |
+
# Execute the search request for up to 3 results
|
| 44 |
+
res = self.service.cse().list(
|
| 45 |
+
q=query,
|
| 46 |
+
cx=self.cse_id,
|
| 47 |
+
num=3
|
| 48 |
+
).execute()
|
| 49 |
+
items = res.get('items', [])
|
| 50 |
+
if not items:
|
| 51 |
+
# Return the specific failure message expected by the agent
|
| 52 |
+
return "XX record info: No results found."
|
| 53 |
+
search_results = []
|
| 54 |
+
for i, item in enumerate(items):
|
| 55 |
+
# We've adjusted the format here to put the snippet first,
|
| 56 |
+
# which is usually the most important information for the agent.
|
| 57 |
+
search_results.append(
|
| 58 |
+
f"RESULT {i+1}: '{item.get('title')}'\n"
|
| 59 |
+
f"CONTENT: {item.get('snippet')}\n"
|
| 60 |
+
f"SOURCE: {item.get('link')}" # The URL is now clearly labeled as the SOURCE
|
| 61 |
)
|
| 62 |
+
# Join the results with a clear separator
|
| 63 |
+
return "\n\n---SEPARATOR---\n\n".join(search_results)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
except Exception as e:
|
| 65 |
+
# Provide an informative error message upon API failure
|
| 66 |
+
return f"Error during Google Search API call: {e}"
|