Spaces:
Sleeping
Sleeping
File size: 10,322 Bytes
d12a6df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 |
import os
import json
import requests
from dotenv import load_dotenv
load_dotenv()
from google import genai
from google.genai import types
from agentflow.tools.base import BaseTool
# For formatting the response
import requests
from typing import List
import re
# Tool name mapping - this defines the external name for this tool
TOOL_NAME = "Ground_Google_Search_Tool"
LIMITATIONS = """
1. This tool is only suitable for general information search.
2. This tool contains less domain specific information.
3. This tools is not suitable for searching and analyzing videos at YouTube or other video platforms.
"""
BEST_PRACTICES = """
1. Choose this tool when you want to search general information about a topic.
2. Choose this tool for question type of query, such as "What is the capital of France?" or "What is the capital of France?"
3. The tool will return a summarized information.
4. This tool is more suiable for defination, world knowledge, and general information search.
"""
class Google_Search_Tool(BaseTool):
def __init__(self, model_string="gemini-2.5-flash"):
super().__init__(
tool_name=TOOL_NAME,
tool_description="A web search tool powered by Google's Gemini AI that provides real-time information from the internet with citation support.",
tool_version="1.0.0",
input_types={
"query": "str - The search query to find information on the web.",
"add_citations": "bool - Whether to add citations to the results. If True, the results will be formatted with citations. By default, it is True.",
},
output_type="str - The search results of the query.",
demo_commands=[
{
"command": 'execution = tool.execute(query="What is the capital of France?")',
"description": "Search for general information about the capital of France with default citations enabled."
},
{
"command": 'execution = tool.execute(query="Who won the euro 2024?", add_citations=False)',
"description": "Search for information about Euro 2024 winner without citations."
},
{
"command": 'execution = tool.execute(query="Physics and Society article arXiv August 11, 2016", add_citations=True)',
"description": "Search for specific academic articles with citations enabled."
}
],
user_metadata={
"limitations": LIMITATIONS,
"best_practices": BEST_PRACTICES,
}
)
self.max_retries = 5
self.search_model = model_string
try:
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
raise Exception("Google API key not found. Please set the GOOGLE_API_KEY environment variable.")
except Exception as e:
raise Exception(f"Google API key not found. Please set the GOOGLE_API_KEY environment variable.")
self.client = genai.Client(api_key=api_key)
@staticmethod
def get_real_url(url):
"""
Convert a redirect URL to the final real URL in a stable manner.
This function handles redirects by:
1. Setting a browser-like User-Agent to avoid being blocked or throttled.
2. Using a reasonable timeout to prevent getting stuck indefinitely.
3. Following HTTP redirects automatically (default requests behavior).
4. Catching specific request-related exceptions for cleaner error handling.
"""
try:
# Headers to mimic a real browser visit
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
# allow_redirects=True is the default, but we state it for clarity.
# The request will automatically follow the 3xx redirect chain.
response = requests.get(
url,
headers=headers,
timeout=8, # Increased timeout for more reliability
allow_redirects=True
)
# After all redirects, response.url contains the final URL.
return response.url
except Exception as e:
# Catching specific exceptions from the requests library is better practice.
# print(f"An error occurred: {e}")
return url
@staticmethod
def extract_urls(text: str) -> List[str]:
"""
Extract all URLs from Markdown‑style citations [number](url) in the given text.
Args:
text: A string containing Markdown citations.
Returns:
A list of URL strings.
"""
pattern = re.compile(r'\[\d+\]\((https?://[^\s)]+)\)')
urls = pattern.findall(text)
return urls
def reformat_response(self, response: str) -> str:
"""
Reformat the response to a readable format.
"""
urls = self.extract_urls(response)
for url in urls:
direct_url = self.get_real_url(url)
response = response.replace(url, direct_url)
return response
@staticmethod
def add_citations(response):
text = response.text
supports = response.candidates[0].grounding_metadata.grounding_supports
chunks = response.candidates[0].grounding_metadata.grounding_chunks
# Sort supports by end_index in descending order to avoid shifting issues when inserting.
sorted_supports = sorted(supports, key=lambda s: s.segment.end_index, reverse=True)
for support in sorted_supports:
end_index = support.segment.end_index
if support.grounding_chunk_indices:
# Create citation string like [1](link1)[2](link2)
citation_links = []
for i in support.grounding_chunk_indices:
if i < len(chunks):
uri = chunks[i].web.uri
citation_links.append(f"[{i + 1}]({uri})")
citation_string = ", ".join(citation_links)
text = text[:end_index] + citation_string + text[end_index:]
return text
def _execute_search(self, query: str, add_citations_flag: bool):
"""
https://ai.google.dev/gemini-api/docs/google-search
"""
# Define the grounding tool
grounding_tool = types.Tool(
google_search=types.GoogleSearch()
)
# Configure generation settings
config = types.GenerateContentConfig(
tools=[grounding_tool]
)
response = None
response_text = None
for attempt in range(self.max_retries):
try:
response = self.client.models.generate_content(
model=self.search_model,
contents=query,
config=config,
)
response_text = response.text
# If we get here, the API call was successful, so break out of the retry loop
break
except Exception as e:
print(f"Google Search attempt {attempt + 1} failed: {str(e)}. Retrying...")
if attempt == self.max_retries - 1: # Last attempt
print(f"Google Search failed after {self.max_retries} attempts. Last error: {str(e)}")
return f"Google Search tried {self.max_retries} times but failed. Last error: {str(e)}"
# Continue to next attempt
# Check if we have a valid response before proceeding
if response is None or response_text is None:
return "Google Search failed to get a valid response"
# Add citations if needed
try:
response_text = self.add_citations(response) if add_citations_flag else response_text
except Exception as e:
pass
# print(f"Error adding citations: {str(e)}")
# Continue with the original response_text if citations fail
# Format the response
try:
response_text = self.reformat_response(response_text)
except Exception as e:
pass
# print(f"Error reformatting response: {str(e)}")
# Continue with the current response_text if reformatting fails
return response_text
def execute(self, query: str, add_citations: bool = True):
"""
Execute the Google search tool.
Parameters:
query (str): The search query to find information on the web.
add_citations (bool): Whether to add citations to the results. Default is True.
Returns:
str: The search results of the query.
"""
# Perform the search
response = self._execute_search(query, add_citations)
return response
def get_metadata(self):
"""
Returns the metadata for the Google_Search tool.
Returns:
dict: A dictionary containing the tool's metadata.
"""
metadata = super().get_metadata()
return metadata
if __name__ == "__main__":
"""
Test:
cd agentflow/tools/google_search
python tool.py
"""
def print_json(result):
import json
print(json.dumps(result, indent=4))
google_search = Google_Search_Tool()
# Get tool metadata
metadata = google_search.get_metadata()
print("Tool Metadata:")
print_json(metadata)
examples = [
{'query': 'What is the capital of France?', 'add_citations': True},
{'query': 'Who won the euro 2024?', 'add_citations': False},
{'query': 'Physics and Society article arXiv August 11, 2016', 'add_citations': True},
]
for example in examples:
print(f"\nExecuting search: {example['query']}")
try:
result = google_search.execute(**example)
print("Search Result:")
print(result)
except Exception as e:
print(f"Error: {str(e)}")
print("-" * 50)
print("Done!") |