IsaacGHX
update
d12a6df
raw
history blame
10.3 kB
import os
import json
import requests
from dotenv import load_dotenv
load_dotenv()
from google import genai
from google.genai import types
from agentflow.tools.base import BaseTool
# For formatting the response
import requests
from typing import List
import re
# Tool name mapping - this defines the external name for this tool
TOOL_NAME = "Ground_Google_Search_Tool"
LIMITATIONS = """
1. This tool is only suitable for general information search.
2. This tool contains less domain specific information.
3. This tools is not suitable for searching and analyzing videos at YouTube or other video platforms.
"""
BEST_PRACTICES = """
1. Choose this tool when you want to search general information about a topic.
2. Choose this tool for question type of query, such as "What is the capital of France?" or "What is the capital of France?"
3. The tool will return a summarized information.
4. This tool is more suiable for defination, world knowledge, and general information search.
"""
class Google_Search_Tool(BaseTool):
def __init__(self, model_string="gemini-2.5-flash"):
super().__init__(
tool_name=TOOL_NAME,
tool_description="A web search tool powered by Google's Gemini AI that provides real-time information from the internet with citation support.",
tool_version="1.0.0",
input_types={
"query": "str - The search query to find information on the web.",
"add_citations": "bool - Whether to add citations to the results. If True, the results will be formatted with citations. By default, it is True.",
},
output_type="str - The search results of the query.",
demo_commands=[
{
"command": 'execution = tool.execute(query="What is the capital of France?")',
"description": "Search for general information about the capital of France with default citations enabled."
},
{
"command": 'execution = tool.execute(query="Who won the euro 2024?", add_citations=False)',
"description": "Search for information about Euro 2024 winner without citations."
},
{
"command": 'execution = tool.execute(query="Physics and Society article arXiv August 11, 2016", add_citations=True)',
"description": "Search for specific academic articles with citations enabled."
}
],
user_metadata={
"limitations": LIMITATIONS,
"best_practices": BEST_PRACTICES,
}
)
self.max_retries = 5
self.search_model = model_string
try:
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
raise Exception("Google API key not found. Please set the GOOGLE_API_KEY environment variable.")
except Exception as e:
raise Exception(f"Google API key not found. Please set the GOOGLE_API_KEY environment variable.")
self.client = genai.Client(api_key=api_key)
@staticmethod
def get_real_url(url):
"""
Convert a redirect URL to the final real URL in a stable manner.
This function handles redirects by:
1. Setting a browser-like User-Agent to avoid being blocked or throttled.
2. Using a reasonable timeout to prevent getting stuck indefinitely.
3. Following HTTP redirects automatically (default requests behavior).
4. Catching specific request-related exceptions for cleaner error handling.
"""
try:
# Headers to mimic a real browser visit
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
# allow_redirects=True is the default, but we state it for clarity.
# The request will automatically follow the 3xx redirect chain.
response = requests.get(
url,
headers=headers,
timeout=8, # Increased timeout for more reliability
allow_redirects=True
)
# After all redirects, response.url contains the final URL.
return response.url
except Exception as e:
# Catching specific exceptions from the requests library is better practice.
# print(f"An error occurred: {e}")
return url
@staticmethod
def extract_urls(text: str) -> List[str]:
"""
Extract all URLs from Markdown‑style citations [number](url) in the given text.
Args:
text: A string containing Markdown citations.
Returns:
A list of URL strings.
"""
pattern = re.compile(r'\[\d+\]\((https?://[^\s)]+)\)')
urls = pattern.findall(text)
return urls
def reformat_response(self, response: str) -> str:
"""
Reformat the response to a readable format.
"""
urls = self.extract_urls(response)
for url in urls:
direct_url = self.get_real_url(url)
response = response.replace(url, direct_url)
return response
@staticmethod
def add_citations(response):
text = response.text
supports = response.candidates[0].grounding_metadata.grounding_supports
chunks = response.candidates[0].grounding_metadata.grounding_chunks
# Sort supports by end_index in descending order to avoid shifting issues when inserting.
sorted_supports = sorted(supports, key=lambda s: s.segment.end_index, reverse=True)
for support in sorted_supports:
end_index = support.segment.end_index
if support.grounding_chunk_indices:
# Create citation string like [1](link1)[2](link2)
citation_links = []
for i in support.grounding_chunk_indices:
if i < len(chunks):
uri = chunks[i].web.uri
citation_links.append(f"[{i + 1}]({uri})")
citation_string = ", ".join(citation_links)
text = text[:end_index] + citation_string + text[end_index:]
return text
def _execute_search(self, query: str, add_citations_flag: bool):
"""
https://ai.google.dev/gemini-api/docs/google-search
"""
# Define the grounding tool
grounding_tool = types.Tool(
google_search=types.GoogleSearch()
)
# Configure generation settings
config = types.GenerateContentConfig(
tools=[grounding_tool]
)
response = None
response_text = None
for attempt in range(self.max_retries):
try:
response = self.client.models.generate_content(
model=self.search_model,
contents=query,
config=config,
)
response_text = response.text
# If we get here, the API call was successful, so break out of the retry loop
break
except Exception as e:
print(f"Google Search attempt {attempt + 1} failed: {str(e)}. Retrying...")
if attempt == self.max_retries - 1: # Last attempt
print(f"Google Search failed after {self.max_retries} attempts. Last error: {str(e)}")
return f"Google Search tried {self.max_retries} times but failed. Last error: {str(e)}"
# Continue to next attempt
# Check if we have a valid response before proceeding
if response is None or response_text is None:
return "Google Search failed to get a valid response"
# Add citations if needed
try:
response_text = self.add_citations(response) if add_citations_flag else response_text
except Exception as e:
pass
# print(f"Error adding citations: {str(e)}")
# Continue with the original response_text if citations fail
# Format the response
try:
response_text = self.reformat_response(response_text)
except Exception as e:
pass
# print(f"Error reformatting response: {str(e)}")
# Continue with the current response_text if reformatting fails
return response_text
def execute(self, query: str, add_citations: bool = True):
"""
Execute the Google search tool.
Parameters:
query (str): The search query to find information on the web.
add_citations (bool): Whether to add citations to the results. Default is True.
Returns:
str: The search results of the query.
"""
# Perform the search
response = self._execute_search(query, add_citations)
return response
def get_metadata(self):
"""
Returns the metadata for the Google_Search tool.
Returns:
dict: A dictionary containing the tool's metadata.
"""
metadata = super().get_metadata()
return metadata
if __name__ == "__main__":
"""
Test:
cd agentflow/tools/google_search
python tool.py
"""
def print_json(result):
import json
print(json.dumps(result, indent=4))
google_search = Google_Search_Tool()
# Get tool metadata
metadata = google_search.get_metadata()
print("Tool Metadata:")
print_json(metadata)
examples = [
{'query': 'What is the capital of France?', 'add_citations': True},
{'query': 'Who won the euro 2024?', 'add_citations': False},
{'query': 'Physics and Society article arXiv August 11, 2016', 'add_citations': True},
]
for example in examples:
print(f"\nExecuting search: {example['query']}")
try:
result = google_search.execute(**example)
print("Search Result:")
print(result)
except Exception as e:
print(f"Error: {str(e)}")
print("-" * 50)
print("Done!")