agentflow2

Sleeping

agentflow2 / agentflow /tools /google_search /tool.py

IsaacGHX

update

d12a6df 2 months ago

10.3 kB

	import os
	import json
	import requests
	from dotenv import load_dotenv
	load_dotenv()

	from google import genai
	from google.genai import types

	from agentflow.tools.base import BaseTool

	# For formatting the response
	import requests
	from typing import List
	import re

	# Tool name mapping - this defines the external name for this tool
	TOOL_NAME = "Ground_Google_Search_Tool"

	LIMITATIONS = """
	1. This tool is only suitable for general information search.
	2. This tool contains less domain specific information.
	3. This tools is not suitable for searching and analyzing videos at YouTube or other video platforms.
	"""

	BEST_PRACTICES = """
	1. Choose this tool when you want to search general information about a topic.
	2. Choose this tool for question type of query, such as "What is the capital of France?" or "What is the capital of France?"
	3. The tool will return a summarized information.
	4. This tool is more suiable for defination, world knowledge, and general information search.
	"""

	class Google_Search_Tool(BaseTool):
	def __init__(self, model_string="gemini-2.5-flash"):
	super().__init__(
	tool_name=TOOL_NAME,
	tool_description="A web search tool powered by Google's Gemini AI that provides real-time information from the internet with citation support.",
	tool_version="1.0.0",
	input_types={
	"query": "str - The search query to find information on the web.",
	"add_citations": "bool - Whether to add citations to the results. If True, the results will be formatted with citations. By default, it is True.",
	},
	output_type="str - The search results of the query.",
	demo_commands=[
	{
	"command": 'execution = tool.execute(query="What is the capital of France?")',
	"description": "Search for general information about the capital of France with default citations enabled."
	},
	{
	"command": 'execution = tool.execute(query="Who won the euro 2024?", add_citations=False)',
	"description": "Search for information about Euro 2024 winner without citations."
	},
	{
	"command": 'execution = tool.execute(query="Physics and Society article arXiv August 11, 2016", add_citations=True)',
	"description": "Search for specific academic articles with citations enabled."
	}
	],
	user_metadata={
	"limitations": LIMITATIONS,
	"best_practices": BEST_PRACTICES,
	}
	)
	self.max_retries = 5
	self.search_model = model_string

	try:
	api_key = os.getenv("GOOGLE_API_KEY")
	if not api_key:
	raise Exception("Google API key not found. Please set the GOOGLE_API_KEY environment variable.")
	except Exception as e:
	raise Exception(f"Google API key not found. Please set the GOOGLE_API_KEY environment variable.")

	self.client = genai.Client(api_key=api_key)


	@staticmethod
	def get_real_url(url):
	"""
	Convert a redirect URL to the final real URL in a stable manner.

	This function handles redirects by:
	1. Setting a browser-like User-Agent to avoid being blocked or throttled.
	2. Using a reasonable timeout to prevent getting stuck indefinitely.
	3. Following HTTP redirects automatically (default requests behavior).
	4. Catching specific request-related exceptions for cleaner error handling.
	"""
	try:
	# Headers to mimic a real browser visit
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
	}

	# allow_redirects=True is the default, but we state it for clarity.
	# The request will automatically follow the 3xx redirect chain.
	response = requests.get(
	url,
	headers=headers,
	timeout=8, # Increased timeout for more reliability
	allow_redirects=True
	)

	# After all redirects, response.url contains the final URL.
	return response.url

	except Exception as e:
	# Catching specific exceptions from the requests library is better practice.
	# print(f"An error occurred: {e}")
	return url

	@staticmethod
	def extract_urls(text: str) -> List[str]:
	"""
	Extract all URLs from Markdown‑style citations [number](url) in the given text.

	Args:
	text: A string containing Markdown citations.

	Returns:
	A list of URL strings.
	"""
	pattern = re.compile(r'\[\d+\]\((https?://[^\s)]+)\)')
	urls = pattern.findall(text)
	return urls

	def reformat_response(self, response: str) -> str:
	"""
	Reformat the response to a readable format.
	"""
	urls = self.extract_urls(response)
	for url in urls:
	direct_url = self.get_real_url(url)
	response = response.replace(url, direct_url)
	return response

	@staticmethod
	def add_citations(response):
	text = response.text
	supports = response.candidates[0].grounding_metadata.grounding_supports
	chunks = response.candidates[0].grounding_metadata.grounding_chunks

	# Sort supports by end_index in descending order to avoid shifting issues when inserting.
	sorted_supports = sorted(supports, key=lambda s: s.segment.end_index, reverse=True)

	for support in sorted_supports:
	end_index = support.segment.end_index
	if support.grounding_chunk_indices:
	# Create citation string like [1](link1)[2](link2)
	citation_links = []
	for i in support.grounding_chunk_indices:
	if i < len(chunks):
	uri = chunks[i].web.uri
	citation_links.append(f"[{i + 1}]({uri})")

	citation_string = ", ".join(citation_links)
	text = text[:end_index] + citation_string + text[end_index:]

	return text

	def _execute_search(self, query: str, add_citations_flag: bool):
	"""
	https://ai.google.dev/gemini-api/docs/google-search
	"""
	# Define the grounding tool
	grounding_tool = types.Tool(
	google_search=types.GoogleSearch()
	)

	# Configure generation settings
	config = types.GenerateContentConfig(
	tools=[grounding_tool]
	)


	response = None
	response_text = None

	for attempt in range(self.max_retries):
	try:
	response = self.client.models.generate_content(
	model=self.search_model,
	contents=query,
	config=config,
	)
	response_text = response.text
	# If we get here, the API call was successful, so break out of the retry loop
	break
	except Exception as e:
	print(f"Google Search attempt {attempt + 1} failed: {str(e)}. Retrying...")
	if attempt == self.max_retries - 1: # Last attempt
	print(f"Google Search failed after {self.max_retries} attempts. Last error: {str(e)}")
	return f"Google Search tried {self.max_retries} times but failed. Last error: {str(e)}"
	# Continue to next attempt

	# Check if we have a valid response before proceeding
	if response is None or response_text is None:
	return "Google Search failed to get a valid response"

	# Add citations if needed
	try:
	response_text = self.add_citations(response) if add_citations_flag else response_text
	except Exception as e:
	pass
	# print(f"Error adding citations: {str(e)}")
	# Continue with the original response_text if citations fail

	# Format the response
	try:
	response_text = self.reformat_response(response_text)
	except Exception as e:
	pass
	# print(f"Error reformatting response: {str(e)}")
	# Continue with the current response_text if reformatting fails

	return response_text

	def execute(self, query: str, add_citations: bool = True):
	"""
	Execute the Google search tool.

	Parameters:
	query (str): The search query to find information on the web.
	add_citations (bool): Whether to add citations to the results. Default is True.

	Returns:
	str: The search results of the query.
	"""
	# Perform the search
	response = self._execute_search(query, add_citations)

	return response

	def get_metadata(self):
	"""
	Returns the metadata for the Google_Search tool.

	Returns:
	dict: A dictionary containing the tool's metadata.
	"""
	metadata = super().get_metadata()
	return metadata


	if __name__ == "__main__":
	"""
	Test:
	cd agentflow/tools/google_search
	python tool.py
	"""
	def print_json(result):
	import json
	print(json.dumps(result, indent=4))

	google_search = Google_Search_Tool()

	# Get tool metadata
	metadata = google_search.get_metadata()
	print("Tool Metadata:")
	print_json(metadata)

	examples = [
	{'query': 'What is the capital of France?', 'add_citations': True},
	{'query': 'Who won the euro 2024?', 'add_citations': False},
	{'query': 'Physics and Society article arXiv August 11, 2016', 'add_citations': True},
	]

	for example in examples:
	print(f"\nExecuting search: {example['query']}")
	try:
	result = google_search.execute(**example)
	print("Search Result:")
	print(result)
	except Exception as e:
	print(f"Error: {str(e)}")
	print("-" * 50)

	print("Done!")