Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import requests | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| from google import genai | |
| from google.genai import types | |
| from agentflow.tools.base import BaseTool | |
| # For formatting the response | |
| import requests | |
| from typing import List | |
| import re | |
| # Tool name mapping - this defines the external name for this tool | |
| TOOL_NAME = "Ground_Google_Search_Tool" | |
| LIMITATIONS = """ | |
| 1. This tool is only suitable for general information search. | |
| 2. This tool contains less domain specific information. | |
| 3. This tools is not suitable for searching and analyzing videos at YouTube or other video platforms. | |
| """ | |
| BEST_PRACTICES = """ | |
| 1. Choose this tool when you want to search general information about a topic. | |
| 2. Choose this tool for question type of query, such as "What is the capital of France?" or "What is the capital of France?" | |
| 3. The tool will return a summarized information. | |
| 4. This tool is more suiable for defination, world knowledge, and general information search. | |
| """ | |
| class Google_Search_Tool(BaseTool): | |
| def __init__(self, model_string="gemini-2.5-flash"): | |
| super().__init__( | |
| tool_name=TOOL_NAME, | |
| tool_description="A web search tool powered by Google's Gemini AI that provides real-time information from the internet with citation support.", | |
| tool_version="1.0.0", | |
| input_types={ | |
| "query": "str - The search query to find information on the web.", | |
| "add_citations": "bool - Whether to add citations to the results. If True, the results will be formatted with citations. By default, it is True.", | |
| }, | |
| output_type="str - The search results of the query.", | |
| demo_commands=[ | |
| { | |
| "command": 'execution = tool.execute(query="What is the capital of France?")', | |
| "description": "Search for general information about the capital of France with default citations enabled." | |
| }, | |
| { | |
| "command": 'execution = tool.execute(query="Who won the euro 2024?", add_citations=False)', | |
| "description": "Search for information about Euro 2024 winner without citations." | |
| }, | |
| { | |
| "command": 'execution = tool.execute(query="Physics and Society article arXiv August 11, 2016", add_citations=True)', | |
| "description": "Search for specific academic articles with citations enabled." | |
| } | |
| ], | |
| user_metadata={ | |
| "limitations": LIMITATIONS, | |
| "best_practices": BEST_PRACTICES, | |
| } | |
| ) | |
| self.max_retries = 5 | |
| self.search_model = model_string | |
| try: | |
| api_key = os.getenv("GOOGLE_API_KEY") | |
| if not api_key: | |
| raise Exception("Google API key not found. Please set the GOOGLE_API_KEY environment variable.") | |
| except Exception as e: | |
| raise Exception(f"Google API key not found. Please set the GOOGLE_API_KEY environment variable.") | |
| self.client = genai.Client(api_key=api_key) | |
| def get_real_url(url): | |
| """ | |
| Convert a redirect URL to the final real URL in a stable manner. | |
| This function handles redirects by: | |
| 1. Setting a browser-like User-Agent to avoid being blocked or throttled. | |
| 2. Using a reasonable timeout to prevent getting stuck indefinitely. | |
| 3. Following HTTP redirects automatically (default requests behavior). | |
| 4. Catching specific request-related exceptions for cleaner error handling. | |
| """ | |
| try: | |
| # Headers to mimic a real browser visit | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
| } | |
| # allow_redirects=True is the default, but we state it for clarity. | |
| # The request will automatically follow the 3xx redirect chain. | |
| response = requests.get( | |
| url, | |
| headers=headers, | |
| timeout=8, # Increased timeout for more reliability | |
| allow_redirects=True | |
| ) | |
| # After all redirects, response.url contains the final URL. | |
| return response.url | |
| except Exception as e: | |
| # Catching specific exceptions from the requests library is better practice. | |
| # print(f"An error occurred: {e}") | |
| return url | |
| def extract_urls(text: str) -> List[str]: | |
| """ | |
| Extract all URLs from Markdown‑style citations [number](url) in the given text. | |
| Args: | |
| text: A string containing Markdown citations. | |
| Returns: | |
| A list of URL strings. | |
| """ | |
| pattern = re.compile(r'\[\d+\]\((https?://[^\s)]+)\)') | |
| urls = pattern.findall(text) | |
| return urls | |
| def reformat_response(self, response: str) -> str: | |
| """ | |
| Reformat the response to a readable format. | |
| """ | |
| urls = self.extract_urls(response) | |
| for url in urls: | |
| direct_url = self.get_real_url(url) | |
| response = response.replace(url, direct_url) | |
| return response | |
| def add_citations(response): | |
| text = response.text | |
| supports = response.candidates[0].grounding_metadata.grounding_supports | |
| chunks = response.candidates[0].grounding_metadata.grounding_chunks | |
| # Sort supports by end_index in descending order to avoid shifting issues when inserting. | |
| sorted_supports = sorted(supports, key=lambda s: s.segment.end_index, reverse=True) | |
| for support in sorted_supports: | |
| end_index = support.segment.end_index | |
| if support.grounding_chunk_indices: | |
| # Create citation string like [1](link1)[2](link2) | |
| citation_links = [] | |
| for i in support.grounding_chunk_indices: | |
| if i < len(chunks): | |
| uri = chunks[i].web.uri | |
| citation_links.append(f"[{i + 1}]({uri})") | |
| citation_string = ", ".join(citation_links) | |
| text = text[:end_index] + citation_string + text[end_index:] | |
| return text | |
| def _execute_search(self, query: str, add_citations_flag: bool): | |
| """ | |
| https://ai.google.dev/gemini-api/docs/google-search | |
| """ | |
| # Define the grounding tool | |
| grounding_tool = types.Tool( | |
| google_search=types.GoogleSearch() | |
| ) | |
| # Configure generation settings | |
| config = types.GenerateContentConfig( | |
| tools=[grounding_tool] | |
| ) | |
| response = None | |
| response_text = None | |
| for attempt in range(self.max_retries): | |
| try: | |
| response = self.client.models.generate_content( | |
| model=self.search_model, | |
| contents=query, | |
| config=config, | |
| ) | |
| response_text = response.text | |
| # If we get here, the API call was successful, so break out of the retry loop | |
| break | |
| except Exception as e: | |
| print(f"Google Search attempt {attempt + 1} failed: {str(e)}. Retrying...") | |
| if attempt == self.max_retries - 1: # Last attempt | |
| print(f"Google Search failed after {self.max_retries} attempts. Last error: {str(e)}") | |
| return f"Google Search tried {self.max_retries} times but failed. Last error: {str(e)}" | |
| # Continue to next attempt | |
| # Check if we have a valid response before proceeding | |
| if response is None or response_text is None: | |
| return "Google Search failed to get a valid response" | |
| # Add citations if needed | |
| try: | |
| response_text = self.add_citations(response) if add_citations_flag else response_text | |
| except Exception as e: | |
| pass | |
| # print(f"Error adding citations: {str(e)}") | |
| # Continue with the original response_text if citations fail | |
| # Format the response | |
| try: | |
| response_text = self.reformat_response(response_text) | |
| except Exception as e: | |
| pass | |
| # print(f"Error reformatting response: {str(e)}") | |
| # Continue with the current response_text if reformatting fails | |
| return response_text | |
| def execute(self, query: str, add_citations: bool = True): | |
| """ | |
| Execute the Google search tool. | |
| Parameters: | |
| query (str): The search query to find information on the web. | |
| add_citations (bool): Whether to add citations to the results. Default is True. | |
| Returns: | |
| str: The search results of the query. | |
| """ | |
| # Perform the search | |
| response = self._execute_search(query, add_citations) | |
| return response | |
| def get_metadata(self): | |
| """ | |
| Returns the metadata for the Google_Search tool. | |
| Returns: | |
| dict: A dictionary containing the tool's metadata. | |
| """ | |
| metadata = super().get_metadata() | |
| return metadata | |
| if __name__ == "__main__": | |
| """ | |
| Test: | |
| cd agentflow/tools/google_search | |
| python tool.py | |
| """ | |
| def print_json(result): | |
| import json | |
| print(json.dumps(result, indent=4)) | |
| google_search = Google_Search_Tool() | |
| # Get tool metadata | |
| metadata = google_search.get_metadata() | |
| print("Tool Metadata:") | |
| print_json(metadata) | |
| examples = [ | |
| {'query': 'What is the capital of France?', 'add_citations': True}, | |
| {'query': 'Who won the euro 2024?', 'add_citations': False}, | |
| {'query': 'Physics and Society article arXiv August 11, 2016', 'add_citations': True}, | |
| ] | |
| for example in examples: | |
| print(f"\nExecuting search: {example['query']}") | |
| try: | |
| result = google_search.execute(**example) | |
| print("Search Result:") | |
| print(result) | |
| except Exception as e: | |
| print(f"Error: {str(e)}") | |
| print("-" * 50) | |
| print("Done!") |