File size: 10,322 Bytes
d12a6df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
import os
import json
import requests
from dotenv import load_dotenv
load_dotenv()

from google import genai
from google.genai import types

from agentflow.tools.base import BaseTool

# For formatting the response
import requests
from typing import List
import re

# Tool name mapping - this defines the external name for this tool
TOOL_NAME = "Ground_Google_Search_Tool"

LIMITATIONS = """
1. This tool is only suitable for general information search.
2. This tool contains less domain specific information.
3. This tools is not suitable for searching and analyzing videos at YouTube or other video platforms.
"""

BEST_PRACTICES = """
1. Choose this tool when you want to search general information about a topic.
2. Choose this tool for question type of query, such as "What is the capital of France?" or "What is the capital of France?"
3. The tool will return a summarized information.
4. This tool is more suiable for defination, world knowledge, and general information search.
"""

class Google_Search_Tool(BaseTool):
    def __init__(self, model_string="gemini-2.5-flash"):
        super().__init__(
            tool_name=TOOL_NAME,
            tool_description="A web search tool powered by Google's Gemini AI that provides real-time information from the internet with citation support.",
            tool_version="1.0.0",
            input_types={
                "query": "str - The search query to find information on the web.",
                "add_citations": "bool - Whether to add citations to the results. If True, the results will be formatted with citations. By default, it is True.",
            },
            output_type="str - The search results of the query.",
            demo_commands=[
                {
                    "command": 'execution = tool.execute(query="What is the capital of France?")',
                    "description": "Search for general information about the capital of France with default citations enabled."
                },
                {
                    "command": 'execution = tool.execute(query="Who won the euro 2024?", add_citations=False)',
                    "description": "Search for information about Euro 2024 winner without citations."
                },
                {
                    "command": 'execution = tool.execute(query="Physics and Society article arXiv August 11, 2016", add_citations=True)',
                    "description": "Search for specific academic articles with citations enabled."
                }
            ],
            user_metadata={
                "limitations": LIMITATIONS,
                "best_practices": BEST_PRACTICES,
            }
        )
        self.max_retries = 5
        self.search_model = model_string

        try:
            api_key = os.getenv("GOOGLE_API_KEY")
            if not api_key:
                raise Exception("Google API key not found. Please set the GOOGLE_API_KEY environment variable.")
        except Exception as e:
            raise Exception(f"Google API key not found. Please set the GOOGLE_API_KEY environment variable.")

        self.client = genai.Client(api_key=api_key)


    @staticmethod
    def get_real_url(url):
        """
        Convert a redirect URL to the final real URL in a stable manner.

        This function handles redirects by:
        1.  Setting a browser-like User-Agent to avoid being blocked or throttled.
        2.  Using a reasonable timeout to prevent getting stuck indefinitely.
        3.  Following HTTP redirects automatically (default requests behavior).
        4.  Catching specific request-related exceptions for cleaner error handling.
        """
        try:
            # Headers to mimic a real browser visit
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
            }
            
            # allow_redirects=True is the default, but we state it for clarity.
            # The request will automatically follow the 3xx redirect chain.
            response = requests.get(
                url, 
                headers=headers, 
                timeout=8, # Increased timeout for more reliability
                allow_redirects=True 
            )
            
            # After all redirects, response.url contains the final URL.
            return response.url
            
        except Exception as e:
            # Catching specific exceptions from the requests library is better practice.
            # print(f"An error occurred: {e}")
            return url

    @staticmethod
    def extract_urls(text: str) -> List[str]:
        """
        Extract all URLs from Markdown‑style citations [number](url) in the given text.

        Args:
            text: A string containing Markdown citations.

        Returns:
            A list of URL strings.
        """
        pattern = re.compile(r'\[\d+\]\((https?://[^\s)]+)\)')
        urls = pattern.findall(text)
        return urls

    def reformat_response(self, response: str) -> str:
        """
        Reformat the response to a readable format.
        """
        urls = self.extract_urls(response)
        for url in urls:
            direct_url = self.get_real_url(url)
            response = response.replace(url, direct_url)
        return response

    @staticmethod
    def add_citations(response):
        text = response.text
        supports = response.candidates[0].grounding_metadata.grounding_supports
        chunks = response.candidates[0].grounding_metadata.grounding_chunks

        # Sort supports by end_index in descending order to avoid shifting issues when inserting.
        sorted_supports = sorted(supports, key=lambda s: s.segment.end_index, reverse=True)

        for support in sorted_supports:
            end_index = support.segment.end_index
            if support.grounding_chunk_indices:
                # Create citation string like [1](link1)[2](link2)
                citation_links = []
                for i in support.grounding_chunk_indices:
                    if i < len(chunks):
                        uri = chunks[i].web.uri
                        citation_links.append(f"[{i + 1}]({uri})")

                citation_string = ", ".join(citation_links)
                text = text[:end_index] + citation_string + text[end_index:]

        return text

    def _execute_search(self, query: str, add_citations_flag: bool):
        """
        https://ai.google.dev/gemini-api/docs/google-search
        """
        # Define the grounding tool
        grounding_tool = types.Tool(
            google_search=types.GoogleSearch()
        )

        # Configure generation settings
        config = types.GenerateContentConfig(
            tools=[grounding_tool]
        )
        

        response = None
        response_text = None
        
        for attempt in range(self.max_retries):
            try:
                response = self.client.models.generate_content(
                    model=self.search_model,
                    contents=query,
                    config=config,
                )
                response_text = response.text
                # If we get here, the API call was successful, so break out of the retry loop
                break
            except Exception as e:
                print(f"Google Search attempt {attempt + 1} failed: {str(e)}. Retrying...")
                if attempt == self.max_retries - 1:  # Last attempt
                    print(f"Google Search failed after {self.max_retries} attempts. Last error: {str(e)}")
                    return f"Google Search tried {self.max_retries} times but failed. Last error: {str(e)}"
                # Continue to next attempt

        # Check if we have a valid response before proceeding
        if response is None or response_text is None:
            return "Google Search failed to get a valid response"

        # Add citations if needed
        try:
            response_text = self.add_citations(response) if add_citations_flag else response_text
        except Exception as e:
            pass
            # print(f"Error adding citations: {str(e)}")
            # Continue with the original response_text if citations fail

        # Format the response
        try:
            response_text = self.reformat_response(response_text)
        except Exception as e:
            pass
            # print(f"Error reformatting response: {str(e)}")
            # Continue with the current response_text if reformatting fails

        return response_text

    def execute(self, query: str, add_citations: bool = True):
        """
        Execute the Google search tool.

        Parameters:
            query (str): The search query to find information on the web.
            add_citations (bool): Whether to add citations to the results. Default is True.

        Returns:
            str: The search results of the query.
        """
        # Perform the search
        response = self._execute_search(query, add_citations)
        
        return response

    def get_metadata(self):
        """
        Returns the metadata for the Google_Search tool.

        Returns:
            dict: A dictionary containing the tool's metadata.
        """
        metadata = super().get_metadata()
        return metadata


if __name__ == "__main__":
    """
    Test:
    cd agentflow/tools/google_search
    python tool.py
    """
    def print_json(result):
        import json
        print(json.dumps(result, indent=4))

    google_search = Google_Search_Tool()

    # Get tool metadata
    metadata = google_search.get_metadata()
    print("Tool Metadata:")
    print_json(metadata)

    examples = [
        {'query': 'What is the capital of France?', 'add_citations': True},
        {'query': 'Who won the euro 2024?', 'add_citations': False},
        {'query': 'Physics and Society article arXiv August 11, 2016', 'add_citations': True},
    ]
    
    for example in examples:
        print(f"\nExecuting search: {example['query']}")
        try:
            result = google_search.execute(**example)
            print("Search Result:")
            print(result)
        except Exception as e:
            print(f"Error: {str(e)}")
        print("-" * 50)

    print("Done!")