| import requests | |
| from bs4 import BeautifulSoup | |
| class Tool: | |
| """Base class for tools.""" | |
| def __init__(self): | |
| self.name = None | |
| self.description = None | |
| self.inputs = {} | |
| self.output_type = None | |
| def forward(self, *args, **kwargs): | |
| raise NotImplementedError("Subclasses must implement this method.") | |
| class SearchInformationTool(Tool): | |
| name = "web_search" | |
| description = "Perform a web search query and return the search results." | |
| inputs = {"query": {"type": "string", "description": "The web search query to perform."}} | |
| inputs["filter_year"] = { | |
| "type": "string", | |
| "description": "[Optional parameter]: filter the search results to only include pages from a specific year.", | |
| "nullable": True, | |
| } | |
| output_type = "string" | |
| def __init__(self, browser): | |
| super().__init__() | |
| self.browser = browser | |
| def forward(self, query: str, filter_year: int | None = None) -> str: | |
| return self.browser.search_web(query, filter_year, filter_year) | |
| class VisitTool(Tool): | |
| name = "visit_page" | |
| description = "Visit a webpage at a given URL and return its text." | |
| inputs = {"url": {"type": "string", "description": "The relative or absolute URL of the webpage to visit."}} | |
| output_type = "string" | |
| def __init__(self, browser=None): | |
| super().__init__() | |
| self.browser = browser | |
| def forward(self, url: str) -> str: | |
| self.browser.visit_page(url) | |
| header, content = self.browser._state() | |
| return header.strip() + "\n=======================\n" + content | |
| class Browser: | |
| def __init__(self): | |
| self.current_page = None | |
| def search_web(self, query, start_year, end_year): | |
| url = f"https://www.google.com/search?q={query}" | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' | |
| } | |
| try: | |
| response = requests.get(url, headers=headers) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| results = soup.find_all('div', class_='tF2Cxc') | |
| search_results = [] | |
| for idx, result in enumerate(results, start=1): | |
| result_text = result.get_text() | |
| search_results.append(f"Result {idx}: {result_text}\n") | |
| link = result.find('a', href=True) | |
| if link and 'wikipedia.org' in link['href']: | |
| search_results.append(f"Found Wikipedia link: {link['href']}") | |
| return "\n".join(search_results) | |
| except requests.exceptions.RequestException as e: | |
| return f"An error occurred: {e}" | |
| def visit_page(self, url: str): | |
| try: | |
| response = requests.get(url) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| for script in soup(["script", "style"]): | |
| script.decompose() | |
| text = soup.get_text(separator='\n', strip=True) | |
| self.current_page = { | |
| "url": url, | |
| "header": f"Header for {url}", | |
| "content": text | |
| } | |
| except requests.RequestException as e: | |
| print(f"An error occurred: {e}") | |
| self.current_page = { | |
| "url": url, | |
| "header": "Error", | |
| "content": f"Failed to retrieve the page: {e}" | |
| } | |
| def _state(self): | |
| if self.current_page: | |
| return self.current_page["header"], self.current_page["content"] | |
| return "", "" | |