| import requests |
| from bs4 import BeautifulSoup |
|
|
| class Tool: |
| """Base class for tools.""" |
| def __init__(self): |
| self.name = None |
| self.description = None |
| self.inputs = {} |
| self.output_type = None |
|
|
| def forward(self, *args, **kwargs): |
| raise NotImplementedError("Subclasses must implement this method.") |
|
|
| class SearchInformationTool(Tool): |
| name = "web_search" |
| description = "Perform a web search query and return the search results." |
| inputs = {"query": {"type": "string", "description": "The web search query to perform."}} |
| inputs["filter_year"] = { |
| "type": "string", |
| "description": "[Optional parameter]: filter the search results to only include pages from a specific year.", |
| "nullable": True, |
| } |
| output_type = "string" |
|
|
| def __init__(self, browser): |
| super().__init__() |
| self.browser = browser |
|
|
| def forward(self, query: str, filter_year: int | None = None) -> str: |
| return self.browser.search_web(query, filter_year, filter_year) |
|
|
| class VisitTool(Tool): |
| name = "visit_page" |
| description = "Visit a webpage at a given URL and return its text." |
| inputs = {"url": {"type": "string", "description": "The relative or absolute URL of the webpage to visit."}} |
| output_type = "string" |
|
|
| def __init__(self, browser=None): |
| super().__init__() |
| self.browser = browser |
|
|
| def forward(self, url: str) -> str: |
| self.browser.visit_page(url) |
| header, content = self.browser._state() |
| return header.strip() + "\n=======================\n" + content |
|
|
| class Browser: |
| def __init__(self): |
| self.current_page = None |
|
|
| def search_web(self, query, start_year, end_year): |
| url = f"https://www.google.com/search?q={query}" |
| headers = { |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' |
| } |
| try: |
| response = requests.get(url, headers=headers) |
| response.raise_for_status() |
| soup = BeautifulSoup(response.text, 'html.parser') |
| results = soup.find_all('div', class_='tF2Cxc') |
| search_results = [] |
| for idx, result in enumerate(results, start=1): |
| result_text = result.get_text() |
| search_results.append(f"Result {idx}: {result_text}\n") |
| link = result.find('a', href=True) |
| if link and 'wikipedia.org' in link['href']: |
| search_results.append(f"Found Wikipedia link: {link['href']}") |
| return "\n".join(search_results) |
| except requests.exceptions.RequestException as e: |
| return f"An error occurred: {e}" |
|
|
| def visit_page(self, url: str): |
| try: |
| response = requests.get(url) |
| response.raise_for_status() |
| soup = BeautifulSoup(response.text, 'html.parser') |
| for script in soup(["script", "style"]): |
| script.decompose() |
| text = soup.get_text(separator='\n', strip=True) |
| self.current_page = { |
| "url": url, |
| "header": f"Header for {url}", |
| "content": text |
| } |
| except requests.RequestException as e: |
| print(f"An error occurred: {e}") |
| self.current_page = { |
| "url": url, |
| "header": "Error", |
| "content": f"Failed to retrieve the page: {e}" |
| } |
|
|
| def _state(self): |
| if self.current_page: |
| return self.current_page["header"], self.current_page["content"] |
|
|
|
|