import os import requests from camel.toolkits import BaseToolkit class JinaBrowsingToolkit(BaseToolkit): def get_url_content(self, url: str) -> str: r"""Fetch the content of a URL using the r.jina.ai service. Args: url (str): The URL to fetch content from. Returns: str: The markdown content of the URL. """ # Replace http with https and add https if not present if not url.startswith("https://"): url = "https://" + url.lstrip("https://").lstrip("http://") jina_url = f"https://r.jina.ai/{url}" headers = {} if os.environ.get('JINA_PROXY_URL'): headers['X-Proxy-Url'] = os.environ.get('JINA_PROXY_URL') auth_token = os.environ.get('JINA_AUTH_TOKEN') if auth_token: headers['Authorization'] = f'Bearer {auth_token}' try: response = requests.get(jina_url, headers=headers) response.raise_for_status() return response.text except requests.RequestException as e: return f"Error fetching URL content: {e!s}" def get_url_content_with_context( self, url: str, search_string: str, context_chars: int = 700, max_instances: int = 3, ) -> str: r"""Fetch the content of a URL and return context around all instances of a specific string. Args: url (str): The URL to fetch content from. search_string (str): The string to search for in the content. context_chars (int): Number of characters to return before and after each found string. max_instances (int): Maximum number of instances to return. Returns: str: The context around all found instances of the string, or an error message if not found. If there are no results, try again with a more likely search string. Start with a more likely string and only use a less likely string if the first one has too many results. """ content = self.get_url_content(url) if content.startswith("Error fetching URL content"): return content instances = [] start = 0 while True: index = content.lower().find(search_string.lower(), start) if index == -1 or len(instances) >= max_instances: break context_start = max(0, index - context_chars) context_end = min( len(content), index + len(search_string) + context_chars ) instance_context = content[context_start:context_end] instances.append( f"Instance {len(instances) + 1}:\n{instance_context}\n" ) start = index + len(search_string) if instances: return ( f"Found {len(instances)} instance(s) of '{search_string}':\n\n" + '\n'.join(instances) ) else: return f"Search string '{search_string}' not found in the content."