from typing import Type, Optional from langchain_community.document_loaders import AsyncChromiumLoader from langchain_community.document_transformers import BeautifulSoupTransformer from langchain_community.tools.wikipedia.tool import WikipediaQueryInput from langchain_community.utilities import WikipediaAPIWrapper from langchain_core.callbacks import CallbackManagerForToolRun from langchain_core.tools import BaseTool from langchain_tavily import TavilySearch from pydantic import BaseModel, Field def _get_web_page(url: str) -> str: """Fetches the content of a web page and transforms (beautify) it into a string.""" loader = AsyncChromiumLoader([url]) docs = loader.load() bs_transformer = BeautifulSoupTransformer() docs = bs_transformer.transform_documents(docs) return '\n'.join(['=' * 30 + '\n' + doc.page_content for doc in docs]) class WikipediaQueryLoad(BaseTool): """Tool that searches the Wikipedia API.""" name: str = "wikipedia" description: str = ( "A wrapper around Wikipedia. " "Useful for when you need to answer general questions about " "people, places, companies, facts, historical events, or other subjects. " "Input should be a search query." ) api_wrapper: WikipediaAPIWrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=20000) args_schema: Type[BaseModel] = WikipediaQueryInput def _run( self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None, ) -> str: """Use the Wikipedia tool.""" page_titles = self.api_wrapper.wiki_client.search( query[:300], results=1 ) summary = self.api_wrapper.run(query) # Wikipedia python package doesn't properly support some wiki syntax (i.e. tables), so # the full wiki page is read separately full_page = _get_web_page(f"https://en.wikipedia.org/wiki/{page_titles[0]}") res = [ '**Wiki page url**:', page_titles[0], '**Wiki page summary:**', summary, '**Full page content:**', full_page ] return '\n'.join(res) class WebScrapTool(BaseTool): name: str = "webscraper" description: str = "Loads full content of the web page." # Load HTML def _run( self, url: str, run_manager: Optional[CallbackManagerForToolRun] = None, ) -> str: return _get_web_page(url) class CalculatorTool(BaseTool): """Tool that performs basic calculations.""" name: str = "calculator" description: str = ( "A calculator. " "Useful for when you need to perform basic calculations." ) def _run( self, expression: str, run_manager: Optional[CallbackManagerForToolRun] = None, ) -> float: """Use the calculator tool.""" return eval(expression) def get_all_tools() -> list[BaseTool]: """Get all tools.""" return [ WikipediaQueryLoad(api_wrapper=WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=20000)), WebScrapTool(), TavilySearch(max_results=5, topic="general"), CalculatorTool() ]