Spaces:
Sleeping
Sleeping
| from typing import Type, Optional | |
| from langchain_community.document_loaders import AsyncChromiumLoader | |
| from langchain_community.document_transformers import BeautifulSoupTransformer | |
| from langchain_community.tools.wikipedia.tool import WikipediaQueryInput | |
| from langchain_community.utilities import WikipediaAPIWrapper | |
| from langchain_core.callbacks import CallbackManagerForToolRun | |
| from langchain_core.tools import BaseTool | |
| from langchain_tavily import TavilySearch | |
| from pydantic import BaseModel, Field | |
| def _get_web_page(url: str) -> str: | |
| """Fetches the content of a web page and transforms (beautify) it into a string.""" | |
| loader = AsyncChromiumLoader([url]) | |
| docs = loader.load() | |
| bs_transformer = BeautifulSoupTransformer() | |
| docs = bs_transformer.transform_documents(docs) | |
| return '\n'.join(['=' * 30 + '\n' + doc.page_content for doc in docs]) | |
| class WikipediaQueryLoad(BaseTool): | |
| """Tool that searches the Wikipedia API.""" | |
| name: str = "wikipedia" | |
| description: str = ( | |
| "A wrapper around Wikipedia. " | |
| "Useful for when you need to answer general questions about " | |
| "people, places, companies, facts, historical events, or other subjects. " | |
| "Input should be a search query." | |
| ) | |
| api_wrapper: WikipediaAPIWrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=20000) | |
| args_schema: Type[BaseModel] = WikipediaQueryInput | |
| def _run( | |
| self, | |
| query: str, | |
| run_manager: Optional[CallbackManagerForToolRun] = None, | |
| ) -> str: | |
| """Use the Wikipedia tool.""" | |
| page_titles = self.api_wrapper.wiki_client.search( | |
| query[:300], results=1 | |
| ) | |
| summary = self.api_wrapper.run(query) | |
| # Wikipedia python package doesn't properly support some wiki syntax (i.e. tables), so | |
| # the full wiki page is read separately | |
| full_page = _get_web_page(f"https://en.wikipedia.org/wiki/{page_titles[0]}") | |
| res = [ | |
| '**Wiki page url**:', | |
| page_titles[0], | |
| '**Wiki page summary:**', | |
| summary, | |
| '**Full page content:**', | |
| full_page | |
| ] | |
| return '\n'.join(res) | |
| class WebScrapTool(BaseTool): | |
| name: str = "webscraper" | |
| description: str = "Loads full content of the web page." | |
| # Load HTML | |
| def _run( | |
| self, | |
| url: str, | |
| run_manager: Optional[CallbackManagerForToolRun] = None, | |
| ) -> str: | |
| return _get_web_page(url) | |
| class CalculatorTool(BaseTool): | |
| """Tool that performs basic calculations.""" | |
| name: str = "calculator" | |
| description: str = ( | |
| "A calculator. " | |
| "Useful for when you need to perform basic calculations." | |
| ) | |
| def _run( | |
| self, | |
| expression: str, | |
| run_manager: Optional[CallbackManagerForToolRun] = None, | |
| ) -> float: | |
| """Use the calculator tool.""" | |
| return eval(expression) | |
| def get_all_tools() -> list[BaseTool]: | |
| """Get all tools.""" | |
| return [ | |
| WikipediaQueryLoad(api_wrapper=WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=20000)), | |
| WebScrapTool(), | |
| TavilySearch(max_results=5, topic="general"), | |
| CalculatorTool() | |
| ] |