Spaces:
Sleeping
Sleeping
File size: 3,226 Bytes
cbe419f 21a5794 cbe419f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
from typing import Type, Optional
from langchain_community.document_loaders import AsyncChromiumLoader
from langchain_community.document_transformers import BeautifulSoupTransformer
from langchain_community.tools.wikipedia.tool import WikipediaQueryInput
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_core.callbacks import CallbackManagerForToolRun
from langchain_core.tools import BaseTool
from langchain_tavily import TavilySearch
from pydantic import BaseModel, Field
def _get_web_page(url: str) -> str:
"""Fetches the content of a web page and transforms (beautify) it into a string."""
loader = AsyncChromiumLoader([url])
docs = loader.load()
bs_transformer = BeautifulSoupTransformer()
docs = bs_transformer.transform_documents(docs)
return '\n'.join(['=' * 30 + '\n' + doc.page_content for doc in docs])
class WikipediaQueryLoad(BaseTool):
"""Tool that searches the Wikipedia API."""
name: str = "wikipedia"
description: str = (
"A wrapper around Wikipedia. "
"Useful for when you need to answer general questions about "
"people, places, companies, facts, historical events, or other subjects. "
"Input should be a search query."
)
api_wrapper: WikipediaAPIWrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=20000)
args_schema: Type[BaseModel] = WikipediaQueryInput
def _run(
self,
query: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the Wikipedia tool."""
page_titles = self.api_wrapper.wiki_client.search(
query[:300], results=1
)
summary = self.api_wrapper.run(query)
# Wikipedia python package doesn't properly support some wiki syntax (i.e. tables), so
# the full wiki page is read separately
full_page = _get_web_page(f"https://en.wikipedia.org/wiki/{page_titles[0]}")
res = [
'**Wiki page url**:',
page_titles[0],
'**Wiki page summary:**',
summary,
'**Full page content:**',
full_page
]
return '\n'.join(res)
class WebScrapTool(BaseTool):
name: str = "webscraper"
description: str = "Loads full content of the web page."
# Load HTML
def _run(
self,
url: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
return _get_web_page(url)
class CalculatorTool(BaseTool):
"""Tool that performs basic calculations."""
name: str = "calculator"
description: str = (
"A calculator. "
"Useful for when you need to perform basic calculations."
)
def _run(
self,
expression: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> float:
"""Use the calculator tool."""
return eval(expression)
def get_all_tools() -> list[BaseTool]:
"""Get all tools."""
return [
WikipediaQueryLoad(api_wrapper=WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=20000)),
WebScrapTool(),
TavilySearch(max_results=5, topic="general"),
CalculatorTool()
] |