| from metaphor_python import Metaphor | |
| from langchain.agents import tool | |
| from typing import List, Optional | |
| from langchain.tools.base import ToolException | |
| from langchain.tools import WikipediaQueryRun | |
| from langchain.utilities import WikipediaAPIWrapper | |
| from crawlbase import CrawlingAPI | |
| import streamlit as st | |
| #TODO: replace search with something else | |
| # client = Metaphor(api_key=st.secrets["METAPHOR_API_KEY"]) | |
| # | |
| # @tool | |
| # def search(query: str, num_results: Optional[int] = None, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None): | |
| # """Call search engine with a query """ | |
| # try: | |
| # result = client.search(query, use_autoprompt=True, num_results=num_results, include_domains=include_domains, exclude_domains=exclude_domains) | |
| # if "I'm sorry" in result.autoprompt_string: | |
| # raise Exception(result.autoprompt_string) | |
| # return result | |
| # except Exception as e: | |
| # raise ToolException(e.args[0]) | |
| # | |
| # | |
| # @tool | |
| # def get_contents(ids: List[str]): | |
| # """Get contents of a webpage. May return an empty content, it means you have to use another tool to get the content. | |
| # | |
| # The ids passed in should be a list of ids as fetched from `search`. | |
| # """ | |
| # try: | |
| # return client.get_contents(ids) | |
| # except Exception as e: | |
| # raise ToolException(e.args[0]) | |
| # | |
| # | |
| # @tool | |
| # def find_similar(url: str, num_results: Optional[int] = None, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None): | |
| # """Get search results similar to a given URL. | |
| # | |
| # The url passed in should be a URL returned from `search` | |
| # """ | |
| # try: | |
| # return client.find_similar(url, num_results=num_results, include_domains=include_domains, exclude_domains=exclude_domains) | |
| # except Exception as e: | |
| # raise ToolException(e.args[0]) | |
| crawling_api_key = st.secrets["CRAWLING_API_KEY"] | |
| api = CrawlingAPI({'token': crawling_api_key}) | |
| def scrape_page(url: str): | |
| """Get content of a given URL to process by an agent. in a json format like: dict_keys(['alert', 'title', 'favicon', 'meta', 'content', 'canonical', 'images', 'grouped_images', 'og_images', 'links']) | |
| """ | |
| response = api.get(url, options={'format': 'json', 'autoparse': 'true', 'scroll': 'true'}) | |
| content = response['json'] | |
| return content | |
| #TODO: list attibutes to return directly like content_type=['alert', 'title', 'favicon', 'meta', 'content', 'canonical', 'images', 'grouped_images', 'og_images', 'links']] | |
| def scrape_page_and_return_the_content_directly(url: str): | |
| """Use this tool to directly get content of a given URL without processing it. in a json format like: dict_keys(['alert', 'title', 'favicon', 'meta', 'content', 'canonical', 'images', 'grouped_images', 'og_images', 'links']) | |
| """ | |
| response = api.get(url, options={'format': 'json', 'autoparse': 'true', 'scroll': 'true'}) | |
| content = response['json'] | |
| return content | |
| def get_tools(): | |
| wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()) | |
| wikipedia.description = wikipedia.description + " Prioritise this tool if you want to learn about facts." | |
| return [scrape_page, wikipedia, scrape_page_and_return_the_content_directly] | |
| # return [scrape_page] | |