|
|
import os |
|
|
from typing import Dict, List |
|
|
|
|
|
from dotenv import load_dotenv |
|
|
from langchain_community.document_loaders import ArxivLoader, WikipediaLoader |
|
|
from langchain_community.vectorstores import SupabaseVectorStore |
|
|
from langchain_core.tools import tool |
|
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
|
from langchain_tavily import TavilySearch |
|
|
from supabase.client import create_client |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
@tool |
|
|
def create_retriever_from_supabase(query: str) -> str: |
|
|
"""Search for similar documents in the Supabase vector store. |
|
|
|
|
|
This tool uses semantic search to find documents that are semantically similar to the provided query. |
|
|
It leverages the Supabase vector store and HuggingFace embeddings to perform the search. |
|
|
|
|
|
Args: |
|
|
query (str): The search query to find similar documents. |
|
|
|
|
|
Returns: |
|
|
str: A list of documents that are semantically similar to the query. |
|
|
""" |
|
|
embeddings = HuggingFaceEmbeddings( |
|
|
model_name="sentence-transformers/all-mpnet-base-v2" |
|
|
) |
|
|
supabase = create_client(os.environ["SUPABASE_URL"], os.environ["SUPABASE_KEY"]) |
|
|
|
|
|
vector_store = SupabaseVectorStore( |
|
|
client=supabase, |
|
|
embedding=embeddings, |
|
|
table_name="documents", |
|
|
query_name="match_documents_langchain", |
|
|
) |
|
|
|
|
|
return vector_store.similarity_search(query) |
|
|
|
|
|
|
|
|
@tool |
|
|
def internet_search(query: str) -> Dict[str, List[Dict[str, str]]]: |
|
|
"""Perform a web search using Tavily Search API. |
|
|
|
|
|
This tool searches the web for relevant information based on the provided query. |
|
|
It returns up to 3 most relevant results with their sources, titles, and content. |
|
|
|
|
|
Args: |
|
|
query (str): The search query to look up on the web. |
|
|
|
|
|
Returns: |
|
|
Dict[str, List[Dict[str, str]]]: A dictionary containing a list of search results. |
|
|
Each result is a dictionary with keys: |
|
|
- Source: URL of the webpage |
|
|
- Title: Title of the webpage |
|
|
- Content: Main content/text from the webpage |
|
|
""" |
|
|
response = TavilySearch(max_results=3).invoke(query) |
|
|
|
|
|
formatted_answer = [ |
|
|
{ |
|
|
"Source": result["url"], |
|
|
"Title": result["title"], |
|
|
"Content": result["content"], |
|
|
} |
|
|
for result in response["results"] |
|
|
] |
|
|
|
|
|
return {"web_results": formatted_answer} |
|
|
|
|
|
|
|
|
@tool |
|
|
def wiki_search(query: str) -> Dict[str, List[Dict[str, str]]]: |
|
|
"""Search Wikipedia articles using the provided query. |
|
|
|
|
|
This tool searches Wikipedia for articles matching the query and returns |
|
|
up to 3 most relevant results with their sources, titles, and content. |
|
|
|
|
|
Args: |
|
|
query (str): The search query to look up on Wikipedia. |
|
|
|
|
|
Returns: |
|
|
Dict[str, List[Dict[str, str]]]: A dictionary containing a list of Wikipedia results. |
|
|
Each result is a dictionary with keys: |
|
|
- Source: URL of the Wikipedia article |
|
|
- Title: Title of the Wikipedia article |
|
|
- Content: Main content/text from the article |
|
|
""" |
|
|
docs = WikipediaLoader(query=query, load_max_docs=3).load() |
|
|
|
|
|
formatted_answer = [ |
|
|
{ |
|
|
"Source": doc.metadata["source"], |
|
|
"Title": doc.metadata["title"], |
|
|
"Content": doc.page_content, |
|
|
} |
|
|
for doc in docs |
|
|
] |
|
|
|
|
|
return {"wiki_results": formatted_answer} |
|
|
|
|
|
|
|
|
@tool |
|
|
def arxiv_search(query: str) -> Dict[str, List[Dict[str, str]]]: |
|
|
"""Search academic papers on arXiv using the provided query. |
|
|
|
|
|
This tool searches arXiv for academic papers matching the query and returns |
|
|
up to 3 most relevant results with their sources, titles, and content. |
|
|
|
|
|
Args: |
|
|
query (str): The search query to look up on arXiv. |
|
|
|
|
|
Returns: |
|
|
Dict[str, List[Dict[str, str]]]: A dictionary containing a list of arXiv results. |
|
|
Each result is a dictionary with keys: |
|
|
- Source: URL of the arXiv paper |
|
|
- Title: Title of the academic paper |
|
|
- Content: Main content/abstract of the paper |
|
|
""" |
|
|
docs = ArxivLoader(query=query, load_max_docs=3).load() |
|
|
|
|
|
formatted_answer = [ |
|
|
{ |
|
|
"Published": doc.metadata["Published"], |
|
|
"Authors": doc.metadata["Authors"], |
|
|
"Title": doc.metadata["Title"], |
|
|
"Content": doc.page_content, |
|
|
} |
|
|
for doc in docs |
|
|
] |
|
|
|
|
|
return {"arxiv_results": formatted_answer} |
|
|
|