SynapseAI / tools.py
junaid17's picture
Update tools.py
d1f2f58 verified
from langchain_core.tools import tool
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun
from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
from langchain_community.tools.tavily_search import TavilySearchResults
from dotenv import load_dotenv
import os
load_dotenv()
# ==============================
# CONFIG
# ==============================
VECTORSTORE_DIR = "data/vectorstore"
os.makedirs(VECTORSTORE_DIR, exist_ok=True)
# ==============================
# VECTOR STORE CREATION
# ==============================
def build_vectorstore(file_path: str):
loader = PyPDFLoader(file_path)
documents = loader.load()
splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=100
)
chunks = splitter.split_documents(documents)
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = FAISS.from_documents(chunks, embeddings)
vectorstore.save_local(VECTORSTORE_DIR)
return vectorstore
def update_retriever(file_path: str):
"""Rebuild vectorstore when a new document is uploaded."""
build_vectorstore(file_path)
# ==============================
# RAG TOOL (HF SAFE)
# ==============================
def create_rag_tool():
@tool
def rag_search(query: str) -> str:
"""
Retrieve relevant information from uploaded documents.
"""
if not os.path.exists(os.path.join(VECTORSTORE_DIR, "index.faiss")):
return "No document has been uploaded yet."
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = FAISS.load_local(
VECTORSTORE_DIR,
embeddings,
allow_dangerous_deserialization=True
)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
docs = retriever.invoke(query)
if not docs:
return "No relevant information found in the document."
return "\n\n".join(d.page_content for d in docs)
return rag_search
# ==============================
# EXTRA TOOLS
# ==============================
@tool
def wikipedia_search(query: str) -> dict:
"""Search Wikipedia."""
try:
return {"results": WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()).run(query)}
except Exception as e:
return {"error": str(e)}
@tool
def arxiv_search(query: str) -> dict:
"""Search academic papers on arXiv."""
try:
return {"results": ArxivQueryRun(api_wrapper=ArxivAPIWrapper()).run(query)}
except Exception as e:
return {"error": str(e)}
@tool
def tavily_search(query: str) -> dict:
"""Search the web using Tavily."""
try:
return {"results": TavilySearchResults(max_results=5).run(query)}
except Exception as e:
return {"error": str(e)}