SynapseAI / tools.py
junaid17's picture
Update tools.py
ef1f54c verified
raw
history blame
3.36 kB
from langchain_core.tools import tool
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun
from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
from langchain_community.tools.tavily_search import TavilySearchResults
from dotenv import load_dotenv
import os
import requests
load_dotenv()
# ==============================
# CONFIG
# ==============================
VECTORSTORE_DIR = "data/vectorstore"
os.makedirs(VECTORSTORE_DIR, exist_ok=True)
# ==============================
# GLOBAL RETRIEVER
# ==============================
retriever = None
def load_retriever():
global retriever
try:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
if os.path.exists(os.path.join(VECTORSTORE_DIR, "index.faiss")):
vectorstore = FAISS.load_local(
VECTORSTORE_DIR,
embeddings,
allow_dangerous_deserialization=True
)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
print("✅ Vectorstore loaded from disk")
except Exception as e:
print("❌ Failed to load vectorstore:", e)
def build_vectorstore(path: str):
loader = PyPDFLoader(path)
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=100
)
split_docs = splitter.split_documents(docs)
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = FAISS.from_documents(split_docs, embeddings)
vectorstore.save_local(VECTORSTORE_DIR)
return vectorstore
def update_retriever(pdf_path: str):
global retriever
vectorstore = build_vectorstore(pdf_path)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
# ==============================
# RAG TOOL (FIXED)
# ==============================
def create_rag_tool():
@tool
def rag_search(query: str) -> str:
"""
Retrieve relevant information from uploaded documents.
"""
global retriever
if retriever is None:
load_retriever()
if retriever is None:
return "No document has been uploaded yet."
docs = retriever.invoke(query)
if not docs:
return "No relevant information found in the uploaded document."
return "\n\n".join(d.page_content for d in docs)
return rag_search
# ---------------- OTHER TOOLS ---------------- #
@tool
def arxiv_search(query: str) -> dict:
try:
arxiv = ArxivQueryRun(api_wrapper=ArxivAPIWrapper())
return {"results": arxiv.run(query)}
except Exception as e:
return {"error": str(e)}
@tool
def wikipedia_search(query: str) -> dict:
try:
wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
return {"results": wiki.run(query)}
except Exception as e:
return {"error": str(e)}
@tool
def tavily_search(query: str) -> dict:
try:
search = TavilySearchResults(max_results=5)
return {"results": search.run(query)}
except Exception as e:
return {"error": str(e)}