Spaces:

junaid17
/

SynapseAI

Sleeping

App Files Files Community

SynapseAI / tools.py

junaid17

Update tools.py

d1f2f58 verified 2 months ago

raw

history blame contribute delete

3.06 kB

	from langchain_core.tools import tool
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores import FAISS
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_openai import OpenAIEmbeddings
	from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun
	from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
	from langchain_community.tools.tavily_search import TavilySearchResults
	from dotenv import load_dotenv
	import os

	load_dotenv()

	# ==============================
	# CONFIG
	# ==============================
	VECTORSTORE_DIR = "data/vectorstore"
	os.makedirs(VECTORSTORE_DIR, exist_ok=True)


	# ==============================
	# VECTOR STORE CREATION
	# ==============================
	def build_vectorstore(file_path: str):
	loader = PyPDFLoader(file_path)
	documents = loader.load()

	splitter = RecursiveCharacterTextSplitter(
	chunk_size=500,
	chunk_overlap=100
	)

	chunks = splitter.split_documents(documents)

	embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
	vectorstore = FAISS.from_documents(chunks, embeddings)

	vectorstore.save_local(VECTORSTORE_DIR)
	return vectorstore


	def update_retriever(file_path: str):
	"""Rebuild vectorstore when a new document is uploaded."""
	build_vectorstore(file_path)


	# ==============================
	# RAG TOOL (HF SAFE)
	# ==============================
	def create_rag_tool():

	@tool
	def rag_search(query: str) -> str:
	"""
	Retrieve relevant information from uploaded documents.
	"""

	if not os.path.exists(os.path.join(VECTORSTORE_DIR, "index.faiss")):
	return "No document has been uploaded yet."

	embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

	vectorstore = FAISS.load_local(
	VECTORSTORE_DIR,
	embeddings,
	allow_dangerous_deserialization=True
	)

	retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
	docs = retriever.invoke(query)

	if not docs:
	return "No relevant information found in the document."

	return "\n\n".join(d.page_content for d in docs)

	return rag_search


	# ==============================
	# EXTRA TOOLS
	# ==============================

	@tool
	def wikipedia_search(query: str) -> dict:
	"""Search Wikipedia."""
	try:
	return {"results": WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()).run(query)}
	except Exception as e:
	return {"error": str(e)}


	@tool
	def arxiv_search(query: str) -> dict:
	"""Search academic papers on arXiv."""
	try:
	return {"results": ArxivQueryRun(api_wrapper=ArxivAPIWrapper()).run(query)}
	except Exception as e:
	return {"error": str(e)}


	@tool
	def tavily_search(query: str) -> dict:
	"""Search the web using Tavily."""
	try:
	return {"results": TavilySearchResults(max_results=5).run(query)}
	except Exception as e:
	return {"error": str(e)}