from langchain_core.tools import tool from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.vectorstores import FAISS from langchain_community.document_loaders import PyPDFLoader from langchain_openai import OpenAIEmbeddings from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper from langchain_community.tools.tavily_search import TavilySearchResults from dotenv import load_dotenv import os import requests load_dotenv() # ============================== # CONFIG # ============================== VECTORSTORE_DIR = "data/vectorstore" os.makedirs(VECTORSTORE_DIR, exist_ok=True) # ============================== # GLOBAL RETRIEVER # ============================== retriever = None def load_retriever(): global retriever try: embeddings = OpenAIEmbeddings(model="text-embedding-3-small") if os.path.exists(os.path.join(VECTORSTORE_DIR, "index.faiss")): vectorstore = FAISS.load_local( VECTORSTORE_DIR, embeddings, allow_dangerous_deserialization=True ) retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) print("✅ Vectorstore loaded from disk") except Exception as e: print("❌ Failed to load vectorstore:", e) def build_vectorstore(path: str): loader = PyPDFLoader(path) docs = loader.load() splitter = RecursiveCharacterTextSplitter( chunk_size=500, chunk_overlap=100 ) split_docs = splitter.split_documents(docs) embeddings = OpenAIEmbeddings(model="text-embedding-3-small") vectorstore = FAISS.from_documents(split_docs, embeddings) vectorstore.save_local(VECTORSTORE_DIR) return vectorstore def update_retriever(pdf_path: str): global retriever vectorstore = build_vectorstore(pdf_path) retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) # ============================== # RAG TOOL (FIXED) # ============================== def create_rag_tool(): @tool def rag_search(query: str) -> str: """ Retrieve relevant information from uploaded documents. """ global retriever if retriever is None: load_retriever() if retriever is None: return "No document has been uploaded yet." docs = retriever.invoke(query) if not docs: return "No relevant information found in the uploaded document." return "\n\n".join(d.page_content for d in docs) return rag_search # ---------------- OTHER TOOLS ---------------- # @tool def arxiv_search(query: str) -> dict: try: arxiv = ArxivQueryRun(api_wrapper=ArxivAPIWrapper()) return {"results": arxiv.run(query)} except Exception as e: return {"error": str(e)} @tool def wikipedia_search(query: str) -> dict: try: wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()) return {"results": wiki.run(query)} except Exception as e: return {"error": str(e)} @tool def tavily_search(query: str) -> dict: try: search = TavilySearchResults(max_results=5) return {"results": search.run(query)} except Exception as e: return {"error": str(e)}