Spaces:
Sleeping
Sleeping
File size: 3,364 Bytes
02073ee ef1f54c 02073ee ef1f54c aa4ce92 ef1f54c 02073ee ef1f54c aa4ce92 02073ee aa4ce92 02073ee ef1f54c 02073ee aa4ce92 02073ee ef1f54c 02073ee aa4ce92 2e019bc 02073ee aa4ce92 ef1f54c 02073ee ef1f54c 02073ee aa4ce92 02073ee ef1f54c 02073ee aa4ce92 02073ee ef1f54c 02073ee ef1f54c 02073ee ef1f54c 02073ee ef1f54c 02073ee ef1f54c aa4ce92 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | from langchain_core.tools import tool
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun
from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
from langchain_community.tools.tavily_search import TavilySearchResults
from dotenv import load_dotenv
import os
import requests
load_dotenv()
# ==============================
# CONFIG
# ==============================
VECTORSTORE_DIR = "data/vectorstore"
os.makedirs(VECTORSTORE_DIR, exist_ok=True)
# ==============================
# GLOBAL RETRIEVER
# ==============================
retriever = None
def load_retriever():
global retriever
try:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
if os.path.exists(os.path.join(VECTORSTORE_DIR, "index.faiss")):
vectorstore = FAISS.load_local(
VECTORSTORE_DIR,
embeddings,
allow_dangerous_deserialization=True
)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
print("✅ Vectorstore loaded from disk")
except Exception as e:
print("❌ Failed to load vectorstore:", e)
def build_vectorstore(path: str):
loader = PyPDFLoader(path)
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=100
)
split_docs = splitter.split_documents(docs)
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = FAISS.from_documents(split_docs, embeddings)
vectorstore.save_local(VECTORSTORE_DIR)
return vectorstore
def update_retriever(pdf_path: str):
global retriever
vectorstore = build_vectorstore(pdf_path)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
# ==============================
# RAG TOOL (FIXED)
# ==============================
def create_rag_tool():
@tool
def rag_search(query: str) -> str:
"""
Retrieve relevant information from uploaded documents.
"""
global retriever
if retriever is None:
load_retriever()
if retriever is None:
return "No document has been uploaded yet."
docs = retriever.invoke(query)
if not docs:
return "No relevant information found in the uploaded document."
return "\n\n".join(d.page_content for d in docs)
return rag_search
# ---------------- OTHER TOOLS ---------------- #
@tool
def arxiv_search(query: str) -> dict:
try:
arxiv = ArxivQueryRun(api_wrapper=ArxivAPIWrapper())
return {"results": arxiv.run(query)}
except Exception as e:
return {"error": str(e)}
@tool
def wikipedia_search(query: str) -> dict:
try:
wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
return {"results": wiki.run(query)}
except Exception as e:
return {"error": str(e)}
@tool
def tavily_search(query: str) -> dict:
try:
search = TavilySearchResults(max_results=5)
return {"results": search.run(query)}
except Exception as e:
return {"error": str(e)}
|