Spaces:
Sleeping
Sleeping
File size: 3,057 Bytes
02073ee ef1f54c 02073ee 7afac3f 02073ee 7afac3f ef1f54c 7afac3f 367fd43 3e10e7d 7afac3f ef1f54c 02073ee 7afac3f 02073ee ef1f54c 7afac3f ef1f54c 02073ee aa4ce92 2e019bc 02073ee 7afac3f ef1f54c 7afac3f ef1f54c 7afac3f 02073ee 7afac3f 02073ee aa4ce92 02073ee 367fd43 02073ee aa4ce92 02073ee ef1f54c 7afac3f ef1f54c 02073ee 367fd43 02073ee 367fd43 02073ee 367fd43 02073ee 367fd43 02073ee ef1f54c 3e10e7d 02073ee 367fd43 aa4ce92 d1f2f58 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | from langchain_core.tools import tool
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun
from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
from langchain_community.tools.tavily_search import TavilySearchResults
from dotenv import load_dotenv
import os
load_dotenv()
# ==============================
# CONFIG
# ==============================
VECTORSTORE_DIR = "data/vectorstore"
os.makedirs(VECTORSTORE_DIR, exist_ok=True)
# ==============================
# VECTOR STORE CREATION
# ==============================
def build_vectorstore(file_path: str):
loader = PyPDFLoader(file_path)
documents = loader.load()
splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=100
)
chunks = splitter.split_documents(documents)
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = FAISS.from_documents(chunks, embeddings)
vectorstore.save_local(VECTORSTORE_DIR)
return vectorstore
def update_retriever(file_path: str):
"""Rebuild vectorstore when a new document is uploaded."""
build_vectorstore(file_path)
# ==============================
# RAG TOOL (HF SAFE)
# ==============================
def create_rag_tool():
@tool
def rag_search(query: str) -> str:
"""
Retrieve relevant information from uploaded documents.
"""
if not os.path.exists(os.path.join(VECTORSTORE_DIR, "index.faiss")):
return "No document has been uploaded yet."
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = FAISS.load_local(
VECTORSTORE_DIR,
embeddings,
allow_dangerous_deserialization=True
)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
docs = retriever.invoke(query)
if not docs:
return "No relevant information found in the document."
return "\n\n".join(d.page_content for d in docs)
return rag_search
# ==============================
# EXTRA TOOLS
# ==============================
@tool
def wikipedia_search(query: str) -> dict:
"""Search Wikipedia."""
try:
return {"results": WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()).run(query)}
except Exception as e:
return {"error": str(e)}
@tool
def arxiv_search(query: str) -> dict:
"""Search academic papers on arXiv."""
try:
return {"results": ArxivQueryRun(api_wrapper=ArxivAPIWrapper()).run(query)}
except Exception as e:
return {"error": str(e)}
@tool
def tavily_search(query: str) -> dict:
"""Search the web using Tavily."""
try:
return {"results": TavilySearchResults(max_results=5).run(query)}
except Exception as e:
return {"error": str(e)} |