Spaces:
Sleeping
Sleeping
| from langchain_core.tools import tool | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_openai import OpenAIEmbeddings | |
| from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun | |
| from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper | |
| from langchain_community.tools.tavily_search import TavilySearchResults | |
| from dotenv import load_dotenv | |
| import os | |
| import requests | |
| load_dotenv() | |
| # ============================== | |
| # CONFIG | |
| # ============================== | |
| VECTORSTORE_DIR = "data/vectorstore" | |
| os.makedirs(VECTORSTORE_DIR, exist_ok=True) | |
| # ============================== | |
| # GLOBAL RETRIEVER | |
| # ============================== | |
| retriever = None | |
| def load_retriever(): | |
| global retriever | |
| try: | |
| embeddings = OpenAIEmbeddings(model="text-embedding-3-small") | |
| if os.path.exists(os.path.join(VECTORSTORE_DIR, "index.faiss")): | |
| vectorstore = FAISS.load_local( | |
| VECTORSTORE_DIR, | |
| embeddings, | |
| allow_dangerous_deserialization=True | |
| ) | |
| retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) | |
| print("✅ Vectorstore loaded from disk") | |
| except Exception as e: | |
| print("❌ Failed to load vectorstore:", e) | |
| def build_vectorstore(path: str): | |
| loader = PyPDFLoader(path) | |
| docs = loader.load() | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=500, | |
| chunk_overlap=100 | |
| ) | |
| split_docs = splitter.split_documents(docs) | |
| embeddings = OpenAIEmbeddings(model="text-embedding-3-small") | |
| vectorstore = FAISS.from_documents(split_docs, embeddings) | |
| vectorstore.save_local(VECTORSTORE_DIR) | |
| return vectorstore | |
| def update_retriever(pdf_path: str): | |
| global retriever | |
| vectorstore = build_vectorstore(pdf_path) | |
| retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) | |
| # ============================== | |
| # RAG TOOL (FIXED) | |
| # ============================== | |
| def create_rag_tool(): | |
| def rag_search(query: str) -> str: | |
| """ | |
| Retrieve relevant information from uploaded documents. | |
| """ | |
| global retriever | |
| if retriever is None: | |
| load_retriever() | |
| if retriever is None: | |
| return "No document has been uploaded yet." | |
| docs = retriever.invoke(query) | |
| if not docs: | |
| return "No relevant information found in the uploaded document." | |
| return "\n\n".join(d.page_content for d in docs) | |
| return rag_search | |
| # ---------------- OTHER TOOLS ---------------- # | |
| def arxiv_search(query: str) -> dict: | |
| try: | |
| arxiv = ArxivQueryRun(api_wrapper=ArxivAPIWrapper()) | |
| return {"results": arxiv.run(query)} | |
| except Exception as e: | |
| return {"error": str(e)} | |
| def wikipedia_search(query: str) -> dict: | |
| try: | |
| wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()) | |
| return {"results": wiki.run(query)} | |
| except Exception as e: | |
| return {"error": str(e)} | |
| def tavily_search(query: str) -> dict: | |
| try: | |
| search = TavilySearchResults(max_results=5) | |
| return {"results": search.run(query)} | |
| except Exception as e: | |
| return {"error": str(e)} | |