File size: 3,364 Bytes
02073ee
 
 
 
 
 
 
 
 
 
 
 
 
 
ef1f54c
 
 
 
 
02073ee
ef1f54c
aa4ce92
ef1f54c
02073ee
 
 
ef1f54c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa4ce92
 
02073ee
 
 
 
 
 
 
aa4ce92
02073ee
 
ef1f54c
 
 
 
02073ee
 
 
 
 
aa4ce92
02073ee
 
ef1f54c
 
 
02073ee
aa4ce92
2e019bc
02073ee
aa4ce92
 
 
ef1f54c
 
02073ee
ef1f54c
 
 
 
02073ee
 
aa4ce92
02073ee
ef1f54c
02073ee
aa4ce92
02073ee
 
 
ef1f54c
 
 
02073ee
 
 
 
ef1f54c
02073ee
 
 
 
 
 
 
 
ef1f54c
02073ee
 
 
 
 
ef1f54c
02073ee
ef1f54c
 
aa4ce92
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from langchain_core.tools import tool
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun
from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
from langchain_community.tools.tavily_search import TavilySearchResults
from dotenv import load_dotenv
import os
import requests

load_dotenv()

# ==============================
# CONFIG
# ==============================
VECTORSTORE_DIR = "data/vectorstore"
os.makedirs(VECTORSTORE_DIR, exist_ok=True)

# ==============================
# GLOBAL RETRIEVER
# ==============================
retriever = None


def load_retriever():
    global retriever
    try:
        embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
        if os.path.exists(os.path.join(VECTORSTORE_DIR, "index.faiss")):
            vectorstore = FAISS.load_local(
                VECTORSTORE_DIR,
                embeddings,
                allow_dangerous_deserialization=True
            )
            retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
            print("✅ Vectorstore loaded from disk")
    except Exception as e:
        print("❌ Failed to load vectorstore:", e)


def build_vectorstore(path: str):
    loader = PyPDFLoader(path)
    docs = loader.load()

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=100
    )

    split_docs = splitter.split_documents(docs)

    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
    vectorstore = FAISS.from_documents(split_docs, embeddings)

    vectorstore.save_local(VECTORSTORE_DIR)
    return vectorstore


def update_retriever(pdf_path: str):
    global retriever
    vectorstore = build_vectorstore(pdf_path)
    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})


# ==============================
# RAG TOOL (FIXED)
# ==============================
def create_rag_tool():

    @tool
    def rag_search(query: str) -> str:
        """
        Retrieve relevant information from uploaded documents.
        """
        global retriever

        if retriever is None:
            load_retriever()

        if retriever is None:
            return "No document has been uploaded yet."

        docs = retriever.invoke(query)

        if not docs:
            return "No relevant information found in the uploaded document."

        return "\n\n".join(d.page_content for d in docs)

    return rag_search


# ---------------- OTHER TOOLS ---------------- #

@tool
def arxiv_search(query: str) -> dict:
    try:
        arxiv = ArxivQueryRun(api_wrapper=ArxivAPIWrapper())
        return {"results": arxiv.run(query)}
    except Exception as e:
        return {"error": str(e)}


@tool
def wikipedia_search(query: str) -> dict:
    try:
        wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
        return {"results": wiki.run(query)}
    except Exception as e:
        return {"error": str(e)}


@tool
def tavily_search(query: str) -> dict:
    try:
        search = TavilySearchResults(max_results=5)
        return {"results": search.run(query)}
    except Exception as e:
        return {"error": str(e)}