File size: 2,156 Bytes
ca12430
 
 
df31b03
ca12430
 
df31b03
ca12430
2574102
 
 
df31b03
2574102
ca12430
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4fcb92d
2574102
ca12430
4fcb92d
 
ca12430
4fcb92d
ca12430
 
4fcb92d
ca12430
 
 
 
4fcb92d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca12430
4fcb92d
ca12430
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from llama_index.core import SimpleDirectoryReader, Settings, VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.tools import QueryEngineTool, RetrieverTool


class RAG:

    def __init__(self, streaming=False, llm=None):
        self.streaming = streaming
        self.llm = llm
        
    def load_and_index_folder(self, folder_path, embedding_model="BAAI/bge-small-en-v1.5"):
        """
        Load and index documents from the specified folder path.

        Args:
            folder_path (str): The path to the folder containing documents.
            embedding_model (str): The embedding model to use for indexing.
        """
        
        # load documents
        reader = SimpleDirectoryReader(folder_path)
        documents = reader.load_data()

        # parse nodes
        parser = SentenceSplitter()
        nodes = parser.get_nodes_from_documents(documents)

        # setup vector store index
        Settings.embed_model = HuggingFaceEmbedding(model_name=embedding_model)
        index = VectorStoreIndex.from_documents(nodes)
        
        # create index
        Settings.llm = self.llm
        self.index = index
    
    def get_tools(self):
        """
        Get tools for the RAG agent.

        Returns:
            list: List of FunctionTool instances.
        """
        if not hasattr(self, 'index'):
            raise ValueError("Index not found. Please load and index a folder first.")

        retriever = self.index.as_retriever()
        query_engine = self.index.as_query_engine()

        tools = [
            RetrieverTool.from_defaults(
                retriever,
                name="RAG_Document_Retrieval",
                description="Useful for retrieving relevant documents based on user queries."
            ),
            QueryEngineTool.from_defaults(
                query_engine,
                name="RAG_Document_Query",
                description="Useful for answering questions by querying the indexed data directly."
            )
        ]

        return tools