from llama_index.core import SimpleDirectoryReader, Settings, VectorStoreIndex from llama_index.core.node_parser import SentenceSplitter from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.core.tools import QueryEngineTool, RetrieverTool class RAG: def __init__(self, streaming=False, llm=None): self.streaming = streaming self.llm = llm def load_and_index_folder(self, folder_path, embedding_model="BAAI/bge-small-en-v1.5"): """ Load and index documents from the specified folder path. Args: folder_path (str): The path to the folder containing documents. embedding_model (str): The embedding model to use for indexing. """ # load documents reader = SimpleDirectoryReader(folder_path) documents = reader.load_data() # parse nodes parser = SentenceSplitter() nodes = parser.get_nodes_from_documents(documents) # setup vector store index Settings.embed_model = HuggingFaceEmbedding(model_name=embedding_model) index = VectorStoreIndex.from_documents(nodes) # create index Settings.llm = self.llm self.index = index def get_tools(self): """ Get tools for the RAG agent. Returns: list: List of FunctionTool instances. """ if not hasattr(self, 'index'): raise ValueError("Index not found. Please load and index a folder first.") retriever = self.index.as_retriever() query_engine = self.index.as_query_engine() tools = [ RetrieverTool.from_defaults( retriever, name="RAG_Document_Retrieval", description="Useful for retrieving relevant documents based on user queries." ), QueryEngineTool.from_defaults( query_engine, name="RAG_Document_Query", description="Useful for answering questions by querying the indexed data directly." ) ] return tools