younus00's picture
refactor: rename class
df31b03
from llama_index.core import SimpleDirectoryReader, Settings, VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.tools import QueryEngineTool, RetrieverTool
class RAG:
def __init__(self, streaming=False, llm=None):
self.streaming = streaming
self.llm = llm
def load_and_index_folder(self, folder_path, embedding_model="BAAI/bge-small-en-v1.5"):
"""
Load and index documents from the specified folder path.
Args:
folder_path (str): The path to the folder containing documents.
embedding_model (str): The embedding model to use for indexing.
"""
# load documents
reader = SimpleDirectoryReader(folder_path)
documents = reader.load_data()
# parse nodes
parser = SentenceSplitter()
nodes = parser.get_nodes_from_documents(documents)
# setup vector store index
Settings.embed_model = HuggingFaceEmbedding(model_name=embedding_model)
index = VectorStoreIndex.from_documents(nodes)
# create index
Settings.llm = self.llm
self.index = index
def get_tools(self):
"""
Get tools for the RAG agent.
Returns:
list: List of FunctionTool instances.
"""
if not hasattr(self, 'index'):
raise ValueError("Index not found. Please load and index a folder first.")
retriever = self.index.as_retriever()
query_engine = self.index.as_query_engine()
tools = [
RetrieverTool.from_defaults(
retriever,
name="RAG_Document_Retrieval",
description="Useful for retrieving relevant documents based on user queries."
),
QueryEngineTool.from_defaults(
query_engine,
name="RAG_Document_Query",
description="Useful for answering questions by querying the indexed data directly."
)
]
return tools