|
|
from llama_index.core import SimpleDirectoryReader, Settings, VectorStoreIndex |
|
|
from llama_index.core.node_parser import SentenceSplitter |
|
|
from llama_index.embeddings.huggingface import HuggingFaceEmbedding |
|
|
from llama_index.core.tools import QueryEngineTool, RetrieverTool |
|
|
|
|
|
|
|
|
class RAG: |
|
|
|
|
|
def __init__(self, streaming=False, llm=None): |
|
|
self.streaming = streaming |
|
|
self.llm = llm |
|
|
|
|
|
def load_and_index_folder(self, folder_path, embedding_model="BAAI/bge-small-en-v1.5"): |
|
|
""" |
|
|
Load and index documents from the specified folder path. |
|
|
|
|
|
Args: |
|
|
folder_path (str): The path to the folder containing documents. |
|
|
embedding_model (str): The embedding model to use for indexing. |
|
|
""" |
|
|
|
|
|
|
|
|
reader = SimpleDirectoryReader(folder_path) |
|
|
documents = reader.load_data() |
|
|
|
|
|
|
|
|
parser = SentenceSplitter() |
|
|
nodes = parser.get_nodes_from_documents(documents) |
|
|
|
|
|
|
|
|
Settings.embed_model = HuggingFaceEmbedding(model_name=embedding_model) |
|
|
index = VectorStoreIndex.from_documents(nodes) |
|
|
|
|
|
|
|
|
Settings.llm = self.llm |
|
|
self.index = index |
|
|
|
|
|
def get_tools(self): |
|
|
""" |
|
|
Get tools for the RAG agent. |
|
|
|
|
|
Returns: |
|
|
list: List of FunctionTool instances. |
|
|
""" |
|
|
if not hasattr(self, 'index'): |
|
|
raise ValueError("Index not found. Please load and index a folder first.") |
|
|
|
|
|
retriever = self.index.as_retriever() |
|
|
query_engine = self.index.as_query_engine() |
|
|
|
|
|
tools = [ |
|
|
RetrieverTool.from_defaults( |
|
|
retriever, |
|
|
name="RAG_Document_Retrieval", |
|
|
description="Useful for retrieving relevant documents based on user queries." |
|
|
), |
|
|
QueryEngineTool.from_defaults( |
|
|
query_engine, |
|
|
name="RAG_Document_Query", |
|
|
description="Useful for answering questions by querying the indexed data directly." |
|
|
) |
|
|
] |
|
|
|
|
|
return tools |
|
|
|
|
|
|
|
|
|
|
|
|