ERMA / rag_engine.py
mfirat007's picture
Upload 7 files
b39ff93 verified
import os
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader
from langchain.text_splitter import MarkdownTextSplitter
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
def load_documents(folder_path):
docs = []
for filename in os.listdir(folder_path):
if filename.endswith(".md"):
path = os.path.join(folder_path, filename)
loader = TextLoader(path)
docs.extend(loader.load())
return docs
def split_documents(documents):
splitter = MarkdownTextSplitter(chunk_size=500, chunk_overlap=100)
return splitter.split_documents(documents)
def build_vector_store(docs):
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = Chroma.from_documents(docs, embeddings, persist_directory="chroma_db")
return vectorstore
def load_llm():
model_id = "tiiuae/falcon-7b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, device_map="auto")
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512, temperature=0.7)
return HuggingFacePipeline(pipeline=pipe)
def initialize_rag_chain():
raw_docs = load_documents("knowledge")
split_docs = split_documents(raw_docs)
vectorstore = build_vector_store(split_docs)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
llm = load_llm()
qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
return qa_chain