import os from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import Chroma from langchain.document_loaders import TextLoader from langchain.text_splitter import MarkdownTextSplitter from langchain.chains import RetrievalQA from langchain.llms import HuggingFacePipeline from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline def load_documents(folder_path): docs = [] for filename in os.listdir(folder_path): if filename.endswith(".md"): path = os.path.join(folder_path, filename) loader = TextLoader(path) docs.extend(loader.load()) return docs def split_documents(documents): splitter = MarkdownTextSplitter(chunk_size=500, chunk_overlap=100) return splitter.split_documents(documents) def build_vector_store(docs): embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vectorstore = Chroma.from_documents(docs, embeddings, persist_directory="chroma_db") return vectorstore def load_llm(): model_id = "tiiuae/falcon-7b-instruct" tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, device_map="auto") pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512, temperature=0.7) return HuggingFacePipeline(pipeline=pipe) def initialize_rag_chain(): raw_docs = load_documents("knowledge") split_docs = split_documents(raw_docs) vectorstore = build_vector_store(split_docs) retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) llm = load_llm() qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True) return qa_chain