# rag_chain.py from langchain.vectorstores import Chroma from langchain.embeddings import HuggingFaceEmbeddings from langchain.chains import RetrievalQA from langchain.llms import HuggingFaceHub def setup_rag_chain(docs): embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vectorstore = Chroma.from_documents(docs, embedding=embeddings) retriever = vectorstore.as_retriever() # Replace this with your own hosted LLaMA 3.1 if needed llm = HuggingFaceHub( repo_id="meta-llama/Meta-Llama-3-8B-Instruct", model_kwargs={"temperature": 0.3, "max_tokens": 512} ) qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) return qa_chain