from langchain.vectorstores import Chroma from langchain_huggingface import HuggingFaceEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.document_loaders import TextLoader from langchain_huggingface import HuggingFacePipeline from langchain.chains import RetrievalQA from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline def load_documents(file_path: str): """Loads documents from a specified file path.""" loader = TextLoader(file_path) return loader.load() def split_documents(documents, chunk_size=500, chunk_overlap=50): """Splits documents into chunks.""" splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) return splitter.split_documents(documents) def create_embeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"): """Creates HuggingFace embeddings.""" return HuggingFaceEmbeddings(model_name=model_name) def setup_vector_store(docs, embeddings, persist_directory="./chroma_db"): """Sets up and persists the Chroma vector store.""" db = Chroma.from_documents(docs, embeddings, persist_directory=persist_directory) return db.as_retriever() def create_qa_chain(retriever, model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0"): """Creates the RetrievalQA chain with streaming capabilities.""" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", ) pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512, temperature=0.7, top_p=0.9 ) llm = HuggingFacePipeline(pipeline=pipe) qa_chain = RetrievalQA.from_chain_type( llm=llm, retriever=retriever, chain_type="stuff", return_source_documents=True # Added to potentially help with streaming or understanding context ) return qa_chain