|
|
from langchain.vectorstores import Chroma |
|
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
|
from langchain.document_loaders import TextLoader |
|
|
from langchain_huggingface import HuggingFacePipeline |
|
|
from langchain.chains import RetrievalQA |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
|
|
|
|
|
def load_documents(file_path: str): |
|
|
"""Loads documents from a specified file path.""" |
|
|
loader = TextLoader(file_path) |
|
|
return loader.load() |
|
|
|
|
|
def split_documents(documents, chunk_size=500, chunk_overlap=50): |
|
|
"""Splits documents into chunks.""" |
|
|
splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) |
|
|
return splitter.split_documents(documents) |
|
|
|
|
|
def create_embeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"): |
|
|
"""Creates HuggingFace embeddings.""" |
|
|
return HuggingFaceEmbeddings(model_name=model_name) |
|
|
|
|
|
def setup_vector_store(docs, embeddings, persist_directory="./chroma_db"): |
|
|
"""Sets up and persists the Chroma vector store.""" |
|
|
db = Chroma.from_documents(docs, embeddings, persist_directory=persist_directory) |
|
|
return db.as_retriever() |
|
|
|
|
|
def create_qa_chain(retriever, model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0"): |
|
|
"""Creates the RetrievalQA chain with streaming capabilities.""" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
model_name, |
|
|
device_map="auto", |
|
|
) |
|
|
pipe = pipeline( |
|
|
"text-generation", |
|
|
model=model, |
|
|
tokenizer=tokenizer, |
|
|
max_new_tokens=512, |
|
|
temperature=0.7, |
|
|
top_p=0.9 |
|
|
) |
|
|
llm = HuggingFacePipeline(pipeline=pipe) |
|
|
|
|
|
qa_chain = RetrievalQA.from_chain_type( |
|
|
llm=llm, |
|
|
retriever=retriever, |
|
|
chain_type="stuff", |
|
|
return_source_documents=True |
|
|
) |
|
|
return qa_chain |
|
|
|