ARGOBot / src /chains /qa_chain_openai.py
mrmtaeb's picture
Update src/chains/qa_chain_openai.py
adcf055 verified
from langchain_chroma import Chroma # Chroma moved here
from langchain_openai import OpenAIEmbeddings, ChatOpenAI # OpenAI moved here
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from src.agents.prompts import RAG_PROMPT
import os
import shutil
def build_openai_rag_chain_and_llm(pdf_path: str):
# Load and split documents
loader = PyPDFLoader(pdf_path)
documents = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts = splitter.split_documents(documents)
# # Create old-style Chroma client (in-memory, no tenant/db)
# chroma_client = chromadb.Client(Settings(anonymized_telemetry=False))
# # Vectorstore with explicit client
# vectorstore = Chroma.from_documents(
# texts,
# embedding=OpenAIEmbeddings(),
# client=chroma_client
# )
if os.path.exists(".chroma"):
shutil.rmtree(".chroma")
vectorstore = Chroma.from_documents(texts,embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
# Build chain
llm = ChatOpenAI(model_name="gpt-4", temperature=0, streaming=True)
qa_chain = create_stuff_documents_chain(llm=llm, prompt=RAG_PROMPT)
rag_chain = create_retrieval_chain(retriever, qa_chain)
return rag_chain, llm