from langchain_chroma import Chroma # Chroma moved here from langchain_openai import OpenAIEmbeddings, ChatOpenAI # OpenAI moved here from langchain_community.document_loaders import PyPDFLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain.chains import create_retrieval_chain from langchain.chains.combine_documents import create_stuff_documents_chain from src.agents.prompts import RAG_PROMPT import os import shutil def build_openai_rag_chain_and_llm(pdf_path: str): # Load and split documents loader = PyPDFLoader(pdf_path) documents = loader.load() splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) texts = splitter.split_documents(documents) # # Create old-style Chroma client (in-memory, no tenant/db) # chroma_client = chromadb.Client(Settings(anonymized_telemetry=False)) # # Vectorstore with explicit client # vectorstore = Chroma.from_documents( # texts, # embedding=OpenAIEmbeddings(), # client=chroma_client # ) if os.path.exists(".chroma"): shutil.rmtree(".chroma") vectorstore = Chroma.from_documents(texts,embedding=OpenAIEmbeddings()) retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) # Build chain llm = ChatOpenAI(model_name="gpt-4", temperature=0, streaming=True) qa_chain = create_stuff_documents_chain(llm=llm, prompt=RAG_PROMPT) rag_chain = create_retrieval_chain(retriever, qa_chain) return rag_chain, llm