Spaces:
Sleeping
Sleeping
| from langchain_chroma import Chroma # Chroma moved here | |
| from langchain_openai import OpenAIEmbeddings, ChatOpenAI # OpenAI moved here | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain.chains import create_retrieval_chain | |
| from langchain.chains.combine_documents import create_stuff_documents_chain | |
| from src.agents.prompts import RAG_PROMPT | |
| import os | |
| import shutil | |
| def build_openai_rag_chain_and_llm(pdf_path: str): | |
| # Load and split documents | |
| loader = PyPDFLoader(pdf_path) | |
| documents = loader.load() | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
| texts = splitter.split_documents(documents) | |
| # # Create old-style Chroma client (in-memory, no tenant/db) | |
| # chroma_client = chromadb.Client(Settings(anonymized_telemetry=False)) | |
| # # Vectorstore with explicit client | |
| # vectorstore = Chroma.from_documents( | |
| # texts, | |
| # embedding=OpenAIEmbeddings(), | |
| # client=chroma_client | |
| # ) | |
| if os.path.exists(".chroma"): | |
| shutil.rmtree(".chroma") | |
| vectorstore = Chroma.from_documents(texts,embedding=OpenAIEmbeddings()) | |
| retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) | |
| # Build chain | |
| llm = ChatOpenAI(model_name="gpt-4", temperature=0, streaming=True) | |
| qa_chain = create_stuff_documents_chain(llm=llm, prompt=RAG_PROMPT) | |
| rag_chain = create_retrieval_chain(retriever, qa_chain) | |
| return rag_chain, llm |