Spaces:
Sleeping
Sleeping
| from langchain_chroma import Chroma | |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.chains import create_history_aware_retriever, create_retrieval_chain | |
| from langchain.chains.combine_documents import create_stuff_documents_chain | |
| from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder | |
| from src.agents.prompts import RAG_PROMPT | |
| def build_gemini_rag_chain(pdf_path: str): | |
| # Load and split documents | |
| loader = PyPDFLoader(pdf_path) | |
| documents = loader.load() | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| texts = splitter.split_documents(documents) | |
| # Create vectorstore | |
| vectorstore = Chroma.from_documents(texts, embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001")) | |
| retriever = vectorstore.as_retriever() | |
| # Make retriever history-aware | |
| contextualize_q_prompt = ChatPromptTemplate.from_messages([ | |
| ("system", "Given a chat history and the latest user question, rewrite it as a standalone question."), | |
| MessagesPlaceholder("chat_history"), | |
| ("human", "{input}"), | |
| ]) | |
| model = ChatGoogleGenerativeAI(model="gemini-2.0-flash", convert_system_message_to_human=True) | |
| history_aware_retriever = create_history_aware_retriever(model, retriever, contextualize_q_prompt) | |
| # Build RAG chain | |
| qa_chain = create_stuff_documents_chain(model, RAG_PROMPT) | |
| return create_retrieval_chain(history_aware_retriever, qa_chain) |