LangGraph-Agent / app /rag /store.py
Pawan Mane
Initial Changes
8986591
"""
app/rag/store.py
─────────────────
CHECKPOINT 2 β€” RAG (Retrieval-Augmented Generation)
Builds a FAISS vector store from sample documents and exposes a single
`retrieve_context(query)` function used by the RAG graph node.
How RAG works:
1. Documents are split into chunks and embedded into vectors.
2. At query time the query is also embedded.
3. FAISS finds the chunks whose vectors are closest to the query vector.
4. Those chunks are injected into the LLM prompt as "context".
"""
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.documents import Document
from app.config import settings
# ── Sample knowledge base ─────────────────────────────────────────────────
# Replace or extend this list with real documents / a document loader.
SAMPLE_DOCS = [
Document(page_content="LangGraph is a library for building stateful, multi-actor LLM applications using graphs."),
Document(page_content="RAG stands for Retrieval-Augmented Generation. It combines a retriever with an LLM."),
Document(page_content="Guardrails are safety checks that prevent harmful or off-topic responses from AI systems."),
Document(page_content="Human-in-the-Loop (HITL) pauses automation so a human can review or approve an action."),
Document(page_content="Memory in AI agents allows them to remember past interactions within or across sessions."),
Document(page_content="Tool calling allows LLMs to invoke external functions like calculators or APIs."),
Document(page_content="Conditional routing directs a query to the most appropriate processing path."),
]
def build_vector_store(docs: list[Document] | None = None) -> FAISS:
"""
Embed documents and load them into an in-memory FAISS index.
Pass custom `docs` to override the default knowledge base.
"""
embeddings = HuggingFaceEmbeddings(model_name=settings.EMBEDDING_MODEL)
return FAISS.from_documents(docs or SAMPLE_DOCS, embeddings)
# Build once at import time β€” reused across all requests
_vector_store: FAISS = build_vector_store()
def retrieve_context(query: str, k: int | None = None) -> str:
"""
Return the top-k most relevant document chunks for `query` as plain text.
Each chunk is separated by a newline.
"""
top_k = k or settings.RAG_TOP_K
results = _vector_store.similarity_search(query, k=top_k)
return "\n".join(doc.page_content for doc in results)