ninja-code-guard / app /context /retriever.py
NinjainPJs's picture
Fix all ruff lint issues — 0 errors, 92 tests passing
b9da50c
"""
RAG Context Retriever
======================
Retrieves relevant code context from ChromaDB based on the PR diff.
This is the "R" in RAG (Retrieval-Augmented Generation).
How retrieval works:
1. Take the PR diff text as a query
2. Embed the query using the same model used for indexing
3. Search ChromaDB for the most similar code chunks
4. Return the top-k chunks as additional context for the LLM
Why RAG for code review?
The PR diff only shows CHANGED lines. But understanding a change often
requires seeing RELATED code:
- If a function is called from 5 places, changing it affects all callers
- If a variable is validated in another file, the validation matters here
- If the same pattern exists elsewhere, inconsistency is a style issue
RAG gives the agents "peripheral vision" — they see not just the change,
but the surrounding codebase context that makes the change meaningful.
"""
from __future__ import annotations
import structlog
from app.context.embedder import embed_texts
from app.context.indexer import _get_chroma_client
logger = structlog.get_logger()
async def retrieve_context(
collection_name: str,
query_text: str,
top_k: int = 5,
) -> str:
"""
Retrieve relevant code context from ChromaDB.
Args:
collection_name: The ChromaDB collection to search
query_text: The PR diff or a specific query
top_k: Number of results to return (default: 5)
Returns:
A formatted string of relevant code chunks to include in the LLM prompt.
Returns empty string if retrieval fails or no results found.
"""
try:
client = _get_chroma_client()
# Check if collection exists
try:
collection = client.get_collection(name=collection_name)
except Exception:
logger.debug("Collection not found — no RAG context", collection=collection_name)
return ""
# Skip if collection is empty
if collection.count() == 0:
return ""
# Embed the query
query_embeddings = embed_texts([query_text[:5000]]) # Cap query size
if not query_embeddings:
return ""
# Search for similar code chunks
results = collection.query(
query_embeddings=query_embeddings,
n_results=min(top_k, collection.count()),
include=["documents", "metadatas", "distances"],
)
if not results or not results["documents"] or not results["documents"][0]:
return ""
# Format results as context for the LLM
context_parts = ["## Related Code Context (from repository)\n"]
for doc, metadata, distance in zip(
results["documents"][0],
results["metadatas"][0],
results["distances"][0],
strict=False,
):
filepath = metadata.get("filepath", "unknown")
start = metadata.get("start_line", "?")
end = metadata.get("end_line", "?")
# ChromaDB returns L2 distance — lower = more similar
similarity = max(0, 1 - distance / 2) # Rough conversion to 0-1
if similarity < 0.3:
continue # Skip low-relevance results
context_parts.append(
f"### {filepath} (lines {start}-{end}, relevance: {similarity:.0%})\n"
f"```\n{doc}\n```\n"
)
if len(context_parts) == 1: # Only the header, no results
return ""
context = "\n".join(context_parts)
logger.info(
"Retrieved RAG context",
collection=collection_name,
chunks_returned=len(context_parts) - 1,
)
return context
except Exception as e:
logger.warning("RAG retrieval failed", error=str(e))
return ""