codebase-agent / src /rag /answerer.py
AishaSurve's picture
Codebase Intelligence Agent: code-aware RAG + test-gen agent + eval
8e72e1f
Raw
History Blame Contribute Delete
2.7 kB
"""
Grounded answerer.
Takes the reranked code chunks and asks the LLM to answer the developer's
question using ONLY those snippets, citing file:line for every claim. The model
phrases the answer; the retrieved chunks ground it. Generation runs at
temperature=0 so answers are faithful and reproducible.
Returns:
{
"answer": "<text with file:line citations>",
"sources": [ {file, name, type, start_line, end_line, code}, ... ]
}
"""
import os
from dotenv import load_dotenv
from openai import OpenAI
load_dotenv()
SYSTEM_PROMPT = """You are a precise code-analysis assistant for a codebase Q&A tool.
Answer the developer's question using ONLY the code snippets provided.
Rules:
1. Cite the file and line range for every claim, e.g. `app/core/security.py:38-44`.
2. Use ONLY the provided snippets. Never invent files, functions, or behavior.
3. If the snippets do not contain the answer, reply exactly:
"I couldn't find that in the retrieved code."
4. Be concise. Explain what the relevant code does and where it lives.
"""
class Answerer:
def __init__(self, model="gpt-4.1-mini"):
self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
self.model = model
def _format_context(self, documents):
blocks = []
for i, d in enumerate(documents, 1):
header = (f"[{i}] {d['file']}:{d['start_line']}-{d['end_line']} "
f"({d['type']} {d['name']})")
blocks.append(f"{header}\n{d['code']}")
return "\n\n".join(blocks)
def answer(self, query, results):
# accept reranked results ({"document": chunk}) OR raw chunk dicts
documents = [r["document"] if isinstance(r, dict) and "document" in r else r
for r in results]
if not documents:
return {"answer": "I couldn't find that in the retrieved code.",
"sources": []}
context = self._format_context(documents)
user_prompt = (f"Code snippets:\n\n{context}\n\n"
f"Question: {query}\n\nAnswer:")
response = self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_prompt},
],
temperature=0,
)
sources = [{
"file": d["file"],
"name": d["name"],
"type": d["type"],
"start_line": d["start_line"],
"end_line": d["end_line"],
"code": d["code"],
} for d in documents]
return {"answer": response.choices[0].message.content, "sources": sources}