Spaces:

AishaSurve
/

codebase-agent

Running

File size: 2,703 Bytes

8e72e1f

"""
Grounded answerer.

Takes the reranked code chunks and asks the LLM to answer the developer's
question using ONLY those snippets, citing file:line for every claim. The model
phrases the answer; the retrieved chunks ground it. Generation runs at
temperature=0 so answers are faithful and reproducible.

Returns:
    {
      "answer":  "<text with file:line citations>",
      "sources": [ {file, name, type, start_line, end_line, code}, ... ]
    }
"""
import os

from dotenv import load_dotenv
from openai import OpenAI

load_dotenv()

SYSTEM_PROMPT = """You are a precise code-analysis assistant for a codebase Q&A tool.

Answer the developer's question using ONLY the code snippets provided.

Rules:
1. Cite the file and line range for every claim, e.g. `app/core/security.py:38-44`.
2. Use ONLY the provided snippets. Never invent files, functions, or behavior.
3. If the snippets do not contain the answer, reply exactly:
   "I couldn't find that in the retrieved code."
4. Be concise. Explain what the relevant code does and where it lives.
"""


class Answerer:

    def __init__(self, model="gpt-4.1-mini"):
        self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
        self.model = model

    def _format_context(self, documents):
        blocks = []
        for i, d in enumerate(documents, 1):
            header = (f"[{i}] {d['file']}:{d['start_line']}-{d['end_line']}  "
                      f"({d['type']} {d['name']})")
            blocks.append(f"{header}\n{d['code']}")
        return "\n\n".join(blocks)

    def answer(self, query, results):
        # accept reranked results ({"document": chunk}) OR raw chunk dicts
        documents = [r["document"] if isinstance(r, dict) and "document" in r else r
                     for r in results]

        if not documents:
            return {"answer": "I couldn't find that in the retrieved code.",
                    "sources": []}

        context = self._format_context(documents)
        user_prompt = (f"Code snippets:\n\n{context}\n\n"
                       f"Question: {query}\n\nAnswer:")

        response = self.client.chat.completions.create(
            model=self.model,
            messages=[
                {"role": "system", "content": SYSTEM_PROMPT},
                {"role": "user", "content": user_prompt},
            ],
            temperature=0,
        )

        sources = [{
            "file": d["file"],
            "name": d["name"],
            "type": d["type"],
            "start_line": d["start_line"],
            "end_line": d["end_line"],
            "code": d["code"],
        } for d in documents]

        return {"answer": response.choices[0].message.content, "sources": sources}