File size: 2,703 Bytes
8e72e1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
"""
Grounded answerer.

Takes the reranked code chunks and asks the LLM to answer the developer's
question using ONLY those snippets, citing file:line for every claim. The model
phrases the answer; the retrieved chunks ground it. Generation runs at
temperature=0 so answers are faithful and reproducible.

Returns:
    {
      "answer":  "<text with file:line citations>",
      "sources": [ {file, name, type, start_line, end_line, code}, ... ]
    }
"""
import os

from dotenv import load_dotenv
from openai import OpenAI

load_dotenv()

SYSTEM_PROMPT = """You are a precise code-analysis assistant for a codebase Q&A tool.

Answer the developer's question using ONLY the code snippets provided.

Rules:
1. Cite the file and line range for every claim, e.g. `app/core/security.py:38-44`.
2. Use ONLY the provided snippets. Never invent files, functions, or behavior.
3. If the snippets do not contain the answer, reply exactly:
   "I couldn't find that in the retrieved code."
4. Be concise. Explain what the relevant code does and where it lives.
"""


class Answerer:

    def __init__(self, model="gpt-4.1-mini"):
        self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
        self.model = model

    def _format_context(self, documents):
        blocks = []
        for i, d in enumerate(documents, 1):
            header = (f"[{i}] {d['file']}:{d['start_line']}-{d['end_line']}  "
                      f"({d['type']} {d['name']})")
            blocks.append(f"{header}\n{d['code']}")
        return "\n\n".join(blocks)

    def answer(self, query, results):
        # accept reranked results ({"document": chunk}) OR raw chunk dicts
        documents = [r["document"] if isinstance(r, dict) and "document" in r else r
                     for r in results]

        if not documents:
            return {"answer": "I couldn't find that in the retrieved code.",
                    "sources": []}

        context = self._format_context(documents)
        user_prompt = (f"Code snippets:\n\n{context}\n\n"
                       f"Question: {query}\n\nAnswer:")

        response = self.client.chat.completions.create(
            model=self.model,
            messages=[
                {"role": "system", "content": SYSTEM_PROMPT},
                {"role": "user", "content": user_prompt},
            ],
            temperature=0,
        )

        sources = [{
            "file": d["file"],
            "name": d["name"],
            "type": d["type"],
            "start_line": d["start_line"],
            "end_line": d["end_line"],
            "code": d["code"],
        } for d in documents]

        return {"answer": response.choices[0].message.content, "sources": sources}