File size: 3,321 Bytes
8e72e1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
"""
Agent tools.

Two callable tools the LLM can invoke to gather context before writing tests:
  - search_code(query)      : semantic+keyword search over the repo
  - get_definition(name)    : fetch the full source of a function/class by name

Both are thin wrappers over the same retrieval/index the Q&A side uses. The
CodeTools object holds the live index so the tool functions can reach it.
"""

# OpenAI tool schemas (function-calling format).
TOOL_SCHEMAS = [
    {
        "type": "function",
        "function": {
            "name": "search_code",
            "description": "Search the repository for code relevant to a query. "
                           "Returns matching definitions with their file and line range.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {"type": "string", "description": "what to look for"}
                },
                "required": ["query"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "get_definition",
            "description": "Return the full source code of a function or class by its name "
                           "(e.g. 'login_user' or 'UserService.create_token').",
            "parameters": {
                "type": "object",
                "properties": {
                    "name": {"type": "string", "description": "function/class name"}
                },
                "required": ["name"],
            },
        },
    },
]


class CodeTools:
    """Holds the live index and exposes the tool implementations."""

    def __init__(self, chunks, embedder, hybrid, reranker):
        self.chunks = chunks
        self.embedder = embedder
        self.hybrid = hybrid
        self.reranker = reranker

    def search_code(self, query, k=5):
        emb = self.embedder.create_embeddings([query])[0]
        results = self.reranker.rerank(query, self.hybrid.search(query, emb, k=10))
        out = []
        for r in results[:k]:
            d = r["document"]
            out.append({
                "name": d["name"],
                "file": d["file"],
                "lines": f"{d['start_line']}-{d['end_line']}",
                "code": d["code"],
            })
        return out

    def get_definition(self, name):
        name_l = name.lower()
        # exact match first, then suffix match (so "create_token" finds "UserService.create_token")
        matches = [c for c in self.chunks if c["name"].lower() == name_l]
        if not matches:
            matches = [c for c in self.chunks
                       if c["name"].lower().endswith("." + name_l) or name_l in c["name"].lower()]
        if not matches:
            return {"error": f"No definition found named '{name}'."}
        d = matches[0]
        return {
            "name": d["name"], "file": d["file"],
            "lines": f"{d['start_line']}-{d['end_line']}", "code": d["code"],
        }

    def dispatch(self, tool_name, args):
        """Route a tool call from the LLM to the right implementation."""
        if tool_name == "search_code":
            return self.search_code(**args)
        if tool_name == "get_definition":
            return self.get_definition(**args)
        return {"error": f"Unknown tool '{tool_name}'."}