codebase-agent / src /agent /tools.py
AishaSurve's picture
Codebase Intelligence Agent: code-aware RAG + test-gen agent + eval
8e72e1f
Raw
History Blame Contribute Delete
3.32 kB
"""
Agent tools.
Two callable tools the LLM can invoke to gather context before writing tests:
- search_code(query) : semantic+keyword search over the repo
- get_definition(name) : fetch the full source of a function/class by name
Both are thin wrappers over the same retrieval/index the Q&A side uses. The
CodeTools object holds the live index so the tool functions can reach it.
"""
# OpenAI tool schemas (function-calling format).
TOOL_SCHEMAS = [
{
"type": "function",
"function": {
"name": "search_code",
"description": "Search the repository for code relevant to a query. "
"Returns matching definitions with their file and line range.",
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "what to look for"}
},
"required": ["query"],
},
},
},
{
"type": "function",
"function": {
"name": "get_definition",
"description": "Return the full source code of a function or class by its name "
"(e.g. 'login_user' or 'UserService.create_token').",
"parameters": {
"type": "object",
"properties": {
"name": {"type": "string", "description": "function/class name"}
},
"required": ["name"],
},
},
},
]
class CodeTools:
"""Holds the live index and exposes the tool implementations."""
def __init__(self, chunks, embedder, hybrid, reranker):
self.chunks = chunks
self.embedder = embedder
self.hybrid = hybrid
self.reranker = reranker
def search_code(self, query, k=5):
emb = self.embedder.create_embeddings([query])[0]
results = self.reranker.rerank(query, self.hybrid.search(query, emb, k=10))
out = []
for r in results[:k]:
d = r["document"]
out.append({
"name": d["name"],
"file": d["file"],
"lines": f"{d['start_line']}-{d['end_line']}",
"code": d["code"],
})
return out
def get_definition(self, name):
name_l = name.lower()
# exact match first, then suffix match (so "create_token" finds "UserService.create_token")
matches = [c for c in self.chunks if c["name"].lower() == name_l]
if not matches:
matches = [c for c in self.chunks
if c["name"].lower().endswith("." + name_l) or name_l in c["name"].lower()]
if not matches:
return {"error": f"No definition found named '{name}'."}
d = matches[0]
return {
"name": d["name"], "file": d["file"],
"lines": f"{d['start_line']}-{d['end_line']}", "code": d["code"],
}
def dispatch(self, tool_name, args):
"""Route a tool call from the LLM to the right implementation."""
if tool_name == "search_code":
return self.search_code(**args)
if tool_name == "get_definition":
return self.get_definition(**args)
return {"error": f"Unknown tool '{tool_name}'."}