""" Test-generation agent. A minimal tool-calling loop (no LangGraph). The LLM is given two tools (search_code, get_definition) and asked to generate pytest tests for a target function. It decides which tools to call to gather the function's real source and its dependencies, then writes the tests grounded in that actual code. This is the "agent" capability: the model plans and acts via tools, rather than answering in a single shot. """ import json import os from dotenv import load_dotenv from openai import OpenAI from src.agent.tools import TOOL_SCHEMAS load_dotenv() SYSTEM_PROMPT = """You are a senior Python engineer that writes pytest unit tests. You have tools to explore a real codebase: - search_code(query): find relevant code - get_definition(name): fetch the full source of a function/class Workflow: 1. Use get_definition (and search_code if needed) to read the ACTUAL source of the target function and anything it depends on. Never guess at the code. 2. Then write focused pytest tests covering the main behavior and edge cases. Return ONLY the final pytest code in a single Python code block. Base the tests strictly on the real code you retrieved. """ MAX_TOOL_ROUNDS = 5 class TestAgent: def __init__(self, tools, model="gpt-4.1-mini"): self.tools = tools # a CodeTools instance self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) self.model = model def generate_tests(self, target): """target: a function/class name, e.g. 'create_access_token'.""" messages = [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": f"Generate pytest tests for `{target}`."}, ] for _ in range(MAX_TOOL_ROUNDS): response = self.client.chat.completions.create( model=self.model, messages=messages, tools=TOOL_SCHEMAS, temperature=0, ) msg = response.choices[0].message # No tool calls -> the model produced its final answer (the tests). if not msg.tool_calls: return msg.content # Otherwise, run each requested tool and feed results back. messages.append(msg) for call in msg.tool_calls: args = json.loads(call.function.arguments) result = self.tools.dispatch(call.function.name, args) messages.append({ "role": "tool", "tool_call_id": call.id, "content": json.dumps(result)[:6000], # keep tool payload bounded }) # Safety net if it never stopped calling tools. return "Could not finish generating tests within the tool-call limit."