"""LLM-backed agent used by :mod:`training.eval` for trained checkpoints. The eval harness only needs the :class:`.eval.Agent` protocol (``reset(seed)`` + ``act(obs) -> SqlDriftAction``). This module supplies a minimal, chat-template-driven policy that: 1. Loads a saved model directory (either a full HF checkpoint or a PEFT adapter pointing at a base model). 2. Maintains a bounded chat history across the episode so the model sees its own prior tool calls and their observations. 3. Prompts the model to emit *exactly one* JSON tool-call envelope per turn (``{"tool": "...", "payload": {...}}``) and parses it into a :class:`models.SqlDriftAction`. 4. Falls back to a safe default action on parse failure rather than crashing the rollout — this matches the random-agent contract and keeps eval sweeps resilient to occasional generation noise. All heavy ML imports (``torch``, ``transformers``, ``peft``) are deferred into :meth:`LLMAgent.__init__` so the module is importable on CPU-only CI boxes for type checking. """ from __future__ import annotations import json import re from pathlib import Path from typing import Any from models import ( ConsultDBAResult, DescribeTableResult, ExplainQueryResult, ListTablesPayload, ListTablesResult, ReadChangelogResult, RunQueryResult, SampleRowsResult, SqlDriftAction, SqlDriftObservation, SubmitRewriteResult, ToolError, ToolName, ToolPayload, ) from training.prompt import render_system_prompt from utilities.logger import get_module_logger, log_interaction _LOG = get_module_logger(__name__) # Compact, model-facing JSON contract. Kept short because it ships with # every turn and its tokens count against ``max_seq_length``. _TOOL_CONTRACT = ( "Respond with EXACTLY ONE JSON object per turn and nothing else:\n" '{"tool": "", "payload": {...}}\n' "Valid tool names: list_tables, describe_table, sample_rows, run_query, " "explain_query, read_changelog, submit_rewrite, consult_dba.\n" "Payload schemas (match one):\n" '- list_tables: {"kind": "list_tables"}\n' '- describe_table: {"kind": "describe_table", "table": ""}\n' '- sample_rows: {"kind": "sample_rows", "table": "", "limit": 1..5}\n' '- run_query: {"kind": "run_query", "sql": ""}\n' '- read_changelog: {"kind": "read_changelog"}\n' '- submit_rewrite: {"kind": "submit_rewrite", "sql": "