"""Training helpers for ChargebackOps. Lightweight pure-Python wrappers that convert the environment into a prompt/completion/reward interface compatible with TRL's GRPO trainer. The module is import-safe without ``trl`` / ``torch`` installed so unit tests stay fast and offline. """ from __future__ import annotations from .curve import ( CheckpointEval, TaskOutcome, evaluate_checkpoint, evaluate_policy_across_tasks, plot_training_curve, ) from .env_adapter import ( action_from_completion, build_prompt, parse_completion, ) from .reward_adapter import ( EpisodeResult, compute_reward, run_episode_with_text_policy, ) from .sft_dataset import ( SFTSample, action_to_completion, build_sft_dataset, ) __all__ = [ "CheckpointEval", "EpisodeResult", "SFTSample", "TaskOutcome", "action_from_completion", "action_to_completion", "build_prompt", "build_sft_dataset", "compute_reward", "evaluate_checkpoint", "evaluate_policy_across_tasks", "parse_completion", "plot_training_curve", "run_episode_with_text_policy", ]