ChargeBackOps / training /__init__.py
mitudrudutta's picture
feat(training): SFT dataset + stall detection in eval rollout
02a6a9f
"""Training helpers for ChargebackOps.
Lightweight pure-Python wrappers that convert the environment into a
prompt/completion/reward interface compatible with TRL's GRPO trainer.
The module is import-safe without ``trl`` / ``torch`` installed so unit
tests stay fast and offline.
"""
from __future__ import annotations
from .curve import (
CheckpointEval,
TaskOutcome,
evaluate_checkpoint,
evaluate_policy_across_tasks,
plot_training_curve,
)
from .env_adapter import (
action_from_completion,
build_prompt,
parse_completion,
)
from .reward_adapter import (
EpisodeResult,
compute_reward,
run_episode_with_text_policy,
)
from .sft_dataset import (
SFTSample,
action_to_completion,
build_sft_dataset,
)
__all__ = [
"CheckpointEval",
"EpisodeResult",
"SFTSample",
"TaskOutcome",
"action_from_completion",
"action_to_completion",
"build_prompt",
"build_sft_dataset",
"compute_reward",
"evaluate_checkpoint",
"evaluate_policy_across_tasks",
"parse_completion",
"plot_training_curve",
"run_episode_with_text_policy",
]