Spaces:
Sleeping
Sleeping
| """Training helpers for ChargebackOps. | |
| Lightweight pure-Python wrappers that convert the environment into a | |
| prompt/completion/reward interface compatible with TRL's GRPO trainer. | |
| The module is import-safe without ``trl`` / ``torch`` installed so unit | |
| tests stay fast and offline. | |
| """ | |
| from __future__ import annotations | |
| from .curve import ( | |
| CheckpointEval, | |
| TaskOutcome, | |
| evaluate_checkpoint, | |
| evaluate_policy_across_tasks, | |
| plot_training_curve, | |
| ) | |
| from .env_adapter import ( | |
| action_from_completion, | |
| build_prompt, | |
| parse_completion, | |
| ) | |
| from .reward_adapter import ( | |
| EpisodeResult, | |
| compute_reward, | |
| run_episode_with_text_policy, | |
| ) | |
| from .sft_dataset import ( | |
| SFTSample, | |
| action_to_completion, | |
| build_sft_dataset, | |
| ) | |
| __all__ = [ | |
| "CheckpointEval", | |
| "EpisodeResult", | |
| "SFTSample", | |
| "TaskOutcome", | |
| "action_from_completion", | |
| "action_to_completion", | |
| "build_prompt", | |
| "build_sft_dataset", | |
| "compute_reward", | |
| "evaluate_checkpoint", | |
| "evaluate_policy_across_tasks", | |
| "parse_completion", | |
| "plot_training_curve", | |
| "run_episode_with_text_policy", | |
| ] | |