File size: 1,130 Bytes
bd00c06
 
 
 
 
 
 
 
 
 
8fe3b35
 
 
 
 
 
 
bd00c06
 
 
 
 
 
 
 
 
 
02a6a9f
 
 
 
 
bd00c06
 
8fe3b35
bd00c06
02a6a9f
8fe3b35
bd00c06
02a6a9f
bd00c06
02a6a9f
bd00c06
8fe3b35
 
bd00c06
8fe3b35
bd00c06
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
"""Training helpers for ChargebackOps.

Lightweight pure-Python wrappers that convert the environment into a
prompt/completion/reward interface compatible with TRL's GRPO trainer.
The module is import-safe without ``trl`` / ``torch`` installed so unit
tests stay fast and offline.
"""

from __future__ import annotations

from .curve import (
    CheckpointEval,
    TaskOutcome,
    evaluate_checkpoint,
    evaluate_policy_across_tasks,
    plot_training_curve,
)
from .env_adapter import (
    action_from_completion,
    build_prompt,
    parse_completion,
)
from .reward_adapter import (
    EpisodeResult,
    compute_reward,
    run_episode_with_text_policy,
)
from .sft_dataset import (
    SFTSample,
    action_to_completion,
    build_sft_dataset,
)

__all__ = [
    "CheckpointEval",
    "EpisodeResult",
    "SFTSample",
    "TaskOutcome",
    "action_from_completion",
    "action_to_completion",
    "build_prompt",
    "build_sft_dataset",
    "compute_reward",
    "evaluate_checkpoint",
    "evaluate_policy_across_tasks",
    "parse_completion",
    "plot_training_curve",
    "run_episode_with_text_policy",
]