Spaces:

mitudrudutta
/

ChargeBackOps

Sleeping

feat(training): SFT dataset + stall detection in eval rollout

02a6a9f about 2 months ago

1.13 kB

	"""Training helpers for ChargebackOps.

	Lightweight pure-Python wrappers that convert the environment into a
	prompt/completion/reward interface compatible with TRL's GRPO trainer.
	The module is import-safe without ``trl`` / ``torch`` installed so unit
	tests stay fast and offline.
	"""

	from __future__ import annotations

	from .curve import (
	CheckpointEval,
	TaskOutcome,
	evaluate_checkpoint,
	evaluate_policy_across_tasks,
	plot_training_curve,
	)
	from .env_adapter import (
	action_from_completion,
	build_prompt,
	parse_completion,
	)
	from .reward_adapter import (
	EpisodeResult,
	compute_reward,
	run_episode_with_text_policy,
	)
	from .sft_dataset import (
	SFTSample,
	action_to_completion,
	build_sft_dataset,
	)

	__all__ = [
	"CheckpointEval",
	"EpisodeResult",
	"SFTSample",
	"TaskOutcome",
	"action_from_completion",
	"action_to_completion",
	"build_prompt",
	"build_sft_dataset",
	"compute_reward",
	"evaluate_checkpoint",
	"evaluate_policy_across_tasks",
	"parse_completion",
	"plot_training_curve",
	"run_episode_with_text_policy",
	]