Spaces:
Sleeping
Sleeping
| """Training: multi-turn rollout for GRPO / SFT. | |
| Public surface: | |
| EnvClient β protocol; HttpEnvClient or InProcessEnvClient | |
| Policy β protocol; ScriptedPolicy or HfPolicy | |
| rollout(...) β drive one episode, return Trajectory | |
| Trajectory, TurnSample β per-turn (prompt, completion, reward, return) | |
| The rollout is environment-agnostic and policy-agnostic β see | |
| PROPOSAL.md Β§7.2 for the GRPOTrainer integration story. | |
| """ | |
| from graphforge.training.client import ( | |
| EnvClient, | |
| HttpEnvClient, | |
| InProcessEnvClient, | |
| ) | |
| from graphforge.training.policy import HfPolicy, Policy, ScriptedPolicy | |
| from graphforge.training.protocol import ( | |
| ParseFailure, | |
| ParseSuccess, | |
| parse_completion, | |
| render_action, | |
| ) | |
| from graphforge.training.rollout import ( | |
| Trajectory, | |
| TurnSample, | |
| rollout, | |
| trajectory_summary, | |
| ) | |
| __all__ = [ | |
| "EnvClient", | |
| "HfPolicy", | |
| "HttpEnvClient", | |
| "InProcessEnvClient", | |
| "ParseFailure", | |
| "ParseSuccess", | |
| "Policy", | |
| "ScriptedPolicy", | |
| "Trajectory", | |
| "TurnSample", | |
| "parse_completion", | |
| "render_action", | |
| "rollout", | |
| "trajectory_summary", | |
| ] | |
| def train_grpo(config: object) -> None: # pragma: no cover β TODO | |
| raise NotImplementedError("GRPO training TODO β see PROPOSAL.md Β§7") | |
| def train_sft(config: object) -> None: # pragma: no cover β TODO | |
| raise NotImplementedError("SFT plan B TODO β see PROPOSAL.md Β§7.4") | |