Spaces:
Sleeping
Sleeping
File size: 5,092 Bytes
41a9651 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | """
CurriculumBuilder β bridges game/rl_splits.py with OpenEnv environments.
Splits (from rl_splits.py):
TRAIN 8 tracks β 2 per difficulty tier, easy β hard
VAL 4 tracks β 1 per tier, performance gating
TEST 4 tracks β 1 per tier, held-out evaluation
Usage:
builder = CurriculumBuilder()
# Training loop
env = builder.next_env()
obs = env.reset()
while not obs.done:
obs = env.step(DriveAction(accel=1.0, steer=0.0))
builder.record(total_reward) # advances frontier when ready
# Validation
for env in builder.val_envs():
...
# Status
print(builder.status)
"""
from typing import Iterator, List
from game.rl_splits import TRAIN, VAL, TEST, CurriculumSampler
from .environment import RaceEnvironment
class CurriculumBuilder:
"""
Wraps CurriculumSampler to produce ready-to-use RaceEnvironment instances.
Parameters
----------
threshold : mean episode reward needed to advance to the next track
window : rolling window size for reward averaging
replay_frac : fraction of episodes replayed from already-mastered tracks
max_steps : max steps per episode for every environment created
laps_target : lap target per episode
use_image : include 64Γ64 egocentric headlight image in observations
"""
def __init__(
self,
threshold: float = 30.0,
window: int = 50,
replay_frac: float = 0.3,
max_steps: int = 3000,
laps_target: int = 3,
use_image: bool = True,
):
self._sampler = CurriculumSampler(
TRAIN,
threshold=threshold,
window=window,
replay_frac=replay_frac,
)
self._max_steps = max_steps
self._laps_target = laps_target
self._use_image = use_image
# ββ Frontier βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def next_env(self) -> RaceEnvironment:
"""Return an environment for the next episode (respects replay schedule)."""
track = self._sampler.sample()
track.build()
return RaceEnvironment(track, self._max_steps, self._laps_target, self._use_image)
def record(self, episode_reward: float, episode_crashes: int = 0, episode_laps: int = 0, is_frontier: bool = True) -> bool:
"""
Record the reward, crashes, and laps for the last episode.
Advancement is gated by greedy eval only β this method never auto-advances.
Always returns False.
"""
self._sampler.record(episode_reward, episode_crashes, episode_laps, is_frontier=is_frontier)
return False
# ββ Fixed splits βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def train_envs(self) -> List[RaceEnvironment]:
"""One environment per TRAIN track (all 8, in order)."""
return self._make_envs(TRAIN)
def val_envs(self) -> List[RaceEnvironment]:
"""One environment per VAL track (4 tracks, one per difficulty tier)."""
return self._make_envs(VAL)
def test_envs(self) -> List[RaceEnvironment]:
"""One environment per TEST track (4 tracks, held-out)."""
return self._make_envs(TEST)
# ββ Iteration helper βββββββββββββββββββββββββββββββββββββββββββββββββββββ
def iter_train(self) -> Iterator[RaceEnvironment]:
"""Yield environments one by one through the full TRAIN split in order."""
for env in self.train_envs():
yield env
# ββ Info βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@property
def status(self) -> str:
return self._sampler.status()
@property
def current_level(self) -> int:
"""0-based index of the current frontier track within TRAIN."""
return self._sampler.current_level
@property
def is_complete(self) -> bool:
"""True when all TRAIN tracks have been mastered."""
return self._sampler.current_level >= len(TRAIN) - 1
# ββ Internal βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def _make_envs(self, tracks) -> List[RaceEnvironment]:
envs = []
for track in tracks:
track.build()
envs.append(RaceEnvironment(track, self._max_steps, self._laps_target, self._use_image))
return envs
|