File size: 5,092 Bytes
41a9651
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""

CurriculumBuilder β€” bridges game/rl_splits.py with OpenEnv environments.



Splits (from rl_splits.py):

    TRAIN  8 tracks  β€” 2 per difficulty tier, easy β†’ hard

    VAL    4 tracks  β€” 1 per tier, performance gating

    TEST   4 tracks  β€” 1 per tier, held-out evaluation



Usage:

    builder = CurriculumBuilder()



    # Training loop

    env = builder.next_env()

    obs = env.reset()

    while not obs.done:

        obs = env.step(DriveAction(accel=1.0, steer=0.0))

    builder.record(total_reward)   # advances frontier when ready



    # Validation

    for env in builder.val_envs():

        ...



    # Status

    print(builder.status)

"""

from typing import Iterator, List

from game.rl_splits import TRAIN, VAL, TEST, CurriculumSampler
from .environment import RaceEnvironment


class CurriculumBuilder:
    """

    Wraps CurriculumSampler to produce ready-to-use RaceEnvironment instances.



    Parameters

    ----------

    threshold    : mean episode reward needed to advance to the next track

    window       : rolling window size for reward averaging

    replay_frac  : fraction of episodes replayed from already-mastered tracks

    max_steps    : max steps per episode for every environment created

    laps_target  : lap target per episode

    use_image    : include 64Γ—64 egocentric headlight image in observations

    """

    def __init__(

        self,

        threshold: float = 30.0,

        window: int = 50,

        replay_frac: float = 0.3,

        max_steps: int = 3000,

        laps_target: int = 3,

        use_image: bool = True,

    ):
        self._sampler = CurriculumSampler(
            TRAIN,
            threshold=threshold,
            window=window,
            replay_frac=replay_frac,
        )
        self._max_steps = max_steps
        self._laps_target = laps_target
        self._use_image = use_image

    # ── Frontier ─────────────────────────────────────────────────────────────

    def next_env(self) -> RaceEnvironment:
        """Return an environment for the next episode (respects replay schedule)."""
        track = self._sampler.sample()
        track.build()
        return RaceEnvironment(track, self._max_steps, self._laps_target, self._use_image)

    def record(self, episode_reward: float, episode_crashes: int = 0, episode_laps: int = 0, is_frontier: bool = True) -> bool:
        """

        Record the reward, crashes, and laps for the last episode.

        Advancement is gated by greedy eval only β€” this method never auto-advances.

        Always returns False.

        """
        self._sampler.record(episode_reward, episode_crashes, episode_laps, is_frontier=is_frontier)
        return False

    # ── Fixed splits ─────────────────────────────────────────────────────────

    def train_envs(self) -> List[RaceEnvironment]:
        """One environment per TRAIN track (all 8, in order)."""
        return self._make_envs(TRAIN)

    def val_envs(self) -> List[RaceEnvironment]:
        """One environment per VAL track (4 tracks, one per difficulty tier)."""
        return self._make_envs(VAL)

    def test_envs(self) -> List[RaceEnvironment]:
        """One environment per TEST track (4 tracks, held-out)."""
        return self._make_envs(TEST)

    # ── Iteration helper ─────────────────────────────────────────────────────

    def iter_train(self) -> Iterator[RaceEnvironment]:
        """Yield environments one by one through the full TRAIN split in order."""
        for env in self.train_envs():
            yield env

    # ── Info ─────────────────────────────────────────────────────────────────

    @property
    def status(self) -> str:
        return self._sampler.status()

    @property
    def current_level(self) -> int:
        """0-based index of the current frontier track within TRAIN."""
        return self._sampler.current_level

    @property
    def is_complete(self) -> bool:
        """True when all TRAIN tracks have been mastered."""
        return self._sampler.current_level >= len(TRAIN) - 1

    # ── Internal ─────────────────────────────────────────────────────────────

    def _make_envs(self, tracks) -> List[RaceEnvironment]:
        envs = []
        for track in tracks:
            track.build()
            envs.append(RaceEnvironment(track, self._max_steps, self._laps_target, self._use_image))
        return envs