File size: 2,449 Bytes
c745a99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
"""EpisodeContext — the per-episode truth for tier-dependent runtime behavior.

An `EpisodeContext` is the single source for everything about *this one episode*
that the env needs to know at runtime: which task is running, which tier's
dynamics apply (chaos probability, reported tier), and where — if anywhere —
a terminal result should be recorded.

Two construction sites encode the two episode-planning modes:

    # Local mode: env picks the task from its own curriculum. Terminal results
    # flow back to that same curriculum so local mastery/promotion tracking
    # continues to work.
    ctx = EpisodeContext.for_local(task=task, curriculum=self._curriculum)

    # Trainer mode: the trainer hands in a Task it picked from its own
    # (central) curriculum and owns result recording. The env must NOT mutate
    # any local tier-progression state for this episode.
    ctx = EpisodeContext.for_external(task=task)

With this split, `_sync_state`, chaos injection, and result recording all read
from `ctx` and no longer consult `self._curriculum.current_difficulty` — which
was the coupling that let external task injection corrupt local tier stats.
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import TYPE_CHECKING, Callable, Optional

from models import Task, TaskDifficulty
from server.services.curriculum import TIER_CONFIGS

if TYPE_CHECKING:
    from server.services.curriculum import Curriculum


RecordResultFn = Callable[[Task, bool, float], None]


@dataclass(frozen=True)
class EpisodeContext:
    """Immutable per-episode context. `tier` and `chaos_probability` are
    derived from `task.difficulty` so they can never drift out of sync.
    """

    task: Task
    record_result: Optional[RecordResultFn]

    @property
    def tier(self) -> TaskDifficulty:
        return self.task.difficulty

    @property
    def chaos_probability(self) -> float:
        return TIER_CONFIGS[self.task.difficulty].chaos_probability

    @classmethod
    def for_local(cls, task: Task, curriculum: "Curriculum") -> "EpisodeContext":
        """Local mode — results flow back to the env's own curriculum."""
        return cls(task=task, record_result=curriculum.record_result)

    @classmethod
    def for_external(cls, task: Task) -> "EpisodeContext":
        """Trainer mode — terminal result recording is handled by the caller."""
        return cls(task=task, record_result=None)