File size: 12,738 Bytes
8400d8c
 
 
 
 
 
 
 
 
 
f637227
 
8400d8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11f4e9a
 
8400d8c
11f4e9a
8400d8c
 
 
 
 
 
11f4e9a
 
8400d8c
11f4e9a
8400d8c
 
 
 
 
 
 
 
 
 
 
11f4e9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8400d8c
 
 
 
 
 
 
11f4e9a
 
8400d8c
 
 
 
 
 
 
 
 
 
 
f637227
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8400d8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11f4e9a
8400d8c
 
 
 
f637227
 
 
8400d8c
 
f637227
 
 
 
8400d8c
 
 
 
 
f637227
 
 
 
 
 
 
 
 
 
 
 
 
8400d8c
 
 
 
 
 
7c81add
 
 
8400d8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c81add
 
 
 
 
8400d8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
"""The Commentator (``rafters-critic``) β€” cadence, abstain, and the feed card.

Zero mocks: the offline ``DeterministicTinyModel`` (wired by ``conftest``) drives the
funny line, and we assert the per-speaker quorum gate, the self-trigger guard, offline
determinism, that a beat actually lands in a live show, and that the commentary renders
as a graceful feed card (with and without media).
"""

from __future__ import annotations

import dataclasses

from src.core.conductor import Conductor
from src.core.events import Event
from src.core.ledger import Ledger
from src.core.projections import StageProjection
from src.core.registry import default_registry
from src.tools.builtins import default_tool_registry
from src.ui.fishbowl import view_model_at
from src.ui.fishbowl.render.feed import render_feed


def _ev(kind: str, actor: str, turn: int = 1, **payload) -> Event:
    return Event(run_id="r", turn=turn, kind=kind, actor=actor, payload=payload)


def _projection() -> StageProjection:
    return StageProjection(
        seed="a village of stage props wakes up", goal="grow the wood", current_scene="the wood hums"
    )


def _critic(cast_names: list[str]):
    """A live ``rafters-critic`` handler on the offline router, with a cast attached."""
    reg = default_registry()
    agent = reg.build_agent("rafters-critic", reg.build_router(), default_tool_registry())
    agent.cast_names = list(cast_names)
    return agent


class TestCadence:
    def test_abstains_below_count(self, monkeypatch):
        monkeypatch.setenv("MAL_COMMENTATOR_EVERY", "4")
        critic = _critic(["scene-whisperer", "pocket-actor", "rafters-critic"])
        # Only 2 speech beats so far β†’ the cadence (4) is not met yet.
        events = (
            _ev("world.observed", "scene-whisperer", text="the wood hums"),
            _ev("agent.spoke", "pocket-actor", text="I want the moon"),
        )
        assert critic.act("r", 1, _projection(), events) is None

    def test_emits_one_beat_at_count(self, monkeypatch):
        monkeypatch.setenv("MAL_COMMENTATOR_EVERY", "4")
        critic = _critic(["scene-whisperer", "pocket-actor", "rafters-critic"])
        # 4 speech beats since the last remark β†’ chime in exactly once.
        events = (
            _ev("world.observed", "scene-whisperer", text="a"),
            _ev("agent.spoke", "pocket-actor", text="b"),
            _ev("world.observed", "scene-whisperer", text="c"),
            _ev("agent.spoke", "pocket-actor", text="d"),
        )
        event = critic.act("r", 5, _projection(), events)
        assert event is not None
        assert event.kind == "commentary.posted"
        assert event.payload.get("text")

    def test_one_silent_speaker_does_not_wedge_cadence(self, monkeypatch):
        """A stalled speaker can't block the beat β€” the old per-speaker quorum bug."""
        monkeypatch.setenv("MAL_COMMENTATOR_EVERY", "4")
        critic = _critic(["scene-whisperer", "pocket-actor", "rafters-critic"])
        # pocket-actor spoke once then went silent (errored out); scene-whisperer carries
        # the show. A count-based cadence still fires; a per-speaker quorum never would.
        events = (
            _ev("agent.spoke", "pocket-actor", text="I want the moon"),
            _ev("world.observed", "scene-whisperer", text="a"),
            _ev("world.observed", "scene-whisperer", text="b"),
            _ev("world.observed", "scene-whisperer", text="c"),
        )
        event = critic.act("r", 5, _projection(), events)
        assert event is not None and event.kind == "commentary.posted"

    def test_no_speakers_means_silence(self, monkeypatch):
        monkeypatch.setenv("MAL_COMMENTATOR_EVERY", "1")
        critic = _critic(["scene-whisperer", "pocket-actor", "rafters-critic"])
        # Only the critic's own / non-speech events exist β†’ nobody to comment on.
        events = (_ev("run.started", "conductor", turn=0, seed="s"),)
        assert critic.act("r", 1, _projection(), events) is None

    def test_window_resets_after_a_remark(self, monkeypatch):
        """The self-trigger guard: a posted beat resets the cadence window."""
        monkeypatch.setenv("MAL_COMMENTATOR_EVERY", "4")
        critic = _critic(["scene-whisperer", "pocket-actor", "rafters-critic"])
        events = (
            _ev("world.observed", "scene-whisperer", text="a"),
            _ev("agent.spoke", "pocket-actor", text="b"),
            _ev("world.observed", "scene-whisperer", text="c"),
            _ev("agent.spoke", "pocket-actor", text="d"),
            _ev("commentary.posted", "rafters-critic", text="bold choice, mushrooms"),
        )
        # No NEW speech since the remark β†’ abstain again (no runaway self-trigger).
        assert critic.act("r", 6, _projection(), events) is None

    def test_rounds_threshold_is_rounds_times_distinct_speakers(self, monkeypatch):
        """With no absolute override, the cadence is rounds Γ— distinct cast speakers.
        Two speakers, rounds=1 β†’ fire after the round's two beats, not before."""
        monkeypatch.delenv("MAL_COMMENTATOR_EVERY", raising=False)
        monkeypatch.setenv("MAL_COMMENTATOR_ROUNDS", "1")
        critic = _critic(["scene-whisperer", "pocket-actor", "rafters-critic"])
        one_beat = (_ev("world.observed", "scene-whisperer", text="a"),)
        # round_size is still 1 here (one distinct speaker), so one beat already meets it.
        assert critic.act("r", 1, _projection(), one_beat) is not None
        full_round = (
            _ev("world.observed", "scene-whisperer", text="a"),
            _ev("agent.spoke", "pocket-actor", text="b"),
        )
        assert critic.act("r", 2, _projection(), full_round) is not None

    def test_more_rounds_means_more_beats_before_speaking(self, monkeypatch):
        monkeypatch.delenv("MAL_COMMENTATOR_EVERY", raising=False)
        monkeypatch.setenv("MAL_COMMENTATOR_ROUNDS", "2")
        critic = _critic(["scene-whisperer", "pocket-actor", "rafters-critic"])
        # Two distinct speakers β†’ round_size 2, rounds 2 β†’ needs 4 beats. Three abstains.
        three = (
            _ev("world.observed", "scene-whisperer", text="a"),
            _ev("agent.spoke", "pocket-actor", text="b"),
            _ev("world.observed", "scene-whisperer", text="c"),
        )
        assert critic.act("r", 1, _projection(), three) is None
        four = three + (_ev("agent.spoke", "pocket-actor", text="d"),)
        assert critic.act("r", 2, _projection(), four) is not None

    def test_manifest_rounds_default_used_when_env_unset(self, monkeypatch):
        """No env knobs set β†’ the manifest's commentary.rounds (1) drives the cadence."""
        monkeypatch.delenv("MAL_COMMENTATOR_EVERY", raising=False)
        monkeypatch.delenv("MAL_COMMENTATOR_ROUNDS", raising=False)
        critic = _critic(["scene-whisperer", "pocket-actor", "rafters-critic"])
        assert critic.manifest.commentary is not None and critic.manifest.commentary.rounds == 1
        full_round = (
            _ev("world.observed", "scene-whisperer", text="a"),
            _ev("agent.spoke", "pocket-actor", text="b"),
        )
        assert critic.act("r", 1, _projection(), full_round) is not None

    def test_offline_summary_is_deterministic(self, monkeypatch):
        monkeypatch.setenv("MAL_COMMENTATOR_EVERY", "1")
        events = (
            _ev("world.observed", "scene-whisperer", text="a"),
            _ev("agent.spoke", "pocket-actor", text="b"),
        )
        a = _critic(["scene-whisperer", "pocket-actor"]).act("r", 1, _projection(), events)
        b = _critic(["scene-whisperer", "pocket-actor"]).act("r", 1, _projection(), events)
        assert a is not None and b is not None
        assert a.payload["text"] == b.payload["text"]


class TestModularity:
    def test_beat_lands_in_a_live_thousand_token_wood_show(self):
        """Drop-in agent, no engine edit: a real offline run yields a commentary beat."""
        reg = default_registry()
        scenario = reg.build_scenario("thousand-token-wood", tools=default_tool_registry())
        conductor = Conductor(scenario, governor=reg.governor_for("thousand-token-wood"), ledger=Ledger())
        conductor.reset("a village of stage props wakes up")
        conductor.step(8)  # default cadence (4 beats, ~2 turns) trips early in the run
        kinds = [e.kind for e in conductor.ledger.events_for_run(conductor.run_id)]
        assert "commentary.posted" in kinds

    def test_scenario_without_critic_has_no_commentary(self):
        """Opt-out (ADR-0011): drop the critic from a cast and the engine never emits a
        beat. Proven by removing the agent from a built scenario, so it holds no matter
        which scenarios ship the critic in their YAML."""
        reg = default_registry()
        scenario = reg.build_scenario("mystery-roots", tools=default_tool_registry())
        critic_free = dataclasses.replace(
            scenario, agents=tuple(a for a in scenario.agents if a.name != "rafters-critic")
        )
        conductor = Conductor(critic_free, governor=reg.governor_for("mystery-roots"), ledger=Ledger())
        conductor.reset("who moved the standing stone?")
        conductor.step(6)
        kinds = {e.kind for e in conductor.ledger.events_for_run(conductor.run_id)}
        assert not any(k.startswith("commentary.") for k in kinds)

    def test_beat_lands_in_a_judged_scenario_without_skewing_the_verdict(self):
        """The critic is universal: it drops into a judged cast and chimes in, but its
        commentary.posted is not a competitor kind, so it can never be crowned winner."""
        reg = default_registry()
        scenario = reg.build_scenario("debate-duel", tools=default_tool_registry())
        conductor = Conductor(scenario, governor=reg.governor_for("debate-duel"), ledger=Ledger())
        conductor.reset("this house believes the forest should never be mapped")
        conductor.step(10)
        events = conductor.ledger.events_for_run(conductor.run_id)
        assert any(e.kind == "commentary.posted" for e in events)
        winners = [e.payload.get("winner") for e in events if e.kind == "judge.verdict"]
        assert "rafters-critic" not in winners


class TestFeedCard:
    def _cast(self):
        scenario = default_registry().build_scenario("thousand-token-wood", tools=default_tool_registry())
        return [a.manifest for a in scenario.agents]

    def test_renders_media_badges_not_inline_tags(self):
        """Media now plays in the native gr.Image/gr.Audio cutaway, so the feed card shows
        badges β€” not inline <img>/<audio> (those used a /file= route dead in Gradio 5+)."""
        events = (
            _ev("run.started", "conductor", turn=0, seed="s", goal="g"),
            _ev(
                "commentary.posted",
                "rafters-critic",
                turn=3,
                text="bold choice, unionising the mushrooms",
                image={"src": "/file=runs/media/r/003-img.png", "alt": "the vision"},
                audio={"src": "data:audio/wav;base64,UklGRg==", "mime": "audio/wav"},
            ),
        )
        vm = view_model_at(events, 999, self._cast(), scenario_name="thousand-token-wood")
        html = render_feed(vm, mind_reader=False)
        assert "fe commentate" in html
        assert "FROM THE RAFTERS" in html
        assert "bold choice, unionising the mushrooms" in html
        # Badges note the media; the media itself is rendered by the native cutaway.
        assert "cm-badge" in html and "illustrated" in html and "voiced" in html
        # No dead inline media tags / hand-built /file= URLs.
        assert "<img" not in html and "<audio" not in html
        assert "/file=" not in html

    def test_degrades_to_text_only(self):
        events = (
            _ev("run.started", "conductor", turn=0, seed="s", goal="g"),
            _ev("commentary.posted", "rafters-critic", turn=3, text="just the line, no pictures"),
        )
        vm = view_model_at(events, 999, self._cast(), scenario_name="thousand-token-wood")
        html = render_feed(vm, mind_reader=False)
        assert "fe commentate" in html
        assert "just the line, no pictures" in html
        assert "<img" not in html
        assert "<audio" not in html

    def test_escapes_caption(self):
        events = (
            _ev("run.started", "conductor", turn=0, seed="s", goal="g"),
            _ev("commentary.posted", "rafters-critic", turn=3, text="a <script> & friends"),
        )
        vm = view_model_at(events, 999, self._cast(), scenario_name="thousand-token-wood")
        html = render_feed(vm, mind_reader=False)
        assert "<script>" not in html
        assert "&lt;script&gt;" in html