File size: 23,389 Bytes
3193174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
"""Tests for execution/runner.py — MACPRunner."""

import asyncio

import pytest
import rustworkx as rx
import torch

from core.graph import RoleGraph
from execution.budget import BudgetConfig
from execution.runner import MACPResult, MACPRunner, RunnerConfig


def create_test_graph(nodes, edges):
    """Create a test RoleGraph."""
    from core.agent import AgentProfile

    g = rx.PyDiGraph()

    id_to_idx = {}
    agents = []
    for nid in nodes:
        if nid != "task":
            idx = g.add_node({"id": nid})
            id_to_idx[nid] = idx
            # Create roles for agents
            agent = AgentProfile(agent_id=nid, display_name=f"Agent {nid.upper()}")
            agents.append(agent)

    connections = {n: [] for n in nodes}

    for src, tgt in edges:
        if src in id_to_idx and tgt in id_to_idx:
            g.add_edge(id_to_idx[src], id_to_idx[tgt], {"weight": 1.0})
            connections[src].append(tgt)

    n = len(id_to_idx)
    a_com = torch.zeros((n + 1, n + 1), dtype=torch.float32)  # +1 for task node

    # Add task node
    task_idx = g.add_node({"id": "task"})
    id_to_idx["task"] = task_idx

    # Fill matrix
    node_list = [nid for nid in nodes if nid != "task"]
    for i, src in enumerate(node_list):
        for tgt in connections[src]:
            if tgt in node_list:
                j = node_list.index(tgt)
                a_com[i, j] = 1.0

    role_graph = RoleGraph(
        node_ids=nodes,
        role_connections=connections,
        graph=g,
        A_com=a_com,
        task_node="task",
        query="test query",
    )
    role_graph.agents = agents

    return role_graph


def create_simple_llm_caller(response_text="Test response"):
    """Create a simple synchronous LLM caller."""

    def llm_caller(prompt: str) -> str:
        return response_text

    return llm_caller


def create_simple_async_llm_caller(response_text="Test response"):
    """Create a simple asynchronous LLM caller."""

    async def async_llm_caller(prompt: str) -> str:
        await asyncio.sleep(0.001)  # Simulate delay
        return response_text

    return async_llm_caller


class TestMACPRunnerCreation:
    """Tests for MACPRunner creation."""

    def test_basic_creation(self):
        """Basic creation."""
        llm_caller = create_simple_llm_caller()
        runner = MACPRunner(llm_caller=llm_caller)

        assert runner is not None
        assert runner.llm_caller is not None

    def test_creation_with_config(self):
        """Creation with configuration."""
        llm_caller = create_simple_llm_caller()
        config = RunnerConfig(
            timeout=30.0,
            max_retries=3,
            adaptive=True,
        )

        runner = MACPRunner(llm_caller=llm_caller, config=config)

        assert runner.config.timeout == 30.0
        assert runner.config.max_retries == 3
        assert runner.config.adaptive


class TestSyncExecution:
    """Tests for synchronous execution."""

    def test_run_simple(self):
        """Simple run."""
        graph = create_test_graph(["a", "b"], [("a", "b")])
        llm_caller = create_simple_llm_caller()

        runner = MACPRunner(llm_caller=llm_caller)
        result = runner.run_round(graph)

        assert isinstance(result, MACPResult)
        assert result.final_answer is not None

    def test_run_linear_graph(self):
        """Run on a linear graph."""
        graph = create_test_graph(["a", "b", "c"], [("a", "b"), ("b", "c")])
        llm_caller = create_simple_llm_caller()

        runner = MACPRunner(llm_caller=llm_caller)
        result = runner.run_round(graph)

        assert len(result.execution_order) == 3
        assert result.final_answer is not None

    def test_run_with_final_agent(self):
        """Run with a specified final agent."""
        graph = create_test_graph(["a", "b"], [("a", "b")])
        llm_caller = create_simple_llm_caller("final response")

        runner = MACPRunner(llm_caller=llm_caller)
        result = runner.run_round(graph, final_agent_id="b")

        assert result.final_agent_id == "b"


class TestAsyncExecution:
    """Tests for asynchronous execution."""

    @pytest.mark.asyncio
    async def test_arun_simple(self):
        """Simple async run."""
        graph = create_test_graph(["a", "b"], [("a", "b")])
        async_llm_caller = create_simple_async_llm_caller()

        runner = MACPRunner(async_llm_caller=async_llm_caller)
        result = await runner.arun_round(graph)

        assert isinstance(result, MACPResult)
        assert result.final_answer is not None

    @pytest.mark.asyncio
    async def test_arun_parallel_execution(self):
        """Parallel async execution."""
        # a -> b, c (parallel) -> d
        graph = create_test_graph(
            ["a", "b", "c", "d"],
            [("a", "b"), ("a", "c"), ("b", "d"), ("c", "d")],
        )

        async_llm_caller = create_simple_async_llm_caller()
        config = RunnerConfig(enable_parallel=True, adaptive=True)
        runner = MACPRunner(async_llm_caller=async_llm_caller, config=config)
        result = await runner.arun_round(graph)

        # Should have executed all agents
        assert len(result.execution_order) == 4
        # a should be first in execution order
        assert result.execution_order[0] == "a"


class TestTimeouts:
    """Tests for timeout handling."""

    @pytest.mark.asyncio
    async def test_timeout_triggers(self):
        """Timeout triggers."""
        graph = create_test_graph(["a"], [])

        async def slow_llm_caller(prompt: str) -> str:
            await asyncio.sleep(10.0)  # Very slow
            return "done"

        config = RunnerConfig(timeout=0.1)  # Short timeout

        runner = MACPRunner(async_llm_caller=slow_llm_caller, config=config)
        result = await runner.arun_round(graph)

        # Should complete but might have timeout in messages
        assert result is not None

    @pytest.mark.asyncio
    async def test_per_agent_timeout(self):
        """Per-agent timeout."""
        graph = create_test_graph(["a", "b"], [("a", "b")])

        call_count = 0

        async def slow_llm_caller(prompt: str) -> str:
            nonlocal call_count
            call_count += 1
            if call_count > 1:
                await asyncio.sleep(10.0)
            return "response"

        config = RunnerConfig(timeout=0.1)
        runner = MACPRunner(async_llm_caller=slow_llm_caller, config=config)
        result = await runner.arun_round(graph)

        # First agent should succeed, second might timeout
        assert result is not None


class TestRetries:
    """Tests for retry mechanism."""

    @pytest.mark.asyncio
    async def test_retry_on_failure(self):
        """Retry on failure."""
        graph = create_test_graph(["a"], [])

        attempt_count = 0

        async def flaky_llm_caller(prompt: str) -> str:
            nonlocal attempt_count
            attempt_count += 1
            if attempt_count < 3:
                msg = "Temporary failure"
                raise RuntimeError(msg)
            return "success"

        config = RunnerConfig(max_retries=5, adaptive=True)
        runner = MACPRunner(async_llm_caller=flaky_llm_caller, config=config)
        result = await runner.arun_round(graph)

        assert attempt_count == 3
        assert result.messages.get("a") == "success"

    @pytest.mark.asyncio
    async def test_max_retries_exceeded(self):
        """Max retries exceeded."""
        graph = create_test_graph(["a"], [])

        async def always_fails(prompt: str) -> str:
            msg = "Always fails"
            raise RuntimeError(msg)

        config = RunnerConfig(max_retries=2, adaptive=True)
        runner = MACPRunner(async_llm_caller=always_fails, config=config)
        result = await runner.arun_round(graph)

        # Should fail after max retries
        assert result.errors is not None

    @pytest.mark.asyncio
    async def test_retry_with_backoff(self):
        """Retry with exponential backoff."""
        graph = create_test_graph(["a"], [])

        import time

        timestamps = []

        async def timing_llm_caller(prompt: str) -> str:
            timestamps.append(time.time())
            if len(timestamps) < 3:
                msg = "Retry"
                raise RuntimeError(msg)
            return "done"

        config = RunnerConfig(
            max_retries=5,
            retry_delay=0.1,
            retry_backoff=2.0,
            adaptive=True,
        )

        runner = MACPRunner(async_llm_caller=timing_llm_caller, config=config)
        await runner.arun_round(graph)

        # Check delays increased
        if len(timestamps) >= 3:
            delay1 = timestamps[1] - timestamps[0]
            delay2 = timestamps[2] - timestamps[1]
            assert delay2 > delay1  # Backoff should increase delay


class TestBudgetControl:
    """Tests for budget control."""

    @pytest.mark.asyncio
    async def test_token_budget_respected(self):
        """Token budget is respected."""
        graph = create_test_graph(["a", "b", "c"], [("a", "b"), ("b", "c")])

        async def token_hungry_llm_caller(prompt: str) -> str:
            # Simulate token usage
            return "response " * 100  # Many tokens

        budget_config = BudgetConfig(
            # Using reasonable defaults - BudgetConfig doesn't require specific params
        )
        config = RunnerConfig(budget_config=budget_config, adaptive=True)

        runner = MACPRunner(async_llm_caller=token_hungry_llm_caller, config=config)
        result = await runner.arun_round(graph)

        # Should stop due to budget or complete with budget tracking
        assert result.budget_summary is not None or result.total_tokens > 0

    @pytest.mark.asyncio
    async def test_budget_warning(self):
        """Warning when approaching budget."""
        graph = create_test_graph(["a"], [])

        async_llm_caller = create_simple_async_llm_caller()
        budget_config = BudgetConfig(
            # Using reasonable defaults
        )
        config = RunnerConfig(budget_config=budget_config)

        runner = MACPRunner(async_llm_caller=async_llm_caller, config=config)
        result = await runner.arun_round(graph)

        # Should complete successfully
        assert result is not None


class TestMemoryUpdates:
    """Tests for agent memory updates."""

    @pytest.mark.asyncio
    async def test_state_propagation(self):
        """State is propagated between agents."""
        graph = create_test_graph(["a", "b"], [("a", "b")])

        async_llm_caller = create_simple_async_llm_caller()
        runner = MACPRunner(async_llm_caller=async_llm_caller)
        result = await runner.arun_round(graph)

        # b should have received context from a
        assert "b" in result.messages
        assert result.messages["b"] is not None

    @pytest.mark.asyncio
    async def test_hidden_state_channels(self):
        """Hidden state channels."""
        graph = create_test_graph(["a", "b"], [("a", "b")])

        async_llm_caller = create_simple_async_llm_caller()
        config = RunnerConfig(enable_hidden_channels=True)
        runner = MACPRunner(async_llm_caller=async_llm_caller, config=config)

        result = await runner.arun_round(graph)

        assert result is not None


class TestAdaptiveMode:
    """Tests for adaptive mode."""

    @pytest.mark.asyncio
    async def test_adaptive_routing(self):
        """Adaptive routing."""
        graph = create_test_graph(
            ["a", "b", "c"],
            [("a", "b"), ("a", "c"), ("b", "c")],
        )

        async_llm_caller = create_simple_async_llm_caller()
        config = RunnerConfig(adaptive=True)
        runner = MACPRunner(async_llm_caller=async_llm_caller, config=config)

        result = await runner.arun_round(graph)

        assert len(result.execution_order) > 0

    @pytest.mark.asyncio
    async def test_adaptive_topology_change(self):
        """Adaptive topology change on error."""
        graph = create_test_graph(
            ["a", "b", "fallback", "c"],
            [("a", "b"), ("a", "fallback"), ("b", "c"), ("fallback", "c")],
        )

        call_count = 0

        async def maybe_failing_llm_caller(prompt: str) -> str:
            nonlocal call_count
            call_count += 1
            # Simulate error on the second call (agent b)
            if call_count == 2:
                msg = "b failed"
                raise RuntimeError(msg)
            return "response"

        config = RunnerConfig(
            adaptive=True,
            max_retries=0,
        )
        runner = MACPRunner(async_llm_caller=maybe_failing_llm_caller, config=config)

        result = await runner.arun_round(graph)

        # Should complete with some agents executed
        assert result is not None


class TestErrorHandling:
    """Tests for error handling."""

    @pytest.mark.asyncio
    async def test_agent_exception_handled(self):
        """Agent exception is handled."""
        graph = create_test_graph(["a", "b"], [("a", "b")])

        call_count = 0

        async def failing_llm_caller(prompt: str) -> str:
            nonlocal call_count
            call_count += 1
            if call_count == 1:
                msg = "Agent error"
                raise ValueError(msg)
            return "response"

        config = RunnerConfig(max_retries=0)
        runner = MACPRunner(async_llm_caller=failing_llm_caller, config=config)

        result = await runner.arun_round(graph)

        # Should not crash, error should be recorded
        assert result is not None

    @pytest.mark.asyncio
    async def test_on_error_fail_policy(self):
        """Error handling with retries."""
        graph = create_test_graph(["a"], [])

        async def failing_llm_caller(prompt: str) -> str:
            msg = "Critical error"
            raise RuntimeError(msg)

        config = RunnerConfig(max_retries=0, adaptive=True)
        runner = MACPRunner(async_llm_caller=failing_llm_caller, config=config)
        result = await runner.arun_round(graph)

        # Should have error recorded
        assert result.errors is not None or "[Error:" in str(result.messages.get("a", ""))

    @pytest.mark.asyncio
    async def test_on_error_skip_policy(self):
        """Error handling and continuing execution."""
        graph = create_test_graph(["a", "b"], [("a", "b")])

        call_count = 0

        async def maybe_failing_llm_caller(prompt: str) -> str:
            nonlocal call_count
            call_count += 1
            if call_count == 1:
                msg = "a failed"
                raise RuntimeError(msg)
            return "response"

        config = RunnerConfig(max_retries=0)
        runner = MACPRunner(async_llm_caller=maybe_failing_llm_caller, config=config)
        result = await runner.arun_round(graph)

        # Should continue to agent b
        assert result is not None


class TestMACPResult:
    """Tests for execution result."""

    def test_result_structure(self):
        """Result structure."""
        result = MACPResult(
            messages={"a": "response"},
            final_answer="final answer",
            final_agent_id="a",
            execution_order=["a"],
            errors=[],
        )

        assert result.final_answer == "final answer"
        assert result.final_agent_id == "a"
        assert result.execution_order == ["a"]
        assert result.errors == []

    def test_result_with_metrics(self):
        """Result with metrics."""
        from datetime import datetime

        from execution.errors import ExecutionMetrics

        metrics = ExecutionMetrics(
            start_time=datetime.now(),
            total_agents=1,
            total_tokens=500,
        )

        result = MACPResult(
            messages={"a": "response"},
            final_answer="answer",
            final_agent_id="a",
            execution_order=["a"],
            errors=[],
            metrics=metrics,
            total_tokens=500,
            total_time=1.234,
        )

        assert result.total_tokens == 500
        assert result.total_time == 1.234


class TestConditionalEdgesAdaptive:
    """Tests for conditional edges in adaptive mode."""

    def test_condition_true_executes_target(self):
        """If condition is met — target agent is executed."""
        graph = create_test_graph(["a", "b", "c"], [("a", "b"), ("b", "c")])

        # Condition: a→b executes only if a responds with "ok"
        graph.edge_conditions = {
            ("a", "b"): lambda ctx: "ok" in ctx.messages.get("a", ""),
        }

        llm_caller = create_simple_llm_caller("ok response")
        config = RunnerConfig(adaptive=True)
        runner = MACPRunner(llm_caller=llm_caller, config=config)

        result = runner.run_round(graph)

        assert "a" in result.execution_order
        assert "b" in result.execution_order

    def test_condition_false_skips_target(self):
        """If condition is not met — target agent is skipped."""
        graph = create_test_graph(["a", "b", "c"], [("a", "b"), ("a", "c")])

        # Condition: a→b executes only if a responds with "secret"
        graph.edge_conditions = {
            ("a", "b"): lambda ctx: "secret" in ctx.messages.get("a", ""),
        }

        llm_caller = create_simple_llm_caller("normal response")
        config = RunnerConfig(adaptive=True)
        runner = MACPRunner(llm_caller=llm_caller, config=config)

        result = runner.run_round(graph)

        assert "a" in result.execution_order
        # b should be skipped because condition is not met
        # c should execute (unconditional edge)
        assert "c" in result.execution_order

    @pytest.mark.asyncio
    async def test_async_conditional_edges(self):
        """Conditional edges work in async mode."""
        graph = create_test_graph(["a", "b", "c"], [("a", "b"), ("a", "c")])

        graph.edge_conditions = {
            ("a", "b"): lambda ctx: ctx.source_succeeded(),
        }

        async_llm_caller = create_simple_async_llm_caller("response")
        config = RunnerConfig(adaptive=True)
        runner = MACPRunner(async_llm_caller=async_llm_caller, config=config)

        result = await runner.arun_round(graph)

        assert "a" in result.execution_order
        assert "b" in result.execution_order

    def test_topology_changed_count(self):
        """topology_changed_count increments when plan changes."""
        graph = create_test_graph(["a", "b"], [("a", "b")])

        graph.edge_conditions = {
            ("a", "b"): lambda ctx: ctx.source_succeeded(),
        }

        llm_caller = create_simple_llm_caller("response")
        config = RunnerConfig(adaptive=True)
        runner = MACPRunner(llm_caller=llm_caller, config=config)

        result = runner.run_round(graph)

        assert result is not None
        assert isinstance(result.topology_changed_count, int)

    def test_multiple_incoming_conditional_edges(self):
        """Multiple incoming conditional edges: B is not skipped until all are evaluated."""
        graph = create_test_graph(
            ["a", "c", "b"],
            [("a", "b"), ("c", "b")],
        )

        # Different callers for different agents
        llm_callers = {
            "a": lambda _: "fail result",
            "c": lambda _: "good result",
            "b": lambda _: "final response",
        }

        # a→b: condition NOT met (no "success" in a's response)
        # c→b: condition MET ("good" in c's response)
        graph.edge_conditions = {
            ("a", "b"): lambda ctx: "success" in ctx.messages.get("a", ""),
            ("c", "b"): lambda ctx: "good" in ctx.messages.get("c", ""),
        }

        config = RunnerConfig(adaptive=True)
        runner = MACPRunner(
            llm_caller=lambda _: "default",
            llm_callers=llm_callers,
            config=config,
        )

        result = runner.run_round(graph)

        # b should execute because c→b condition is met
        assert "a" in result.execution_order
        assert "c" in result.execution_order
        assert "b" in result.execution_order

    def test_conditional_edges_with_hidden_states_and_chain(self):
        """
        Complex test: conditional edges + hidden states + cascading chain.

        Graph:
            solver → reviewer → finalize  (conditional edge solver→reviewer)
            solver → alt_end              (unconditional)

        Scenario 1 (condition=True):  solver("correct") → reviewer → finalize execute.
        Scenario 2 (condition=False): solver("wrong") → reviewer + finalize are skipped,
                                      alt_end executes.

        Covers:
        - Issue 1: hidden states + conditional edges
        - Issue 2: plan does not stop after the last agent
        - Issue 3: full chain executes after conditional transition
        """
        # --- Scenario 1: condition met → full chain ---
        graph1 = create_test_graph(
            ["solver", "reviewer", "finalize", "alt_end"],
            [
                ("solver", "reviewer"),
                ("reviewer", "finalize"),
                ("solver", "alt_end"),
            ],
        )
        graph1.edge_conditions = {
            ("solver", "reviewer"): lambda ctx: "correct" in ctx.messages.get("solver", ""),
        }

        callers1 = {
            "solver": lambda _: "answer is correct",
            "reviewer": lambda _: "review passed",
            "finalize": lambda _: "done",
            "alt_end": lambda _: "alt",
        }

        config = RunnerConfig(adaptive=True)
        runner1 = MACPRunner(
            llm_caller=lambda _: "default",
            llm_callers=callers1,
            config=config,
        )

        result1 = runner1.run_round_with_hidden(graph1)

        assert "solver" in result1.execution_order
        assert "reviewer" in result1.execution_order
        assert "finalize" in result1.execution_order  # full chain
        assert result1.hidden_states is not None
        assert "solver" in result1.hidden_states

        # --- Scenario 2: condition NOT met → cascading skip ---
        graph2 = create_test_graph(
            ["solver", "reviewer", "finalize", "alt_end"],
            [
                ("solver", "reviewer"),
                ("reviewer", "finalize"),
                ("solver", "alt_end"),
            ],
        )
        graph2.edge_conditions = {
            ("solver", "reviewer"): lambda ctx: "correct" in ctx.messages.get("solver", ""),
        }

        callers2 = {
            "solver": lambda _: "answer is wrong",
            "reviewer": lambda _: "review passed",
            "finalize": lambda _: "done",
            "alt_end": lambda _: "alt ending",
        }

        runner2 = MACPRunner(
            llm_caller=lambda _: "default",
            llm_callers=callers2,
            config=config,
        )

        result2 = runner2.run_round_with_hidden(graph2)

        assert "solver" in result2.execution_order
        assert "reviewer" not in result2.execution_order  # skipped
        assert "finalize" not in result2.execution_order  # cascaded skip
        assert "alt_end" in result2.execution_order  # unconditional path


if __name__ == "__main__":
    pytest.main([__file__, "-v"])