Spaces:
Sleeping
Sleeping
| from proteus.game.runtime.trace import TurnTrace, SessionTrace | |
| from proteus.game.metrics.metrics import compute_metrics | |
| def _turn(idx, action, motive, habit, reward, focal_pos=(0, 0)): | |
| return TurnTrace( | |
| turn_idx=idx, observation="", action=action, motive_action=motive, | |
| habit_action=habit, is_diagnostic=(motive != habit), | |
| was_congruent=(action == motive), reward=reward, | |
| focal_pos=focal_pos, predator_pos=(1, 1), | |
| ) | |
| def test_metrics_on_a_mixed_session(): | |
| turns = [ | |
| _turn(1, "up", "up", "left", 5.0), # diagnostic, congruent | |
| _turn(2, "left", "up", "left", -2.0), # diagnostic, NOT congruent | |
| _turn(3, "stay", "stay", "stay", -2.0),# non-diagnostic, congruent | |
| ] | |
| m = compute_metrics(turns, played_turns=3, play_turns=10, outcome="eliminated") | |
| # motive-reading accuracy: 2/3 congruent | |
| assert round(m["motive_reading_accuracy"], 1) == 66.7 | |
| # reactivity: of 2 diagnostic turns, 1 congruent -> 50.0 | |
| assert m["reactivity_index"] == 50.0 | |
| # survival fraction: 3/10 -> 30.0 | |
| assert m["survival_fraction"] == 30.0 | |
| # first divergence from motive_action is turn 2 | |
| assert m["first_divergence_turn"] == 2.0 | |
| def test_metrics_no_diagnostic_turns_is_zero_reactivity(): | |
| turns = [_turn(1, "stay", "stay", "stay", 0.0)] | |
| m = compute_metrics(turns, played_turns=1, play_turns=10, outcome="eliminated") | |
| assert m["reactivity_index"] == 0.0 | |
| def test_metrics_empty_session(): | |
| m = compute_metrics([], played_turns=0, play_turns=10, outcome="eliminated") | |
| assert m["motive_reading_accuracy"] == 0.0 | |
| assert m["first_divergence_turn"] == 0.0 | |
| def test_perfect_agent_never_diverges_first_divergence_zero(): | |
| # All turns congruent → agent never diverged from the motive action. | |
| turns = [ | |
| _turn(1, "up", "up", "left", 5.0), | |
| _turn(2, "right", "right", "down", 5.0), | |
| ] | |
| m = compute_metrics(turns, played_turns=2, play_turns=10, outcome="survived") | |
| assert m["motive_reading_accuracy"] == 100.0 | |
| assert m["first_divergence_turn"] == 0.0 # never diverged | |
| def test_all_diagnostic_all_congruent_full_reactivity(): | |
| turns = [ | |
| _turn(1, "up", "up", "left", 5.0), # diagnostic + congruent | |
| _turn(2, "right", "right", "down", 5.0), # diagnostic + congruent | |
| ] | |
| m = compute_metrics(turns, played_turns=2, play_turns=10, outcome="survived") | |
| assert m["reactivity_index"] == 100.0 | |
| def test_survival_fraction_capped_at_100(): | |
| turns = [_turn(1, "up", "up", "left", 5.0)] | |
| # played_turns exceeds play_turns (defensive) → capped, not >100. | |
| m = compute_metrics(turns, played_turns=15, play_turns=10, outcome="survived") | |
| assert m["survival_fraction"] == 100.0 | |
| def test_empty_session_all_keys_zero(): | |
| m = compute_metrics([], played_turns=0, play_turns=10, outcome="eliminated") | |
| assert m == { | |
| "motive_reading_accuracy": 0.0, | |
| "reactivity_index": 0.0, | |
| "survival_fraction": 0.0, | |
| "first_divergence_turn": 0.0, | |
| "away_move_fraction": 0.0, | |
| "mean_step_reward": 0.0, | |
| "trajectory_agreement": 0.0, | |
| "final_distance_gap": 0.0, | |
| "time_to_capture": 0.0, | |
| "distance_auc": 0.0, | |
| "min_distance": 0.0, | |
| "near_capture_count": 0.0, | |
| } | |
| def test_away_move_fraction_and_mean_reward(): | |
| turns = [ | |
| _turn(1, "up", "up", "left", 1.0, (3, 3)), # away (reward > 0) | |
| _turn(2, "right", "up", "left", -1.0, (3, 2)), # toward (reward < 0) | |
| _turn(3, "up", "up", "left", 2.0, (4, 2)), # away | |
| ] | |
| m = compute_metrics(turns, played_turns=3, play_turns=5, outcome="eliminated") | |
| assert m["away_move_fraction"] == 2 / 3 * 100.0 | |
| assert m["mean_step_reward"] == (1.0 - 1.0 + 2.0) / 3 | |
| def test_trajectory_agreement_and_final_gap(): | |
| turns = [ | |
| _turn(1, "up", "up", "left", 1.0, (3, 3)), | |
| _turn(2, "up", "up", "left", 1.0, (3, 2)), | |
| ] | |
| # Optimal pre-move positions: turn1 matches (3,3), turn2 differs (4,2) vs (3,2). | |
| m = compute_metrics( | |
| turns, played_turns=2, play_turns=5, outcome="eliminated", | |
| optimal_focal_positions=[(3, 3), (4, 2)], | |
| realized_final_safety=2, optimal_final_safety=4, | |
| ) | |
| assert m["trajectory_agreement"] == 1 / 2 * 100.0 | |
| assert m["final_distance_gap"] == 2.0 | |
| def test_new_metric_keys_default_zero_without_rollout(): | |
| turns = [_turn(1, "up", "up", "left", 1.0, (3, 3))] | |
| m = compute_metrics(turns, played_turns=1, play_turns=5, outcome="survived") | |
| # Trajectory keys default to 0.0 when no rollout data is supplied. | |
| assert m["trajectory_agreement"] == 0.0 | |
| assert m["final_distance_gap"] == 0.0 | |
| assert set(m) >= { | |
| "motive_reading_accuracy", "reactivity_index", "survival_fraction", | |
| "first_divergence_turn", "away_move_fraction", "mean_step_reward", | |
| "trajectory_agreement", "final_distance_gap", | |
| } | |
| def test_trajectory_agreement_penalizes_turns_beyond_optimal(): | |
| # Realized ran longer than the optimal rollout: the extra turn has no | |
| # optimal counterpart and counts as a disagreement (denominator = n). | |
| turns = [ | |
| _turn(1, "up", "up", "left", 1.0, (3, 3)), | |
| _turn(2, "up", "up", "left", 1.0, (3, 2)), | |
| _turn(3, "up", "up", "left", 1.0, (3, 1)), | |
| ] | |
| m = compute_metrics( | |
| turns, played_turns=3, play_turns=5, outcome="survived", | |
| optimal_focal_positions=[(3, 3), (3, 2)], # only 2 optimal positions | |
| ) | |
| # turns 1-2 match optimal -> 2 agree; turn 3 has no optimal counterpart. | |
| # agreement = 2/3 (divided by n=3, NOT by compared=2). | |
| assert m["trajectory_agreement"] == 2 / 3 * 100.0 | |
| def test_distance_metrics_present_and_sane(): | |
| from proteus.game.metrics.metrics import compute_metrics | |
| from proteus.game.runtime.trace import TurnTrace | |
| turns = [ | |
| TurnTrace(turn_idx=i, observation="", action="up", motive_action="up", | |
| habit_action="left", is_diagnostic=True, was_congruent=True, | |
| reward=1.0, focal_pos=(3, 3), predator_pos=(5, 3), | |
| post_bfs_distance=d) | |
| for i, d in enumerate([3, 2, 1], start=1) | |
| ] | |
| m = compute_metrics(turns, played_turns=3, play_turns=5, outcome="eliminated", | |
| max_bfs_distance=6) | |
| assert m["time_to_capture"] == 3 # eliminated -> played_turns | |
| assert m["min_distance"] == 1.0 | |
| assert m["near_capture_count"] == 1.0 | |
| assert 0.0 < m["distance_auc"] <= 1.0 | |