from proteus.game.runtime.trace import TurnTrace, SessionTrace from proteus.game.metrics.metrics import compute_metrics def _turn(idx, action, motive, habit, reward, focal_pos=(0, 0)): return TurnTrace( turn_idx=idx, observation="", action=action, motive_action=motive, habit_action=habit, is_diagnostic=(motive != habit), was_congruent=(action == motive), reward=reward, focal_pos=focal_pos, predator_pos=(1, 1), ) def test_metrics_on_a_mixed_session(): turns = [ _turn(1, "up", "up", "left", 5.0), # diagnostic, congruent _turn(2, "left", "up", "left", -2.0), # diagnostic, NOT congruent _turn(3, "stay", "stay", "stay", -2.0),# non-diagnostic, congruent ] m = compute_metrics(turns, played_turns=3, play_turns=10, outcome="eliminated") # motive-reading accuracy: 2/3 congruent assert round(m["motive_reading_accuracy"], 1) == 66.7 # reactivity: of 2 diagnostic turns, 1 congruent -> 50.0 assert m["reactivity_index"] == 50.0 # survival fraction: 3/10 -> 30.0 assert m["survival_fraction"] == 30.0 # first divergence from motive_action is turn 2 assert m["first_divergence_turn"] == 2.0 def test_metrics_no_diagnostic_turns_is_zero_reactivity(): turns = [_turn(1, "stay", "stay", "stay", 0.0)] m = compute_metrics(turns, played_turns=1, play_turns=10, outcome="eliminated") assert m["reactivity_index"] == 0.0 def test_metrics_empty_session(): m = compute_metrics([], played_turns=0, play_turns=10, outcome="eliminated") assert m["motive_reading_accuracy"] == 0.0 assert m["first_divergence_turn"] == 0.0 def test_perfect_agent_never_diverges_first_divergence_zero(): # All turns congruent → agent never diverged from the motive action. turns = [ _turn(1, "up", "up", "left", 5.0), _turn(2, "right", "right", "down", 5.0), ] m = compute_metrics(turns, played_turns=2, play_turns=10, outcome="survived") assert m["motive_reading_accuracy"] == 100.0 assert m["first_divergence_turn"] == 0.0 # never diverged def test_all_diagnostic_all_congruent_full_reactivity(): turns = [ _turn(1, "up", "up", "left", 5.0), # diagnostic + congruent _turn(2, "right", "right", "down", 5.0), # diagnostic + congruent ] m = compute_metrics(turns, played_turns=2, play_turns=10, outcome="survived") assert m["reactivity_index"] == 100.0 def test_survival_fraction_capped_at_100(): turns = [_turn(1, "up", "up", "left", 5.0)] # played_turns exceeds play_turns (defensive) → capped, not >100. m = compute_metrics(turns, played_turns=15, play_turns=10, outcome="survived") assert m["survival_fraction"] == 100.0 def test_empty_session_all_keys_zero(): m = compute_metrics([], played_turns=0, play_turns=10, outcome="eliminated") assert m == { "motive_reading_accuracy": 0.0, "reactivity_index": 0.0, "survival_fraction": 0.0, "first_divergence_turn": 0.0, "away_move_fraction": 0.0, "mean_step_reward": 0.0, "trajectory_agreement": 0.0, "final_distance_gap": 0.0, "time_to_capture": 0.0, "distance_auc": 0.0, "min_distance": 0.0, "near_capture_count": 0.0, } def test_away_move_fraction_and_mean_reward(): turns = [ _turn(1, "up", "up", "left", 1.0, (3, 3)), # away (reward > 0) _turn(2, "right", "up", "left", -1.0, (3, 2)), # toward (reward < 0) _turn(3, "up", "up", "left", 2.0, (4, 2)), # away ] m = compute_metrics(turns, played_turns=3, play_turns=5, outcome="eliminated") assert m["away_move_fraction"] == 2 / 3 * 100.0 assert m["mean_step_reward"] == (1.0 - 1.0 + 2.0) / 3 def test_trajectory_agreement_and_final_gap(): turns = [ _turn(1, "up", "up", "left", 1.0, (3, 3)), _turn(2, "up", "up", "left", 1.0, (3, 2)), ] # Optimal pre-move positions: turn1 matches (3,3), turn2 differs (4,2) vs (3,2). m = compute_metrics( turns, played_turns=2, play_turns=5, outcome="eliminated", optimal_focal_positions=[(3, 3), (4, 2)], realized_final_safety=2, optimal_final_safety=4, ) assert m["trajectory_agreement"] == 1 / 2 * 100.0 assert m["final_distance_gap"] == 2.0 def test_new_metric_keys_default_zero_without_rollout(): turns = [_turn(1, "up", "up", "left", 1.0, (3, 3))] m = compute_metrics(turns, played_turns=1, play_turns=5, outcome="survived") # Trajectory keys default to 0.0 when no rollout data is supplied. assert m["trajectory_agreement"] == 0.0 assert m["final_distance_gap"] == 0.0 assert set(m) >= { "motive_reading_accuracy", "reactivity_index", "survival_fraction", "first_divergence_turn", "away_move_fraction", "mean_step_reward", "trajectory_agreement", "final_distance_gap", } def test_trajectory_agreement_penalizes_turns_beyond_optimal(): # Realized ran longer than the optimal rollout: the extra turn has no # optimal counterpart and counts as a disagreement (denominator = n). turns = [ _turn(1, "up", "up", "left", 1.0, (3, 3)), _turn(2, "up", "up", "left", 1.0, (3, 2)), _turn(3, "up", "up", "left", 1.0, (3, 1)), ] m = compute_metrics( turns, played_turns=3, play_turns=5, outcome="survived", optimal_focal_positions=[(3, 3), (3, 2)], # only 2 optimal positions ) # turns 1-2 match optimal -> 2 agree; turn 3 has no optimal counterpart. # agreement = 2/3 (divided by n=3, NOT by compared=2). assert m["trajectory_agreement"] == 2 / 3 * 100.0 def test_distance_metrics_present_and_sane(): from proteus.game.metrics.metrics import compute_metrics from proteus.game.runtime.trace import TurnTrace turns = [ TurnTrace(turn_idx=i, observation="", action="up", motive_action="up", habit_action="left", is_diagnostic=True, was_congruent=True, reward=1.0, focal_pos=(3, 3), predator_pos=(5, 3), post_bfs_distance=d) for i, d in enumerate([3, 2, 1], start=1) ] m = compute_metrics(turns, played_turns=3, play_turns=5, outcome="eliminated", max_bfs_distance=6) assert m["time_to_capture"] == 3 # eliminated -> played_turns assert m["min_distance"] == 1.0 assert m["near_capture_count"] == 1.0 assert 0.0 < m["distance_auc"] <= 1.0