metropolis-chess / tests /test_phase_4_3_evolution.py
Forkei's picture
Phase 4.0–4.3: demo prep — auth, Elo fairness, rooms, lobbies, conversational entry, evolution
7bad702
"""Phase 4.3 — agent evolution tests.
Covers pure math functions, the apply_evolution entry point, and
long-horizon clamp guarantees.
"""
from __future__ import annotations
import math
from datetime import datetime, timedelta
import pytest
from app.db import SessionLocal
from app.models.character import (
Character,
CharacterState,
ContentRating,
Visibility,
)
from app.models.evolution import CharacterEvolutionState
from app.models.match import Color, Match, MatchResult, MatchStatus, Player
from app.models.memory import Memory, MemoryType
from app.post_match import evolution as ev
# --- seed helpers --------------------------------------------------------
def _mk_character(s, **over) -> Character:
defaults = dict(
name="TestChar", short_description="x",
backstory="A rich backstory." * 4,
voice_descriptor="voice",
target_elo=1500, current_elo=1500, floor_elo=1400, max_elo=1800,
adaptive=True, is_preset=False, owner_id=None,
state=CharacterState.READY,
visibility=Visibility.PUBLIC,
content_rating=ContentRating.FAMILY,
aggression=5, risk_tolerance=5, patience=5, trash_talk=5,
)
defaults.update(over)
c = Character(**defaults)
s.add(c); s.commit(); s.refresh(c)
return c
def _mk_player(s, username="p", elo=1500) -> Player:
p = Player(username=username, display_name=username, elo=elo)
s.add(p); s.commit(); s.refresh(p)
return p
def _mk_match(
s, *, character, player, result=MatchResult.WHITE_WIN,
player_color=Color.WHITE, status=MatchStatus.COMPLETED,
char_elo_start=1500, player_elo_start=1500, is_private=False,
ended_at=None, move_count=30,
) -> Match:
m = Match(
character_id=character.id, player_id=player.id,
status=status, result=result, player_color=player_color,
initial_fen="startpos", current_fen="startpos",
move_count=move_count,
character_elo_at_start=char_elo_start, player_elo_at_start=player_elo_start,
is_private=is_private,
ended_at=ended_at or datetime.utcnow(),
)
s.add(m); s.commit(); s.refresh(m)
return m
# --- slider drift math --------------------------------------------------
def test_slider_nudge_lost_cautious_bumps_aggression():
nudge = ev.select_slider_nudge(
won=False, lost=True, char_acpl=15,
trash_talk_base=5, trash_talk_drift=0.0,
)
assert nudge == ("aggression", +ev.SLIDER_DELTA_STEP)
def test_slider_nudge_lost_reckless_bumps_patience():
nudge = ev.select_slider_nudge(
won=False, lost=True, char_acpl=120,
trash_talk_base=5, trash_talk_drift=0.0,
)
assert nudge == ("patience", +ev.SLIDER_DELTA_STEP)
def test_slider_nudge_on_a_decisive_win_returns_none():
"""Wins don't nudge a slider — they nudge tone. Ensures the character
doesn't drift just because they played a weak opponent and won."""
nudge = ev.select_slider_nudge(
won=True, lost=False, char_acpl=25,
trash_talk_base=5, trash_talk_drift=0.0,
)
assert nudge is None
def test_slider_nudge_homeostasis_pulls_trash_talk_back():
"""If trash_talk_drift has drifted positive, homeostasis should
pull it back toward zero rather than letting it keep climbing."""
nudge = ev.select_slider_nudge(
won=True, lost=False, char_acpl=25,
trash_talk_base=5, trash_talk_drift=1.5,
)
assert nudge is not None
slider, delta = nudge
assert slider == "trash_talk"
assert delta < 0
def test_apply_slider_drift_clamps_cumulatively():
drift = {}
for _ in range(30):
drift = ev.apply_slider_drift(drift, ("aggression", +ev.SLIDER_DELTA_STEP))
# 30 × +0.5 = 15.0, should clamp to SLIDER_DRIFT_CLAMP.
assert drift["aggression"] == ev.SLIDER_DRIFT_CLAMP
# --- opening ema --------------------------------------------------------
def test_opening_ema_moves_toward_signal():
openings = ev.opening_ema_step({}, opening_label="Sicilian Najdorf", signal=1.0)
assert openings["Sicilian Najdorf"] == pytest.approx(ev.OPENING_EMA_ALPHA)
openings = ev.opening_ema_step(openings, opening_label="Sicilian Najdorf", signal=1.0)
assert openings["Sicilian Najdorf"] > ev.OPENING_EMA_ALPHA
def test_opening_ema_clamps_and_ignores_empty_label():
openings = ev.opening_ema_step({}, opening_label=None, signal=1.0)
assert openings == {}
# --- trap detection + memory -------------------------------------------
def test_detect_trap_when_character_blunders_early():
cms = [
{"ply": 6, "side": "white", "eval_loss_cp": 500, "pattern": "scholar_mate"},
]
trap = ev.detect_trap(critical_moments=cms, character_is_white=True)
assert trap is not None
assert trap["fell_for"] is True
assert trap["pattern"] == "scholar_mate"
def test_detect_trap_ignores_opponent_blunder():
"""If the OPPONENT blundered early, we might still return an entry
(as a trick the character used) but with fell_for=False."""
cms = [{"ply": 4, "side": "black", "eval_loss_cp": 600, "pattern": "gambit_bluff"}]
trap = ev.detect_trap(critical_moments=cms, character_is_white=True)
assert trap is not None
assert trap["fell_for"] is False
def test_detect_trap_none_for_late_blunder():
cms = [{"ply": 25, "side": "white", "eval_loss_cp": 800, "pattern": "late_mistake"}]
trap = ev.detect_trap(critical_moments=cms, character_is_white=True)
assert trap is None
def test_update_trap_memory_first_time_sets_brand_new_flag():
entries, brand_new = ev.update_trap_memory(
[], detected={"pattern": "scholar", "fell_for": True, "ply": 6, "eval_loss_cp": 500},
now=datetime.utcnow(),
)
assert brand_new is True
assert len(entries) == 1
assert entries[0]["pattern"] == "scholar"
assert entries[0]["fell_for"] == 1
def test_update_trap_memory_second_time_bumps_counter():
first, _ = ev.update_trap_memory(
[], detected={"pattern": "scholar", "fell_for": True, "ply": 6, "eval_loss_cp": 500},
now=datetime.utcnow(),
)
second, brand_new = ev.update_trap_memory(
first, detected={"pattern": "scholar", "fell_for": True, "ply": 8, "eval_loss_cp": 450},
now=datetime.utcnow(),
)
assert brand_new is False
assert second[0]["fell_for"] == 2
# --- tone drift ---------------------------------------------------------
def test_tone_drift_moves_toward_streak_target():
after = ev.tone_ema_step({}, win_streak=5, loss_streak=0)
assert after["confidence_baseline"] > 0
# Single step can't reach the target.
assert after["confidence_baseline"] < ev.TONE_CLAMP
def test_tone_drift_clamps_over_many_steps():
tone = {}
for _ in range(500):
tone = ev.tone_ema_step(tone, win_streak=10, loss_streak=0)
assert tone["confidence_baseline"] <= ev.TONE_CLAMP + 1e-9
# --- apply_evolution: end-to-end ---------------------------------------
def test_apply_evolution_skips_private_match():
with SessionLocal() as s:
char = _mk_character(s)
p = _mk_player(s, "ev_priv")
m = _mk_match(s, character=char, player=p, is_private=True)
summary = ev.apply_evolution(
s, match=m, analysis_moves=[], critical_moments=[]
)
assert summary.skipped_private is True
assert s.get(CharacterEvolutionState, char.id) is None
def test_apply_evolution_creates_state_on_first_run():
with SessionLocal() as s:
char = _mk_character(s)
p = _mk_player(s, "ev_first", elo=1500)
m = _mk_match(s, character=char, player=p, player_color=Color.WHITE,
result=MatchResult.WHITE_WIN)
summary = ev.apply_evolution(s, match=m, analysis_moves=[], critical_moments=[])
assert summary.skipped_private is False
state = s.get(CharacterEvolutionState, char.id)
assert state is not None
assert state.matches_processed == 1
assert state.last_match_id == m.id
def test_apply_evolution_is_idempotent():
with SessionLocal() as s:
char = _mk_character(s)
p = _mk_player(s, "ev_id")
m = _mk_match(s, character=char, player=p)
ev.apply_evolution(s, match=m, analysis_moves=[], critical_moments=[])
before_state = s.get(CharacterEvolutionState, char.id)
mp_before = before_state.matches_processed
drift_before = dict(before_state.slider_drift or {})
summary = ev.apply_evolution(s, match=m, analysis_moves=[], critical_moments=[])
assert summary.skipped_idempotent is True
after_state = s.get(CharacterEvolutionState, char.id)
assert after_state.matches_processed == mp_before
assert (after_state.slider_drift or {}) == drift_before
def test_apply_evolution_records_trap_and_creates_learning_memory():
with SessionLocal() as s:
char = _mk_character(s)
# Character played black (player_color=white). Player (white) wins
# → character lost. Character's blunder on ply 6.
p = _mk_player(s, "ev_trap")
m = _mk_match(
s, character=char, player=p,
player_color=Color.WHITE, result=MatchResult.WHITE_WIN,
)
cms = [{"ply": 6, "side": "black", "eval_loss_cp": 550, "pattern": "opening_pin"}]
summary = ev.apply_evolution(s, match=m, analysis_moves=[], critical_moments=cms)
assert summary.trap_detected is not None
assert summary.new_learning_memory_id is not None
mem = s.get(Memory, summary.new_learning_memory_id)
assert mem.type == MemoryType.LEARNING
def test_apply_evolution_second_trap_hit_bumps_counter_but_no_new_memory():
with SessionLocal() as s:
char = _mk_character(s)
p = _mk_player(s, "ev_trap2")
cms = [{"ply": 6, "side": "black", "eval_loss_cp": 550, "pattern": "opening_pin"}]
m1 = _mk_match(
s, character=char, player=p,
player_color=Color.WHITE, result=MatchResult.WHITE_WIN,
ended_at=datetime.utcnow() - timedelta(minutes=5),
)
ev.apply_evolution(s, match=m1, analysis_moves=[], critical_moments=cms)
m2 = _mk_match(
s, character=char, player=p,
player_color=Color.WHITE, result=MatchResult.WHITE_WIN,
)
summary = ev.apply_evolution(s, match=m2, analysis_moves=[], critical_moments=cms)
assert summary.new_learning_memory_id is None
state = s.get(CharacterEvolutionState, char.id)
entry = next((e for e in state.trap_memory if e["pattern"] == "opening_pin"), None)
assert entry is not None
assert entry["fell_for"] == 2
def test_50_match_simulation_respects_cumulative_clamps():
"""Long-horizon test: run the pipeline on 50 matches and confirm the
character hasn't drifted outside its identity range."""
with SessionLocal() as s:
char = _mk_character(s, aggression=3, patience=8) # calm, patient base
p = _mk_player(s, "long", elo=1700)
now = datetime.utcnow()
for i in range(50):
ended = now - timedelta(minutes=50 - i)
m = _mk_match(
s, character=char, player=p,
player_color=Color.WHITE,
result=MatchResult.WHITE_WIN, # character loses every match (they're black)
char_elo_start=1500, player_elo_start=1700,
ended_at=ended,
)
# Half the matches — simulate reckless play (high ACPL).
moves = [{"side": "black", "eval_loss_cp": 100}] * 15 if i % 2 else []
ev.apply_evolution(s, match=m, analysis_moves=moves, critical_moments=[])
state = s.get(CharacterEvolutionState, char.id)
for slider in ("aggression", "risk_tolerance", "patience", "trash_talk"):
assert abs(state.slider_drift.get(slider, 0.0)) <= ev.SLIDER_DRIFT_CLAMP
assert abs(state.tone_drift.get("tilt_baseline", 0.0)) <= ev.TONE_CLAMP + 1e-9
assert state.matches_processed == 50
# --- integration helpers (sliders + tone) ------------------------------
def test_effective_sliders_applies_drift_and_clamps_1_to_10():
with SessionLocal() as s:
char = _mk_character(s, aggression=9)
state = CharacterEvolutionState(
character_id=char.id,
slider_drift={"aggression": +2.0},
opening_scores={}, trap_memory=[], tone_drift={},
matches_processed=0, last_match_id=None,
)
eff = ev.effective_sliders(char, state)
assert eff["aggression"] == 10 # clamped, not 11
def test_tone_bias_for_none_state_returns_zeros():
bias = ev.tone_bias_for(None)
assert bias["confidence_baseline"] == 0.0
assert bias["tilt_baseline"] == 0.0