Spaces:
Running
Running
| """Phase 4.3 — agent evolution tests. | |
| Covers pure math functions, the apply_evolution entry point, and | |
| long-horizon clamp guarantees. | |
| """ | |
| from __future__ import annotations | |
| import math | |
| from datetime import datetime, timedelta | |
| import pytest | |
| from app.db import SessionLocal | |
| from app.models.character import ( | |
| Character, | |
| CharacterState, | |
| ContentRating, | |
| Visibility, | |
| ) | |
| from app.models.evolution import CharacterEvolutionState | |
| from app.models.match import Color, Match, MatchResult, MatchStatus, Player | |
| from app.models.memory import Memory, MemoryType | |
| from app.post_match import evolution as ev | |
| # --- seed helpers -------------------------------------------------------- | |
| def _mk_character(s, **over) -> Character: | |
| defaults = dict( | |
| name="TestChar", short_description="x", | |
| backstory="A rich backstory." * 4, | |
| voice_descriptor="voice", | |
| target_elo=1500, current_elo=1500, floor_elo=1400, max_elo=1800, | |
| adaptive=True, is_preset=False, owner_id=None, | |
| state=CharacterState.READY, | |
| visibility=Visibility.PUBLIC, | |
| content_rating=ContentRating.FAMILY, | |
| aggression=5, risk_tolerance=5, patience=5, trash_talk=5, | |
| ) | |
| defaults.update(over) | |
| c = Character(**defaults) | |
| s.add(c); s.commit(); s.refresh(c) | |
| return c | |
| def _mk_player(s, username="p", elo=1500) -> Player: | |
| p = Player(username=username, display_name=username, elo=elo) | |
| s.add(p); s.commit(); s.refresh(p) | |
| return p | |
| def _mk_match( | |
| s, *, character, player, result=MatchResult.WHITE_WIN, | |
| player_color=Color.WHITE, status=MatchStatus.COMPLETED, | |
| char_elo_start=1500, player_elo_start=1500, is_private=False, | |
| ended_at=None, move_count=30, | |
| ) -> Match: | |
| m = Match( | |
| character_id=character.id, player_id=player.id, | |
| status=status, result=result, player_color=player_color, | |
| initial_fen="startpos", current_fen="startpos", | |
| move_count=move_count, | |
| character_elo_at_start=char_elo_start, player_elo_at_start=player_elo_start, | |
| is_private=is_private, | |
| ended_at=ended_at or datetime.utcnow(), | |
| ) | |
| s.add(m); s.commit(); s.refresh(m) | |
| return m | |
| # --- slider drift math -------------------------------------------------- | |
| def test_slider_nudge_lost_cautious_bumps_aggression(): | |
| nudge = ev.select_slider_nudge( | |
| won=False, lost=True, char_acpl=15, | |
| trash_talk_base=5, trash_talk_drift=0.0, | |
| ) | |
| assert nudge == ("aggression", +ev.SLIDER_DELTA_STEP) | |
| def test_slider_nudge_lost_reckless_bumps_patience(): | |
| nudge = ev.select_slider_nudge( | |
| won=False, lost=True, char_acpl=120, | |
| trash_talk_base=5, trash_talk_drift=0.0, | |
| ) | |
| assert nudge == ("patience", +ev.SLIDER_DELTA_STEP) | |
| def test_slider_nudge_on_a_decisive_win_returns_none(): | |
| """Wins don't nudge a slider — they nudge tone. Ensures the character | |
| doesn't drift just because they played a weak opponent and won.""" | |
| nudge = ev.select_slider_nudge( | |
| won=True, lost=False, char_acpl=25, | |
| trash_talk_base=5, trash_talk_drift=0.0, | |
| ) | |
| assert nudge is None | |
| def test_slider_nudge_homeostasis_pulls_trash_talk_back(): | |
| """If trash_talk_drift has drifted positive, homeostasis should | |
| pull it back toward zero rather than letting it keep climbing.""" | |
| nudge = ev.select_slider_nudge( | |
| won=True, lost=False, char_acpl=25, | |
| trash_talk_base=5, trash_talk_drift=1.5, | |
| ) | |
| assert nudge is not None | |
| slider, delta = nudge | |
| assert slider == "trash_talk" | |
| assert delta < 0 | |
| def test_apply_slider_drift_clamps_cumulatively(): | |
| drift = {} | |
| for _ in range(30): | |
| drift = ev.apply_slider_drift(drift, ("aggression", +ev.SLIDER_DELTA_STEP)) | |
| # 30 × +0.5 = 15.0, should clamp to SLIDER_DRIFT_CLAMP. | |
| assert drift["aggression"] == ev.SLIDER_DRIFT_CLAMP | |
| # --- opening ema -------------------------------------------------------- | |
| def test_opening_ema_moves_toward_signal(): | |
| openings = ev.opening_ema_step({}, opening_label="Sicilian Najdorf", signal=1.0) | |
| assert openings["Sicilian Najdorf"] == pytest.approx(ev.OPENING_EMA_ALPHA) | |
| openings = ev.opening_ema_step(openings, opening_label="Sicilian Najdorf", signal=1.0) | |
| assert openings["Sicilian Najdorf"] > ev.OPENING_EMA_ALPHA | |
| def test_opening_ema_clamps_and_ignores_empty_label(): | |
| openings = ev.opening_ema_step({}, opening_label=None, signal=1.0) | |
| assert openings == {} | |
| # --- trap detection + memory ------------------------------------------- | |
| def test_detect_trap_when_character_blunders_early(): | |
| cms = [ | |
| {"ply": 6, "side": "white", "eval_loss_cp": 500, "pattern": "scholar_mate"}, | |
| ] | |
| trap = ev.detect_trap(critical_moments=cms, character_is_white=True) | |
| assert trap is not None | |
| assert trap["fell_for"] is True | |
| assert trap["pattern"] == "scholar_mate" | |
| def test_detect_trap_ignores_opponent_blunder(): | |
| """If the OPPONENT blundered early, we might still return an entry | |
| (as a trick the character used) but with fell_for=False.""" | |
| cms = [{"ply": 4, "side": "black", "eval_loss_cp": 600, "pattern": "gambit_bluff"}] | |
| trap = ev.detect_trap(critical_moments=cms, character_is_white=True) | |
| assert trap is not None | |
| assert trap["fell_for"] is False | |
| def test_detect_trap_none_for_late_blunder(): | |
| cms = [{"ply": 25, "side": "white", "eval_loss_cp": 800, "pattern": "late_mistake"}] | |
| trap = ev.detect_trap(critical_moments=cms, character_is_white=True) | |
| assert trap is None | |
| def test_update_trap_memory_first_time_sets_brand_new_flag(): | |
| entries, brand_new = ev.update_trap_memory( | |
| [], detected={"pattern": "scholar", "fell_for": True, "ply": 6, "eval_loss_cp": 500}, | |
| now=datetime.utcnow(), | |
| ) | |
| assert brand_new is True | |
| assert len(entries) == 1 | |
| assert entries[0]["pattern"] == "scholar" | |
| assert entries[0]["fell_for"] == 1 | |
| def test_update_trap_memory_second_time_bumps_counter(): | |
| first, _ = ev.update_trap_memory( | |
| [], detected={"pattern": "scholar", "fell_for": True, "ply": 6, "eval_loss_cp": 500}, | |
| now=datetime.utcnow(), | |
| ) | |
| second, brand_new = ev.update_trap_memory( | |
| first, detected={"pattern": "scholar", "fell_for": True, "ply": 8, "eval_loss_cp": 450}, | |
| now=datetime.utcnow(), | |
| ) | |
| assert brand_new is False | |
| assert second[0]["fell_for"] == 2 | |
| # --- tone drift --------------------------------------------------------- | |
| def test_tone_drift_moves_toward_streak_target(): | |
| after = ev.tone_ema_step({}, win_streak=5, loss_streak=0) | |
| assert after["confidence_baseline"] > 0 | |
| # Single step can't reach the target. | |
| assert after["confidence_baseline"] < ev.TONE_CLAMP | |
| def test_tone_drift_clamps_over_many_steps(): | |
| tone = {} | |
| for _ in range(500): | |
| tone = ev.tone_ema_step(tone, win_streak=10, loss_streak=0) | |
| assert tone["confidence_baseline"] <= ev.TONE_CLAMP + 1e-9 | |
| # --- apply_evolution: end-to-end --------------------------------------- | |
| def test_apply_evolution_skips_private_match(): | |
| with SessionLocal() as s: | |
| char = _mk_character(s) | |
| p = _mk_player(s, "ev_priv") | |
| m = _mk_match(s, character=char, player=p, is_private=True) | |
| summary = ev.apply_evolution( | |
| s, match=m, analysis_moves=[], critical_moments=[] | |
| ) | |
| assert summary.skipped_private is True | |
| assert s.get(CharacterEvolutionState, char.id) is None | |
| def test_apply_evolution_creates_state_on_first_run(): | |
| with SessionLocal() as s: | |
| char = _mk_character(s) | |
| p = _mk_player(s, "ev_first", elo=1500) | |
| m = _mk_match(s, character=char, player=p, player_color=Color.WHITE, | |
| result=MatchResult.WHITE_WIN) | |
| summary = ev.apply_evolution(s, match=m, analysis_moves=[], critical_moments=[]) | |
| assert summary.skipped_private is False | |
| state = s.get(CharacterEvolutionState, char.id) | |
| assert state is not None | |
| assert state.matches_processed == 1 | |
| assert state.last_match_id == m.id | |
| def test_apply_evolution_is_idempotent(): | |
| with SessionLocal() as s: | |
| char = _mk_character(s) | |
| p = _mk_player(s, "ev_id") | |
| m = _mk_match(s, character=char, player=p) | |
| ev.apply_evolution(s, match=m, analysis_moves=[], critical_moments=[]) | |
| before_state = s.get(CharacterEvolutionState, char.id) | |
| mp_before = before_state.matches_processed | |
| drift_before = dict(before_state.slider_drift or {}) | |
| summary = ev.apply_evolution(s, match=m, analysis_moves=[], critical_moments=[]) | |
| assert summary.skipped_idempotent is True | |
| after_state = s.get(CharacterEvolutionState, char.id) | |
| assert after_state.matches_processed == mp_before | |
| assert (after_state.slider_drift or {}) == drift_before | |
| def test_apply_evolution_records_trap_and_creates_learning_memory(): | |
| with SessionLocal() as s: | |
| char = _mk_character(s) | |
| # Character played black (player_color=white). Player (white) wins | |
| # → character lost. Character's blunder on ply 6. | |
| p = _mk_player(s, "ev_trap") | |
| m = _mk_match( | |
| s, character=char, player=p, | |
| player_color=Color.WHITE, result=MatchResult.WHITE_WIN, | |
| ) | |
| cms = [{"ply": 6, "side": "black", "eval_loss_cp": 550, "pattern": "opening_pin"}] | |
| summary = ev.apply_evolution(s, match=m, analysis_moves=[], critical_moments=cms) | |
| assert summary.trap_detected is not None | |
| assert summary.new_learning_memory_id is not None | |
| mem = s.get(Memory, summary.new_learning_memory_id) | |
| assert mem.type == MemoryType.LEARNING | |
| def test_apply_evolution_second_trap_hit_bumps_counter_but_no_new_memory(): | |
| with SessionLocal() as s: | |
| char = _mk_character(s) | |
| p = _mk_player(s, "ev_trap2") | |
| cms = [{"ply": 6, "side": "black", "eval_loss_cp": 550, "pattern": "opening_pin"}] | |
| m1 = _mk_match( | |
| s, character=char, player=p, | |
| player_color=Color.WHITE, result=MatchResult.WHITE_WIN, | |
| ended_at=datetime.utcnow() - timedelta(minutes=5), | |
| ) | |
| ev.apply_evolution(s, match=m1, analysis_moves=[], critical_moments=cms) | |
| m2 = _mk_match( | |
| s, character=char, player=p, | |
| player_color=Color.WHITE, result=MatchResult.WHITE_WIN, | |
| ) | |
| summary = ev.apply_evolution(s, match=m2, analysis_moves=[], critical_moments=cms) | |
| assert summary.new_learning_memory_id is None | |
| state = s.get(CharacterEvolutionState, char.id) | |
| entry = next((e for e in state.trap_memory if e["pattern"] == "opening_pin"), None) | |
| assert entry is not None | |
| assert entry["fell_for"] == 2 | |
| def test_50_match_simulation_respects_cumulative_clamps(): | |
| """Long-horizon test: run the pipeline on 50 matches and confirm the | |
| character hasn't drifted outside its identity range.""" | |
| with SessionLocal() as s: | |
| char = _mk_character(s, aggression=3, patience=8) # calm, patient base | |
| p = _mk_player(s, "long", elo=1700) | |
| now = datetime.utcnow() | |
| for i in range(50): | |
| ended = now - timedelta(minutes=50 - i) | |
| m = _mk_match( | |
| s, character=char, player=p, | |
| player_color=Color.WHITE, | |
| result=MatchResult.WHITE_WIN, # character loses every match (they're black) | |
| char_elo_start=1500, player_elo_start=1700, | |
| ended_at=ended, | |
| ) | |
| # Half the matches — simulate reckless play (high ACPL). | |
| moves = [{"side": "black", "eval_loss_cp": 100}] * 15 if i % 2 else [] | |
| ev.apply_evolution(s, match=m, analysis_moves=moves, critical_moments=[]) | |
| state = s.get(CharacterEvolutionState, char.id) | |
| for slider in ("aggression", "risk_tolerance", "patience", "trash_talk"): | |
| assert abs(state.slider_drift.get(slider, 0.0)) <= ev.SLIDER_DRIFT_CLAMP | |
| assert abs(state.tone_drift.get("tilt_baseline", 0.0)) <= ev.TONE_CLAMP + 1e-9 | |
| assert state.matches_processed == 50 | |
| # --- integration helpers (sliders + tone) ------------------------------ | |
| def test_effective_sliders_applies_drift_and_clamps_1_to_10(): | |
| with SessionLocal() as s: | |
| char = _mk_character(s, aggression=9) | |
| state = CharacterEvolutionState( | |
| character_id=char.id, | |
| slider_drift={"aggression": +2.0}, | |
| opening_scores={}, trap_memory=[], tone_drift={}, | |
| matches_processed=0, last_match_id=None, | |
| ) | |
| eff = ev.effective_sliders(char, state) | |
| assert eff["aggression"] == 10 # clamped, not 11 | |
| def test_tone_bias_for_none_state_returns_zeros(): | |
| bias = ev.tone_bias_for(None) | |
| assert bias["confidence_baseline"] == 0.0 | |
| assert bias["tilt_baseline"] == 0.0 | |