Spaces:
Running
Running
File size: 6,376 Bytes
d5a6f3e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 | """
Tests for EWMA profile embedding computation.
Covers:
- ewma_update produces L2-normalised output
- First interaction sets the profile directly
- Multiple updates blend correctly
- Negative dismiss pushes vector away
- Storage round-trip (save + load)
"""
import asyncio
import pytest
import numpy as np
from app.recommend.profiles import (
ewma_update,
EMBEDDING_DIM,
ALPHA_LONG_TERM,
ALPHA_SHORT_TERM,
ALPHA_NEGATIVE,
_to_bytes,
_from_bytes,
)
# ββ Helper ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def _random_unit_vec(seed: int = 42) -> np.ndarray:
rng = np.random.RandomState(seed)
v = rng.randn(EMBEDDING_DIM).astype(np.float32)
return v / np.linalg.norm(v)
def _assert_unit(v: np.ndarray, tol: float = 1e-5):
assert abs(np.linalg.norm(v) - 1.0) < tol, f"norm = {np.linalg.norm(v)}"
# ββ ewma_update unit tests βββββββββββββββββββββββββββββββββββββββββββββββββββ
def test_ewma_first_interaction_sets_profile():
"""First interaction: profile == normalised input."""
embed = _random_unit_vec(1)
result = ewma_update(None, embed, ALPHA_LONG_TERM)
_assert_unit(result)
# Should be very close to input (already unit-norm)
assert np.allclose(result, embed, atol=1e-5)
def test_ewma_update_is_normalised():
"""EWMA output is always L2-normalised."""
current = _random_unit_vec(10)
new = _random_unit_vec(20)
result = ewma_update(current, new, ALPHA_LONG_TERM)
_assert_unit(result)
def test_ewma_long_term_alpha_is_stable():
"""With Ξ±=0.03, a single new interaction should only move the
profile slightly β cosine similarity to old profile should be high."""
current = _random_unit_vec(100)
new = _random_unit_vec(200) # different direction
result = ewma_update(current, new, ALPHA_LONG_TERM)
sim = float(np.dot(current, result))
# At Ξ±=0.03, should preserve >97% of old direction
assert sim > 0.97, f"cosine sim = {sim}"
def test_ewma_short_term_alpha_is_responsive():
"""With Ξ±=0.40, the profile should shift significantly toward the new input."""
current = _random_unit_vec(100)
new = _random_unit_vec(200)
result = ewma_update(current, new, ALPHA_SHORT_TERM)
sim_to_old = float(np.dot(current, result))
sim_to_new = float(np.dot(new, result))
# Short-term should move meaningfully toward new
assert sim_to_new > 0.3, f"sim to new = {sim_to_new}"
def test_ewma_multiple_updates_converge():
"""Repeated identical inputs should converge the profile to that input.
With Ξ±=0.03 (Doc 06 correction), convergence is slower β need ~200 updates."""
target = _random_unit_vec(42)
profile = _random_unit_vec(99) # start far away
for _ in range(200):
profile = ewma_update(profile, target, ALPHA_LONG_TERM)
sim = float(np.dot(profile, target))
assert sim > 0.99, f"after 200 updates, sim = {sim}"
def test_ewma_dissimilar_input_shifts_profile():
"""Feeding a dissimilar vector should shift the profile away from original."""
current = _random_unit_vec(10)
dissimilar = _random_unit_vec(999) # a genuinely different direction
result = ewma_update(current, dissimilar, ALPHA_SHORT_TERM)
sim_to_old = float(np.dot(current, result))
sim_to_new = float(np.dot(dissimilar, result))
# With Ξ±=0.40, profile should move toward new input
assert sim_to_old < 1.0, f"profile didn't move, sim to old = {sim_to_old}"
assert sim_to_new > 0.0, f"profile should have some similarity to new, got {sim_to_new}"
# ββ Binary storage round-trip βββββββββββββββββββββββββββββββββββββββββββββββββ
def test_bytes_roundtrip():
"""to_bytes β from_bytes preserves data exactly."""
original = _random_unit_vec(77)
recovered = _from_bytes(_to_bytes(original))
assert np.allclose(original, recovered, atol=1e-7)
def test_bytes_size():
"""Each profile vector should be exactly 4096 bytes."""
v = _random_unit_vec(0)
b = _to_bytes(v)
assert len(b) == EMBEDDING_DIM * 4 # float32 = 4 bytes
# ββ DB integration tests βββββββββββββββββββββββββββββββββββββββββββββββββββββ
@pytest.fixture
def setup_db(tmp_path, monkeypatch):
"""Fresh SQLite DB for each test."""
import app.config as cfg
import app.db as db_mod
db_path = str(tmp_path / "test_profiles.db")
monkeypatch.setattr(cfg, "DB_PATH", db_path)
monkeypatch.setattr(db_mod, "DB_PATH", db_path)
asyncio.get_event_loop().run_until_complete(db_mod.init_db())
yield
def test_profile_save_and_load(setup_db):
"""Profile round-trips through SQLite correctly."""
from app.recommend import profiles
vec = _random_unit_vec(55)
async def _run():
await profiles.save_profile("user-1", "long_term", vec, interaction_count=5)
loaded = await profiles.load_profile("user-1", "long_term")
assert loaded is not None
assert np.allclose(vec, loaded, atol=1e-7)
asyncio.get_event_loop().run_until_complete(_run())
def test_profile_interaction_count(setup_db):
"""Interaction count persists and retrieves correctly."""
from app.recommend import profiles
vec = _random_unit_vec(66)
async def _run():
await profiles.save_profile("user-2", "short_term", vec, interaction_count=12)
count = await profiles.get_interaction_count("user-2", "short_term")
assert count == 12
asyncio.get_event_loop().run_until_complete(_run())
def test_profile_not_found_returns_none(setup_db):
"""Missing profile returns None, not an error."""
from app.recommend import profiles
async def _run():
result = await profiles.load_profile("nonexistent", "long_term")
assert result is None
count = await profiles.get_interaction_count("nonexistent", "long_term")
assert count == 0
asyncio.get_event_loop().run_until_complete(_run())
|