Spaces:
Running
Running
| """ | |
| Tests for EWMA profile embedding computation. | |
| Covers: | |
| - ewma_update produces L2-normalised output | |
| - First interaction sets the profile directly | |
| - Multiple updates blend correctly | |
| - Negative dismiss pushes vector away | |
| - Storage round-trip (save + load) | |
| """ | |
| import asyncio | |
| import pytest | |
| import numpy as np | |
| from app.recommend.profiles import ( | |
| ewma_update, | |
| EMBEDDING_DIM, | |
| ALPHA_LONG_TERM, | |
| ALPHA_SHORT_TERM, | |
| ALPHA_NEGATIVE, | |
| _to_bytes, | |
| _from_bytes, | |
| ) | |
| # ββ Helper ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _random_unit_vec(seed: int = 42) -> np.ndarray: | |
| rng = np.random.RandomState(seed) | |
| v = rng.randn(EMBEDDING_DIM).astype(np.float32) | |
| return v / np.linalg.norm(v) | |
| def _assert_unit(v: np.ndarray, tol: float = 1e-5): | |
| assert abs(np.linalg.norm(v) - 1.0) < tol, f"norm = {np.linalg.norm(v)}" | |
| # ββ ewma_update unit tests βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_ewma_first_interaction_sets_profile(): | |
| """First interaction: profile == normalised input.""" | |
| embed = _random_unit_vec(1) | |
| result = ewma_update(None, embed, ALPHA_LONG_TERM) | |
| _assert_unit(result) | |
| # Should be very close to input (already unit-norm) | |
| assert np.allclose(result, embed, atol=1e-5) | |
| def test_ewma_update_is_normalised(): | |
| """EWMA output is always L2-normalised.""" | |
| current = _random_unit_vec(10) | |
| new = _random_unit_vec(20) | |
| result = ewma_update(current, new, ALPHA_LONG_TERM) | |
| _assert_unit(result) | |
| def test_ewma_long_term_alpha_is_stable(): | |
| """With Ξ±=0.03, a single new interaction should only move the | |
| profile slightly β cosine similarity to old profile should be high.""" | |
| current = _random_unit_vec(100) | |
| new = _random_unit_vec(200) # different direction | |
| result = ewma_update(current, new, ALPHA_LONG_TERM) | |
| sim = float(np.dot(current, result)) | |
| # At Ξ±=0.03, should preserve >97% of old direction | |
| assert sim > 0.97, f"cosine sim = {sim}" | |
| def test_ewma_short_term_alpha_is_responsive(): | |
| """With Ξ±=0.40, the profile should shift significantly toward the new input.""" | |
| current = _random_unit_vec(100) | |
| new = _random_unit_vec(200) | |
| result = ewma_update(current, new, ALPHA_SHORT_TERM) | |
| sim_to_old = float(np.dot(current, result)) | |
| sim_to_new = float(np.dot(new, result)) | |
| # Short-term should move meaningfully toward new | |
| assert sim_to_new > 0.3, f"sim to new = {sim_to_new}" | |
| def test_ewma_multiple_updates_converge(): | |
| """Repeated identical inputs should converge the profile to that input. | |
| With Ξ±=0.03 (Doc 06 correction), convergence is slower β need ~200 updates.""" | |
| target = _random_unit_vec(42) | |
| profile = _random_unit_vec(99) # start far away | |
| for _ in range(200): | |
| profile = ewma_update(profile, target, ALPHA_LONG_TERM) | |
| sim = float(np.dot(profile, target)) | |
| assert sim > 0.99, f"after 200 updates, sim = {sim}" | |
| def test_ewma_dissimilar_input_shifts_profile(): | |
| """Feeding a dissimilar vector should shift the profile away from original.""" | |
| current = _random_unit_vec(10) | |
| dissimilar = _random_unit_vec(999) # a genuinely different direction | |
| result = ewma_update(current, dissimilar, ALPHA_SHORT_TERM) | |
| sim_to_old = float(np.dot(current, result)) | |
| sim_to_new = float(np.dot(dissimilar, result)) | |
| # With Ξ±=0.40, profile should move toward new input | |
| assert sim_to_old < 1.0, f"profile didn't move, sim to old = {sim_to_old}" | |
| assert sim_to_new > 0.0, f"profile should have some similarity to new, got {sim_to_new}" | |
| # ββ Binary storage round-trip βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_bytes_roundtrip(): | |
| """to_bytes β from_bytes preserves data exactly.""" | |
| original = _random_unit_vec(77) | |
| recovered = _from_bytes(_to_bytes(original)) | |
| assert np.allclose(original, recovered, atol=1e-7) | |
| def test_bytes_size(): | |
| """Each profile vector should be exactly 4096 bytes.""" | |
| v = _random_unit_vec(0) | |
| b = _to_bytes(v) | |
| assert len(b) == EMBEDDING_DIM * 4 # float32 = 4 bytes | |
| # ββ DB integration tests βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def setup_db(tmp_path, monkeypatch): | |
| """Fresh SQLite DB for each test.""" | |
| import app.config as cfg | |
| import app.db as db_mod | |
| db_path = str(tmp_path / "test_profiles.db") | |
| monkeypatch.setattr(cfg, "DB_PATH", db_path) | |
| monkeypatch.setattr(db_mod, "DB_PATH", db_path) | |
| asyncio.get_event_loop().run_until_complete(db_mod.init_db()) | |
| yield | |
| def test_profile_save_and_load(setup_db): | |
| """Profile round-trips through SQLite correctly.""" | |
| from app.recommend import profiles | |
| vec = _random_unit_vec(55) | |
| async def _run(): | |
| await profiles.save_profile("user-1", "long_term", vec, interaction_count=5) | |
| loaded = await profiles.load_profile("user-1", "long_term") | |
| assert loaded is not None | |
| assert np.allclose(vec, loaded, atol=1e-7) | |
| asyncio.get_event_loop().run_until_complete(_run()) | |
| def test_profile_interaction_count(setup_db): | |
| """Interaction count persists and retrieves correctly.""" | |
| from app.recommend import profiles | |
| vec = _random_unit_vec(66) | |
| async def _run(): | |
| await profiles.save_profile("user-2", "short_term", vec, interaction_count=12) | |
| count = await profiles.get_interaction_count("user-2", "short_term") | |
| assert count == 12 | |
| asyncio.get_event_loop().run_until_complete(_run()) | |
| def test_profile_not_found_returns_none(setup_db): | |
| """Missing profile returns None, not an error.""" | |
| from app.recommend import profiles | |
| async def _run(): | |
| result = await profiles.load_profile("nonexistent", "long_term") | |
| assert result is None | |
| count = await profiles.get_interaction_count("nonexistent", "long_term") | |
| assert count == 0 | |
| asyncio.get_event_loop().run_until_complete(_run()) | |