ResearchIT / tests /test_profiles.py
siddhm11
Phase 3 complete: Hybrid Semantic Search pipeline
d5a6f3e
"""
Tests for EWMA profile embedding computation.
Covers:
- ewma_update produces L2-normalised output
- First interaction sets the profile directly
- Multiple updates blend correctly
- Negative dismiss pushes vector away
- Storage round-trip (save + load)
"""
import asyncio
import pytest
import numpy as np
from app.recommend.profiles import (
ewma_update,
EMBEDDING_DIM,
ALPHA_LONG_TERM,
ALPHA_SHORT_TERM,
ALPHA_NEGATIVE,
_to_bytes,
_from_bytes,
)
# ── Helper ────────────────────────────────────────────────────────────────────
def _random_unit_vec(seed: int = 42) -> np.ndarray:
rng = np.random.RandomState(seed)
v = rng.randn(EMBEDDING_DIM).astype(np.float32)
return v / np.linalg.norm(v)
def _assert_unit(v: np.ndarray, tol: float = 1e-5):
assert abs(np.linalg.norm(v) - 1.0) < tol, f"norm = {np.linalg.norm(v)}"
# ── ewma_update unit tests ───────────────────────────────────────────────────
def test_ewma_first_interaction_sets_profile():
"""First interaction: profile == normalised input."""
embed = _random_unit_vec(1)
result = ewma_update(None, embed, ALPHA_LONG_TERM)
_assert_unit(result)
# Should be very close to input (already unit-norm)
assert np.allclose(result, embed, atol=1e-5)
def test_ewma_update_is_normalised():
"""EWMA output is always L2-normalised."""
current = _random_unit_vec(10)
new = _random_unit_vec(20)
result = ewma_update(current, new, ALPHA_LONG_TERM)
_assert_unit(result)
def test_ewma_long_term_alpha_is_stable():
"""With Ξ±=0.03, a single new interaction should only move the
profile slightly β€” cosine similarity to old profile should be high."""
current = _random_unit_vec(100)
new = _random_unit_vec(200) # different direction
result = ewma_update(current, new, ALPHA_LONG_TERM)
sim = float(np.dot(current, result))
# At Ξ±=0.03, should preserve >97% of old direction
assert sim > 0.97, f"cosine sim = {sim}"
def test_ewma_short_term_alpha_is_responsive():
"""With Ξ±=0.40, the profile should shift significantly toward the new input."""
current = _random_unit_vec(100)
new = _random_unit_vec(200)
result = ewma_update(current, new, ALPHA_SHORT_TERM)
sim_to_old = float(np.dot(current, result))
sim_to_new = float(np.dot(new, result))
# Short-term should move meaningfully toward new
assert sim_to_new > 0.3, f"sim to new = {sim_to_new}"
def test_ewma_multiple_updates_converge():
"""Repeated identical inputs should converge the profile to that input.
With Ξ±=0.03 (Doc 06 correction), convergence is slower β€” need ~200 updates."""
target = _random_unit_vec(42)
profile = _random_unit_vec(99) # start far away
for _ in range(200):
profile = ewma_update(profile, target, ALPHA_LONG_TERM)
sim = float(np.dot(profile, target))
assert sim > 0.99, f"after 200 updates, sim = {sim}"
def test_ewma_dissimilar_input_shifts_profile():
"""Feeding a dissimilar vector should shift the profile away from original."""
current = _random_unit_vec(10)
dissimilar = _random_unit_vec(999) # a genuinely different direction
result = ewma_update(current, dissimilar, ALPHA_SHORT_TERM)
sim_to_old = float(np.dot(current, result))
sim_to_new = float(np.dot(dissimilar, result))
# With Ξ±=0.40, profile should move toward new input
assert sim_to_old < 1.0, f"profile didn't move, sim to old = {sim_to_old}"
assert sim_to_new > 0.0, f"profile should have some similarity to new, got {sim_to_new}"
# ── Binary storage round-trip ─────────────────────────────────────────────────
def test_bytes_roundtrip():
"""to_bytes β†’ from_bytes preserves data exactly."""
original = _random_unit_vec(77)
recovered = _from_bytes(_to_bytes(original))
assert np.allclose(original, recovered, atol=1e-7)
def test_bytes_size():
"""Each profile vector should be exactly 4096 bytes."""
v = _random_unit_vec(0)
b = _to_bytes(v)
assert len(b) == EMBEDDING_DIM * 4 # float32 = 4 bytes
# ── DB integration tests ─────────────────────────────────────────────────────
@pytest.fixture
def setup_db(tmp_path, monkeypatch):
"""Fresh SQLite DB for each test."""
import app.config as cfg
import app.db as db_mod
db_path = str(tmp_path / "test_profiles.db")
monkeypatch.setattr(cfg, "DB_PATH", db_path)
monkeypatch.setattr(db_mod, "DB_PATH", db_path)
asyncio.get_event_loop().run_until_complete(db_mod.init_db())
yield
def test_profile_save_and_load(setup_db):
"""Profile round-trips through SQLite correctly."""
from app.recommend import profiles
vec = _random_unit_vec(55)
async def _run():
await profiles.save_profile("user-1", "long_term", vec, interaction_count=5)
loaded = await profiles.load_profile("user-1", "long_term")
assert loaded is not None
assert np.allclose(vec, loaded, atol=1e-7)
asyncio.get_event_loop().run_until_complete(_run())
def test_profile_interaction_count(setup_db):
"""Interaction count persists and retrieves correctly."""
from app.recommend import profiles
vec = _random_unit_vec(66)
async def _run():
await profiles.save_profile("user-2", "short_term", vec, interaction_count=12)
count = await profiles.get_interaction_count("user-2", "short_term")
assert count == 12
asyncio.get_event_loop().run_until_complete(_run())
def test_profile_not_found_returns_none(setup_db):
"""Missing profile returns None, not an error."""
from app.recommend import profiles
async def _run():
result = await profiles.load_profile("nonexistent", "long_term")
assert result is None
count = await profiles.get_interaction_count("nonexistent", "long_term")
assert count == 0
asyncio.get_event_loop().run_until_complete(_run())