StressDetect / tests /test_feedback.py
Ace-119's picture
Add Streamlit dashboard, test suite, and Colab notebooks
0304d75
"""
tests/test_feedback.py
======================
Unit tests for the RL feedback subsystem:
- database/feedback.py (FeedbackStore)
- utils/reward.py (compute_reward, compute_combined_reward, reward_to_weight)
- utils/llm_reward.py (get_llm_reward — offline/no-key path only)
"""
from __future__ import annotations
import pytest
from database.feedback import FeedbackStore
from utils.reward import compute_combined_reward, compute_reward, reward_to_weight
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture()
def store() -> FeedbackStore:
"""Return an in-memory FeedbackStore (discarded after each test)."""
return FeedbackStore(":memory:")
# ---------------------------------------------------------------------------
# FeedbackStore — table creation & basic CRUD
# ---------------------------------------------------------------------------
class TestFeedbackStore:
def test_tables_created(self, store: FeedbackStore) -> None:
"""Tables must exist immediately after construction."""
cur = store._conn.execute(
"SELECT name FROM sqlite_master WHERE type='table'"
)
names = {row["name"] for row in cur.fetchall()}
assert "feedback" in names
assert "experience" in names
def test_save_feedback_correct(self, store: FeedbackStore) -> None:
"""Saving a correct-prediction feedback inserts both feedback and experience rows."""
fid = store.save_feedback(
username="alice",
text="I am so stressed",
prediction=0.8,
user_feedback=1,
reward=1.0,
)
assert fid >= 1
# feedback table
fb_rows = store.get_all_feedback()
assert len(fb_rows) == 1
row = fb_rows[0]
assert row["username"] == "alice"
assert row["user_feedback"] == 1
assert row["reward"] == pytest.approx(1.0)
assert row["llm_reward"] is None
# experience table: label should equal round(0.8) = 1 (correct)
exp = store.get_experience_for_training(min_samples=1)
assert len(exp) == 1
assert exp[0]["label"] == 1
assert exp[0]["reward"] == pytest.approx(1.0)
def test_save_feedback_wrong(self, store: FeedbackStore) -> None:
"""Wrong-prediction feedback flips the label in the experience table."""
store.save_feedback(
username="bob",
text="Everything is fine",
prediction=0.8, # predicted stressed (class 1)
user_feedback=0, # but user says wrong → corrected label should be 0
reward=-1.0,
)
exp = store.get_experience_for_training(min_samples=1)
assert len(exp) == 1
assert exp[0]["label"] == 0
def test_save_feedback_with_llm_reward(self, store: FeedbackStore) -> None:
"""LLM reward is stored correctly."""
store.save_feedback(
username="carol",
text="text",
prediction=0.6,
user_feedback=1,
reward=1.0,
llm_reward=1,
)
rows = store.get_all_feedback()
assert rows[0]["llm_reward"] == 1
def test_get_user_stats_no_data(self, store: FeedbackStore) -> None:
stats = store.get_user_stats("nobody")
assert stats["total"] == 0
assert stats["mean_reward"] == pytest.approx(0.0)
def test_get_user_stats(self, store: FeedbackStore) -> None:
store.save_feedback("alice", "t1", 0.8, 1, 1.0)
store.save_feedback("alice", "t2", 0.8, 0, -1.0)
store.save_feedback("alice", "t3", 0.7, 1, 1.0)
stats = store.get_user_stats("alice")
assert stats["total"] == 3
assert stats["n_correct"] == 2
assert stats["n_wrong"] == 1
assert stats["mean_reward"] == pytest.approx(1 / 3, abs=0.01)
assert stats["accuracy_rate"] == pytest.approx(2 / 3, abs=0.01)
def test_min_samples_gate(self, store: FeedbackStore) -> None:
"""get_experience_for_training returns [] when below min_samples."""
store.save_feedback("alice", "text", 0.7, 1, 1.0)
assert store.get_experience_for_training(min_samples=5) == []
assert len(store.get_experience_for_training(min_samples=1)) == 1
def test_feedback_count(self, store: FeedbackStore) -> None:
assert store.get_feedback_count() == 0
store.save_feedback("u1", "t", 0.5, 1, 1.0)
store.save_feedback("u1", "t", 0.5, 0, -1.0)
store.save_feedback("u2", "t", 0.5, 1, 1.0)
assert store.get_feedback_count() == 3
assert store.get_feedback_count("u1") == 2
assert store.get_feedback_count("u2") == 1
def test_multiple_users_isolated(self, store: FeedbackStore) -> None:
"""User stats must be scoped to individual users."""
store.save_feedback("alice", "text", 0.9, 1, 1.0)
store.save_feedback("bob", "text", 0.2, 0, -1.0)
alice = store.get_user_stats("alice")
bob = store.get_user_stats("bob")
assert alice["total"] == 1
assert alice["n_correct"] == 1
assert bob["total"] == 1
assert bob["n_wrong"] == 1
def test_close(self, store: FeedbackStore) -> None:
"""close() must not raise."""
store.close()
# ---------------------------------------------------------------------------
# Reward functions
# ---------------------------------------------------------------------------
class TestComputeReward:
def test_correct_gives_positive(self) -> None:
assert compute_reward(1) == pytest.approx(1.0)
def test_wrong_gives_negative(self) -> None:
assert compute_reward(0) == pytest.approx(-1.0)
def test_correct_low_prediction(self) -> None:
assert compute_reward(1) == pytest.approx(1.0)
def test_wrong_low_prediction(self) -> None:
assert compute_reward(0) == pytest.approx(-1.0)
class TestComputeCombinedReward:
def test_no_llm_passes_through(self) -> None:
assert compute_combined_reward(1, None) == pytest.approx(1.0)
assert compute_combined_reward(0, None) == pytest.approx(-1.0)
def test_llm_agree_positive(self) -> None:
# Both user (+1) and LLM (+1) agree → average = +1
assert compute_combined_reward(1, 1) == pytest.approx(1.0)
def test_llm_agree_negative(self) -> None:
# Both user (-1) and LLM (-1) agree → average = -1
assert compute_combined_reward(0, -1) == pytest.approx(-1.0)
def test_llm_disagree_averages(self) -> None:
# User says correct (+1), LLM says wrong (-1) → average = 0.0
result = compute_combined_reward(1, -1)
assert result == pytest.approx(0.0)
def test_llm_partial_agreement(self) -> None:
# User says wrong (-1), LLM says correct (+1) → 0.0
result = compute_combined_reward(0, 1)
assert result == pytest.approx(0.0)
class TestRewardToWeight:
def test_nonzero_reward_gives_1_5(self) -> None:
assert reward_to_weight(1.0) == pytest.approx(1.5)
assert reward_to_weight(-1.0) == pytest.approx(1.5)
assert reward_to_weight(0.5) == pytest.approx(1.5)
def test_zero_reward_gives_1_0(self) -> None:
assert reward_to_weight(0.0) == pytest.approx(1.0)
# ---------------------------------------------------------------------------
# LLM reward (no-key / offline path)
# ---------------------------------------------------------------------------
class TestGetLlmRewardOffline:
"""These tests run without real API keys — the function must return None."""
def test_returns_none_without_keys(self, monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.delenv("GEMINI_API_KEY", raising=False)
from utils.llm_reward import get_llm_reward
result = get_llm_reward("I am stressed", 0.8, provider="auto")
assert result is None
def test_openai_returns_none_without_key(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
from utils.llm_reward import get_llm_reward
result = get_llm_reward("text", 0.5, provider="openai")
assert result is None
def test_gemini_returns_none_without_key(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
monkeypatch.delenv("GEMINI_API_KEY", raising=False)
from utils.llm_reward import get_llm_reward
result = get_llm_reward("text", 0.5, provider="gemini")
assert result is None