SciPeerAI-API / tests /test_stat_audit.py
Abu-Sameer-66
deploy: SciPeerAI v2.3.0 β€” 24 modules, 209 tests, Phase 6 complete
a53c25d
# tests/test_stat_audit.py
#
# Tests for the statistical audit engine.
# We test with synthetic text that mimics
# real paper patterns β€” both clean and problematic ones.
import pytest
from src.scipeerai.modules.stat_audit import StatAuditEngine, StatAuditResult
@pytest.fixture
def engine():
return StatAuditEngine()
# ── p-hacking tests ──────────────────────────────────────────
def test_flags_suspicious_p_clustering(engine):
# three p-values all hugging 0.05 β€” should trigger high severity
text = """
The intervention showed significant improvement (p=0.048).
Secondary outcome also significant (p=0.049).
Tertiary measure borderline significant (p=0.046).
"""
result = engine.analyze(text)
types = [f.flag_type for f in result.flags]
assert "p_hacking_suspected" in types or "borderline_p_values" in types
def test_clean_paper_passes(engine):
# well-spaced p-values, decent sample, no red flags
text = """
Participants (n=120) completed the protocol.
Group A showed significant improvement (p=0.002).
No effect on secondary measure (p=0.41).
"""
result = engine.analyze(text)
assert result.risk_level in ("low", "medium")
assert result.risk_score < 0.5
# ── sample size tests ────────────────────────────────────────
def test_flags_tiny_sample(engine):
text = "We recruited n=8 participants. Results were significant (p=0.03)."
result = engine.analyze(text)
types = [f.flag_type for f in result.flags]
assert "small_sample_size" in types
def test_acceptable_sample_passes(engine):
text = "Study enrolled n=95 subjects across three sites."
result = engine.analyze(text)
types = [f.flag_type for f in result.flags]
assert "small_sample_size" not in types
# ── round numbers ────────────────────────────────────────────
def test_flags_exact_p_value(engine):
text = "The result was statistically significant (p=0.05)."
result = engine.analyze(text)
types = [f.flag_type for f in result.flags]
assert "suspiciously_round_p_values" in types
# ── result structure tests ───────────────────────────────────
def test_result_is_correct_type(engine):
result = engine.analyze("A simple paper with no statistics.")
assert isinstance(result, StatAuditResult)
assert 0.0 <= result.risk_score <= 1.0
assert result.risk_level in ("low", "medium", "high", "critical")
def test_empty_text_doesnt_crash(engine):
result = engine.analyze("")
assert result.risk_score == 0.0
assert result.risk_level == "low"