Spaces:

Abu-Sameer-66
/

SciPeerAI-API

Running

SciPeerAI-API / tests /test_stat_audit.py

Abu-Sameer-66

deploy: SciPeerAI v2.3.0 — 24 modules, 209 tests, Phase 6 complete

a53c25d about 14 hours ago

2.91 kB

	# tests/test_stat_audit.py
	#
	# Tests for the statistical audit engine.
	# We test with synthetic text that mimics
	# real paper patterns — both clean and problematic ones.

	import pytest
	from src.scipeerai.modules.stat_audit import StatAuditEngine, StatAuditResult


	@pytest.fixture
	def engine():
	return StatAuditEngine()


	# ── p-hacking tests ──────────────────────────────────────────

	def test_flags_suspicious_p_clustering(engine):
	# three p-values all hugging 0.05 — should trigger high severity
	text = """
	The intervention showed significant improvement (p=0.048).
	Secondary outcome also significant (p=0.049).
	Tertiary measure borderline significant (p=0.046).
	"""
	result = engine.analyze(text)
	types = [f.flag_type for f in result.flags]
	assert "p_hacking_suspected" in types or "borderline_p_values" in types


	def test_clean_paper_passes(engine):
	# well-spaced p-values, decent sample, no red flags
	text = """
	Participants (n=120) completed the protocol.
	Group A showed significant improvement (p=0.002).
	No effect on secondary measure (p=0.41).
	"""
	result = engine.analyze(text)
	assert result.risk_level in ("low", "medium")
	assert result.risk_score < 0.5


	# ── sample size tests ────────────────────────────────────────

	def test_flags_tiny_sample(engine):
	text = "We recruited n=8 participants. Results were significant (p=0.03)."
	result = engine.analyze(text)
	types = [f.flag_type for f in result.flags]
	assert "small_sample_size" in types


	def test_acceptable_sample_passes(engine):
	text = "Study enrolled n=95 subjects across three sites."
	result = engine.analyze(text)
	types = [f.flag_type for f in result.flags]
	assert "small_sample_size" not in types


	# ── round numbers ────────────────────────────────────────────

	def test_flags_exact_p_value(engine):
	text = "The result was statistically significant (p=0.05)."
	result = engine.analyze(text)
	types = [f.flag_type for f in result.flags]
	assert "suspiciously_round_p_values" in types


	# ── result structure tests ───────────────────────────────────

	def test_result_is_correct_type(engine):
	result = engine.analyze("A simple paper with no statistics.")
	assert isinstance(result, StatAuditResult)
	assert 0.0 <= result.risk_score <= 1.0
	assert result.risk_level in ("low", "medium", "high", "critical")


	def test_empty_text_doesnt_crash(engine):
	result = engine.analyze("")
	assert result.risk_score == 0.0
	assert result.risk_level == "low"