Spaces:
Running
Running
| # tests/test_stat_audit.py | |
| # | |
| # Tests for the statistical audit engine. | |
| # We test with synthetic text that mimics | |
| # real paper patterns β both clean and problematic ones. | |
| import pytest | |
| from src.scipeerai.modules.stat_audit import StatAuditEngine, StatAuditResult | |
| def engine(): | |
| return StatAuditEngine() | |
| # ββ p-hacking tests ββββββββββββββββββββββββββββββββββββββββββ | |
| def test_flags_suspicious_p_clustering(engine): | |
| # three p-values all hugging 0.05 β should trigger high severity | |
| text = """ | |
| The intervention showed significant improvement (p=0.048). | |
| Secondary outcome also significant (p=0.049). | |
| Tertiary measure borderline significant (p=0.046). | |
| """ | |
| result = engine.analyze(text) | |
| types = [f.flag_type for f in result.flags] | |
| assert "p_hacking_suspected" in types or "borderline_p_values" in types | |
| def test_clean_paper_passes(engine): | |
| # well-spaced p-values, decent sample, no red flags | |
| text = """ | |
| Participants (n=120) completed the protocol. | |
| Group A showed significant improvement (p=0.002). | |
| No effect on secondary measure (p=0.41). | |
| """ | |
| result = engine.analyze(text) | |
| assert result.risk_level in ("low", "medium") | |
| assert result.risk_score < 0.5 | |
| # ββ sample size tests ββββββββββββββββββββββββββββββββββββββββ | |
| def test_flags_tiny_sample(engine): | |
| text = "We recruited n=8 participants. Results were significant (p=0.03)." | |
| result = engine.analyze(text) | |
| types = [f.flag_type for f in result.flags] | |
| assert "small_sample_size" in types | |
| def test_acceptable_sample_passes(engine): | |
| text = "Study enrolled n=95 subjects across three sites." | |
| result = engine.analyze(text) | |
| types = [f.flag_type for f in result.flags] | |
| assert "small_sample_size" not in types | |
| # ββ round numbers ββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_flags_exact_p_value(engine): | |
| text = "The result was statistically significant (p=0.05)." | |
| result = engine.analyze(text) | |
| types = [f.flag_type for f in result.flags] | |
| assert "suspiciously_round_p_values" in types | |
| # ββ result structure tests βββββββββββββββββββββββββββββββββββ | |
| def test_result_is_correct_type(engine): | |
| result = engine.analyze("A simple paper with no statistics.") | |
| assert isinstance(result, StatAuditResult) | |
| assert 0.0 <= result.risk_score <= 1.0 | |
| assert result.risk_level in ("low", "medium", "high", "critical") | |
| def test_empty_text_doesnt_crash(engine): | |
| result = engine.analyze("") | |
| assert result.risk_score == 0.0 | |
| assert result.risk_level == "low" |