File size: 2,835 Bytes
33f3681
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# tests/test_effect_size_validator.py
#
# Test suite for Effect Size Validator.
# 8 tests covering inflated effects, underpowered studies,
# missing effect sizes, impossible r values, and risk levels.

import pytest
from src.scipeerai.modules.effect_size_validator import EffectSizeValidator

engine = EffectSizeValidator()


def test_inflated_effect_detected():
    """Cohen d=3.2 with N=12 — critical risk."""
    r = engine.analyze(
        "Cohen d = 3.2 with n=12 participants. "
        "Results were significant p=0.049."
    )
    assert len(r.inflated_effects) >= 1
    assert r.risk_level == "critical"


def test_reasonable_effect_passes():
    """Cohen d=0.5 with N=80 — low risk."""
    r = engine.analyze(
        "Cohen d = 0.50 with n=80 participants. "
        "Results showed significant improvement p=0.032."
    )
    assert len(r.inflated_effects) == 0
    assert r.risk_level in ("low", "medium")


def test_missing_effect_sizes_flagged():
    """No effect sizes reported — medium risk."""
    r = engine.analyze(
        "This study examined the effect of treatment "
        "on outcomes in n=45 participants. Results "
        "showed significant improvement with p=0.03."
    )
    flag_types = [f.flag_type for f in r.flags]
    assert "missing_effect_sizes" in flag_types


def test_underpowered_study_detected():
    """Small d with small N — underpowered."""
    r = engine.analyze(
        "Cohen d = 0.30 with n=15 participants. "
        "Significant result found p=0.048."
    )
    assert r.flags_count >= 1


def test_effect_score_range():
    """Score must be between 0 and 1."""
    r = engine.analyze(
        "Cohen d = 3.2 with n=12 participants "
        "showing significant effects p=0.049."
    )
    assert 0.0 <= r.effect_score <= 1.0


def test_flag_structure():
    """Flags have correct required fields."""
    r = engine.analyze(
        "Cohen d = 3.2 with n=12 participants. "
        "Results were significant p=0.049."
    )
    assert r.flags_count >= 1
    flag = r.flags[0]
    assert hasattr(flag, 'flag_type')
    assert hasattr(flag, 'severity')
    assert hasattr(flag, 'description')
    assert hasattr(flag, 'evidence')
    assert hasattr(flag, 'suggestion')


def test_summary_contains_key_info():
    """Summary mentions effect sizes and risk level."""
    r = engine.analyze(
        "Cohen d = 3.2 with n=12 participants. "
        "Results were significant p=0.049."
    )
    assert "Effect Size" in r.summary
    assert r.risk_level.upper() in r.summary


def test_power_estimation_calculated():
    """Power estimates calculated for Cohen d."""
    r = engine.analyze(
        "Cohen d = 0.50 with n=20 participants. "
        "Significant improvement found p=0.035."
    )
    assert len(r.power_estimates) >= 1
    assert "power" in r.power_estimates[0]