Spaces:
Sleeping
Sleeping
File size: 5,577 Bytes
c94f46f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 | # src/scipeerai/modules/grim_test.py
#
# GRIM Test β Granularity-Related Inconsistency of Means
# Based on: Brown & Heathers (2017), Social Psychological
# and Personality Science β scientifically validated.
#
# Catches mathematically impossible means given sample size.
# Example: mean=2.34 with n=20 is IMPOSSIBLE.
import re
import math
from dataclasses import dataclass, field
@dataclass
class GrimFlag:
flag_type: str
severity: str
description: str
evidence: str
suggestion: str
@dataclass
class GrimResult:
impossible_means: list
possible_means: list
grim_score: float
risk_level: str
summary: str
flags: list = field(default_factory=list)
flags_count: int = 0
class GrimTest:
"""
GRIM Test implementation.
Checks whether reported means are mathematically
possible given the reported sample size and scale.
"""
# regex to pull mean/average + sample size pairs
MEAN_PATTERN = re.compile(
r'(?:mean|average|m)\s*[=:]\s*(-?\d+\.?\d*)',
re.IGNORECASE
)
N_PATTERN = re.compile(
r'n\s*[=:]\s*(\d+)',
re.IGNORECASE
)
FULL_PATTERN = re.compile(
r'(?:mean|average|m)\s*[=:]\s*(-?\d+\.\d+)'
r'.{0,80}'
r'n\s*[=:]\s*(\d+)'
r'|'
r'n\s*[=:]\s*(\d+)'
r'.{0,80}'
r'(?:mean|average|m)\s*[=:]\s*(-?\d+\.\d+)',
re.IGNORECASE
)
def analyze(self, text: str) -> GrimResult:
pairs = self._extract_pairs(text)
impossible = []
possible = []
flags = []
for mean_val, n_val in pairs:
ok = self._grim_check(mean_val, n_val)
if ok:
possible.append((mean_val, n_val))
else:
impossible.append((mean_val, n_val))
flags.append(GrimFlag(
flag_type = "grim_impossible_mean",
severity = "high",
description = (
f"Mean={mean_val} is mathematically "
f"impossible with N={n_val}. "
f"This value cannot arise from integer "
f"item scores β potential data fabrication."
),
evidence = (
f"Reported: M={mean_val}, N={n_val} | "
f"Closest valid means: "
f"{self._nearest_valid(mean_val, n_val)}"
),
suggestion = (
"Re-check raw data and recalculate. "
"If using Likert scales, verify item "
"scoring and sample size."
),
))
total = len(impossible) + len(possible)
score = (len(impossible) / total) if total > 0 else 0.0
level = self._risk(score, len(impossible))
summary = self._build_summary(
impossible, possible, score, level
)
return GrimResult(
impossible_means = impossible,
possible_means = possible,
grim_score = round(score, 4),
risk_level = level,
summary = summary,
flags = flags,
flags_count = len(flags),
)
# ββ internal helpers βββββββββββββββββββββββββββββββββββββββββ
def _grim_check(self, mean: float, n: int) -> bool:
"""
Core GRIM logic.
A mean is possible iff (mean * n) rounds to an integer.
Tolerance: 0.001 to handle floating-point noise.
"""
product = mean * n
remainder = abs(product - round(product))
return remainder < 0.001
def _extract_pairs(self, text: str):
pairs = []
for m in self.FULL_PATTERN.finditer(text):
if m.group(1) and m.group(2):
mean_val = float(m.group(1))
n_val = int(m.group(2))
else:
mean_val = float(m.group(4))
n_val = int(m.group(3))
if 2 <= n_val <= 10000:
pairs.append((mean_val, n_val))
return pairs
def _nearest_valid(self, mean: float, n: int) -> str:
decimals = len(str(mean).split(".")[-1])
step = round(1 / n, decimals + 2)
lower = math.floor(mean * n) / n
upper = math.ceil(mean * n) / n
return f"{round(lower, decimals)} or {round(upper, decimals)}"
def _risk(self, score: float, count: int) -> str:
if count >= 3 or score >= 0.6:
return "critical"
if count == 2 or score >= 0.4:
return "high"
if count == 1 or score >= 0.2:
return "medium"
return "low"
def _build_summary(self, impossible, possible,
score, level) -> str:
total = len(impossible) + len(possible)
if total == 0:
return (
"GRIM Test: No mean/N pairs detected in text. "
"Add explicit M= and N= values for analysis."
)
pct = round(score * 100)
return (
f"GRIM Test analyzed {total} mean/N pair(s). "
f"{len(impossible)} impossible mean(s) detected "
f"({pct}% failure rate). "
f"Risk level: {level.upper()}."
) |