Spaces:
Running
Running
| # src/scipeerai/modules/sprite_test.py | |
| # | |
| # SPRITE Test β Sample Parameter Reconstruction via Iterative Techniques | |
| # Based on: Heathers & Brown (2019) | |
| # | |
| # GRIM checks if a mean is possible. | |
| # SPRITE checks if a mean AND standard deviation are | |
| # simultaneously possible given N and scale bounds. | |
| # Catches fabricated datasets that pass GRIM alone. | |
| import re | |
| import math | |
| import itertools | |
| from dataclasses import dataclass, field | |
| class SpriteFlag: | |
| flag_type: str | |
| severity: str | |
| description: str | |
| evidence: str | |
| suggestion: str | |
| class SpriteResult: | |
| impossible_combinations: list | |
| possible_combinations: list | |
| sprite_score: float | |
| risk_level: str | |
| summary: str | |
| flags: list = field(default_factory=list) | |
| flags_count: int = 0 | |
| class SpriteTest: | |
| """ | |
| SPRITE Test implementation. | |
| Reconstructs possible integer distributions and checks | |
| whether reported mean + SD are jointly achievable. | |
| """ | |
| # matches: mean=X, sd=Y, n=Z, scale=A-B | |
| MEAN_PAT = re.compile(r'(?:mean|m)\s*[=:]\s*(-?\d+\.\d+)', re.I) | |
| SD_PAT = re.compile(r'(?:sd|std|s\.d\.)\s*[=:]\s*(\d+\.\d+)', re.I) | |
| N_PAT = re.compile(r'\bn\s*[=:]\s*(\d+)', re.I) | |
| SCALE_PAT = re.compile(r'(?:scale|range)\s*[=:]\s*(\d+)\s*[-β]\s*(\d+)', re.I) | |
| # max N to attempt full reconstruction β above this use sampling | |
| RECONSTRUCTION_LIMIT = 12 | |
| def analyze(self, text: str) -> SpriteResult: | |
| groups = self._extract_groups(text) | |
| impossible = [] | |
| possible = [] | |
| flags = [] | |
| for g in groups: | |
| mean, sd, n, lo, hi = g | |
| ok = self._sprite_check(mean, sd, n, lo, hi) | |
| if ok: | |
| possible.append(g) | |
| else: | |
| impossible.append(g) | |
| flags.append(SpriteFlag( | |
| flag_type = "sprite_impossible_distribution", | |
| severity = "high", | |
| description = ( | |
| f"No integer distribution exists that produces " | |
| f"M={mean}, SD={sd} with N={n} on a {lo}-{hi} scale. " | |
| f"The reported statistics are mathematically " | |
| f"inconsistent β potential data fabrication." | |
| ), | |
| evidence = ( | |
| f"M={mean}, SD={sd}, N={n}, Scale={lo}-{hi} | " | |
| f"Exhaustive reconstruction failed." | |
| ), | |
| suggestion = ( | |
| "Re-verify raw data. Recalculate mean and SD " | |
| "from original scores. Check scale bounds and " | |
| "sample size reporting." | |
| ), | |
| )) | |
| total = len(impossible) + len(possible) | |
| score = (len(impossible) / total) if total > 0 else 0.0 | |
| level = self._risk(score, len(impossible)) | |
| summary = self._build_summary(impossible, possible, score, level) | |
| return SpriteResult( | |
| impossible_combinations = impossible, | |
| possible_combinations = possible, | |
| sprite_score = round(score, 4), | |
| risk_level = level, | |
| summary = summary, | |
| flags = flags, | |
| flags_count = len(flags), | |
| ) | |
| # ββ internal helpers βββββββββββββββββββββββββββββββββββββββββ | |
| def _sprite_check(self, mean: float, sd: float, | |
| n: int, lo: int, hi: int) -> bool: | |
| """ | |
| Core SPRITE logic. | |
| For small N: exhaustive search over all integer distributions. | |
| For large N: use variance bounds check (fast approximation). | |
| """ | |
| if n > self.RECONSTRUCTION_LIMIT: | |
| return self._variance_bounds_check(mean, sd, n, lo, hi) | |
| return self._exhaustive_check(mean, sd, n, lo, hi) | |
| def _exhaustive_check(self, mean: float, sd: float, | |
| n: int, lo: int, hi: int) -> bool: | |
| """Try all combinations of n integers in [lo, hi].""" | |
| target_sum = mean * n | |
| # only proceed if sum is near-integer (GRIM passes) | |
| if abs(target_sum - round(target_sum)) > 0.01: | |
| return False | |
| int_sum = round(target_sum) | |
| target_var = sd ** 2 | |
| scale = list(range(lo, hi + 1)) | |
| for combo in itertools.combinations_with_replacement(scale, n): | |
| if sum(combo) != int_sum: | |
| continue | |
| var = sum((x - mean) ** 2 for x in combo) / n | |
| if abs(math.sqrt(var) - sd) < 0.01: | |
| return True | |
| return False | |
| def _variance_bounds_check(self, mean: float, sd: float, | |
| n: int, lo: int, hi: int) -> bool: | |
| """ | |
| Fast check for large N. | |
| The maximum possible variance occurs when values are | |
| as extreme as possible (all lo or hi). | |
| """ | |
| # minimum SD = 0 (all values equal) | |
| # maximum SD approximation | |
| p = (mean - lo) / (hi - lo) if hi != lo else 0.5 | |
| max_var = p * (1 - p) * (hi - lo) ** 2 | |
| max_sd = math.sqrt(max_var) | |
| return sd <= max_sd + 0.05 | |
| def _extract_groups(self, text: str): | |
| """Extract (mean, sd, n, scale_lo, scale_hi) tuples.""" | |
| groups = [] | |
| means = [(m.start(), float(m.group(1))) for m in self.MEAN_PAT.finditer(text)] | |
| sds = [(m.start(), float(m.group(1))) for m in self.SD_PAT.finditer(text)] | |
| ns = [(m.start(), int(m.group(1))) for m in self.N_PAT.finditer(text)] | |
| scales = [(m.start(), int(m.group(1)), int(m.group(2))) | |
| for m in self.SCALE_PAT.finditer(text)] | |
| if not (means and sds and ns): | |
| return groups | |
| # default scale if not found | |
| default_lo, default_hi = 1, 7 | |
| for (mp, mean), (sp, sd) in zip(means, sds): | |
| # find closest n | |
| if not ns: | |
| continue | |
| n_pos, n_val = min(ns, key=lambda x: abs(x[0] - mp)) | |
| if n_val < 2 or n_val > 500: | |
| continue | |
| # find closest scale | |
| if scales: | |
| _, lo, hi = min(scales, key=lambda x: abs(x[0] - mp)) | |
| else: | |
| lo, hi = default_lo, default_hi | |
| groups.append((mean, sd, n_val, lo, hi)) | |
| return groups | |
| def _risk(self, score: float, count: int) -> str: | |
| if count >= 2 or score >= 0.6: | |
| return "critical" | |
| if count == 1 or score >= 0.3: | |
| return "high" | |
| if score > 0: | |
| return "medium" | |
| return "low" | |
| def _build_summary(self, impossible, possible, | |
| score, level) -> str: | |
| total = len(impossible) + len(possible) | |
| if total == 0: | |
| return ( | |
| "SPRITE Test: No mean/SD/N groups detected. " | |
| "Include M=, SD=, N= and scale=X-Y for analysis." | |
| ) | |
| pct = round(score * 100) | |
| return ( | |
| f"SPRITE Test analyzed {total} mean/SD/N group(s). " | |
| f"{len(impossible)} impossible distribution(s) detected " | |
| f"({pct}% failure rate). " | |
| f"Risk level: {level.upper()}." | |
| ) |