Spaces:

Abu-Sameer-66
/

SciPeerAI-API

Running

File size: 7,438 Bytes

c94f46f

# src/scipeerai/modules/sprite_test.py
#
# SPRITE Test — Sample Parameter Reconstruction via Iterative Techniques
# Based on: Heathers & Brown (2019)
#
# GRIM checks if a mean is possible.
# SPRITE checks if a mean AND standard deviation are
# simultaneously possible given N and scale bounds.
# Catches fabricated datasets that pass GRIM alone.

import re
import math
import itertools
from dataclasses import dataclass, field


@dataclass
class SpriteFlag:
    flag_type:   str
    severity:    str
    description: str
    evidence:    str
    suggestion:  str


@dataclass
class SpriteResult:
    impossible_combinations: list
    possible_combinations:   list
    sprite_score:            float
    risk_level:              str
    summary:                 str
    flags:                   list = field(default_factory=list)
    flags_count:             int  = 0


class SpriteTest:
    """
    SPRITE Test implementation.
    Reconstructs possible integer distributions and checks
    whether reported mean + SD are jointly achievable.
    """

    # matches: mean=X, sd=Y, n=Z, scale=A-B
    MEAN_PAT  = re.compile(r'(?:mean|m)\s*[=:]\s*(-?\d+\.\d+)', re.I)
    SD_PAT    = re.compile(r'(?:sd|std|s\.d\.)\s*[=:]\s*(\d+\.\d+)', re.I)
    N_PAT     = re.compile(r'\bn\s*[=:]\s*(\d+)', re.I)
    SCALE_PAT = re.compile(r'(?:scale|range)\s*[=:]\s*(\d+)\s*[-–]\s*(\d+)', re.I)

    # max N to attempt full reconstruction — above this use sampling
    RECONSTRUCTION_LIMIT = 12

    def analyze(self, text: str) -> SpriteResult:
        groups     = self._extract_groups(text)
        impossible = []
        possible   = []
        flags      = []

        for g in groups:
            mean, sd, n, lo, hi = g
            ok = self._sprite_check(mean, sd, n, lo, hi)
            if ok:
                possible.append(g)
            else:
                impossible.append(g)
                flags.append(SpriteFlag(
                    flag_type   = "sprite_impossible_distribution",
                    severity    = "high",
                    description = (
                        f"No integer distribution exists that produces "
                        f"M={mean}, SD={sd} with N={n} on a {lo}-{hi} scale. "
                        f"The reported statistics are mathematically "
                        f"inconsistent — potential data fabrication."
                    ),
                    evidence    = (
                        f"M={mean}, SD={sd}, N={n}, Scale={lo}-{hi} | "
                        f"Exhaustive reconstruction failed."
                    ),
                    suggestion  = (
                        "Re-verify raw data. Recalculate mean and SD "
                        "from original scores. Check scale bounds and "
                        "sample size reporting."
                    ),
                ))

        total  = len(impossible) + len(possible)
        score  = (len(impossible) / total) if total > 0 else 0.0
        level  = self._risk(score, len(impossible))
        summary = self._build_summary(impossible, possible, score, level)

        return SpriteResult(
            impossible_combinations = impossible,
            possible_combinations   = possible,
            sprite_score            = round(score, 4),
            risk_level              = level,
            summary                 = summary,
            flags                   = flags,
            flags_count             = len(flags),
        )

    # ── internal helpers ─────────────────────────────────────────

    def _sprite_check(self, mean: float, sd: float,
                      n: int, lo: int, hi: int) -> bool:
        """
        Core SPRITE logic.
        For small N: exhaustive search over all integer distributions.
        For large N: use variance bounds check (fast approximation).
        """
        if n > self.RECONSTRUCTION_LIMIT:
            return self._variance_bounds_check(mean, sd, n, lo, hi)
        return self._exhaustive_check(mean, sd, n, lo, hi)

    def _exhaustive_check(self, mean: float, sd: float,
                          n: int, lo: int, hi: int) -> bool:
        """Try all combinations of n integers in [lo, hi]."""
        target_sum = mean * n
        # only proceed if sum is near-integer (GRIM passes)
        if abs(target_sum - round(target_sum)) > 0.01:
            return False
        int_sum = round(target_sum)
        target_var = sd ** 2

        scale = list(range(lo, hi + 1))
        for combo in itertools.combinations_with_replacement(scale, n):
            if sum(combo) != int_sum:
                continue
            var = sum((x - mean) ** 2 for x in combo) / n
            if abs(math.sqrt(var) - sd) < 0.01:
                return True
        return False

    def _variance_bounds_check(self, mean: float, sd: float,
                               n: int, lo: int, hi: int) -> bool:
        """
        Fast check for large N.
        The maximum possible variance occurs when values are
        as extreme as possible (all lo or hi).
        """
        # minimum SD = 0 (all values equal)
        # maximum SD approximation
        p = (mean - lo) / (hi - lo) if hi != lo else 0.5
        max_var = p * (1 - p) * (hi - lo) ** 2
        max_sd  = math.sqrt(max_var)
        return sd <= max_sd + 0.05

    def _extract_groups(self, text: str):
        """Extract (mean, sd, n, scale_lo, scale_hi) tuples."""
        groups = []
        means  = [(m.start(), float(m.group(1))) for m in self.MEAN_PAT.finditer(text)]
        sds    = [(m.start(), float(m.group(1))) for m in self.SD_PAT.finditer(text)]
        ns     = [(m.start(), int(m.group(1)))   for m in self.N_PAT.finditer(text)]
        scales = [(m.start(), int(m.group(1)), int(m.group(2)))
                  for m in self.SCALE_PAT.finditer(text)]

        if not (means and sds and ns):
            return groups

        # default scale if not found
        default_lo, default_hi = 1, 7

        for (mp, mean), (sp, sd) in zip(means, sds):
            # find closest n
            if not ns:
                continue
            n_pos, n_val = min(ns, key=lambda x: abs(x[0] - mp))
            if n_val < 2 or n_val > 500:
                continue
            # find closest scale
            if scales:
                _, lo, hi = min(scales, key=lambda x: abs(x[0] - mp))
            else:
                lo, hi = default_lo, default_hi
            groups.append((mean, sd, n_val, lo, hi))

        return groups

    def _risk(self, score: float, count: int) -> str:
        if count >= 2 or score >= 0.6:
            return "critical"
        if count == 1 or score >= 0.3:
            return "high"
        if score >  0:
            return "medium"
        return "low"

    def _build_summary(self, impossible, possible,
                       score, level) -> str:
        total = len(impossible) + len(possible)
        if total == 0:
            return (
                "SPRITE Test: No mean/SD/N groups detected. "
                "Include M=, SD=, N= and scale=X-Y for analysis."
            )
        pct = round(score * 100)
        return (
            f"SPRITE Test analyzed {total} mean/SD/N group(s). "
            f"{len(impossible)} impossible distribution(s) detected "
            f"({pct}% failure rate). "
            f"Risk level: {level.upper()}."
        )