Spaces:

Abu-Sameer-66
/

SciPeerAI-API

Running

SciPeerAI-API / src /scipeerai /modules /sprite_test.py

Abu-Sameer-66

feat: add SPRITE Test module - impossible distribution detector

7f89197 30 days ago

7.44 kB

	# src/scipeerai/modules/sprite_test.py
	#
	# SPRITE Test — Sample Parameter Reconstruction via Iterative Techniques
	# Based on: Heathers & Brown (2019)
	#
	# GRIM checks if a mean is possible.
	# SPRITE checks if a mean AND standard deviation are
	# simultaneously possible given N and scale bounds.
	# Catches fabricated datasets that pass GRIM alone.

	import re
	import math
	import itertools
	from dataclasses import dataclass, field


	@dataclass
	class SpriteFlag:
	flag_type: str
	severity: str
	description: str
	evidence: str
	suggestion: str


	@dataclass
	class SpriteResult:
	impossible_combinations: list
	possible_combinations: list
	sprite_score: float
	risk_level: str
	summary: str
	flags: list = field(default_factory=list)
	flags_count: int = 0


	class SpriteTest:
	"""
	SPRITE Test implementation.
	Reconstructs possible integer distributions and checks
	whether reported mean + SD are jointly achievable.
	"""

	# matches: mean=X, sd=Y, n=Z, scale=A-B
	MEAN_PAT = re.compile(r'(?:mean\|m)\s[=:]\s(-?\d+\.\d+)', re.I)
	SD_PAT = re.compile(r'(?:sd\|std\|s\.d\.)\s[=:]\s(\d+\.\d+)', re.I)
	N_PAT = re.compile(r'\bn\s[=:]\s(\d+)', re.I)
	SCALE_PAT = re.compile(r'(?:scale\|range)\s[=:]\s(\d+)\s[-–]\s(\d+)', re.I)

	# max N to attempt full reconstruction — above this use sampling
	RECONSTRUCTION_LIMIT = 12

	def analyze(self, text: str) -> SpriteResult:
	groups = self._extract_groups(text)
	impossible = []
	possible = []
	flags = []

	for g in groups:
	mean, sd, n, lo, hi = g
	ok = self._sprite_check(mean, sd, n, lo, hi)
	if ok:
	possible.append(g)
	else:
	impossible.append(g)
	flags.append(SpriteFlag(
	flag_type = "sprite_impossible_distribution",
	severity = "high",
	description = (
	f"No integer distribution exists that produces "
	f"M={mean}, SD={sd} with N={n} on a {lo}-{hi} scale. "
	f"The reported statistics are mathematically "
	f"inconsistent — potential data fabrication."
	),
	evidence = (
	f"M={mean}, SD={sd}, N={n}, Scale={lo}-{hi} \| "
	f"Exhaustive reconstruction failed."
	),
	suggestion = (
	"Re-verify raw data. Recalculate mean and SD "
	"from original scores. Check scale bounds and "
	"sample size reporting."
	),
	))

	total = len(impossible) + len(possible)
	score = (len(impossible) / total) if total > 0 else 0.0
	level = self._risk(score, len(impossible))
	summary = self._build_summary(impossible, possible, score, level)

	return SpriteResult(
	impossible_combinations = impossible,
	possible_combinations = possible,
	sprite_score = round(score, 4),
	risk_level = level,
	summary = summary,
	flags = flags,
	flags_count = len(flags),
	)

	# ── internal helpers ─────────────────────────────────────────

	def _sprite_check(self, mean: float, sd: float,
	n: int, lo: int, hi: int) -> bool:
	"""
	Core SPRITE logic.
	For small N: exhaustive search over all integer distributions.
	For large N: use variance bounds check (fast approximation).
	"""
	if n > self.RECONSTRUCTION_LIMIT:
	return self._variance_bounds_check(mean, sd, n, lo, hi)
	return self._exhaustive_check(mean, sd, n, lo, hi)

	def _exhaustive_check(self, mean: float, sd: float,
	n: int, lo: int, hi: int) -> bool:
	"""Try all combinations of n integers in [lo, hi]."""
	target_sum = mean * n
	# only proceed if sum is near-integer (GRIM passes)
	if abs(target_sum - round(target_sum)) > 0.01:
	return False
	int_sum = round(target_sum)
	target_var = sd ** 2

	scale = list(range(lo, hi + 1))
	for combo in itertools.combinations_with_replacement(scale, n):
	if sum(combo) != int_sum:
	continue
	var = sum((x - mean) ** 2 for x in combo) / n
	if abs(math.sqrt(var) - sd) < 0.01:
	return True
	return False

	def _variance_bounds_check(self, mean: float, sd: float,
	n: int, lo: int, hi: int) -> bool:
	"""
	Fast check for large N.
	The maximum possible variance occurs when values are
	as extreme as possible (all lo or hi).
	"""
	# minimum SD = 0 (all values equal)
	# maximum SD approximation
	p = (mean - lo) / (hi - lo) if hi != lo else 0.5
	max_var = p * (1 - p) * (hi - lo) ** 2
	max_sd = math.sqrt(max_var)
	return sd <= max_sd + 0.05

	def _extract_groups(self, text: str):
	"""Extract (mean, sd, n, scale_lo, scale_hi) tuples."""
	groups = []
	means = [(m.start(), float(m.group(1))) for m in self.MEAN_PAT.finditer(text)]
	sds = [(m.start(), float(m.group(1))) for m in self.SD_PAT.finditer(text)]
	ns = [(m.start(), int(m.group(1))) for m in self.N_PAT.finditer(text)]
	scales = [(m.start(), int(m.group(1)), int(m.group(2)))
	for m in self.SCALE_PAT.finditer(text)]

	if not (means and sds and ns):
	return groups

	# default scale if not found
	default_lo, default_hi = 1, 7

	for (mp, mean), (sp, sd) in zip(means, sds):
	# find closest n
	if not ns:
	continue
	n_pos, n_val = min(ns, key=lambda x: abs(x[0] - mp))
	if n_val < 2 or n_val > 500:
	continue
	# find closest scale
	if scales:
	_, lo, hi = min(scales, key=lambda x: abs(x[0] - mp))
	else:
	lo, hi = default_lo, default_hi
	groups.append((mean, sd, n_val, lo, hi))

	return groups

	def _risk(self, score: float, count: int) -> str:
	if count >= 2 or score >= 0.6:
	return "critical"
	if count == 1 or score >= 0.3:
	return "high"
	if score > 0:
	return "medium"
	return "low"

	def _build_summary(self, impossible, possible,
	score, level) -> str:
	total = len(impossible) + len(possible)
	if total == 0:
	return (
	"SPRITE Test: No mean/SD/N groups detected. "
	"Include M=, SD=, N= and scale=X-Y for analysis."
	)
	pct = round(score * 100)
	return (
	f"SPRITE Test analyzed {total} mean/SD/N group(s). "
	f"{len(impossible)} impossible distribution(s) detected "
	f"({pct}% failure rate). "
	f"Risk level: {level.upper()}."
	)