Spaces:
Sleeping
Sleeping
| # src/scipeerai/modules/pcurve_analyzer.py | |
| # | |
| # P-Curve Analyzer | |
| # Detects publication bias by analyzing p-value distribution. | |
| # Real effects: p-values uniformly distributed 0.00-0.05 | |
| # P-hacking: p-values cluster just below 0.05 | |
| # | |
| # Based on Simonsohn, Nelson & Simmons (2014) | |
| # Published in Journal of Experimental Psychology | |
| import re | |
| import math | |
| from dataclasses import dataclass, field | |
| class PCurveFlag: | |
| flag_type: str | |
| severity: str | |
| description: str | |
| evidence: str | |
| suggestion: str | |
| class PCurveResult: | |
| p_values_found: list | |
| significant_p: list | |
| right_skew_ratio: float | |
| clustering_score: float | |
| pcurve_score: float | |
| risk_level: str | |
| summary: str | |
| flags: list = field(default_factory=list) | |
| flags_count: int = 0 | |
| class PCurveAnalyzer: | |
| """ | |
| P-Curve Analyzer. | |
| Analyzes distribution of p-values to detect | |
| publication bias and p-hacking patterns. | |
| Key insight: | |
| - Real effects β p-values RIGHT skewed (more near 0.01) | |
| - P-hacking β p-values cluster near 0.05 | |
| - No effect β p-values uniformly distributed | |
| """ | |
| P_PATTERN = re.compile( | |
| r'p\s*[=<>β€]\s*(0?\.\d+)', | |
| re.IGNORECASE | |
| ) | |
| def analyze(self, text: str) -> PCurveResult: | |
| all_p = self._extract_p_values(text) | |
| sig_p = [p for p in all_p if p <= 0.05] | |
| flags = [] | |
| if len(sig_p) < 3: | |
| return PCurveResult( | |
| p_values_found = all_p, | |
| significant_p = sig_p, | |
| right_skew_ratio = 0.0, | |
| clustering_score = 0.0, | |
| pcurve_score = 0.0, | |
| risk_level = "low", | |
| summary = ( | |
| f"P-Curve Analysis: {len(sig_p)} significant p-value(s) " | |
| f"found. Minimum 3 required for curve analysis." | |
| ), | |
| flags = [], | |
| flags_count= 0, | |
| ) | |
| right_skew = self._right_skew_ratio(sig_p) | |
| clustering = self._clustering_score(sig_p) | |
| score = self._aggregate_score(right_skew, clustering, sig_p) | |
| level = self._risk(score, clustering, right_skew) | |
| # ββ Flag 1: P-value clustering near 0.05 βββββββββββββββββ | |
| if clustering > 0.5: | |
| near_05 = sum(1 for p in sig_p if p >= 0.04) | |
| flags.append(PCurveFlag( | |
| flag_type = "p_value_clustering", | |
| severity = "high" if clustering > 0.7 else "medium", | |
| description = ( | |
| f"{near_05}/{len(sig_p)} significant p-values " | |
| f"({round(near_05/len(sig_p)*100)}%) fall between " | |
| f"0.040-0.050. This clustering pattern is the " | |
| f"hallmark of p-hacking β results were likely " | |
| f"manipulated to just reach significance." | |
| ), | |
| evidence = ( | |
| f"Significant p-values: {[round(p,4) for p in sig_p]} | " | |
| f"Near-0.05 ratio: {round(clustering*100)}%" | |
| ), | |
| suggestion = ( | |
| "Pre-register hypotheses before data collection. " | |
| "Report all tests conducted including non-significant. " | |
| "Use sequential testing or Bayesian methods." | |
| ), | |
| )) | |
| # ββ Flag 2: Lack of right skew (no real effect) βββββββββββ | |
| if right_skew < 0.3 and len(sig_p) >= 4: | |
| flags.append(PCurveFlag( | |
| flag_type = "flat_pcurve", | |
| severity = "medium", | |
| description = ( | |
| f"P-curve lacks right skew β only {round(right_skew*100)}% " | |
| f"of p-values fall below 0.025. A genuine effect " | |
| f"produces a right-skewed p-curve. Flat curve suggests " | |
| f"the findings may lack evidentiary value." | |
| ), | |
| evidence = ( | |
| f"Right-skew ratio: {round(right_skew*100)}% " | |
| f"(expected >50% for real effects) | " | |
| f"P-values: {[round(p,4) for p in sig_p]}" | |
| ), | |
| suggestion = ( | |
| "Conduct a power analysis. If the effect is real, " | |
| "p-values should skew toward 0. Consider increasing " | |
| "sample size for a more definitive test." | |
| ), | |
| )) | |
| # ββ Flag 3: Too many exactly 0.05 values ββββββββββββββββββ | |
| exact_05 = sum(1 for p in all_p if abs(p - 0.05) < 0.001) | |
| if exact_05 >= 2: | |
| flags.append(PCurveFlag( | |
| flag_type = "exact_threshold_reporting", | |
| severity = "medium", | |
| description = ( | |
| f"{exact_05} p-values reported as exactly p=0.05. " | |
| f"This is statistically rare in real data and " | |
| f"suggests threshold-seeking behavior or rounding." | |
| ), | |
| evidence = ( | |
| f"{exact_05} values equal to exactly 0.050 found" | |
| ), | |
| suggestion = ( | |
| "Report exact p-values to 3+ decimal places. " | |
| "Avoid rounding to threshold values." | |
| ), | |
| )) | |
| summary = self._build_summary( | |
| all_p, sig_p, score, level, | |
| right_skew, clustering | |
| ) | |
| return PCurveResult( | |
| p_values_found = all_p, | |
| significant_p = sig_p, | |
| right_skew_ratio = round(right_skew, 4), | |
| clustering_score = round(clustering, 4), | |
| pcurve_score = round(score, 4), | |
| risk_level = level, | |
| summary = summary, | |
| flags = flags, | |
| flags_count = len(flags), | |
| ) | |
| # ββ internal helpers βββββββββββββββββββββββββββββββββββββββββ | |
| def _extract_p_values(self, text: str) -> list: | |
| values = [] | |
| for m in self.P_PATTERN.finditer(text): | |
| try: | |
| v = float(m.group(1)) | |
| if 0 < v <= 1: | |
| values.append(round(v, 4)) | |
| except ValueError: | |
| pass | |
| return values | |
| def _right_skew_ratio(self, sig_p: list) -> float: | |
| """ | |
| Ratio of p-values below 0.025 vs 0.025-0.05. | |
| Real effects: >50% below 0.025 (right skewed). | |
| """ | |
| if not sig_p: | |
| return 0.0 | |
| below_half = sum(1 for p in sig_p if p <= 0.025) | |
| return below_half / len(sig_p) | |
| def _clustering_score(self, sig_p: list) -> float: | |
| """ | |
| Ratio of p-values in 0.04-0.05 range. | |
| High clustering = p-hacking signature. | |
| """ | |
| if not sig_p: | |
| return 0.0 | |
| near_05 = sum(1 for p in sig_p if p >= 0.04) | |
| return near_05 / len(sig_p) | |
| def _aggregate_score(self, right_skew: float, | |
| clustering: float, | |
| sig_p: list) -> float: | |
| """Combine signals into 0-1 risk score.""" | |
| cluster_risk = clustering | |
| no_skew_risk = 1.0 - right_skew | |
| score = (cluster_risk * 0.6 + no_skew_risk * 0.4) | |
| return min(round(score, 4), 1.0) | |
| def _risk(self, score: float, | |
| clustering: float, | |
| right_skew: float) -> str: | |
| if clustering > 0.7 or score >= 0.7: | |
| return "critical" | |
| if clustering > 0.5 or score >= 0.5: | |
| return "high" | |
| if clustering > 0.3 or score >= 0.3: | |
| return "medium" | |
| return "low" | |
| def _build_summary(self, all_p, sig_p, score, | |
| level, right_skew, clustering) -> str: | |
| pct = round(score * 100) | |
| return ( | |
| f"P-Curve analyzed {len(all_p)} p-value(s), " | |
| f"{len(sig_p)} significant (pβ€0.05). " | |
| f"Clustering score: {round(clustering*100)}% near p=0.05. " | |
| f"Right-skew ratio: {round(right_skew*100)}%. " | |
| f"Overall bias score: {pct}%. " | |
| f"Risk level: {level.upper()}." | |
| ) |