Spaces:
Running
Running
| # src/scipeerai/modules/reproducibility_scanner.py | |
| # | |
| # Reproducibility Scanner | |
| # ----------------------- | |
| # The reproducibility crisis exists largely because | |
| # researchers cannot access the code, data, and exact | |
| # methods used in published papers. | |
| # | |
| # This module scans paper text for reproducibility | |
| # signals β what is present and what is critically | |
| # missing for independent replication. | |
| import re | |
| from dataclasses import dataclass, field | |
| # ββ data structures βββββββββββββββββββββββββββββββββββββββββββ | |
| class ReproducibilityFlag: | |
| flag_type: str | |
| severity: str | |
| description: str | |
| evidence: str | |
| suggestion: str | |
| class ReproducibilityResult: | |
| # what was found | |
| has_code_link: bool | |
| has_data_link: bool | |
| has_software_versions: bool | |
| has_statistical_software: bool | |
| has_preregistration: bool | |
| has_ethics_statement: bool | |
| has_conflict_statement: bool | |
| has_sample_size_justification: bool | |
| # scoring | |
| reproducibility_score: float # 0.0 = not reproducible, 1.0 = fully | |
| flags: list | |
| risk_level: str | |
| summary: str | |
| # ββ main class ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class ReproducibilityScanner: | |
| """ | |
| Scans paper text for reproducibility indicators. | |
| Two layers: | |
| 1. Presence checks β what good papers SHOULD have | |
| 2. Absence flags β what is missing and how serious | |
| Scoring is inverted from other modules: | |
| HIGH reproducibility score = LOW risk. | |
| We report both for clarity. | |
| """ | |
| # code/data sharing signals | |
| CODE_PATTERNS = [ | |
| r'github\.com/\S+', | |
| r'gitlab\.com/\S+', | |
| r'bitbucket\.org/\S+', | |
| r'code.*available.*at', | |
| r'code.*provided.*at', | |
| r'source code.*available', | |
| r'scripts.*available', | |
| r'zenodo\.org/\S+', | |
| r'osf\.io/\S+', | |
| r'code ocean', | |
| r'figshare\.com/\S+', | |
| ] | |
| DATA_PATTERNS = [ | |
| r'data.*available.*at', | |
| r'dataset.*available', | |
| r'data.*deposited', | |
| r'data.*repository', | |
| r'data.*doi', | |
| r'supplementary data', | |
| r'data.*provided', | |
| r'open data', | |
| r'zenodo\.org/\S+', | |
| r'osf\.io/\S+', | |
| r'dryad', | |
| r'figshare', | |
| r'harvard dataverse', | |
| r'data.*upon.*request', # weaker β noted separately | |
| ] | |
| SOFTWARE_PATTERNS = [ | |
| r'r\s+version\s+\d', | |
| r'python\s+\d+\.\d+', | |
| r'spss\s+version', | |
| r'stata\s+\d+', | |
| r'matlab\s+r\d+', | |
| r'sas\s+version', | |
| r'scipy\s+\d', | |
| r'numpy\s+\d', | |
| r'sklearn\s+\d', | |
| r'tensorflow\s+\d', | |
| r'pytorch\s+\d', | |
| ] | |
| STAT_SOFTWARE = [ | |
| 'r software', 'rstudio', 'spss', 'stata', | |
| 'sas', 'matlab', 'python', 'excel', 'graphpad' | |
| ] | |
| PREREG_PATTERNS = [ | |
| r'pre.?registered', | |
| r'preregistered', | |
| r'clinicaltrials\.gov', | |
| r'osf\.io', | |
| r'aspredicted\.org', | |
| r'registered report', | |
| r'trial registration', | |
| r'isrctn', | |
| r'anzctr', | |
| ] | |
| def __init__(self): | |
| self._code_re = [re.compile(p, re.IGNORECASE) for p in self.CODE_PATTERNS] | |
| self._data_re = [re.compile(p, re.IGNORECASE) for p in self.DATA_PATTERNS] | |
| self._sw_re = [re.compile(p, re.IGNORECASE) for p in self.SOFTWARE_PATTERNS] | |
| self._prereg_re = [re.compile(p, re.IGNORECASE) for p in self.PREREG_PATTERNS] | |
| # ββ public method βββββββββββββββββββββββββββββββββββββββββ | |
| def analyze(self, text: str) -> ReproducibilityResult: | |
| """ | |
| Full reproducibility scan. | |
| Returns what is present, what is missing, and risk level. | |
| """ | |
| t = text.lower() | |
| # presence checks | |
| has_code = self._check_patterns(text, self._code_re) | |
| has_data = self._check_patterns(text, self._data_re) | |
| has_sw_version = self._check_patterns(text, self._sw_re) | |
| has_stat_sw = any(sw in t for sw in self.STAT_SOFTWARE) | |
| has_prereg = self._check_patterns(text, self._prereg_re) | |
| has_ethics = self._has_ethics_statement(t) | |
| has_conflict = self._has_conflict_statement(t) | |
| has_n_justify = self._has_sample_size_justification(t) | |
| # build flags for what is missing | |
| flags = [] | |
| flags.extend(self._flag_missing_code(has_code, t)) | |
| flags.extend(self._flag_missing_data(has_data, t)) | |
| flags.extend(self._flag_missing_software(has_sw_version, has_stat_sw, t)) | |
| flags.extend(self._flag_missing_prereg(has_prereg, t)) | |
| flags.extend(self._flag_missing_ethics(has_ethics, t)) | |
| flags.extend(self._flag_data_on_request(text)) | |
| # reproducibility score: percentage of key items present | |
| checklist = [ | |
| has_code, has_data, has_sw_version, | |
| has_stat_sw, has_prereg, has_ethics, | |
| has_conflict, has_n_justify | |
| ] | |
| repro_score = sum(checklist) / len(checklist) | |
| # risk is inverse of reproducibility | |
| risk_score = round(1.0 - repro_score, 3) | |
| risk_level = self._get_risk_level(risk_score) | |
| return ReproducibilityResult( | |
| has_code_link=has_code, | |
| has_data_link=has_data, | |
| has_software_versions=has_sw_version, | |
| has_statistical_software=has_stat_sw, | |
| has_preregistration=has_prereg, | |
| has_ethics_statement=has_ethics, | |
| has_conflict_statement=has_conflict, | |
| has_sample_size_justification=has_n_justify, | |
| reproducibility_score=round(repro_score, 3), | |
| flags=flags, | |
| risk_level=risk_level, | |
| summary=self._write_summary( | |
| repro_score, risk_level, flags, | |
| has_code, has_data | |
| ), | |
| ) | |
| # ββ presence detectors ββββββββββββββββββββββββββββββββββββ | |
| def _check_patterns(self, text: str, patterns: list) -> bool: | |
| return any(p.search(text) for p in patterns) | |
| def _has_ethics_statement(self, text: str) -> bool: | |
| markers = [ | |
| 'ethics committee', 'institutional review board', | |
| 'irb approval', 'ethics approval', 'ethical approval', | |
| 'helsinki declaration', 'informed consent', | |
| 'ethical clearance', 'ethics board' | |
| ] | |
| return any(m in text for m in markers) | |
| def _has_conflict_statement(self, text: str) -> bool: | |
| markers = [ | |
| 'conflict of interest', 'competing interest', | |
| 'no conflict', 'declare no', 'disclose', | |
| 'funding source', 'financial disclosure' | |
| ] | |
| return any(m in text for m in markers) | |
| def _has_sample_size_justification(self, text: str) -> bool: | |
| markers = [ | |
| 'power analysis', 'sample size calculation', | |
| 'power calculation', 'statistical power', | |
| 'a priori power', 'effect size calculation', | |
| 'g*power', 'gpower' | |
| ] | |
| return any(m in text for m in markers) | |
| # ββ flag generators βββββββββββββββββββββββββββββββββββββββ | |
| def _flag_missing_code(self, has_code: bool, text: str) -> list: | |
| """ | |
| Code absence is critical for computational papers. | |
| We detect if the paper is computational first. | |
| """ | |
| flags = [] | |
| is_computational = any(w in text for w in [ | |
| 'algorithm', 'code', 'software', 'script', | |
| 'simulation', 'model', 'neural network', | |
| 'machine learning', 'deep learning' | |
| ]) | |
| if is_computational and not has_code: | |
| flags.append(ReproducibilityFlag( | |
| flag_type="missing_code_availability", | |
| severity="high", | |
| description=( | |
| "Computational study does not provide a link to " | |
| "source code or analysis scripts. Independent " | |
| "replication is not possible without this." | |
| ), | |
| evidence="Computational methods detected β no code link found", | |
| suggestion=( | |
| "Deposit code on GitHub/GitLab/Zenodo and include " | |
| "the URL in a 'Code Availability' section." | |
| ), | |
| )) | |
| return flags | |
| def _flag_missing_data(self, has_data: bool, text: str) -> list: | |
| flags = [] | |
| has_empirical = any(w in text for w in [ | |
| 'dataset', 'data', 'sample', 'participants', | |
| 'measurements', 'observations', 'collected' | |
| ]) | |
| if has_empirical and not has_data: | |
| flags.append(ReproducibilityFlag( | |
| flag_type="missing_data_availability", | |
| severity="high", | |
| description=( | |
| "Empirical study does not specify where raw data " | |
| "can be accessed. Results cannot be independently verified." | |
| ), | |
| evidence="Empirical data detected β no data availability statement found", | |
| suggestion=( | |
| "Deposit raw data in a repository (OSF, Zenodo, Dryad, " | |
| "Harvard Dataverse) and include a Data Availability statement." | |
| ), | |
| )) | |
| return flags | |
| def _flag_missing_software( | |
| self, has_versions: bool, has_sw: bool, text: str | |
| ) -> list: | |
| flags = [] | |
| is_quantitative = any(w in text for w in [ | |
| 'statistical', 'analysis', 'test', 'regression', | |
| 'anova', 'correlation', 't-test', 'chi-square' | |
| ]) | |
| if is_quantitative and not has_versions: | |
| flags.append(ReproducibilityFlag( | |
| flag_type="missing_software_versions", | |
| severity="medium", | |
| description=( | |
| "Statistical analysis performed but software name and " | |
| "version number not reported. Results may not replicate " | |
| "across different software versions." | |
| ), | |
| evidence="Statistical analysis detected β no software version found", | |
| suggestion=( | |
| "Specify the exact software and version used " | |
| "(e.g., 'R version 4.3.1', 'Python 3.10.12 with " | |
| "scikit-learn 1.3.0')." | |
| ), | |
| )) | |
| return flags | |
| def _flag_missing_prereg(self, has_prereg: bool, text: str) -> list: | |
| flags = [] | |
| is_clinical_or_experimental = any(w in text for w in [ | |
| 'clinical trial', 'randomized', 'experiment', | |
| 'intervention', 'treatment', 'placebo', | |
| 'hypothesis', 'we predicted', 'we hypothesized' | |
| ]) | |
| if is_clinical_or_experimental and not has_prereg: | |
| flags.append(ReproducibilityFlag( | |
| flag_type="missing_preregistration", | |
| severity="medium", | |
| description=( | |
| "Experimental or clinical study with no preregistration " | |
| "detected. Without preregistration, it is difficult to " | |
| "distinguish confirmatory from exploratory analyses." | |
| ), | |
| evidence="Experimental design detected β no preregistration link", | |
| suggestion=( | |
| "For future studies, preregister hypotheses on OSF " | |
| "(osf.io) or ClinicalTrials.gov before data collection." | |
| ), | |
| )) | |
| return flags | |
| def _flag_missing_ethics(self, has_ethics: bool, text: str) -> list: | |
| flags = [] | |
| involves_humans = any(w in text for w in [ | |
| 'participants', 'subjects', 'patients', 'volunteers', | |
| 'respondents', 'human', 'children', 'adults' | |
| ]) | |
| if involves_humans and not has_ethics: | |
| flags.append(ReproducibilityFlag( | |
| flag_type="missing_ethics_statement", | |
| severity="high", | |
| description=( | |
| "Human participants study with no ethics approval " | |
| "or IRB statement detected. This is required by " | |
| "most journals and funding bodies." | |
| ), | |
| evidence="Human participants detected β no ethics statement found", | |
| suggestion=( | |
| "Include an Ethics Statement specifying the approving " | |
| "body, protocol number, and that informed consent was obtained." | |
| ), | |
| )) | |
| return flags | |
| def _flag_data_on_request(self, text: str) -> list: | |
| """ | |
| 'Data available upon request' is widely considered | |
| a reproducibility red flag β studies show that | |
| most such requests are never fulfilled. | |
| """ | |
| flags = [] | |
| if re.search( | |
| r'data.*available.*upon.*request|' | |
| r'data.*available.*on.*request|' | |
| r'available.*from.*corresponding.*author', | |
| text, re.IGNORECASE | |
| ): | |
| flags.append(ReproducibilityFlag( | |
| flag_type="data_available_on_request", | |
| severity="medium", | |
| description=( | |
| "'Data available upon request' is a reproducibility " | |
| "risk. Research shows that over 80% of such requests " | |
| "go unfulfilled or receive no response." | |
| ), | |
| evidence="'Data available upon request' language detected", | |
| suggestion=( | |
| "Deposit data in a public repository instead. " | |
| "This increases citation rates and research trust." | |
| ), | |
| )) | |
| return flags | |
| # ββ scoring βββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _get_risk_level(self, risk_score: float) -> str: | |
| if risk_score >= 0.7: return "critical" | |
| elif risk_score >= 0.4: return "high" | |
| elif risk_score >= 0.2: return "medium" | |
| return "low" | |
| def _write_summary( | |
| self, | |
| repro_score: float, | |
| risk_level: str, | |
| flags: list, | |
| has_code: bool, | |
| has_data: bool, | |
| ) -> str: | |
| pct = round(repro_score * 100) | |
| if not flags: | |
| return ( | |
| f"Reproducibility score: {pct}%. " | |
| f"All key reproducibility indicators detected." | |
| ) | |
| missing = [] | |
| if not has_code: missing.append("code") | |
| if not has_data: missing.append("data") | |
| high = sum(1 for f in flags if f.severity == "high") | |
| med = sum(1 for f in flags if f.severity == "medium") | |
| parts = [] | |
| if high: parts.append(f"{high} critical gap{'s' if high > 1 else ''}") | |
| if med: parts.append(f"{med} concern{'s' if med > 1 else ''}") | |
| return ( | |
| f"Reproducibility score: {pct}%. " | |
| f"Flagged {', '.join(parts)}. " | |
| f"Risk level: {risk_level.upper()}." | |
| ) |