File size: 15,446 Bytes
b5c2bb1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
# src/scipeerai/modules/reproducibility_scanner.py
#
# Reproducibility Scanner
# -----------------------
# The reproducibility crisis exists largely because
# researchers cannot access the code, data, and exact
# methods used in published papers.
#
# This module scans paper text for reproducibility
# signals β€” what is present and what is critically
# missing for independent replication.

import re
from dataclasses import dataclass, field


# ── data structures ───────────────────────────────────────────

@dataclass
class ReproducibilityFlag:
    flag_type: str
    severity: str
    description: str
    evidence: str
    suggestion: str


@dataclass
class ReproducibilityResult:
    # what was found
    has_code_link: bool
    has_data_link: bool
    has_software_versions: bool
    has_statistical_software: bool
    has_preregistration: bool
    has_ethics_statement: bool
    has_conflict_statement: bool
    has_sample_size_justification: bool

    # scoring
    reproducibility_score: float   # 0.0 = not reproducible, 1.0 = fully
    flags: list
    risk_level: str
    summary: str


# ── main class ────────────────────────────────────────────────

class ReproducibilityScanner:
    """
    Scans paper text for reproducibility indicators.

    Two layers:
    1. Presence checks β€” what good papers SHOULD have
    2. Absence flags β€” what is missing and how serious

    Scoring is inverted from other modules:
    HIGH reproducibility score = LOW risk.
    We report both for clarity.
    """

    # code/data sharing signals
    CODE_PATTERNS = [
        r'github\.com/\S+',
        r'gitlab\.com/\S+',
        r'bitbucket\.org/\S+',
        r'code.*available.*at',
        r'code.*provided.*at',
        r'source code.*available',
        r'scripts.*available',
        r'zenodo\.org/\S+',
        r'osf\.io/\S+',
        r'code ocean',
        r'figshare\.com/\S+',
    ]

    DATA_PATTERNS = [
        r'data.*available.*at',
        r'dataset.*available',
        r'data.*deposited',
        r'data.*repository',
        r'data.*doi',
        r'supplementary data',
        r'data.*provided',
        r'open data',
        r'zenodo\.org/\S+',
        r'osf\.io/\S+',
        r'dryad',
        r'figshare',
        r'harvard dataverse',
        r'data.*upon.*request',  # weaker β€” noted separately
    ]

    SOFTWARE_PATTERNS = [
        r'r\s+version\s+\d',
        r'python\s+\d+\.\d+',
        r'spss\s+version',
        r'stata\s+\d+',
        r'matlab\s+r\d+',
        r'sas\s+version',
        r'scipy\s+\d',
        r'numpy\s+\d',
        r'sklearn\s+\d',
        r'tensorflow\s+\d',
        r'pytorch\s+\d',
    ]

    STAT_SOFTWARE = [
        'r software', 'rstudio', 'spss', 'stata',
        'sas', 'matlab', 'python', 'excel', 'graphpad'
    ]

    PREREG_PATTERNS = [
        r'pre.?registered',
        r'preregistered',
        r'clinicaltrials\.gov',
        r'osf\.io',
        r'aspredicted\.org',
        r'registered report',
        r'trial registration',
        r'isrctn',
        r'anzctr',
    ]

    def __init__(self):
        self._code_re    = [re.compile(p, re.IGNORECASE) for p in self.CODE_PATTERNS]
        self._data_re    = [re.compile(p, re.IGNORECASE) for p in self.DATA_PATTERNS]
        self._sw_re      = [re.compile(p, re.IGNORECASE) for p in self.SOFTWARE_PATTERNS]
        self._prereg_re  = [re.compile(p, re.IGNORECASE) for p in self.PREREG_PATTERNS]

    # ── public method ─────────────────────────────────────────

    def analyze(self, text: str) -> ReproducibilityResult:
        """
        Full reproducibility scan.
        Returns what is present, what is missing, and risk level.
        """
        t = text.lower()

        # presence checks
        has_code       = self._check_patterns(text, self._code_re)
        has_data       = self._check_patterns(text, self._data_re)
        has_sw_version = self._check_patterns(text, self._sw_re)
        has_stat_sw    = any(sw in t for sw in self.STAT_SOFTWARE)
        has_prereg     = self._check_patterns(text, self._prereg_re)
        has_ethics     = self._has_ethics_statement(t)
        has_conflict   = self._has_conflict_statement(t)
        has_n_justify  = self._has_sample_size_justification(t)

        # build flags for what is missing
        flags = []
        flags.extend(self._flag_missing_code(has_code, t))
        flags.extend(self._flag_missing_data(has_data, t))
        flags.extend(self._flag_missing_software(has_sw_version, has_stat_sw, t))
        flags.extend(self._flag_missing_prereg(has_prereg, t))
        flags.extend(self._flag_missing_ethics(has_ethics, t))
        flags.extend(self._flag_data_on_request(text))

        # reproducibility score: percentage of key items present
        checklist = [
            has_code, has_data, has_sw_version,
            has_stat_sw, has_prereg, has_ethics,
            has_conflict, has_n_justify
        ]
        repro_score = sum(checklist) / len(checklist)

        # risk is inverse of reproducibility
        risk_score = round(1.0 - repro_score, 3)
        risk_level = self._get_risk_level(risk_score)

        return ReproducibilityResult(
            has_code_link=has_code,
            has_data_link=has_data,
            has_software_versions=has_sw_version,
            has_statistical_software=has_stat_sw,
            has_preregistration=has_prereg,
            has_ethics_statement=has_ethics,
            has_conflict_statement=has_conflict,
            has_sample_size_justification=has_n_justify,
            reproducibility_score=round(repro_score, 3),
            flags=flags,
            risk_level=risk_level,
            summary=self._write_summary(
                repro_score, risk_level, flags,
                has_code, has_data
            ),
        )

    # ── presence detectors ────────────────────────────────────

    def _check_patterns(self, text: str, patterns: list) -> bool:
        return any(p.search(text) for p in patterns)

    def _has_ethics_statement(self, text: str) -> bool:
        markers = [
            'ethics committee', 'institutional review board',
            'irb approval', 'ethics approval', 'ethical approval',
            'helsinki declaration', 'informed consent',
            'ethical clearance', 'ethics board'
        ]
        return any(m in text for m in markers)

    def _has_conflict_statement(self, text: str) -> bool:
        markers = [
            'conflict of interest', 'competing interest',
            'no conflict', 'declare no', 'disclose',
            'funding source', 'financial disclosure'
        ]
        return any(m in text for m in markers)

    def _has_sample_size_justification(self, text: str) -> bool:
        markers = [
            'power analysis', 'sample size calculation',
            'power calculation', 'statistical power',
            'a priori power', 'effect size calculation',
            'g*power', 'gpower'
        ]
        return any(m in text for m in markers)

    # ── flag generators ───────────────────────────────────────

    def _flag_missing_code(self, has_code: bool, text: str) -> list:
        """
        Code absence is critical for computational papers.
        We detect if the paper is computational first.
        """
        flags = []
        is_computational = any(w in text for w in [
            'algorithm', 'code', 'software', 'script',
            'simulation', 'model', 'neural network',
            'machine learning', 'deep learning'
        ])

        if is_computational and not has_code:
            flags.append(ReproducibilityFlag(
                flag_type="missing_code_availability",
                severity="high",
                description=(
                    "Computational study does not provide a link to "
                    "source code or analysis scripts. Independent "
                    "replication is not possible without this."
                ),
                evidence="Computational methods detected β€” no code link found",
                suggestion=(
                    "Deposit code on GitHub/GitLab/Zenodo and include "
                    "the URL in a 'Code Availability' section."
                ),
            ))
        return flags

    def _flag_missing_data(self, has_data: bool, text: str) -> list:
        flags = []
        has_empirical = any(w in text for w in [
            'dataset', 'data', 'sample', 'participants',
            'measurements', 'observations', 'collected'
        ])

        if has_empirical and not has_data:
            flags.append(ReproducibilityFlag(
                flag_type="missing_data_availability",
                severity="high",
                description=(
                    "Empirical study does not specify where raw data "
                    "can be accessed. Results cannot be independently verified."
                ),
                evidence="Empirical data detected β€” no data availability statement found",
                suggestion=(
                    "Deposit raw data in a repository (OSF, Zenodo, Dryad, "
                    "Harvard Dataverse) and include a Data Availability statement."
                ),
            ))
        return flags

    def _flag_missing_software(
        self, has_versions: bool, has_sw: bool, text: str
    ) -> list:
        flags = []
        is_quantitative = any(w in text for w in [
            'statistical', 'analysis', 'test', 'regression',
            'anova', 'correlation', 't-test', 'chi-square'
        ])

        if is_quantitative and not has_versions:
            flags.append(ReproducibilityFlag(
                flag_type="missing_software_versions",
                severity="medium",
                description=(
                    "Statistical analysis performed but software name and "
                    "version number not reported. Results may not replicate "
                    "across different software versions."
                ),
                evidence="Statistical analysis detected β€” no software version found",
                suggestion=(
                    "Specify the exact software and version used "
                    "(e.g., 'R version 4.3.1', 'Python 3.10.12 with "
                    "scikit-learn 1.3.0')."
                ),
            ))
        return flags

    def _flag_missing_prereg(self, has_prereg: bool, text: str) -> list:
        flags = []
        is_clinical_or_experimental = any(w in text for w in [
            'clinical trial', 'randomized', 'experiment',
            'intervention', 'treatment', 'placebo',
            'hypothesis', 'we predicted', 'we hypothesized'
        ])

        if is_clinical_or_experimental and not has_prereg:
            flags.append(ReproducibilityFlag(
                flag_type="missing_preregistration",
                severity="medium",
                description=(
                    "Experimental or clinical study with no preregistration "
                    "detected. Without preregistration, it is difficult to "
                    "distinguish confirmatory from exploratory analyses."
                ),
                evidence="Experimental design detected β€” no preregistration link",
                suggestion=(
                    "For future studies, preregister hypotheses on OSF "
                    "(osf.io) or ClinicalTrials.gov before data collection."
                ),
            ))
        return flags

    def _flag_missing_ethics(self, has_ethics: bool, text: str) -> list:
        flags = []
        involves_humans = any(w in text for w in [
            'participants', 'subjects', 'patients', 'volunteers',
            'respondents', 'human', 'children', 'adults'
        ])

        if involves_humans and not has_ethics:
            flags.append(ReproducibilityFlag(
                flag_type="missing_ethics_statement",
                severity="high",
                description=(
                    "Human participants study with no ethics approval "
                    "or IRB statement detected. This is required by "
                    "most journals and funding bodies."
                ),
                evidence="Human participants detected β€” no ethics statement found",
                suggestion=(
                    "Include an Ethics Statement specifying the approving "
                    "body, protocol number, and that informed consent was obtained."
                ),
            ))
        return flags

    def _flag_data_on_request(self, text: str) -> list:
        """
        'Data available upon request' is widely considered
        a reproducibility red flag β€” studies show that
        most such requests are never fulfilled.
        """
        flags = []
        if re.search(
            r'data.*available.*upon.*request|'
            r'data.*available.*on.*request|'
            r'available.*from.*corresponding.*author',
            text, re.IGNORECASE
        ):
            flags.append(ReproducibilityFlag(
                flag_type="data_available_on_request",
                severity="medium",
                description=(
                    "'Data available upon request' is a reproducibility "
                    "risk. Research shows that over 80% of such requests "
                    "go unfulfilled or receive no response."
                ),
                evidence="'Data available upon request' language detected",
                suggestion=(
                    "Deposit data in a public repository instead. "
                    "This increases citation rates and research trust."
                ),
            ))
        return flags

    # ── scoring ───────────────────────────────────────────────

    def _get_risk_level(self, risk_score: float) -> str:
        if risk_score >= 0.7:   return "critical"
        elif risk_score >= 0.4: return "high"
        elif risk_score >= 0.2: return "medium"
        return "low"

    def _write_summary(
        self,
        repro_score: float,
        risk_level: str,
        flags: list,
        has_code: bool,
        has_data: bool,
    ) -> str:
        pct = round(repro_score * 100)

        if not flags:
            return (
                f"Reproducibility score: {pct}%. "
                f"All key reproducibility indicators detected."
            )

        missing = []
        if not has_code: missing.append("code")
        if not has_data: missing.append("data")

        high = sum(1 for f in flags if f.severity == "high")
        med  = sum(1 for f in flags if f.severity == "medium")

        parts = []
        if high: parts.append(f"{high} critical gap{'s' if high > 1 else ''}")
        if med:  parts.append(f"{med} concern{'s' if med > 1 else ''}")

        return (
            f"Reproducibility score: {pct}%. "
            f"Flagged {', '.join(parts)}. "
            f"Risk level: {risk_level.upper()}."
        )