Spaces:

Abu-Sameer-66
/

SciPeerAI-API

Sleeping

SciPeerAI-API / src /scipeerai /modules /methodology_checker.py

Abu-Sameer-66

feat: citation integrity analyzer — self-citation, unsupported claims, density — 27 total tests

c4139c8 about 2 months ago

18.6 kB

	# Methodology Logic Checker
	# -------------------------
	# The most subtle form of scientific fraud is not
	# fabricating data — it is making claims that your
	# method cannot actually support.
	#
	# "Correlation does not imply causation" is the
	# famous example. But there are dozens of variations:
	# underpowered claims, wrong timeframes, missing
	# controls, self-report data for clinical conclusions.
	#
	# This module catches them — using both rule-based
	# pattern matching and LLM-powered reasoning.

	import re
	import os
	import json
	import urllib.request
	import urllib.error
	from dataclasses import dataclass
	from dotenv import load_dotenv

	load_dotenv()


	# ── data structures ───────────────────────────────────────────

	@dataclass
	class MethodologyFlag:
	flag_type: str
	severity: str
	claim: str
	issue: str
	evidence: str
	suggestion: str


	@dataclass
	class MethodologyResult:
	claims_found: list
	methods_found: list
	flags: list
	llm_assessment: str
	llm_available: bool
	risk_score: float
	risk_level: str
	summary: str


	# ── main class ────────────────────────────────────────────────

	class MethodologyChecker:
	"""
	Two-layer methodology analysis:

	Layer 1 — Rule-based: fast, deterministic, catches
	known patterns (causation language, tiny samples,
	missing controls, short timeframes for long-term claims)

	Layer 2 — LLM reasoning: slower, probabilistic,
	catches subtle logical gaps that rules miss.
	Falls back gracefully if API unavailable.
	"""

	# words that claim causation — need RCT to justify
	CAUSATION_WORDS = [
	"causes", "caused by", "leads to", "results in",
	"produces", "induces", "drives", "responsible for",
	"due to", "because of", "proves that", "demonstrates that"
	]

	# words that only justify correlation
	CORRELATION_WORDS = [
	"associated with", "correlated", "linked to",
	"related to", "predicts", "suggests"
	]

	# study designs that cannot prove causation
	WEAK_DESIGNS = [
	"survey", "questionnaire", "self-report", "cross-sectional",
	"retrospective", "observational", "case study", "anecdotal"
	]

	# long-term claims need long-term studies
	LONGTERM_CLAIMS = [
	"long-term", "chronic", "sustained", "permanent",
	"lasting", "durable", "years", "lifetime"
	]

	def __init__(self):
	self._hf_token = os.getenv("HF_API_TOKEN", "")
	# free model on HuggingFace — good at reasoning
	self._hf_model = "HuggingFaceH4/zephyr-7b-beta"
	self._hf_api_url = (
	f"https://api-inference.huggingface.co/models/{self._hf_model}"
	)

	# ── public method ─────────────────────────────────────────

	def analyze(self, text: str, abstract: str = "") -> MethodologyResult:
	"""
	Full methodology analysis.
	Pass full paper text. Optionally pass abstract separately
	for cleaner claim extraction.
	"""
	working_text = abstract if abstract else text

	claims = self._extract_claims(working_text)
	methods = self._extract_methods(text)

	flags = []
	flags.extend(self._check_causation_without_rct(text, claims))
	flags.extend(self._check_weak_design_strong_claim(text, claims))
	flags.extend(self._check_longterm_claim_shortterm_study(text, claims))
	flags.extend(self._check_missing_control_group(text))
	flags.extend(self._check_generalization(text, claims))

	# try LLM reasoning — graceful fallback if unavailable
	llm_text, llm_ok = self._llm_assess(
	claims=claims,
	methods=methods,
	paper_snippet=text[:1500]
	)

	risk_score = self._calculate_risk(flags)
	risk_level = self._get_risk_level(risk_score)

	return MethodologyResult(
	claims_found=claims,
	methods_found=methods,
	flags=flags,
	llm_assessment=llm_text,
	llm_available=llm_ok,
	risk_score=round(risk_score, 3),
	risk_level=risk_level,
	summary=self._write_summary(flags, risk_level, llm_ok),
	)

	# ── claim / method extraction ─────────────────────────────

	def _extract_claims(self, text: str) -> list:
	"""
	Pull claim-like sentences — those that assert findings,
	conclusions, or implications.
	Looks for language that signals a conclusion.
	"""
	claim_markers = [
	"we found", "we show", "we demonstrate", "we conclude",
	"our results", "our findings", "this study shows",
	"this study demonstrates", "results indicate",
	"results suggest", "data show", "analysis reveals",
	"we report", "evidence suggests", "we establish"
	]
	claims = []
	sentences = re.split(r'[.!?]', text)

	for sentence in sentences:
	sentence = sentence.strip()
	if not sentence or len(sentence) < 20:
	continue
	s_lower = sentence.lower()
	if any(marker in s_lower for marker in claim_markers):
	claims.append(sentence)

	return claims[:8] # cap at 8 — enough signal

	def _extract_methods(self, text: str) -> list:
	"""
	Pull sentences from the methods section that describe
	how the study was actually conducted.
	"""
	method_markers = [
	"we used", "we conducted", "we recruited", "we collected",
	"we measured", "we analyzed", "participants were",
	"subjects were", "samples were", "data were collected",
	"randomized", "controlled", "double-blind", "survey",
	"questionnaire", "interview", "experiment"
	]
	methods = []
	sentences = re.split(r'[.!?]', text)

	for sentence in sentences:
	sentence = sentence.strip()
	if not sentence or len(sentence) < 20:
	continue
	s_lower = sentence.lower()
	if any(marker in s_lower for marker in method_markers):
	methods.append(sentence)

	return methods[:8]

	# ── rule-based checks ─────────────────────────────────────

	def _check_causation_without_rct(self, text: str, claims: list) -> list:
	"""
	Detects causal language in claims combined with
	study designs that cannot establish causation.
	Classic example: observational study claiming X causes Y.
	"""
	flags = []
	text_lower = text.lower()

	has_causal_claim = any(
	word in text_lower for word in self.CAUSATION_WORDS
	)
	has_weak_design = any(
	design in text_lower for design in self.WEAK_DESIGNS
	)
	has_rct = any(
	word in text_lower
	for word in ["randomized", "randomised", "rct",
	"control group", "placebo", "double-blind"]
	)

	if has_causal_claim and has_weak_design and not has_rct:
	# find the actual causal sentence as evidence
	evidence_sentence = ""
	for sentence in re.split(r'[.!?]', text):
	if any(w in sentence.lower() for w in self.CAUSATION_WORDS):
	evidence_sentence = sentence.strip()
	break

	flags.append(MethodologyFlag(
	flag_type="causation_without_rct",
	severity="high",
	claim="Causal language detected in conclusions",
	issue=(
	"The study uses causal language "
	f"({', '.join([w for w in self.CAUSATION_WORDS if w in text_lower[:500]])}) "
	"but the study design "
	f"({', '.join([d for d in self.WEAK_DESIGNS if d in text_lower])}) "
	"cannot establish causation."
	),
	evidence=evidence_sentence or "See causal language in conclusions",
	suggestion=(
	"Causal claims require randomized controlled trials. "
	"Replace causal language with correlation language, "
	"or acknowledge the design limitation explicitly."
	),
	))

	return flags

	def _check_weak_design_strong_claim(
	self, text: str, claims: list
	) -> list:
	"""
	Self-report surveys and questionnaires cannot support
	strong clinical or behavioral conclusions.
	"""
	flags = []
	text_lower = text.lower()

	has_self_report = any(
	w in text_lower
	for w in ["self-report", "self report", "questionnaire",
	"survey", "interview", "participants reported"]
	)
	has_strong_claim = any(
	w in text_lower
	for w in ["proves", "demonstrates", "establishes",
	"confirms", "validates", "clinical evidence"]
	)

	if has_self_report and has_strong_claim:
	flags.append(MethodologyFlag(
	flag_type="weak_design_strong_claim",
	severity="medium",
	claim="Strong claim based on self-report data",
	issue=(
	"Self-report or questionnaire data has known limitations "
	"(social desirability bias, recall bias) that undermine "
	"strong conclusive claims."
	),
	evidence="Self-report instrument combined with conclusive language",
	suggestion=(
	"Acknowledge self-report limitations explicitly. "
	"Soften conclusions to match data quality."
	),
	))

	return flags

	def _check_longterm_claim_shortterm_study(
	self, text: str, claims: list
	) -> list:
	"""
	Studies lasting days or weeks cannot make
	long-term or chronic effect claims.
	"""
	flags = []
	text_lower = text.lower()

	has_longterm_claim = any(
	w in text_lower for w in self.LONGTERM_CLAIMS
	)
	has_shortterm_study = any(
	w in text_lower
	for w in ["two weeks", "2 weeks", "one week", "1 week",
	"3 days", "7 days", "short-term pilot",
	"preliminary study"]
	)

	if has_longterm_claim and has_shortterm_study:
	flags.append(MethodologyFlag(
	flag_type="timeframe_mismatch",
	severity="medium",
	claim="Long-term claim from short-term study",
	issue=(
	"The study duration appears insufficient to support "
	"long-term or chronic effect claims."
	),
	evidence="Long-term language with short study duration",
	suggestion=(
	"Either extend the study duration or explicitly "
	"limit claims to short-term effects only."
	),
	))

	return flags

	def _check_missing_control_group(self, text: str) -> list:
	"""
	Studies measuring treatment effects without a
	control group cannot isolate the treatment's impact.
	"""
	flags = []
	text_lower = text.lower()

	has_treatment = any(
	w in text_lower
	for w in ["treatment", "intervention", "drug", "therapy",
	"program", "training"]
	)
	has_effect_claim = any(
	w in text_lower
	for w in ["improved", "reduced", "increased", "effective",
	"significant effect"]
	)
	has_control = any(
	w in text_lower
	for w in ["control group", "control condition", "placebo",
	"comparison group", "waitlist"]
	)

	if has_treatment and has_effect_claim and not has_control:
	flags.append(MethodologyFlag(
	flag_type="missing_control_group",
	severity="high",
	claim="Treatment effect claimed without control group",
	issue=(
	"Effect claims for a treatment or intervention "
	"require a control group to rule out confounds, "
	"placebo effects, and natural recovery."
	),
	evidence="Treatment + effect language with no control group mention",
	suggestion=(
	"Add a control/comparison condition, or acknowledge "
	"that without a control group, the effect cannot be "
	"attributed to the intervention specifically."
	),
	))

	return flags

	def _check_generalization(self, text: str, claims: list) -> list:
	"""
	Small, homogeneous samples cannot support
	broad population-level generalizations.
	"""
	flags = []
	text_lower = text.lower()

	has_broad_claim = any(
	w in text_lower
	for w in ["all patients", "general population", "universally",
	"across all", "globally applicable", "all humans"]
	)
	has_limited_sample = any(
	w in text_lower
	for w in ["undergraduate students", "college students",
	"single institution", "convenience sample",
	"homogeneous sample"]
	)

	if has_broad_claim and has_limited_sample:
	flags.append(MethodologyFlag(
	flag_type="overgeneralization",
	severity="medium",
	claim="Broad generalization from limited sample",
	issue=(
	"The sample characteristics (e.g., undergraduate students, "
	"single institution) limit generalizability beyond "
	"the studied population."
	),
	evidence="Broad claim language with limited sample description",
	suggestion=(
	"Explicitly acknowledge sampling limitations "
	"and restrict claims to the studied population."
	),
	))

	return flags

	# ── llm reasoning ─────────────────────────────────────────

	def _llm_assess(
	self, claims: list, methods: list, paper_snippet: str
	) -> tuple:
	"""
	Ask an LLM to reason about whether the methods
	logically support the claims.

	Returns (assessment_text, success_bool).
	Falls back gracefully if token missing or API down.
	"""
	if not self._hf_token or self._hf_token == "hf_xxxxxxxxxxxxxxxx":
	return (
	"LLM assessment unavailable — HF_API_TOKEN not configured.",
	False
	)

	claims_text = " \| ".join(claims[:3]) if claims else "Not extracted"
	methods_text = " \| ".join(methods[:3]) if methods else "Not extracted"

	prompt = f"""[INST] You are a scientific peer reviewer.

	Paper excerpt:
	{paper_snippet[:800]}

	Claims made: {claims_text}
	Methods used: {methods_text}

	In 2-3 sentences, identify the most critical logical gap between the methods and claims. Be specific and direct. [/INST]"""

	try:
	payload = json.dumps({
	"inputs": prompt,
	"parameters": {
	"max_new_tokens": 150,
	"temperature": 0.3,
	"return_full_text": False,
	}
	}).encode("utf-8")

	req = urllib.request.Request(
	self._hf_api_url,
	data=payload,
	headers={
	"Authorization": f"Bearer {self._hf_token}",
	"Content-Type": "application/json",
	},
	method="POST"
	)

	with urllib.request.urlopen(req, timeout=30) as response:
	result = json.loads(response.read().decode("utf-8"))

	if isinstance(result, list) and result:
	text = result[0].get("generated_text", "").strip()
	return (text, True) if text else ("No assessment generated.", False)

	return ("Unexpected API response format.", False)

	except urllib.error.HTTPError as e:
	if e.code == 503:
	return ("LLM model loading — try again in 20 seconds.", False)
	return (f"API error {e.code}: {str(e)}", False)
	except Exception as e:
	return (f"LLM unavailable: {str(e)}", False)

	# ── scoring ───────────────────────────────────────────────

	def _calculate_risk(self, flags: list) -> float:
	weights = {"high": 0.35, "medium": 0.20, "low": 0.08}
	score = sum(weights.get(f.severity, 0) for f in flags)
	return min(score, 1.0)

	def _get_risk_level(self, score: float) -> str:
	if score >= 0.7: return "critical"
	elif score >= 0.4: return "high"
	elif score >= 0.2: return "medium"
	return "low"

	def _write_summary(
	self, flags: list, risk_level: str, llm_ok: bool
	) -> str:
	if not flags:
	base = "No methodology logic issues detected."
	else:
	high = sum(1 for f in flags if f.severity == "high")
	med = sum(1 for f in flags if f.severity == "medium")
	parts = []
	if high: parts.append(f"{high} high-severity issue{'s' if high > 1 else ''}")
	if med: parts.append(f"{med} medium-severity concern{'s' if med > 1 else ''}")
	base = (
	f"Methodology checker flagged {', '.join(parts)}. "
	f"Risk level: {risk_level.upper()}."
	)

	llm_note = " LLM reasoning included." if llm_ok else " LLM reasoning unavailable."
	return base + llm_note