Spaces:

SanidhyaDhangar
/

Plaiglab

Sleeping

App Files Files Community

Plaiglab / scripts /eval_typeclf.py

SanidhyaDhangar

PlaigLab — Hugging Face Space (Docker) clean deploy

ebebfe8 11 days ago

Raw

History Blame Contribute Delete

6.14 kB

	"""Cross-distribution honesty check for the plagiarism-TYPE classifier.

	The classifier was trained on synthetic gen_corpus variants; its reported
	held-out accuracy used a split from the SAME generator (circular). Here we build
	the same kind of (variant -> true type) cases from REAL PLOS academic text the
	generator never produced, push them through the CURRENT evidence pipeline (now
	the MiniLM+Siamese ensemble), and measure:

	1. TYPE accuracy per class (does the learned feature->type map generalise?)
	2. The thing the VERDICT actually depends on: does the evidence SCORE separate
	reuse (clone/find_replace/mosaic) from clean? (verdict = score + rules, the
	type label is secondary/cosmetic.)

	If type accuracy collapses but score-separation holds, the honest takeaway is:
	trust the verdict, treat the fine-grained type as a hint.
	"""
	import os
	import sys

	import numpy as np

	ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	sys.path.insert(0, ROOT)
	from plagdetect.evidence import EvidenceWeights, compare_section # noqa: E402
	from plagdetect.forensics import CLASSES, rule_type # noqa: E402
	from plagdetect.ingestion import Document # noqa: E402
	from plagdetect.siamese import EnsembleEncoder # noqa: E402
	from plagdetect.textutils import mosaic_mix, sentences, synonymize # noqa: E402
	from plagdetect.understanding import build_idf # noqa: E402

	import json

	PLOS = os.path.join(ROOT, "data", "oa_corpus", "plos_complex_systems.jsonl")


	def plos_docs(limit=40, cap_sents=12):
	"""Real PLOS docs; cap the worked section to a handful of sentences so the
	MiniLM ensemble stays fast (type signatures don't need the whole paper)."""
	docs = []
	for r in json.loads("[" + ",".join(open(PLOS, encoding="utf-8")) + "]")[:limit]:
	ft = r.get("fulltext", "") or ""
	intro = " ".join(sentences(ft)[:cap_sents])
	results = " ".join(sentences(ft)[cap_sents:cap_sents * 2])
	sec = {"abstract": r.get("abstract", ""),
	"introduction": intro, "results": results}
	docs.append(Document(doc_id=r["doi"], title=r["title"], sections=sec,
	references=[a for a in r.get("authors", [])]))
	return docs


	def _word_dropout(text, rng, p=0.15):
	w = text.split()
	kept = [x for x in w if rng.rand() > p]
	return " ".join(kept if len(kept) > 3 else w)


	def main():
	rng = np.random.RandomState(7)
	docs = [d for d in plos_docs() if len(d.sections.get("introduction", "")) > 400]
	idf, default_idf = build_idf(docs)
	enc = EnsembleEncoder.load(os.path.join(ROOT, "models", "siamese.npz"))
	weights = EvidenceWeights.load(os.path.join(ROOT, "models", "weights.json"))

	y_true, y_pred, scores_by_type = [], [], {c: [] for c in CLASSES}
	n = 0
	for src in docs:
	sec_text = src.sections["introduction"]
	src_sents = sentences(sec_text)
	if len(src_sents) < 4:
	continue
	other = docs[rng.randint(len(docs))]
	while other.doc_id == src.doc_id:
	other = docs[rng.randint(len(docs))]
	variants = {
	"clean": other.sections["introduction"],
	"clone": sec_text,
	"find_replace": synonymize(sec_text, rng, p=0.9),
	"mosaic": mosaic_mix(src_sents, sentences(other.sections["introduction"]), rng),
	}
	for label, text in variants.items():
	ev = compare_section(text, src.references, src, enc, idf, default_idf)
	cls, prob, dist = rule_type(ev["features"])
	score = weights.score(ev["features"])
	y_true.append(label)
	y_pred.append(cls)
	scores_by_type[label].append(score)
	n += 1
	print(f" ...{n} docs done", flush=True)

	print(f"cross-distribution cases: {n} source docs x 4 variants = {len(y_true)}")
	print("\n1) TYPE-CLASSIFIER accuracy on REAL text (was overfit to gen_corpus):")
	labels_used = ["clean", "clone", "find_replace", "mosaic"]
	for lab in labels_used:
	idxs = [i for i, t in enumerate(y_true) if t == lab]
	acc = np.mean([y_pred[i] == lab for i in idxs]) if idxs else 0.0
	# most common prediction for this true label
	from collections import Counter
	common = Counter(y_pred[i] for i in idxs).most_common(2)
	print(f" {lab:13s} acc={acc:4.0%} predicted-as={common}")
	overall = np.mean([p == t for p, t in zip(y_pred, y_true)])
	print(f" OVERALL 5-way accuracy = {overall:.0%} (noisy: find_replace/mosaic"
	" are near-verbatim on real text, all confused AMONG reuse types)")

	# The verdict-relevant axis: REUSE vs CLEAN (mislabelling among reuse types is
	# cosmetic; calling clean a reuse-type, or reuse 'clean', is what hurts).
	print("\n verdict-relevant BINARY (reuse vs clean):")
	clean_idx = [i for i, t in enumerate(y_true) if t == "clean"]
	reuse_idx = [i for i, t in enumerate(y_true) if t != "clean"]
	clean_ok = np.mean([y_pred[i] == "clean" for i in clean_idx])
	reuse_ok = np.mean([y_pred[i] != "clean" for i in reuse_idx])
	print(f" clean kept clean (specificity) = {clean_ok:.0%}")
	print(f" reuse flagged as some reuse-type (recall) = {reuse_ok:.0%}")

	print("\n2) EVIDENCE SCORE separation (what the VERDICT uses):")
	clean_mu = np.mean(scores_by_type["clean"])
	for lab in labels_used:
	mu = np.mean(scores_by_type[lab])
	print(f" {lab:13s} mean score = {mu:.3f}")
	reuse = np.concatenate([scores_by_type[l] for l in ("clone", "find_replace", "mosaic")])
	print(f"\n clean mean {clean_mu:.3f} vs reuse mean {reuse.mean():.3f} "
	f"-> gap {reuse.mean()-clean_mu:+.3f}")
	# verdict floor is EVIDENCE_FLOOR=0.28 (pipeline); how cleanly do we separate?
	clean_above = np.mean(np.array(scores_by_type["clean"]) >= 0.28)
	reuse_above = np.mean(reuse >= 0.28)
	print(f" reuse above evidence-floor(0.28): {reuse_above:.0%} "
	f"clean false-trigger: {clean_above:.0%}")


	if __name__ == "__main__":
	main()