Plaiglab / scripts /probe_ai_features.py
SanidhyaDhangar's picture
PlaigLab — Hugging Face Space (Docker) clean deploy
ebebfe8
Raw
History Blame Contribute Delete
2.63 kB
"""Quick, network-free discrimination probe: extract the new model-agnostic
AI features on the 13 real drafts and see which separate the Turnitin-flagged
AI papers (AI% >= 40) from the clearly-human ones (suppressed <20). This tells
us whether the features are worth building a full training corpus around,
BEFORE investing in that. Pure ranking sanity check, not a trained model.
"""
import json, os, sys
import numpy as np
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, ROOT)
from plagdetect.webpipeline import extract_text, split_body_references # noqa
from plagdetect.aifeatures import extract, FEATURES # noqa
DSET = os.path.join(ROOT, "DATASET FOR training of turnitin")
GT = os.path.join(ROOT, "data", "turnitin_groundtruth.json")
def main():
gt = json.load(open(GT, encoding="utf-8"))
rows = []
for rec in gt:
draft = rec.get("draft")
ai = (rec.get("ai") or {}).get("ai_pct")
if not draft or ai is None:
continue
_t, text = extract_text(os.path.join(DSET, draft))
body, _ = split_body_references(text)
f = extract(body)
lbl = 0 if ai == "*" else (1 if ai >= 40 else -1) # -1 = ambiguous 20-39
rows.append((draft, ai, lbl, f))
ai_rows = [r for r in rows if r[2] == 1]
hu_rows = [r for r in rows if r[2] == 0]
print(f"AI(>=40%): {len(ai_rows)} human(<20%): {len(hu_rows)} "
f"ambiguous: {sum(1 for r in rows if r[2]==-1)}\n")
# per-feature separation: mean(AI) vs mean(human), and a crude AUC
print(f"{'feature':22s} {'mean_AI':>9s} {'mean_HU':>9s} {'AUC':>6s}")
print("-" * 50)
scored = []
for k in FEATURES:
a = np.array([r[3][k] for r in ai_rows])
h = np.array([r[3][k] for r in hu_rows])
# AUC = P(random AI > random human)
wins = sum((x > y) + 0.5 * (x == y) for x in a for y in h)
auc = wins / (len(a) * len(h)) if len(a) and len(h) else 0.5
scored.append((abs(auc - 0.5), auc, k, a.mean(), h.mean()))
scored.sort(reverse=True)
for _, auc, k, ma, mh in scored:
flag = " <<" if abs(auc - 0.5) >= 0.25 else ""
print(f"{k:22s} {ma:9.3f} {mh:9.3f} {auc:6.2f}{flag}")
print("\nper-paper (sorted by Turnitin AI%):")
top = [s[2] for s in scored[:4]]
print(f"{'draft':30s} {'aiT':>4s} " + " ".join(f"{k[:10]:>10s}" for k in top))
for draft, ai, lbl, f in sorted(rows, key=lambda r: (r[1] if isinstance(r[1], int) else -1)):
print(f"{draft[:29]:30s} {str(ai):>4s} " + " ".join(f"{f[k]:10.3f}" for k in top))
if __name__ == "__main__":
main()