Spaces:

SanidhyaDhangar
/

Plaiglab

Sleeping

App Files Files Community

Plaiglab / scripts /test_web.py

SanidhyaDhangar

PlaigLab — Hugging Face Space (Docker) clean deploy

ebebfe8 11 days ago

Raw

History Blame Contribute Delete

1.63 kB

	"""Smoke test for the real-world web pipeline against live academic APIs."""
	import os
	import sys

	sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from plagdetect.webpipeline import analyze_document # noqa: E402

	ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	path = sys.argv[1] if len(sys.argv) > 1 else os.path.join(ROOT, "data", "test_real.txt")

	r = analyze_document(path, progress=lambda m: print(" >", m))
	print()
	print("VERDICT:", r["verdict"], "\|", "; ".join(r["verdict_reasons"]))
	print("similarity_index:", r["similarity_index"], "% risk:", r["plagiarism_risk"])
	print("ai_score:", r["ai_score"], r["ai_band"],
	"\| detectors:", {d["name"]: d["score"] for d in r["ai_detectors"]})
	print("semantic:", r["semantic_enabled"],
	"\| obfuscation spoof:", r["obfuscation"]["spoof_suspected"])
	print("coverage providers:", r["coverage"]["providers"])
	print("deep-read PDFs:", len(r["coverage"]["deep_read_pdfs"]),
	"\| manifest case:", r["case_id"])
	print("near_misses:", [(n["title"][:40], n["match_percent"]) for n in r["near_misses"]])
	print("candidates:", r["candidates_screened"], "\| elapsed:", r["elapsed_seconds"], "s")
	for s in r["sources"][:6]:
	print(f" {s['match_percent']:>5}% exact={s['exact_fingerprint_overlap']:>4}% "
	f"deep={s['deep_scraped']} {s['title'][:64]} "
	f"({s['provider']}, {s['year']})")
	if r["sources"] and r["sources"][0]["top_pairs"]:
	p = r["sources"][0]["top_pairs"][0]
	print("\nstrongest pair sim", p["similarity"])
	print(" SUB:", p["submitted"][:120])
	print(" SRC:", p["source"][:120])