Spaces:
Sleeping
Sleeping
| """Smoke test for the real-world web pipeline against live academic APIs.""" | |
| import os | |
| import sys | |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from plagdetect.webpipeline import analyze_document # noqa: E402 | |
| ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| path = sys.argv[1] if len(sys.argv) > 1 else os.path.join(ROOT, "data", "test_real.txt") | |
| r = analyze_document(path, progress=lambda m: print(" >", m)) | |
| print() | |
| print("VERDICT:", r["verdict"], "|", "; ".join(r["verdict_reasons"])) | |
| print("similarity_index:", r["similarity_index"], "% risk:", r["plagiarism_risk"]) | |
| print("ai_score:", r["ai_score"], r["ai_band"], | |
| "| detectors:", {d["name"]: d["score"] for d in r["ai_detectors"]}) | |
| print("semantic:", r["semantic_enabled"], | |
| "| obfuscation spoof:", r["obfuscation"]["spoof_suspected"]) | |
| print("coverage providers:", r["coverage"]["providers"]) | |
| print("deep-read PDFs:", len(r["coverage"]["deep_read_pdfs"]), | |
| "| manifest case:", r["case_id"]) | |
| print("near_misses:", [(n["title"][:40], n["match_percent"]) for n in r["near_misses"]]) | |
| print("candidates:", r["candidates_screened"], "| elapsed:", r["elapsed_seconds"], "s") | |
| for s in r["sources"][:6]: | |
| print(f" {s['match_percent']:>5}% exact={s['exact_fingerprint_overlap']:>4}% " | |
| f"deep={s['deep_scraped']} {s['title'][:64]} " | |
| f"({s['provider']}, {s['year']})") | |
| if r["sources"] and r["sources"][0]["top_pairs"]: | |
| p = r["sources"][0]["top_pairs"][0] | |
| print("\nstrongest pair sim", p["similarity"]) | |
| print(" SUB:", p["submitted"][:120]) | |
| print(" SRC:", p["source"][:120]) | |