"""Smoke test for the real-world web pipeline against live academic APIs."""
import os
import sys

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from plagdetect.webpipeline import analyze_document  # noqa: E402

ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
path = sys.argv[1] if len(sys.argv) > 1 else os.path.join(ROOT, "data", "test_real.txt")

r = analyze_document(path, progress=lambda m: print("  >", m))
print()
print("VERDICT:", r["verdict"], "|", "; ".join(r["verdict_reasons"]))
print("similarity_index:", r["similarity_index"], "%  risk:", r["plagiarism_risk"])
print("ai_score:", r["ai_score"], r["ai_band"],
      "| detectors:", {d["name"]: d["score"] for d in r["ai_detectors"]})
print("semantic:", r["semantic_enabled"],
      "| obfuscation spoof:", r["obfuscation"]["spoof_suspected"])
print("coverage providers:", r["coverage"]["providers"])
print("deep-read PDFs:", len(r["coverage"]["deep_read_pdfs"]),
      "| manifest case:", r["case_id"])
print("near_misses:", [(n["title"][:40], n["match_percent"]) for n in r["near_misses"]])
print("candidates:", r["candidates_screened"], "| elapsed:", r["elapsed_seconds"], "s")
for s in r["sources"][:6]:
    print(f"  {s['match_percent']:>5}%  exact={s['exact_fingerprint_overlap']:>4}%  "
          f"deep={s['deep_scraped']}  {s['title'][:64]} "
          f"({s['provider']}, {s['year']})")
if r["sources"] and r["sources"][0]["top_pairs"]:
    p = r["sources"][0]["top_pairs"][0]
    print("\nstrongest pair sim", p["similarity"])
    print("  SUB:", p["submitted"][:120])
    print("  SRC:", p["source"][:120])