import os import sys from collections import Counter sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from plagdetect.websearch import federated_search queries = ['"attention is all you need" transformer', "neural machine translation attention"] cands = federated_search(queries, per_query=4, progress=lambda m: print(" >", m)) print("\ntotal unique candidates:", len(cands)) print("by provider:", dict(Counter(c["provider"] for c in cands))) with_pdf = [c for c in cands if c.get("pdf_url") or c.get("text_url") or c.get("fulltext")] print("candidates with a full-text locator:", len(with_pdf)) merged = [c for c in cands if c.get("oa_via")] print("candidates whose OA copy was merged from another index:", len(merged)) for c in merged[:5]: print(f" via {c['oa_via']}: {c['title'][:55]}")