Plaiglab / scripts /test_fed.py
SanidhyaDhangar's picture
PlaigLab — Hugging Face Space (Docker) clean deploy
ebebfe8
Raw
History Blame Contribute Delete
878 Bytes
import os
import sys
from collections import Counter
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from plagdetect.websearch import federated_search
queries = ['"attention is all you need" transformer',
"neural machine translation attention"]
cands = federated_search(queries, per_query=4,
progress=lambda m: print(" >", m))
print("\ntotal unique candidates:", len(cands))
print("by provider:", dict(Counter(c["provider"] for c in cands)))
with_pdf = [c for c in cands if c.get("pdf_url") or c.get("text_url")
or c.get("fulltext")]
print("candidates with a full-text locator:", len(with_pdf))
merged = [c for c in cands if c.get("oa_via")]
print("candidates whose OA copy was merged from another index:", len(merged))
for c in merged[:5]:
print(f" via {c['oa_via']}: {c['title'][:55]}")