Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| from collections import Counter | |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from plagdetect.websearch import federated_search | |
| queries = ['"attention is all you need" transformer', | |
| "neural machine translation attention"] | |
| cands = federated_search(queries, per_query=4, | |
| progress=lambda m: print(" >", m)) | |
| print("\ntotal unique candidates:", len(cands)) | |
| print("by provider:", dict(Counter(c["provider"] for c in cands))) | |
| with_pdf = [c for c in cands if c.get("pdf_url") or c.get("text_url") | |
| or c.get("fulltext")] | |
| print("candidates with a full-text locator:", len(with_pdf)) | |
| merged = [c for c in cands if c.get("oa_via")] | |
| print("candidates whose OA copy was merged from another index:", len(merged)) | |
| for c in merged[:5]: | |
| print(f" via {c['oa_via']}: {c['title'][:55]}") | |