Spaces:
Sleeping
Sleeping
File size: 1,462 Bytes
b7f3196 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
#! /usr/bin/env python3
import json
import readline
import sys
from pyserini.search.lucene import LuceneSearcher
def main():
index_dir = sys.argv[1] if len(sys.argv) > 1 else "indexes/pubmed"
searcher = LuceneSearcher(index_dir)
print(f"Loaded {searcher.num_docs} documents from {index_dir}")
print(f"(Ctrl-D or 'quit' to exit)\n")
while True:
try:
query = input("PubMed> ").strip()
if not query or query.lower() in ['quit', 'exit']:
break
hits = searcher.search(query, k=10)
print(f"{len(hits)}/{searcher.num_docs} matching documents found\n")
if not hits:
print("No results found.\n")
continue
for i, hit in enumerate(hits, 1):
doc = searcher.doc(hit.docid)
raw = json.loads(doc.raw())
title = raw.get('title', '')
contents = raw.get('contents', '')
abstract = contents[len(title):] if contents.startswith(title) else contents
print(f"{i}. PMID {hit.docid} \"{title}\" (score: {hit.score:.4f})")
print(f" {abstract[:120]}...\n")
except EOFError:
print("\nBye!")
break
except KeyboardInterrupt:
print("\nBye!")
break
if __name__ == "__main__":
main()
|