taraky's picture
Upload folder using huggingface_hub
b7f3196 verified
raw
history blame
1.46 kB
#! /usr/bin/env python3
import json
import readline
import sys
from pyserini.search.lucene import LuceneSearcher
def main():
index_dir = sys.argv[1] if len(sys.argv) > 1 else "indexes/pubmed"
searcher = LuceneSearcher(index_dir)
print(f"Loaded {searcher.num_docs} documents from {index_dir}")
print(f"(Ctrl-D or 'quit' to exit)\n")
while True:
try:
query = input("PubMed> ").strip()
if not query or query.lower() in ['quit', 'exit']:
break
hits = searcher.search(query, k=10)
print(f"{len(hits)}/{searcher.num_docs} matching documents found\n")
if not hits:
print("No results found.\n")
continue
for i, hit in enumerate(hits, 1):
doc = searcher.doc(hit.docid)
raw = json.loads(doc.raw())
title = raw.get('title', '')
contents = raw.get('contents', '')
abstract = contents[len(title):] if contents.startswith(title) else contents
print(f"{i}. PMID {hit.docid} \"{title}\" (score: {hit.score:.4f})")
print(f" {abstract[:120]}...\n")
except EOFError:
print("\nBye!")
break
except KeyboardInterrupt:
print("\nBye!")
break
if __name__ == "__main__":
main()