File size: 1,462 Bytes
b7f3196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#! /usr/bin/env python3

import json
import readline
import sys

from pyserini.search.lucene import LuceneSearcher


def main():
    index_dir = sys.argv[1] if len(sys.argv) > 1 else "indexes/pubmed"

    searcher = LuceneSearcher(index_dir)

    print(f"Loaded {searcher.num_docs} documents from {index_dir}")
    print(f"(Ctrl-D or 'quit' to exit)\n")

    while True:
        try:
            query = input("PubMed> ").strip()
            if not query or query.lower() in ['quit', 'exit']:
                break

            hits = searcher.search(query, k=10)

            print(f"{len(hits)}/{searcher.num_docs} matching documents found\n")

            if not hits:
                print("No results found.\n")

                continue

            for i, hit in enumerate(hits, 1):
                doc = searcher.doc(hit.docid)

                raw = json.loads(doc.raw())

                title = raw.get('title', '')
                contents = raw.get('contents', '')

                abstract = contents[len(title):] if contents.startswith(title) else contents

                print(f"{i}. PMID {hit.docid} \"{title}\" (score: {hit.score:.4f})")
                print(f"   {abstract[:120]}...\n")

        except EOFError:
            print("\nBye!")

            break

        except KeyboardInterrupt:
            print("\nBye!")

            break


if __name__ == "__main__":
    main()