Spaces:
Sleeping
Sleeping
| #! /usr/bin/env python3 | |
| import json | |
| import readline | |
| import sys | |
| from pyserini.search.lucene import LuceneSearcher | |
| def main(): | |
| index_dir = sys.argv[1] if len(sys.argv) > 1 else "indexes/pubmed" | |
| searcher = LuceneSearcher(index_dir) | |
| print(f"Loaded {searcher.num_docs} documents from {index_dir}") | |
| print(f"(Ctrl-D or 'quit' to exit)\n") | |
| while True: | |
| try: | |
| query = input("PubMed> ").strip() | |
| if not query or query.lower() in ['quit', 'exit']: | |
| break | |
| hits = searcher.search(query, k=10) | |
| print(f"{len(hits)}/{searcher.num_docs} matching documents found\n") | |
| if not hits: | |
| print("No results found.\n") | |
| continue | |
| for i, hit in enumerate(hits, 1): | |
| doc = searcher.doc(hit.docid) | |
| raw = json.loads(doc.raw()) | |
| title = raw.get('title', '') | |
| contents = raw.get('contents', '') | |
| abstract = contents[len(title):] if contents.startswith(title) else contents | |
| print(f"{i}. PMID {hit.docid} \"{title}\" (score: {hit.score:.4f})") | |
| print(f" {abstract[:120]}...\n") | |
| except EOFError: | |
| print("\nBye!") | |
| break | |
| except KeyboardInterrupt: | |
| print("\nBye!") | |
| break | |
| if __name__ == "__main__": | |
| main() | |