Steps to Run

# Imports
import faiss
import pickle
import numpy as np
from sentence_transformers import SentenceTransformer

# Load Corpus
with open("/kaggle/input/rag-pubmed/pytorch/default/1/pubmed_25k_bge_chunks.pkl", "rb") as f:
    corpus = pickle.load(f)
if isinstance(corpus[0], dict):
    corpus_list = [c["text"] for c in corpus]
else:
    corpus_list = corpus

print("Loaded corpus size:", len(corpus_list))

# Load FAISS index
index = faiss.read_index("/kaggle/input/faiss-rag-pubmed/pytorch/default/1/pubmed_25k_bge.faiss")

print("FAISS index size:", index.ntotal)
assert index.ntotal == len(corpus_list), \
    "FAISS index and corpus size mismatch!"

# Load embed model
embed_model = SentenceTransformer(
    "BAAI/bge-base-en-v1.5",
    device="cuda"   # or "cpu"
)

# RAG Query
def rag_query(case_text, k=10):

    query = (
        "Represent this query for retrieving relevant clinical case reports: "
        + case_text
    )

    query_emb = embed_model.encode(
        [query],
        convert_to_numpy=True,
        normalize_embeddings=True
    )

    D, I = index.search(query_emb.astype("float32"), k)

    # Absolute safety
    retrieved_texts = [
        corpus_list[i] for i in I[0]
        if i < len(corpus_list)
    ]

    return D, I, retrieved_texts

# Sample Query
sample_case = """
A 45-year-old male with persistent fever, night sweats,
unintentional weight loss, enlarged cervical lymph nodes,
and elevated lactate dehydrogenase.
"""

D, I, texts = rag_query(sample_case, k=10)

for rank, (score, text) in enumerate(zip(D[0], texts), start=1):
    print(
        f"\nRank {rank} | Score {score:.4f}\n"
        f"{text[:400]}...\n"
        f"{'-'*80}"
    )
Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support