File size: 1,216 Bytes
cfc2b8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
"""Pre-download all models and build index during Docker build."""
import os
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"

print("=== PRE-DOWNLOADING MODELS ===")

# 1. Download embedding model
print("[1/3] Downloading embedding model...")
from sentence_transformers import SentenceTransformer
SentenceTransformer("NeuML/pubmedbert-base-embeddings")
print("  Done.")

# 2. Download NLI model
print("[2/3] Downloading NLI model...")
from transformers import AutoTokenizer, AutoModelForSequenceClassification
AutoTokenizer.from_pretrained("pritamdeka/PubMedBERT-MNLI-MedNLI")
AutoModelForSequenceClassification.from_pretrained("pritamdeka/PubMedBERT-MNLI-MedNLI")
print("  Done.")

# 3. Build FAISS index
print("[3/3] Building FAISS index...")
from src.bio_rag.config import BioRAGConfig
from src.bio_rag.data_loader import load_diabetes_pubmedqa
from src.bio_rag.knowledge_base import KnowledgeBaseBuilder

config = BioRAGConfig()
samples = load_diabetes_pubmedqa(config.dataset_name, max_samples=config.max_samples)
print(f"  Filtered {len(samples)} diabetes samples.")
kb = KnowledgeBaseBuilder(config)
vs = kb.load_or_build(samples)
print("  FAISS index built and saved.")

print("=== ALL MODELS READY ===")