Spaces:
Running
Running
File size: 1,216 Bytes
cfc2b8e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | """Pre-download all models and build index during Docker build."""
import os
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
print("=== PRE-DOWNLOADING MODELS ===")
# 1. Download embedding model
print("[1/3] Downloading embedding model...")
from sentence_transformers import SentenceTransformer
SentenceTransformer("NeuML/pubmedbert-base-embeddings")
print(" Done.")
# 2. Download NLI model
print("[2/3] Downloading NLI model...")
from transformers import AutoTokenizer, AutoModelForSequenceClassification
AutoTokenizer.from_pretrained("pritamdeka/PubMedBERT-MNLI-MedNLI")
AutoModelForSequenceClassification.from_pretrained("pritamdeka/PubMedBERT-MNLI-MedNLI")
print(" Done.")
# 3. Build FAISS index
print("[3/3] Building FAISS index...")
from src.bio_rag.config import BioRAGConfig
from src.bio_rag.data_loader import load_diabetes_pubmedqa
from src.bio_rag.knowledge_base import KnowledgeBaseBuilder
config = BioRAGConfig()
samples = load_diabetes_pubmedqa(config.dataset_name, max_samples=config.max_samples)
print(f" Filtered {len(samples)} diabetes samples.")
kb = KnowledgeBaseBuilder(config)
vs = kb.load_or_build(samples)
print(" FAISS index built and saved.")
print("=== ALL MODELS READY ===") |