Spaces:

salvinjose
/

HNTAI

Paused

App Files Files Community

HNTAI / scripts /preload_models.py

sachinchandrankallar

bench mark

188ec8d 4 days ago

raw

history blame contribute delete

10.9 kB

	#!/usr/bin/env python3
	"""
	Pre-download and cache models for Hugging Face Spaces deployment.
	Run this during Docker build to avoid runtime downloads.

	PRE-CACHED MODELS (downloaded during build):
	- facebook/bart-large-cnn (Summarization)
	- patrickvonplaten/longformer2roberta-cnn_dailymail-fp16 (Seq2Seq)
	- google/flan-t5-large (Summarization)
	- microsoft/Phi-3-mini-4k-instruct (Causal OpenVINO)
	- OpenVINO/Phi-3-mini-4k-instruct-fp16-ov (Causal OpenVINO)
	- microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf (GGUF - PRIMARY)

	RUNTIME BEHAVIOR:
	- If you request a pre-cached model: Loads instantly from cache (30-60 sec)
	- If you request a different model: Downloads and uses at runtime automatically
	- System supports both pre-cached and on-demand model loading

	PRIMARY MODEL for patient summaries:
	- microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf (is_active: true)
	"""
	import os
	import sys
	import logging
	from pathlib import Path

	# Add src to path for benchmarking
	current_dir = os.path.dirname(os.path.abspath(__file__))
	project_root = os.path.dirname(current_dir)
	sys.path.insert(0, os.path.join(project_root, "services", "ai-service", "src"))

	try:
	from ai_med_extract.utils.benchmark import BenchmarkContext
	except ImportError:
	# Fallback if path is wrong or module missing (though we set path)
	logging.warning("Benchmark module not found. creating dummy context.")
	class BenchmarkContext:
	def __init__(self, args, *kwargs): pass
	def __enter__(self): return self
	def __exit__(self, *args): pass

	# Configure logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	# Set cache directories - these will be baked into the Docker image
	MODEL_CACHE_DIR = os.environ.get('MODEL_CACHE_DIR', '/app/models')
	HF_HOME = os.environ.get('HF_HOME', '/app/.cache/huggingface')
	TORCH_HOME = os.environ.get('TORCH_HOME', '/app/.cache/torch')
	WHISPER_CACHE = os.environ.get('WHISPER_CACHE', '/app/.cache/whisper')

	# Create cache directories
	for cache_dir in [MODEL_CACHE_DIR, HF_HOME, TORCH_HOME, WHISPER_CACHE]:
	Path(cache_dir).mkdir(parents=True, exist_ok=True)
	logger.info(f"Created cache directory: {cache_dir}")

	def preload_transformers_models():
	"""Pre-download Hugging Face transformers models"""
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
	from huggingface_hub import snapshot_download

	# Models for patient summary generation - as specified by user
	models = [
	# Summarization models
	{
	"name": "facebook/bart-large-cnn",
	"type": "seq2seq",
	"description": "BART Large CNN - Summarization",
	"is_active": False # Available but not primary
	},
	{
	"name": "patrickvonplaten/longformer2roberta-cnn_dailymail-fp16",
	"type": "seq2seq",
	"description": "Longformer2Roberta - Seq2Seq Summarization",
	"is_active": False
	},
	{
	"name": "google/flan-t5-large",
	"type": "seq2seq",
	"description": "FLAN-T5 Large - Summarization",
	"is_active": False
	},
	# OpenVINO models for patient summaries
	{
	"name": "microsoft/Phi-3-mini-4k-instruct",
	"type": "causal",
	"description": "Phi-3 Mini - Causal OpenVINO (base model)",
	"is_active": False
	},
	{
	"name": "OpenVINO/Phi-3-mini-4k-instruct-fp16-ov",
	"type": "causal",
	"description": "Phi-3 Mini - FP16 OpenVINO optimized",
	"is_active": False
	},
	]

	for model_info in models:
	model_name = model_info["name"]
	model_type = model_info["type"]
	description = model_info["description"]

	try:
	logger.info(f"📥 Downloading {description}: {model_name}")

	# Download tokenizer
	logger.info(f" ↳ Downloading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(
	model_name,
	cache_dir=HF_HOME,
	trust_remote_code=False
	)

	# Download model
	logger.info(f" ↳ Downloading model weights...")
	if model_type == "seq2seq":
	model = AutoModelForSeq2SeqLM.from_pretrained(
	model_name,
	cache_dir=HF_HOME,
	trust_remote_code=False
	)
	else:
	# For token classification and other types
	from transformers import AutoModel
	model = AutoModel.from_pretrained(
	model_name,
	cache_dir=HF_HOME,
	trust_remote_code=False
	)

	logger.info(f" ✅ Successfully cached {model_name}")

	# Clean up memory
	del model
	del tokenizer

	except Exception as e:
	logger.error(f" ❌ Failed to download {model_name}: {e}")
	# Don't fail the entire script if one model fails
	continue

	def preload_gguf_models():
	"""Pre-download GGUF models"""
	from huggingface_hub import hf_hub_download

	# GGUF model for patient summaries - PRIMARY MODEL (is_active: true)
	gguf_models = [
	{
	"repo_id": "microsoft/Phi-3-mini-4k-instruct-gguf",
	"filename": "Phi-3-mini-4k-instruct-q4.gguf",
	"description": "Phi-3 Mini GGUF (Q4 quantized) - PRIMARY for patient summaries",
	"is_active": True # This is the active model for patient summaries
	}
	]

	for model_info in gguf_models:
	try:
	logger.info(f"📥 Downloading GGUF: {model_info['description']}")

	file_path = hf_hub_download(
	repo_id=model_info["repo_id"],
	filename=model_info["filename"],
	cache_dir=HF_HOME,
	local_dir=MODEL_CACHE_DIR,
	local_dir_use_symlinks=False # Copy files instead of symlinks
	)

	logger.info(f" ✅ Successfully cached GGUF model at: {file_path}")

	except Exception as e:
	logger.error(f" ❌ Failed to download GGUF model: {e}")
	continue

	def preload_whisper_models():
	"""Pre-download Whisper models"""
	try:
	logger.info(f"📥 Downloading Whisper tiny model...")

	import whisper
	model = whisper.load_model(
	"tiny",
	device="cpu",
	download_root=WHISPER_CACHE
	)

	logger.info(f" ✅ Successfully cached Whisper tiny model")
	del model

	except Exception as e:
	logger.error(f" ❌ Failed to download Whisper model: {e}")

	def preload_spacy_models():
	"""Pre-download spaCy models"""
	try:
	logger.info(f"📥 Loading spaCy en_core_web_sm model...")

	import spacy
	nlp = spacy.load("en_core_web_sm")

	logger.info(f" ✅ Successfully loaded spaCy model")

	except Exception as e:
	logger.error(f" ❌ Failed to load spaCy model: {e}")

	def preload_nltk_data():
	"""Pre-download NLTK data"""
	try:
	logger.info(f"📥 Downloading NLTK data...")

	import nltk
	nltk_data_dir = os.path.join(HF_HOME, 'nltk_data')
	Path(nltk_data_dir).mkdir(parents=True, exist_ok=True)

	# Download common NLTK datasets
	for package in ['punkt', 'stopwords', 'wordnet', 'averaged_perceptron_tagger']:
	try:
	nltk.download(package, download_dir=nltk_data_dir, quiet=True)
	logger.info(f" ✅ Downloaded NLTK package: {package}")
	except:
	logger.warning(f" ⚠️ Failed to download NLTK package: {package}")

	except Exception as e:
	logger.error(f" ❌ Failed to download NLTK data: {e}")

	def print_cache_summary():
	"""Print summary of cached models"""
	logger.info("\n" + "="*80)
	logger.info("CACHE SUMMARY")
	logger.info("="*80)

	for cache_dir in [MODEL_CACHE_DIR, HF_HOME, TORCH_HOME, WHISPER_CACHE]:
	if os.path.exists(cache_dir):
	# Calculate directory size
	total_size = 0
	file_count = 0
	for dirpath, dirnames, filenames in os.walk(cache_dir):
	for f in filenames:
	fp = os.path.join(dirpath, f)
	if os.path.exists(fp):
	total_size += os.path.getsize(fp)
	file_count += 1

	size_mb = total_size / (1024 * 1024)
	size_gb = size_mb / 1024

	logger.info(f"\n📁 {cache_dir}")
	logger.info(f" Files: {file_count}")
	logger.info(f" Size: {size_mb:.2f} MB ({size_gb:.2f} GB)")

	logger.info("\n" + "="*80)

	def main():
	"""Main preload function"""
	logger.info("🚀 Starting model pre-download process...")
	logger.info(f" HF_HOME: {HF_HOME}")
	logger.info(f" MODEL_CACHE_DIR: {MODEL_CACHE_DIR}")
	logger.info(f" TORCH_HOME: {TORCH_HOME}")
	logger.info(f" WHISPER_CACHE: {WHISPER_CACHE}")
	logger.info("")

	# Import torch early to ensure CUDA detection works
	try:
	import torch
	logger.info(f"🔧 PyTorch version: {torch.__version__}")
	logger.info(f"🔧 CUDA available: {torch.cuda.is_available()}")
	if torch.cuda.is_available():
	logger.info(f"🔧 CUDA version: {torch.version.cuda}")
	logger.info(f"🔧 GPU: {torch.cuda.get_device_name(0)}")
	except Exception as e:
	logger.warning(f"⚠️ Could not detect PyTorch/CUDA info: {e}")

	logger.info("")

	# Preload all models
	steps = [
	("Transformers Models", preload_transformers_models),
	("GGUF Models", preload_gguf_models),
	("Whisper Models", preload_whisper_models),
	("spaCy Models", preload_spacy_models),
	("NLTK Data", preload_nltk_data),
	]

	for step_name, step_func in steps:
	logger.info(f"\n{'='*80}")
	logger.info(f"STEP: {step_name}")
	logger.info(f"{'='*80}\n")

	try:
	with BenchmarkContext(f"preload_{step_name.replace(' ', '_')}"):
	step_func()
	except Exception as e:
	logger.error(f"❌ Failed during {step_name}: {e}")
	import traceback
	traceback.print_exc()

	# Print summary
	print_cache_summary()

	logger.info("\n✅ Model pre-download completed!")

	if __name__ == "__main__":
	main()