Spaces:

joytheslothh
/

MediRAG-API

Running

App Files Files Community

MediRAG-API / scripts /warmup.py

joytheslothh

deploy: clean build

b6f9fa8 1 day ago

raw

history blame contribute delete

1.95 kB

	"""
	src/scripts/warmup.py
	=====================
	Pre-loads heavy ML models (FAISS, DeBERTa, SciSpaCy) into memory
	and guarantees instantaneous responses for the first API request during the live demo.

	Usage:
	python scripts/warmup.py
	"""

	import sys
	from pathlib import Path

	sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

	import logging
	import time
	import requests

	logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
	logger = logging.getLogger("warmup")

	def main():
	api_url = "http://localhost:8000"

	logger.info("Verifying API is running...")
	try:
	health = requests.get(f"{api_url}/health", timeout=5)
	health.raise_for_status()
	logger.info(f"API Health: {health.json()}")
	except requests.exceptions.RequestException as e:
	logger.error(f"API is not running at {api_url}. Please start it with 'uvicorn src.api.main:app' first.")
	sys.exit(1)

	logger.info("Sending WARMUP query to load DeBERTa, SciSpaCy, and FAISS into RAM... (This may take 15-25s)")
	t0 = time.time()

	# We send a basic query to force all models to initialize
	payload = {
	"question": "What is the recommended dosage of Metformin for elderly Type 2 Diabetes patients?",
	"top_k": 1,
	"run_ragas": False
	}

	try:
	resp = requests.post(f"{api_url}/query", json=payload, timeout=60)
	resp.raise_for_status()
	elapsed = time.time() - t0
	logger.info(f"Warmup successful in {elapsed:.1f}s!")
	logger.info("All machine learning models are now cached in RAM.")
	logger.info("The next API requests will be completely instantaneous.")
	except Exception as e:
	logger.error(f"Warmup failed: {e}")
	if hasattr(e, "response") and e.response is not None:
	logger.error(f"Response: {e.response.text}")

	if __name__ == "__main__":
	main()