""" src/scripts/warmup.py ===================== Pre-loads heavy ML models (FAISS, DeBERTa, SciSpaCy) into memory and guarantees instantaneous responses for the first API request during the live demo. Usage: python scripts/warmup.py """ import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) import logging import time import requests logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s") logger = logging.getLogger("warmup") def main(): api_url = "http://localhost:8000" logger.info("Verifying API is running...") try: health = requests.get(f"{api_url}/health", timeout=5) health.raise_for_status() logger.info(f"API Health: {health.json()}") except requests.exceptions.RequestException as e: logger.error(f"API is not running at {api_url}. Please start it with 'uvicorn src.api.main:app' first.") sys.exit(1) logger.info("Sending WARMUP query to load DeBERTa, SciSpaCy, and FAISS into RAM... (This may take 15-25s)") t0 = time.time() # We send a basic query to force all models to initialize payload = { "question": "What is the recommended dosage of Metformin for elderly Type 2 Diabetes patients?", "top_k": 1, "run_ragas": False } try: resp = requests.post(f"{api_url}/query", json=payload, timeout=60) resp.raise_for_status() elapsed = time.time() - t0 logger.info(f"Warmup successful in {elapsed:.1f}s!") logger.info("All machine learning models are now cached in RAM.") logger.info("The next API requests will be completely instantaneous.") except Exception as e: logger.error(f"Warmup failed: {e}") if hasattr(e, "response") and e.response is not None: logger.error(f"Response: {e.response.text}") if __name__ == "__main__": main()