import requests import json import time def verify_vllm(): url = "http://localhost:8001/v1/models" print(f"Checking vLLM status at {url}...") start_time = time.time() while True: try: response = requests.get(url, timeout=5) if response.status_code == 200: models = [m["id"] for m in response.json()["data"]] if "iquest-coder-40b-loop" in models: print("\n✅ API is READY with model: iquest-coder-40b-loop") print(json.dumps(response.json(), indent=2)) break except Exception: pass elapsed = int(time.time() - start_time) print(f"\rStill loading weights... ({elapsed}s elapsed)", end="", flush=True) time.sleep(5) if elapsed > 300: # 5 minute timeout print("\n❌ Timeout waiting for vLLM to start.") break if __name__ == "__main__": verify_vllm()