| import requests | |
| import json | |
| import time | |
| def verify_vllm(): | |
| url = "http://localhost:8001/v1/models" | |
| print(f"Checking vLLM status at {url}...") | |
| start_time = time.time() | |
| while True: | |
| try: | |
| response = requests.get(url, timeout=5) | |
| if response.status_code == 200: | |
| models = [m["id"] for m in response.json()["data"]] | |
| if "iquest-coder-40b-loop" in models: | |
| print("\n✅ API is READY with model: iquest-coder-40b-loop") | |
| print(json.dumps(response.json(), indent=2)) | |
| break | |
| except Exception: | |
| pass | |
| elapsed = int(time.time() - start_time) | |
| print(f"\rStill loading weights... ({elapsed}s elapsed)", end="", flush=True) | |
| time.sleep(5) | |
| if elapsed > 300: # 5 minute timeout | |
| print("\n❌ Timeout waiting for vLLM to start.") | |
| break | |
| if __name__ == "__main__": | |
| verify_vllm() | |