File size: 5,494 Bytes
327350d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | #!/usr/bin/env bash
# test-api.sh — smoke tests for the vLLM API
# Usage:
# bash test-api.sh # test localhost:8000
# bash test-api.sh 192.168.1.50 # test remote host
# bash test-api.sh 192.168.1.50 8080 # remote host, custom port
set -euo pipefail
HOST="${1:-localhost}"
PORT="${2:-8000}"
BASE_URL="http://${HOST}:${PORT}/v1"
if [[ -t 1 ]]; then
GREEN="\033[0;32m"; RED="\033[0;31m"; YELLOW="\033[0;33m"; NC="\033[0m"
else
GREEN=""; RED=""; YELLOW=""; NC=""
fi
ok() { echo -e "${GREEN}[OK]${NC} $*"; }
fail() { echo -e "${RED}[FAIL]${NC} $*"; }
info() { echo -e "${YELLOW}[INFO]${NC} $*"; }
echo "============================================================"
echo " vLLM API smoke tests — ${BASE_URL}"
echo "============================================================"
echo ""
# ---------------------------------------------------------------------------
# Test 1: Health endpoint
# ---------------------------------------------------------------------------
info "Test 1: /health"
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" "${BASE_URL%/v1}/health")
if [[ "${HTTP_CODE}" == "200" ]]; then
ok "/health returned HTTP 200"
else
fail "/health returned HTTP ${HTTP_CODE} (server may still be loading)"
fi
echo ""
# ---------------------------------------------------------------------------
# Test 2: Model list
# ---------------------------------------------------------------------------
info "Test 2: GET /v1/models"
MODELS_RESPONSE=$(curl -s "${BASE_URL}/models")
echo "${MODELS_RESPONSE}" | python3 -m json.tool 2>/dev/null || echo "${MODELS_RESPONSE}"
MODEL_ID=$(echo "${MODELS_RESPONSE}" | python3 -c \
"import sys,json; data=json.load(sys.stdin); print(data['data'][0]['id'])" 2>/dev/null || echo "")
if [[ -n "${MODEL_ID}" ]]; then
ok "Model loaded: ${MODEL_ID}"
else
fail "Could not parse model list"
MODEL_ID="GadflyII/Qwen3-Coder-Next-NVFP4"
fi
echo ""
# ---------------------------------------------------------------------------
# Test 3: Chat completion (reasoning off)
# ---------------------------------------------------------------------------
info "Test 3: POST /v1/chat/completions (reasoning off)"
RESPONSE=$(curl -s \
-X POST "${BASE_URL}/chat/completions" \
-H "Content-Type: application/json" \
-d "{
\"model\": \"${MODEL_ID}\",
\"messages\": [{\"role\": \"user\", \"content\": \"Reply in one sentence: what is the capital of France?\"}],
\"max_tokens\": 60,
\"temperature\": 0.1,
\"chat_template_kwargs\": {\"enable_thinking\": false}
}")
CONTENT=$(echo "${RESPONSE}" | python3 -c \
"import sys,json; r=json.load(sys.stdin); print(r['choices'][0]['message']['content'])" 2>/dev/null || echo "")
if [[ -n "${CONTENT}" ]]; then
ok "Chat completion works."
echo " >> ${CONTENT}"
else
fail "No response"
echo "${RESPONSE}" | python3 -m json.tool 2>/dev/null || echo "${RESPONSE}"
fi
echo ""
# ---------------------------------------------------------------------------
# Test 4: Chat completion (reasoning on)
# ---------------------------------------------------------------------------
info "Test 4: POST /v1/chat/completions (reasoning on)"
RESPONSE=$(curl -s \
-X POST "${BASE_URL}/chat/completions" \
-H "Content-Type: application/json" \
-d "{
\"model\": \"${MODEL_ID}\",
\"messages\": [{\"role\": \"user\", \"content\": \"What is 17 * 23? Show your work.\"}],
\"max_tokens\": 1000,
\"temperature\": 0.1,
\"chat_template_kwargs\": {\"enable_thinking\": true}
}")
CONTENT=$(echo "${RESPONSE}" | python3 -c \
"import sys,json; r=json.load(sys.stdin); m=r['choices'][0]['message']; thinking=m.get('reasoning_content') or m.get('reasoning',''); print('thinking:', repr(thinking)[:80], '\nanswer:', m.get('content',''))" \
2>/dev/null || echo "")
if [[ -n "${CONTENT}" ]]; then
ok "Reasoning mode works."
echo "${CONTENT}"
else
fail "No response from reasoning mode"
fi
echo ""
# ---------------------------------------------------------------------------
# Test 5: Code generation
# ---------------------------------------------------------------------------
info "Test 5: Code generation"
RESPONSE=$(curl -s \
-X POST "${BASE_URL}/chat/completions" \
-H "Content-Type: application/json" \
-d "{
\"model\": \"${MODEL_ID}\",
\"messages\": [{\"role\": \"user\", \"content\": \"Write a Python function that returns the nth Fibonacci number using memoization.\"}],
\"max_tokens\": 300,
\"temperature\": 0.1,
\"chat_template_kwargs\": {\"enable_thinking\": false}
}")
CODE=$(echo "${RESPONSE}" | python3 -c \
"import sys,json; r=json.load(sys.stdin); print(r['choices'][0]['message']['content'])" 2>/dev/null || echo "")
if [[ -n "${CODE}" ]]; then
ok "Code generation works."
echo "${CODE}" | head -10
echo " ..."
else
fail "No code response"
fi
echo ""
# ---------------------------------------------------------------------------
# Summary
# ---------------------------------------------------------------------------
echo "============================================================"
echo " Cline configuration (OpenAI Compatible provider):"
echo ""
echo " Base URL : ${BASE_URL}"
echo " Model ID : ${MODEL_ID}"
echo " API Key : none (any non-empty string)"
echo "============================================================"
|