File size: 5,494 Bytes
327350d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env bash
# test-api.sh — smoke tests for the vLLM API
# Usage:
#   bash test-api.sh                      # test localhost:8000
#   bash test-api.sh 192.168.1.50         # test remote host
#   bash test-api.sh 192.168.1.50 8080    # remote host, custom port

set -euo pipefail

HOST="${1:-localhost}"
PORT="${2:-8000}"
BASE_URL="http://${HOST}:${PORT}/v1"

if [[ -t 1 ]]; then
    GREEN="\033[0;32m"; RED="\033[0;31m"; YELLOW="\033[0;33m"; NC="\033[0m"
else
    GREEN=""; RED=""; YELLOW=""; NC=""
fi

ok()   { echo -e "${GREEN}[OK]${NC}  $*"; }
fail() { echo -e "${RED}[FAIL]${NC} $*"; }
info() { echo -e "${YELLOW}[INFO]${NC} $*"; }

echo "============================================================"
echo "  vLLM API smoke tests — ${BASE_URL}"
echo "============================================================"
echo ""

# ---------------------------------------------------------------------------
# Test 1: Health endpoint
# ---------------------------------------------------------------------------
info "Test 1: /health"
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" "${BASE_URL%/v1}/health")
if [[ "${HTTP_CODE}" == "200" ]]; then
    ok "/health returned HTTP 200"
else
    fail "/health returned HTTP ${HTTP_CODE} (server may still be loading)"
fi
echo ""

# ---------------------------------------------------------------------------
# Test 2: Model list
# ---------------------------------------------------------------------------
info "Test 2: GET /v1/models"
MODELS_RESPONSE=$(curl -s "${BASE_URL}/models")
echo "${MODELS_RESPONSE}" | python3 -m json.tool 2>/dev/null || echo "${MODELS_RESPONSE}"

MODEL_ID=$(echo "${MODELS_RESPONSE}" | python3 -c \
    "import sys,json; data=json.load(sys.stdin); print(data['data'][0]['id'])" 2>/dev/null || echo "")

if [[ -n "${MODEL_ID}" ]]; then
    ok "Model loaded: ${MODEL_ID}"
else
    fail "Could not parse model list"
    MODEL_ID="GadflyII/Qwen3-Coder-Next-NVFP4"
fi
echo ""

# ---------------------------------------------------------------------------
# Test 3: Chat completion (reasoning off)
# ---------------------------------------------------------------------------
info "Test 3: POST /v1/chat/completions (reasoning off)"
RESPONSE=$(curl -s \
    -X POST "${BASE_URL}/chat/completions" \
    -H "Content-Type: application/json" \
    -d "{
        \"model\": \"${MODEL_ID}\",
        \"messages\": [{\"role\": \"user\", \"content\": \"Reply in one sentence: what is the capital of France?\"}],
        \"max_tokens\": 60,
        \"temperature\": 0.1,
        \"chat_template_kwargs\": {\"enable_thinking\": false}
    }")

CONTENT=$(echo "${RESPONSE}" | python3 -c \
    "import sys,json; r=json.load(sys.stdin); print(r['choices'][0]['message']['content'])" 2>/dev/null || echo "")

if [[ -n "${CONTENT}" ]]; then
    ok "Chat completion works."
    echo "  >> ${CONTENT}"
else
    fail "No response"
    echo "${RESPONSE}" | python3 -m json.tool 2>/dev/null || echo "${RESPONSE}"
fi
echo ""

# ---------------------------------------------------------------------------
# Test 4: Chat completion (reasoning on)
# ---------------------------------------------------------------------------
info "Test 4: POST /v1/chat/completions (reasoning on)"
RESPONSE=$(curl -s \
    -X POST "${BASE_URL}/chat/completions" \
    -H "Content-Type: application/json" \
    -d "{
        \"model\": \"${MODEL_ID}\",
        \"messages\": [{\"role\": \"user\", \"content\": \"What is 17 * 23? Show your work.\"}],
        \"max_tokens\": 1000,
        \"temperature\": 0.1,
        \"chat_template_kwargs\": {\"enable_thinking\": true}
    }")

CONTENT=$(echo "${RESPONSE}" | python3 -c \
    "import sys,json; r=json.load(sys.stdin); m=r['choices'][0]['message']; thinking=m.get('reasoning_content') or m.get('reasoning',''); print('thinking:', repr(thinking)[:80], '\nanswer:', m.get('content',''))" \
    2>/dev/null || echo "")

if [[ -n "${CONTENT}" ]]; then
    ok "Reasoning mode works."
    echo "${CONTENT}"
else
    fail "No response from reasoning mode"
fi
echo ""

# ---------------------------------------------------------------------------
# Test 5: Code generation
# ---------------------------------------------------------------------------
info "Test 5: Code generation"
RESPONSE=$(curl -s \
    -X POST "${BASE_URL}/chat/completions" \
    -H "Content-Type: application/json" \
    -d "{
        \"model\": \"${MODEL_ID}\",
        \"messages\": [{\"role\": \"user\", \"content\": \"Write a Python function that returns the nth Fibonacci number using memoization.\"}],
        \"max_tokens\": 300,
        \"temperature\": 0.1,
        \"chat_template_kwargs\": {\"enable_thinking\": false}
    }")

CODE=$(echo "${RESPONSE}" | python3 -c \
    "import sys,json; r=json.load(sys.stdin); print(r['choices'][0]['message']['content'])" 2>/dev/null || echo "")

if [[ -n "${CODE}" ]]; then
    ok "Code generation works."
    echo "${CODE}" | head -10
    echo "  ..."
else
    fail "No code response"
fi
echo ""

# ---------------------------------------------------------------------------
# Summary
# ---------------------------------------------------------------------------
echo "============================================================"
echo "  Cline configuration (OpenAI Compatible provider):"
echo ""
echo "    Base URL : ${BASE_URL}"
echo "    Model ID : ${MODEL_ID}"
echo "    API Key  : none  (any non-empty string)"
echo "============================================================"