Commit
·
1055891
1
Parent(s):
b91238a
Fix tool calling: make tool-call-parser optional
Browse files- Remove hardcoded --tool-call-parser json (may not be valid for Qwen)
- Make parser optional via TOOL_CALL_PARSER env var
- Use --enable-auto-tool-choice only (should work for most models)
- This fixes Koyeb deployment failure
- start-vllm.sh +23 -11
start-vllm.sh
CHANGED
|
@@ -22,19 +22,31 @@ echo "Model: $MODEL"
|
|
| 22 |
echo "Port: $PORT"
|
| 23 |
echo "Max Model Len: $MAX_MODEL_LEN"
|
| 24 |
echo "GPU Memory Utilization: $GPU_MEMORY_UTILIZATION"
|
| 25 |
-
echo "Tool Calling: ENABLED (auto-tool-choice
|
| 26 |
echo "HF Token: ${HF_TOKEN:+set (${#HF_TOKEN} chars)}"
|
| 27 |
echo "=========================================="
|
| 28 |
|
| 29 |
# Execute vLLM server (use python3, not python)
|
| 30 |
# Enable tool calling support for OpenAI-compatible API
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
--
|
| 35 |
-
--
|
| 36 |
-
--
|
| 37 |
-
--
|
| 38 |
-
--
|
| 39 |
-
--
|
| 40 |
-
--
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
echo "Port: $PORT"
|
| 23 |
echo "Max Model Len: $MAX_MODEL_LEN"
|
| 24 |
echo "GPU Memory Utilization: $GPU_MEMORY_UTILIZATION"
|
| 25 |
+
echo "Tool Calling: ENABLED (auto-tool-choice)"
|
| 26 |
echo "HF Token: ${HF_TOKEN:+set (${#HF_TOKEN} chars)}"
|
| 27 |
echo "=========================================="
|
| 28 |
|
| 29 |
# Execute vLLM server (use python3, not python)
|
| 30 |
# Enable tool calling support for OpenAI-compatible API
|
| 31 |
+
# Note: tool-call-parser may not be needed for all models
|
| 32 |
+
# If deployment fails, try removing --tool-call-parser or use model-specific parser
|
| 33 |
+
VLLM_ARGS=(
|
| 34 |
+
--model "$MODEL"
|
| 35 |
+
--trust-remote-code
|
| 36 |
+
--dtype "$DTYPE"
|
| 37 |
+
--max-model-len "$MAX_MODEL_LEN"
|
| 38 |
+
--gpu-memory-utilization "$GPU_MEMORY_UTILIZATION"
|
| 39 |
+
--port "$PORT"
|
| 40 |
+
--host 0.0.0.0
|
| 41 |
+
--enable-auto-tool-choice
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
# Add tool-call-parser only if specified (Qwen may not need it)
|
| 45 |
+
if [ -n "${TOOL_CALL_PARSER:-}" ]; then
|
| 46 |
+
VLLM_ARGS+=(--tool-call-parser "$TOOL_CALL_PARSER")
|
| 47 |
+
echo "Tool Call Parser: $TOOL_CALL_PARSER"
|
| 48 |
+
else
|
| 49 |
+
echo "Tool Call Parser: auto (default)"
|
| 50 |
+
fi
|
| 51 |
+
|
| 52 |
+
exec python3 -m vllm.entrypoints.openai.api_server "${VLLM_ARGS[@]}"
|