jeanbaptdzd commited on
Commit
1055891
·
1 Parent(s): b91238a

Fix tool calling: make tool-call-parser optional

Browse files

- Remove hardcoded --tool-call-parser json (may not be valid for Qwen)
- Make parser optional via TOOL_CALL_PARSER env var
- Use --enable-auto-tool-choice only (should work for most models)
- This fixes Koyeb deployment failure

Files changed (1) hide show
  1. start-vllm.sh +23 -11
start-vllm.sh CHANGED
@@ -22,19 +22,31 @@ echo "Model: $MODEL"
22
  echo "Port: $PORT"
23
  echo "Max Model Len: $MAX_MODEL_LEN"
24
  echo "GPU Memory Utilization: $GPU_MEMORY_UTILIZATION"
25
- echo "Tool Calling: ENABLED (auto-tool-choice, json parser)"
26
  echo "HF Token: ${HF_TOKEN:+set (${#HF_TOKEN} chars)}"
27
  echo "=========================================="
28
 
29
  # Execute vLLM server (use python3, not python)
30
  # Enable tool calling support for OpenAI-compatible API
31
- exec python3 -m vllm.entrypoints.openai.api_server \
32
- --model "$MODEL" \
33
- --trust-remote-code \
34
- --dtype "$DTYPE" \
35
- --max-model-len "$MAX_MODEL_LEN" \
36
- --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \
37
- --port "$PORT" \
38
- --host 0.0.0.0 \
39
- --enable-auto-tool-choice \
40
- --tool-call-parser json
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  echo "Port: $PORT"
23
  echo "Max Model Len: $MAX_MODEL_LEN"
24
  echo "GPU Memory Utilization: $GPU_MEMORY_UTILIZATION"
25
+ echo "Tool Calling: ENABLED (auto-tool-choice)"
26
  echo "HF Token: ${HF_TOKEN:+set (${#HF_TOKEN} chars)}"
27
  echo "=========================================="
28
 
29
  # Execute vLLM server (use python3, not python)
30
  # Enable tool calling support for OpenAI-compatible API
31
+ # Note: tool-call-parser may not be needed for all models
32
+ # If deployment fails, try removing --tool-call-parser or use model-specific parser
33
+ VLLM_ARGS=(
34
+ --model "$MODEL"
35
+ --trust-remote-code
36
+ --dtype "$DTYPE"
37
+ --max-model-len "$MAX_MODEL_LEN"
38
+ --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION"
39
+ --port "$PORT"
40
+ --host 0.0.0.0
41
+ --enable-auto-tool-choice
42
+ )
43
+
44
+ # Add tool-call-parser only if specified (Qwen may not need it)
45
+ if [ -n "${TOOL_CALL_PARSER:-}" ]; then
46
+ VLLM_ARGS+=(--tool-call-parser "$TOOL_CALL_PARSER")
47
+ echo "Tool Call Parser: $TOOL_CALL_PARSER"
48
+ else
49
+ echo "Tool Call Parser: auto (default)"
50
+ fi
51
+
52
+ exec python3 -m vllm.entrypoints.openai.api_server "${VLLM_ARGS[@]}"