jeanbaptdzd commited on
Commit
b91238a
·
1 Parent(s): 9566d4f

Enable tool calling in vLLM server

Browse files

- Add --enable-auto-tool-choice flag
- Add --tool-call-parser json for JSON-based tool call parsing
- Update startup logging to show tool calling is enabled
- This enables OpenAI-compatible tool calling API support

Files changed (1) hide show
  1. start-vllm.sh +5 -1
start-vllm.sh CHANGED
@@ -22,10 +22,12 @@ echo "Model: $MODEL"
22
  echo "Port: $PORT"
23
  echo "Max Model Len: $MAX_MODEL_LEN"
24
  echo "GPU Memory Utilization: $GPU_MEMORY_UTILIZATION"
 
25
  echo "HF Token: ${HF_TOKEN:+set (${#HF_TOKEN} chars)}"
26
  echo "=========================================="
27
 
28
  # Execute vLLM server (use python3, not python)
 
29
  exec python3 -m vllm.entrypoints.openai.api_server \
30
  --model "$MODEL" \
31
  --trust-remote-code \
@@ -33,4 +35,6 @@ exec python3 -m vllm.entrypoints.openai.api_server \
33
  --max-model-len "$MAX_MODEL_LEN" \
34
  --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \
35
  --port "$PORT" \
36
- --host 0.0.0.0
 
 
 
22
  echo "Port: $PORT"
23
  echo "Max Model Len: $MAX_MODEL_LEN"
24
  echo "GPU Memory Utilization: $GPU_MEMORY_UTILIZATION"
25
+ echo "Tool Calling: ENABLED (auto-tool-choice, json parser)"
26
  echo "HF Token: ${HF_TOKEN:+set (${#HF_TOKEN} chars)}"
27
  echo "=========================================="
28
 
29
  # Execute vLLM server (use python3, not python)
30
+ # Enable tool calling support for OpenAI-compatible API
31
  exec python3 -m vllm.entrypoints.openai.api_server \
32
  --model "$MODEL" \
33
  --trust-remote-code \
 
35
  --max-model-len "$MAX_MODEL_LEN" \
36
  --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \
37
  --port "$PORT" \
38
+ --host 0.0.0.0 \
39
+ --enable-auto-tool-choice \
40
+ --tool-call-parser json