Spaces:

jeanbaptdzd
/

open-finance-llm-8b

Paused

jeanbaptdzd commited on 18 days ago

Commit

1055891

1 Parent(s): b91238a

Fix tool calling: make tool-call-parser optional

- Remove hardcoded --tool-call-parser json (may not be valid for Qwen)
- Make parser optional via TOOL_CALL_PARSER env var
- Use --enable-auto-tool-choice only (should work for most models)
- This fixes Koyeb deployment failure

Files changed (1) hide show

start-vllm.sh +23 -11

start-vllm.sh CHANGED Viewed

@@ -22,19 +22,31 @@ echo "Model: $MODEL"
 echo "Port: $PORT"
 echo "Max Model Len: $MAX_MODEL_LEN"
 echo "GPU Memory Utilization: $GPU_MEMORY_UTILIZATION"
-echo "Tool Calling: ENABLED (auto-tool-choice, json parser)"
 echo "HF Token: ${HF_TOKEN:+set (${#HF_TOKEN} chars)}"
 echo "=========================================="
 # Execute vLLM server (use python3, not python)
 # Enable tool calling support for OpenAI-compatible API
-exec python3 -m vllm.entrypoints.openai.api_server \
-    --model "$MODEL" \
-    --trust-remote-code \
-    --dtype "$DTYPE" \
-    --max-model-len "$MAX_MODEL_LEN" \
-    --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \
-    --port "$PORT" \
-    --host 0.0.0.0 \
-    --enable-auto-tool-choice \
-    --tool-call-parser json

 echo "Port: $PORT"
 echo "Max Model Len: $MAX_MODEL_LEN"
 echo "GPU Memory Utilization: $GPU_MEMORY_UTILIZATION"
+echo "Tool Calling: ENABLED (auto-tool-choice)"
 echo "HF Token: ${HF_TOKEN:+set (${#HF_TOKEN} chars)}"
 echo "=========================================="
 # Execute vLLM server (use python3, not python)
 # Enable tool calling support for OpenAI-compatible API
+# Note: tool-call-parser may not be needed for all models
+# If deployment fails, try removing --tool-call-parser or use model-specific parser
+VLLM_ARGS=(
+    --model "$MODEL"
+    --trust-remote-code
+    --dtype "$DTYPE"
+    --max-model-len "$MAX_MODEL_LEN"
+    --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION"
+    --port "$PORT"
+    --host 0.0.0.0
+    --enable-auto-tool-choice
+)
+# Add tool-call-parser only if specified (Qwen may not need it)
+if [ -n "${TOOL_CALL_PARSER:-}" ]; then
+    VLLM_ARGS+=(--tool-call-parser "$TOOL_CALL_PARSER")
+    echo "Tool Call Parser: $TOOL_CALL_PARSER"
+else
+    echo "Tool Call Parser: auto (default)"
+fi
+exec python3 -m vllm.entrypoints.openai.api_server "${VLLM_ARGS[@]}"