Terminator-Qwen3-8B / start_server.sh
acnagle's picture
Upload folder using huggingface_hub
aa7a04b verified
#!/usr/bin/env bash
set -euo pipefail
# ==========================================================================
# Terminator-Qwen3-8B — Server Launcher
#
# Starts the vLLM server with the Terminator model.
# Run setup.sh first to create the model directory.
#
# Configuration (set as environment variables before running):
#
# VLLM_GPU_UTIL GPU memory fraction to use (default: 0.90)
#
# VLLM_MAX_MODEL_LEN Maximum context length in tokens (default: server picks)
#
# VLLM_PORT Server port (default: 8000)
#
# VLLM_ENFORCE_EAGER Set to 1 to disable CUDA graphs (default: 0)
# Use if you encounter CUDA graph compilation errors.
# NOTE: VLLM_ENFORCE_EAGER=0 will result in slower responses
#
# VLLM_API_KEY Require this API key from clients (default: none)
#
# Usage:
# ./start_server.sh
# or to manually override default environment variables:
# VLLM_GPU_UTIL=0.70 VLLM_MAX_MODEL_LEN=8192 ./start_server.sh
# ==========================================================================
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
MODEL_DIR="${SCRIPT_DIR}/model_dir"
if [ ! -d "$MODEL_DIR" ]; then
echo "ERROR: Model directory not found at: $MODEL_DIR" >&2
echo "" >&2
echo "Run setup first:" >&2
echo " ./setup.sh" >&2
echo "" >&2
echo "Or manually:" >&2
echo " python setup_model_dir.py" >&2
exit 1
fi
export VLLM_MODEL="$MODEL_DIR"
export REASONING_PARSER="${REASONING_PARSER:-qwen3}"
export VLLM_SERVED_NAME="${VLLM_SERVED_NAME:-Terminator-Qwen3-8B}"
exec python "$SCRIPT_DIR/serve.py"