| set -euo pipefail | |
| # ========================================================================== | |
| # Terminator-Qwen3-14B — Server Launcher | |
| # | |
| # Starts the vLLM server with the Terminator model. | |
| # Run setup.sh first to create the model directory. | |
| # | |
| # Configuration (set as environment variables before running): | |
| # | |
| # VLLM_GPU_UTIL GPU memory fraction to use (default: 0.90) | |
| # | |
| # VLLM_MAX_MODEL_LEN Maximum context length in tokens (default: server picks) | |
| # | |
| # VLLM_PORT Server port (default: 8000) | |
| # | |
| # VLLM_ENFORCE_EAGER Set to 1 to disable CUDA graphs (default: 0) | |
| # Use if you encounter CUDA graph compilation errors. | |
| # NOTE: VLLM_ENFORCE_EAGER=0 will result in slower responses | |
| # | |
| # VLLM_API_KEY Require this API key from clients (default: none) | |
| # | |
| # Usage: | |
| # ./start_server.sh | |
| # or to manually override default environment variables: | |
| # VLLM_GPU_UTIL=0.70 VLLM_MAX_MODEL_LEN=8192 ./start_server.sh | |
| # ========================================================================== | |
| SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | |
| MODEL_DIR="${SCRIPT_DIR}/model_dir" | |
| if [ ! -d "$MODEL_DIR" ]; then | |
| echo "ERROR: Model directory not found at: $MODEL_DIR" >&2 | |
| echo "" >&2 | |
| echo "Run setup first:" >&2 | |
| echo " ./setup.sh" >&2 | |
| echo "" >&2 | |
| echo "Or manually:" >&2 | |
| echo " python setup_model_dir.py" >&2 | |
| exit 1 | |
| fi | |
| export VLLM_MODEL="$MODEL_DIR" | |
| export REASONING_PARSER="${REASONING_PARSER:-qwen3}" | |
| export VLLM_SERVED_NAME="${VLLM_SERVED_NAME:-Terminator-Qwen3-14B}" | |
| exec python "$SCRIPT_DIR/serve.py" | |