#!/usr/bin/env bash set -euo pipefail # ========================================================================== # Terminator-Qwen3-8B — Server Launcher # # Starts the vLLM server with the Terminator model. # Run setup.sh first to create the model directory. # # Configuration (set as environment variables before running): # # VLLM_GPU_UTIL GPU memory fraction to use (default: 0.90) # # VLLM_MAX_MODEL_LEN Maximum context length in tokens (default: server picks) # # VLLM_PORT Server port (default: 8000) # # VLLM_ENFORCE_EAGER Set to 1 to disable CUDA graphs (default: 0) # Use if you encounter CUDA graph compilation errors. # NOTE: VLLM_ENFORCE_EAGER=0 will result in slower responses # # VLLM_API_KEY Require this API key from clients (default: none) # # Usage: # ./start_server.sh # or to manually override default environment variables: # VLLM_GPU_UTIL=0.70 VLLM_MAX_MODEL_LEN=8192 ./start_server.sh # ========================================================================== SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" MODEL_DIR="${SCRIPT_DIR}/model_dir" if [ ! -d "$MODEL_DIR" ]; then echo "ERROR: Model directory not found at: $MODEL_DIR" >&2 echo "" >&2 echo "Run setup first:" >&2 echo " ./setup.sh" >&2 echo "" >&2 echo "Or manually:" >&2 echo " python setup_model_dir.py" >&2 exit 1 fi export VLLM_MODEL="$MODEL_DIR" export REASONING_PARSER="${REASONING_PARSER:-qwen3}" export VLLM_SERVED_NAME="${VLLM_SERVED_NAME:-Terminator-Qwen3-8B}" exec python "$SCRIPT_DIR/serve.py"