File size: 1,631 Bytes
88b9f90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa7a04b
88b9f90
 
 
 
 
aa7a04b
88b9f90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa7a04b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/env bash
set -euo pipefail

# ==========================================================================
# Terminator-Qwen3-8B — Server Launcher
#
# Starts the vLLM server with the Terminator model.
# Run setup.sh first to create the model directory.
#
# Configuration (set as environment variables before running):
#
#   VLLM_GPU_UTIL       GPU memory fraction to use (default: 0.90)
#
#   VLLM_MAX_MODEL_LEN  Maximum context length in tokens (default: server picks)
#
#   VLLM_PORT           Server port (default: 8000)
#
#   VLLM_ENFORCE_EAGER  Set to 1 to disable CUDA graphs (default: 0)
#                       Use if you encounter CUDA graph compilation errors.
#                       NOTE: VLLM_ENFORCE_EAGER=0 will result in slower responses
#
#   VLLM_API_KEY        Require this API key from clients (default: none)
#
# Usage:
#   ./start_server.sh
#   or to manually override default environment variables:
#   VLLM_GPU_UTIL=0.70 VLLM_MAX_MODEL_LEN=8192 ./start_server.sh
# ==========================================================================

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
MODEL_DIR="${SCRIPT_DIR}/model_dir"

if [ ! -d "$MODEL_DIR" ]; then
    echo "ERROR: Model directory not found at: $MODEL_DIR" >&2
    echo "" >&2
    echo "Run setup first:" >&2
    echo "  ./setup.sh" >&2
    echo "" >&2
    echo "Or manually:" >&2
    echo "  python setup_model_dir.py" >&2
    exit 1
fi

export VLLM_MODEL="$MODEL_DIR"
export REASONING_PARSER="${REASONING_PARSER:-qwen3}"
export VLLM_SERVED_NAME="${VLLM_SERVED_NAME:-Terminator-Qwen3-8B}"

exec python "$SCRIPT_DIR/serve.py"