File size: 2,276 Bytes
01ee73c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/bin/bash
# 启动脚本:可选下载模型,并直接启动 Gradio 应用(内部调用 vLLM.LLM)

set -euo pipefail

MODEL_REPO="${MODEL_REPO:-stepfun-ai/Step-Audio-2-mini-Think}"
MODEL_DIR="${MODEL_DIR:-/root/models/Step-Audio-2-mini-Think}"
PRELOAD_MODEL="${PRELOAD_MODEL:-1}"
GRADIO_PORT=${GRADIO_PORT:-7860}
HOST=${HOST:-0.0.0.0}
TENSOR_PARALLEL_SIZE=${TENSOR_PARALLEL_SIZE:-4}
MAX_MODEL_LEN=${MAX_MODEL_LEN:-8192}
GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.9}
TOKENIZER_MODE=${TOKENIZER_MODE:-step_audio_2}
SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-step-audio-2-mini-think}

echo "=========================================="
echo "Step Audio 2 Gradio 启动脚本"
echo "MODEL_REPO: $MODEL_REPO"
echo "MODEL_DIR : $MODEL_DIR"
echo "PRELOAD_MODEL: $PRELOAD_MODEL"
echo "HOST/PORT: $HOST:$GRADIO_PORT"
echo "TP: $TENSOR_PARALLEL_SIZE | MAX_LEN: $MAX_MODEL_LEN"
echo "=========================================="

download_model() {
    if command -v huggingface-cli &> /dev/null; then
        echo "[Download] 使用 huggingface-cli"
        huggingface-cli download "$MODEL_REPO" --local-dir "$MODEL_DIR" --local-dir-use-symlinks False
    else
        echo "[Download] 使用 python + huggingface_hub"
        python3 -c "
from huggingface_hub import snapshot_download
print('开始下载: $MODEL_REPO')
snapshot_download(repo_id='$MODEL_REPO', local_dir='$MODEL_DIR', local_dir_use_symlinks=False)
print('下载完成')
"
    fi
}

if [[ "$PRELOAD_MODEL" == "1" ]]; then
    if [[ ! -d "$MODEL_DIR" ]] || [[ ! -f "$MODEL_DIR/config.json" ]]; then
        echo "模型未就绪,开始下载..."
        mkdir -p "$MODEL_DIR"
        download_model
    else
        echo "检测到本地模型: $MODEL_DIR"
    fi
    export MODEL_PATH="$MODEL_DIR"
else
    echo "跳过预下载,直接使用仓库名称加载"
    export MODEL_PATH="${MODEL_PATH:-$MODEL_REPO}"
fi

echo "模型路径: ${MODEL_PATH}"
echo "启动 Gradio..."

python app.py \
    --host "$HOST" \
    --port "$GRADIO_PORT" \
    --model "$MODEL_PATH" \
    --tensor-parallel-size "$TENSOR_PARALLEL_SIZE" \
    --max-model-len "$MAX_MODEL_LEN" \
    --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \
    --tokenizer-mode "$TOKENIZER_MODE" \
    --served-model-name "$SERVED_MODEL_NAME"