File size: 3,440 Bytes
01ee73c
 
 
 
 
740536a
01ee73c
 
 
 
 
 
 
 
 
51164b8
 
 
 
01ee73c
 
 
 
 
 
 
51164b8
01ee73c
 
 
51164b8
 
 
 
 
 
 
 
 
 
 
 
01ee73c
 
 
 
 
 
51164b8
 
 
01ee73c
 
 
51164b8
01ee73c
 
 
 
 
 
 
51164b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70d0425
 
 
 
 
ae1236e
 
 
 
 
 
 
70d0425
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/bin/bash
# 启动脚本:可选下载模型,并直接启动 Gradio 应用(内部调用 vLLM.LLM)
set -euo pipefail

MODEL_REPO="${MODEL_REPO:-stepfun-ai/Step-Audio-2-mini-Think}"
MODEL_DIR="${MODEL_DIR:-/tmp/app/models/Step-Audio-2-mini-Think}"
PRELOAD_MODEL="${PRELOAD_MODEL:-1}"
GRADIO_PORT=${GRADIO_PORT:-7860}
HOST=${HOST:-0.0.0.0}
TENSOR_PARALLEL_SIZE=${TENSOR_PARALLEL_SIZE:-4}
MAX_MODEL_LEN=${MAX_MODEL_LEN:-8192}
GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.9}
TOKENIZER_MODE=${TOKENIZER_MODE:-step_audio_2}
SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-step-audio-2-mini-think}

# 新增:设置缓存目录到可写位置
export HF_HOME="${HF_HOME:-/tmp/hf_cache}"
export XDG_CACHE_HOME="${XDG_CACHE_HOME:-/tmp/hf_cache}"

echo "=========================================="
echo "Step Audio 2 Gradio 启动脚本"
echo "MODEL_REPO: $MODEL_REPO"
echo "MODEL_DIR : $MODEL_DIR"
echo "PRELOAD_MODEL: $PRELOAD_MODEL"
echo "HOST/PORT: $HOST:$GRADIO_PORT"
echo "TP: $TENSOR_PARALLEL_SIZE | MAX_LEN: $MAX_MODEL_LEN"
echo "缓存目录: $HF_HOME"
echo "=========================================="

download_model() {
    # 创建必要的目录
    mkdir -p "$MODEL_DIR"
    mkdir -p "$HF_HOME"
    
    echo "[Download] 开始下载模型到: $MODEL_DIR"
    echo "[Download] 缓存目录: $HF_HOME"
    
    # 优先尝试使用 hf 命令(新版本推荐)
    if command -v hf &> /dev/null; then
        echo "[Download] 使用 hf download 命令"
        hf download "$MODEL_REPO" --local-dir "$MODEL_DIR" --cache-dir "$HF_HOME"
    elif command -v huggingface-cli &> /dev/null; then
        echo "[Download] 使用 huggingface-cli"
        huggingface-cli download "$MODEL_REPO" --local-dir "$MODEL_DIR" --local-dir-use-symlinks False
    fi
}

if [[ "$PRELOAD_MODEL" == "1" ]]; then
    # 检查模型是否完整(检查关键文件)
    if [[ ! -d "$MODEL_DIR" ]] || [[ ! -f "$MODEL_DIR/config.json" ]] || [[ ! -f "$MODEL_DIR/model.safetensors.index.json" ]]; then
        echo "模型未就绪或文件不完整,开始下载..."
        download_model
    else
        echo "检测到本地模型: $MODEL_DIR"
        echo "模型文件检查通过"
    fi
    export MODEL_PATH="$MODEL_DIR"
else
    echo "跳过预下载,直接使用仓库名称加载"
    export MODEL_PATH="${MODEL_PATH:-$MODEL_REPO}"
fi

# 验证下载结果
if [[ "$PRELOAD_MODEL" == "1" ]]; then
    echo "=== 模型文件验证 ==="
    ls -la "$MODEL_DIR" | head -10
    if [[ -f "$MODEL_DIR/config.json" ]]; then
        echo "✓ config.json 存在"
    else
        echo "✗ config.json 缺失"
    fi
    if [[ -f "$MODEL_DIR/model.safetensors.index.json" ]]; then
        echo "✓ model.safetensors.index.json 存在"
    else
        echo "✗ model.safetensors.index.json 缺失"
    fi
    echo "==================="
fi

echo "模型路径: ${MODEL_PATH}"
echo "启动 Gradio..."

PYTHON_BIN="${PYTHON_BIN:-python3}"
if ! command -v "$PYTHON_BIN" >/dev/null 2>&1; then
    echo "未找到 Python 解释器(当前设置: $PYTHON_BIN),请确认镜像已安装 python3。"
    exit 1
fi

"$PYTHON_BIN" app.py \
    --host "$HOST" \
    --port "$GRADIO_PORT" \
    --model "$MODEL_PATH" \
    --tensor-parallel-size "$TENSOR_PARALLEL_SIZE" \
    --max-model-len "$MAX_MODEL_LEN" \
    --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \
    --tokenizer-mode "$TOKENIZER_MODE" \
    --served-model-name "$SERVED_MODEL_NAME"