File size: 1,595 Bytes
4853067
0359836
 
f3703d9
 
 
 
 
 
e00dc72
f3703d9
0359836
30dea22
23c6786
d4f638d
0359836
 
d4f638d
03657b8
f3703d9
 
 
 
 
 
 
0359836
e00dc72
0359836
f3703d9
 
 
 
 
d4f638d
f3703d9
0359836
f3703d9
b488c6c
0359836
23c6786
 
f3703d9
 
 
 
 
 
 
03657b8
f3703d9
 
 
 
0359836
5a8fb72
b57cd88
4853067
 
03657b8
e00dc72
1dd8a15
 
4853067
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/bin/bash
set -e

echo "================================================"
echo "πŸš€ Axon - Qwen2.5-Omni-7B Multimodal Server"
echo "================================================"
echo ""
echo "πŸ“‹ Capabilities: Text | Images | Audio"
echo "πŸ”§ Quantization: Q8_0 (near-lossless)"
echo "⚑ Optimizations: Flash Attention, Continuous Batching"
echo ""

mkdir -p /app/models/qwen2.5-omni-7b

download_file () {
    REPO_ID=$1
    FILENAME=$2
    DEST_PATH=$3

    if [ -f "$DEST_PATH" ]; then
        echo "βœ… Already exists: $(basename $DEST_PATH)"
        return 0
    fi

    echo "⬇️  Downloading $FILENAME ..."
    python3 -c "
from huggingface_hub import hf_hub_download
import shutil, sys
try:
    path = hf_hub_download(
        repo_id='$REPO_ID',
        filename='$FILENAME',
        cache_dir='/app/.cache'
    )
    shutil.copy(path, '$DEST_PATH')
    print(f'βœ… Downloaded: $FILENAME')
except Exception as e:
    print(f'❌ Error downloading $FILENAME: {e}')
    sys.exit(1)
"
}

download_file "ggml-org/Qwen2.5-Omni-7B-GGUF" \
    "Qwen2.5-Omni-7B-Q8_0.gguf" \
    "/app/models/qwen2.5-omni-7b/Qwen2.5-Omni-7B-Q8_0.gguf"

download_file "ggml-org/Qwen2.5-Omni-7B-GGUF" \
    "mmproj-Qwen2.5-Omni-7B-Q8_0.gguf" \
    "/app/models/qwen2.5-omni-7b/mmproj-Qwen2.5-Omni-7B-Q8_0.gguf"

echo ""
echo "πŸš€ Starting llama.cpp Server"
echo "🌐 Server will be available at http://0.0.0.0:7860"
echo ""

exec /usr/local/bin/llama-server \
    --models-dir /app/models \
    --host 0.0.0.0 \
    --port 7860 \
    -c 8192 \
    -t 4 \
    -fa on \
    -cb \
    --n-gpu-layers 0