Axon-Llama-GUI / start.sh
AIencoder's picture
Update start.sh
1dd8a15 verified
#!/bin/bash
set -e
echo "================================================"
echo "πŸš€ Axon - Qwen2.5-Omni-7B Multimodal Server"
echo "================================================"
echo ""
echo "πŸ“‹ Capabilities: Text | Images | Audio"
echo "πŸ”§ Quantization: Q8_0 (near-lossless)"
echo "⚑ Optimizations: Flash Attention, Continuous Batching"
echo ""
mkdir -p /app/models/qwen2.5-omni-7b
download_file () {
REPO_ID=$1
FILENAME=$2
DEST_PATH=$3
if [ -f "$DEST_PATH" ]; then
echo "βœ… Already exists: $(basename $DEST_PATH)"
return 0
fi
echo "⬇️ Downloading $FILENAME ..."
python3 -c "
from huggingface_hub import hf_hub_download
import shutil, sys
try:
path = hf_hub_download(
repo_id='$REPO_ID',
filename='$FILENAME',
cache_dir='/app/.cache'
)
shutil.copy(path, '$DEST_PATH')
print(f'βœ… Downloaded: $FILENAME')
except Exception as e:
print(f'❌ Error downloading $FILENAME: {e}')
sys.exit(1)
"
}
download_file "ggml-org/Qwen2.5-Omni-7B-GGUF" \
"Qwen2.5-Omni-7B-Q8_0.gguf" \
"/app/models/qwen2.5-omni-7b/Qwen2.5-Omni-7B-Q8_0.gguf"
download_file "ggml-org/Qwen2.5-Omni-7B-GGUF" \
"mmproj-Qwen2.5-Omni-7B-Q8_0.gguf" \
"/app/models/qwen2.5-omni-7b/mmproj-Qwen2.5-Omni-7B-Q8_0.gguf"
echo ""
echo "πŸš€ Starting llama.cpp Server"
echo "🌐 Server will be available at http://0.0.0.0:7860"
echo ""
exec /usr/local/bin/llama-server \
--models-dir /app/models \
--host 0.0.0.0 \
--port 7860 \
-c 8192 \
-t 4 \
-fa on \
-cb \
--n-gpu-layers 0