Spaces:
Sleeping
Sleeping
| set -e | |
| echo "================================================" | |
| echo "π Axon - Qwen2.5-Omni-7B Multimodal Server" | |
| echo "================================================" | |
| echo "" | |
| echo "π Capabilities: Text | Images | Audio" | |
| echo "π§ Quantization: Q8_0 (near-lossless)" | |
| echo "β‘ Optimizations: Flash Attention, Continuous Batching" | |
| echo "" | |
| mkdir -p /app/models/qwen2.5-omni-7b | |
| download_file () { | |
| REPO_ID=$1 | |
| FILENAME=$2 | |
| DEST_PATH=$3 | |
| if [ -f "$DEST_PATH" ]; then | |
| echo "β Already exists: $(basename $DEST_PATH)" | |
| return 0 | |
| fi | |
| echo "β¬οΈ Downloading $FILENAME ..." | |
| python3 -c " | |
| from huggingface_hub import hf_hub_download | |
| import shutil, sys | |
| try: | |
| path = hf_hub_download( | |
| repo_id='$REPO_ID', | |
| filename='$FILENAME', | |
| cache_dir='/app/.cache' | |
| ) | |
| shutil.copy(path, '$DEST_PATH') | |
| print(f'β Downloaded: $FILENAME') | |
| except Exception as e: | |
| print(f'β Error downloading $FILENAME: {e}') | |
| sys.exit(1) | |
| " | |
| } | |
| download_file "ggml-org/Qwen2.5-Omni-7B-GGUF" \ | |
| "Qwen2.5-Omni-7B-Q8_0.gguf" \ | |
| "/app/models/qwen2.5-omni-7b/Qwen2.5-Omni-7B-Q8_0.gguf" | |
| download_file "ggml-org/Qwen2.5-Omni-7B-GGUF" \ | |
| "mmproj-Qwen2.5-Omni-7B-Q8_0.gguf" \ | |
| "/app/models/qwen2.5-omni-7b/mmproj-Qwen2.5-Omni-7B-Q8_0.gguf" | |
| echo "" | |
| echo "π Starting llama.cpp Server" | |
| echo "π Server will be available at http://0.0.0.0:7860" | |
| echo "" | |
| exec /usr/local/bin/llama-server \ | |
| --models-dir /app/models \ | |
| --host 0.0.0.0 \ | |
| --port 7860 \ | |
| -c 8192 \ | |
| -t 4 \ | |
| -fa on \ | |
| -cb \ | |
| --n-gpu-layers 0 |