Spaces:
Running
Running
Update start.sh
Browse files
start.sh
CHANGED
|
@@ -7,12 +7,9 @@ echo "================================================"
|
|
| 7 |
echo ""
|
| 8 |
echo "π Capabilities: Text | Images | Audio"
|
| 9 |
echo "π§ Quantization: Q8_0 (near-lossless)"
|
|
|
|
| 10 |
echo ""
|
| 11 |
-
echo "------------------------------------------------"
|
| 12 |
-
echo "π οΈ Setting up Environment"
|
| 13 |
-
echo "------------------------------------------------"
|
| 14 |
|
| 15 |
-
# Create model directory structure
|
| 16 |
mkdir -p /app/models/qwen2.5-omni-7b
|
| 17 |
|
| 18 |
download_file () {
|
|
@@ -28,9 +25,7 @@ download_file () {
|
|
| 28 |
echo "β¬οΈ Downloading $FILENAME ..."
|
| 29 |
python3 -c "
|
| 30 |
from huggingface_hub import hf_hub_download
|
| 31 |
-
import shutil
|
| 32 |
-
import sys
|
| 33 |
-
|
| 34 |
try:
|
| 35 |
path = hf_hub_download(
|
| 36 |
repo_id='$REPO_ID',
|
|
@@ -45,11 +40,6 @@ except Exception as e:
|
|
| 45 |
"
|
| 46 |
}
|
| 47 |
|
| 48 |
-
echo ""
|
| 49 |
-
echo "π¦ Downloading Qwen2.5-Omni-7B Q8_0..."
|
| 50 |
-
echo ""
|
| 51 |
-
|
| 52 |
-
# Download model and multimodal projector
|
| 53 |
download_file "ggml-org/Qwen2.5-Omni-7B-GGUF" \
|
| 54 |
"Qwen2.5-Omni-7B-Q8_0.gguf" \
|
| 55 |
"/app/models/qwen2.5-omni-7b/Qwen2.5-Omni-7B-Q8_0.gguf"
|
|
@@ -59,10 +49,7 @@ download_file "ggml-org/Qwen2.5-Omni-7B-GGUF" \
|
|
| 59 |
"/app/models/qwen2.5-omni-7b/mmproj-Qwen2.5-Omni-7B-Q8_0.gguf"
|
| 60 |
|
| 61 |
echo ""
|
| 62 |
-
echo "------------------------------------------------"
|
| 63 |
echo "π Starting llama.cpp Server"
|
| 64 |
-
echo "------------------------------------------------"
|
| 65 |
-
echo ""
|
| 66 |
echo "π Server will be available at http://0.0.0.0:7860"
|
| 67 |
echo ""
|
| 68 |
|
|
@@ -71,4 +58,7 @@ exec /usr/local/bin/llama-server \
|
|
| 71 |
--host 0.0.0.0 \
|
| 72 |
--port 7860 \
|
| 73 |
-c 8192 \
|
|
|
|
|
|
|
|
|
|
| 74 |
--n-gpu-layers 0
|
|
|
|
| 7 |
echo ""
|
| 8 |
echo "π Capabilities: Text | Images | Audio"
|
| 9 |
echo "π§ Quantization: Q8_0 (near-lossless)"
|
| 10 |
+
echo "β‘ Optimizations: Flash Attention, Continuous Batching"
|
| 11 |
echo ""
|
|
|
|
|
|
|
|
|
|
| 12 |
|
|
|
|
| 13 |
mkdir -p /app/models/qwen2.5-omni-7b
|
| 14 |
|
| 15 |
download_file () {
|
|
|
|
| 25 |
echo "β¬οΈ Downloading $FILENAME ..."
|
| 26 |
python3 -c "
|
| 27 |
from huggingface_hub import hf_hub_download
|
| 28 |
+
import shutil, sys
|
|
|
|
|
|
|
| 29 |
try:
|
| 30 |
path = hf_hub_download(
|
| 31 |
repo_id='$REPO_ID',
|
|
|
|
| 40 |
"
|
| 41 |
}
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
download_file "ggml-org/Qwen2.5-Omni-7B-GGUF" \
|
| 44 |
"Qwen2.5-Omni-7B-Q8_0.gguf" \
|
| 45 |
"/app/models/qwen2.5-omni-7b/Qwen2.5-Omni-7B-Q8_0.gguf"
|
|
|
|
| 49 |
"/app/models/qwen2.5-omni-7b/mmproj-Qwen2.5-Omni-7B-Q8_0.gguf"
|
| 50 |
|
| 51 |
echo ""
|
|
|
|
| 52 |
echo "π Starting llama.cpp Server"
|
|
|
|
|
|
|
| 53 |
echo "π Server will be available at http://0.0.0.0:7860"
|
| 54 |
echo ""
|
| 55 |
|
|
|
|
| 58 |
--host 0.0.0.0 \
|
| 59 |
--port 7860 \
|
| 60 |
-c 8192 \
|
| 61 |
+
-t 4 \
|
| 62 |
+
--flash-attn \
|
| 63 |
+
--cont-batching \
|
| 64 |
--n-gpu-layers 0
|