AIencoder commited on
Commit
e00dc72
Β·
verified Β·
1 Parent(s): c9ab7da

Update start.sh

Browse files
Files changed (1) hide show
  1. start.sh +5 -15
start.sh CHANGED
@@ -7,12 +7,9 @@ echo "================================================"
7
  echo ""
8
  echo "πŸ“‹ Capabilities: Text | Images | Audio"
9
  echo "πŸ”§ Quantization: Q8_0 (near-lossless)"
 
10
  echo ""
11
- echo "------------------------------------------------"
12
- echo "πŸ› οΈ Setting up Environment"
13
- echo "------------------------------------------------"
14
 
15
- # Create model directory structure
16
  mkdir -p /app/models/qwen2.5-omni-7b
17
 
18
  download_file () {
@@ -28,9 +25,7 @@ download_file () {
28
  echo "⬇️ Downloading $FILENAME ..."
29
  python3 -c "
30
  from huggingface_hub import hf_hub_download
31
- import shutil
32
- import sys
33
-
34
  try:
35
  path = hf_hub_download(
36
  repo_id='$REPO_ID',
@@ -45,11 +40,6 @@ except Exception as e:
45
  "
46
  }
47
 
48
- echo ""
49
- echo "πŸ“¦ Downloading Qwen2.5-Omni-7B Q8_0..."
50
- echo ""
51
-
52
- # Download model and multimodal projector
53
  download_file "ggml-org/Qwen2.5-Omni-7B-GGUF" \
54
  "Qwen2.5-Omni-7B-Q8_0.gguf" \
55
  "/app/models/qwen2.5-omni-7b/Qwen2.5-Omni-7B-Q8_0.gguf"
@@ -59,10 +49,7 @@ download_file "ggml-org/Qwen2.5-Omni-7B-GGUF" \
59
  "/app/models/qwen2.5-omni-7b/mmproj-Qwen2.5-Omni-7B-Q8_0.gguf"
60
 
61
  echo ""
62
- echo "------------------------------------------------"
63
  echo "πŸš€ Starting llama.cpp Server"
64
- echo "------------------------------------------------"
65
- echo ""
66
  echo "🌐 Server will be available at http://0.0.0.0:7860"
67
  echo ""
68
 
@@ -71,4 +58,7 @@ exec /usr/local/bin/llama-server \
71
  --host 0.0.0.0 \
72
  --port 7860 \
73
  -c 8192 \
 
 
 
74
  --n-gpu-layers 0
 
7
  echo ""
8
  echo "πŸ“‹ Capabilities: Text | Images | Audio"
9
  echo "πŸ”§ Quantization: Q8_0 (near-lossless)"
10
+ echo "⚑ Optimizations: Flash Attention, Continuous Batching"
11
  echo ""
 
 
 
12
 
 
13
  mkdir -p /app/models/qwen2.5-omni-7b
14
 
15
  download_file () {
 
25
  echo "⬇️ Downloading $FILENAME ..."
26
  python3 -c "
27
  from huggingface_hub import hf_hub_download
28
+ import shutil, sys
 
 
29
  try:
30
  path = hf_hub_download(
31
  repo_id='$REPO_ID',
 
40
  "
41
  }
42
 
 
 
 
 
 
43
  download_file "ggml-org/Qwen2.5-Omni-7B-GGUF" \
44
  "Qwen2.5-Omni-7B-Q8_0.gguf" \
45
  "/app/models/qwen2.5-omni-7b/Qwen2.5-Omni-7B-Q8_0.gguf"
 
49
  "/app/models/qwen2.5-omni-7b/mmproj-Qwen2.5-Omni-7B-Q8_0.gguf"
50
 
51
  echo ""
 
52
  echo "πŸš€ Starting llama.cpp Server"
 
 
53
  echo "🌐 Server will be available at http://0.0.0.0:7860"
54
  echo ""
55
 
 
58
  --host 0.0.0.0 \
59
  --port 7860 \
60
  -c 8192 \
61
+ -t 4 \
62
+ --flash-attn \
63
+ --cont-batching \
64
  --n-gpu-layers 0