ubergarm commited on
Commit
93cabe1
·
1 Parent(s): 1acbccf

give a very rough full offload example

Browse files
Files changed (1) hide show
  1. README.md +21 -4
README.md CHANGED
@@ -230,10 +230,27 @@ $ cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON
230
  $ cmake --build build --config Release -j $(nproc)
231
 
232
  # Hybrid CPU and Single GPU
233
- echo TODO or look at my Step-3.5-Flash for rough example for now
234
-
235
- # Hybrid CPU and Multi GPU
236
- echo TODO or look at my Step-3.5-Flash for rough example for now
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
  # CPU-Only
239
  numactl -N "$SOCKET" -m "$SOCKET" \
 
230
  $ cmake --build build --config Release -j $(nproc)
231
 
232
  # Hybrid CPU and Single GPU
233
+ echo TODO or look at my Step-3.5-Flash for rough example for now using --cpu-moe or --n-cpu-moe XX etc
234
+
235
+ # Hybrid CPU and Multi GPU 128k context full offload in 96GB VRAM
236
+ model=MiniMax-M2.5-IQ2_KS-00001-of-00003.gguf
237
+ _GLIBCXX_REGEX_STATE_LIMIT=1000000 \
238
+ CUDA_VISIBLE_DEVICES="0,1" \
239
+ ./build/bin/llama-sweep-bench \
240
+ --model "$model" \
241
+ --alias ubergarm/MiniMax-M2.5 \
242
+ -khad -ctk q6_0 -ctv q8_0 \
243
+ -c 131072 \
244
+ -ger \
245
+ -sm graph \
246
+ -ngl 99 \
247
+ -ub 4096 -b 4096 \
248
+ -ts 47,48 \
249
+ --threads 1 \
250
+ --host 127.0.0.1 \
251
+ --port 8080 \
252
+ --no-mmap \
253
+ --jinja
254
 
255
  # CPU-Only
256
  numactl -N "$SOCKET" -m "$SOCKET" \