Update README.md
Browse files
README.md
CHANGED
|
@@ -57,11 +57,11 @@ snapshot_download(
|
|
| 57 |
allow_patterns = ["*UD-IQ1_S*"], # Select quant type UD-IQ1_S for 1.58bit
|
| 58 |
)
|
| 59 |
```
|
| 60 |
-
5. Example with
|
| 61 |
```bash
|
| 62 |
./llama.cpp/llama-cli \
|
| 63 |
--model DeepSeek-R1-GGUF/DeepSeek-R1-UD-IQ1_S/DeepSeek-R1-UD-IQ1_S-00001-of-00003.gguf \
|
| 64 |
-
--cache-type-k
|
| 65 |
--threads 12 -no-cnv --prio 2 \
|
| 66 |
--temp 0.6 \
|
| 67 |
--ctx-size 8192 \
|
|
@@ -83,7 +83,7 @@ snapshot_download(
|
|
| 83 |
```bash
|
| 84 |
./llama.cpp/llama-cli \
|
| 85 |
--model DeepSeek-R1-GGUF/DeepSeek-R1-UD-IQ1_S/DeepSeek-R1-UD-IQ1_S-00001-of-00003.gguf \
|
| 86 |
-
--cache-type-k
|
| 87 |
--threads 12 -no-cnv --prio 2 \
|
| 88 |
--n-gpu-layers 7 \
|
| 89 |
--temp 0.6 \
|
|
|
|
| 57 |
allow_patterns = ["*UD-IQ1_S*"], # Select quant type UD-IQ1_S for 1.58bit
|
| 58 |
)
|
| 59 |
```
|
| 60 |
+
5. Example with Q8_0 K quantized cache **Notice -no-cnv disables auto conversation mode**
|
| 61 |
```bash
|
| 62 |
./llama.cpp/llama-cli \
|
| 63 |
--model DeepSeek-R1-GGUF/DeepSeek-R1-UD-IQ1_S/DeepSeek-R1-UD-IQ1_S-00001-of-00003.gguf \
|
| 64 |
+
--cache-type-k q8_0 \
|
| 65 |
--threads 12 -no-cnv --prio 2 \
|
| 66 |
--temp 0.6 \
|
| 67 |
--ctx-size 8192 \
|
|
|
|
| 83 |
```bash
|
| 84 |
./llama.cpp/llama-cli \
|
| 85 |
--model DeepSeek-R1-GGUF/DeepSeek-R1-UD-IQ1_S/DeepSeek-R1-UD-IQ1_S-00001-of-00003.gguf \
|
| 86 |
+
--cache-type-k q8_0 \
|
| 87 |
--threads 12 -no-cnv --prio 2 \
|
| 88 |
--n-gpu-layers 7 \
|
| 89 |
--temp 0.6 \
|