presets : add 32GB
Browse files- preset-128GB.ini +0 -32
- preset-32GB.ini +99 -0
preset-128GB.ini
CHANGED
|
@@ -86,38 +86,6 @@ top-p = 0.95
|
|
| 86 |
top-k = 0
|
| 87 |
temp = 1.0
|
| 88 |
|
| 89 |
-
[qwen3.6-35b-a3b-hf]
|
| 90 |
-
hf = ggml-org/Qwen3.6-35B-A3B-GGUF:Q8_0
|
| 91 |
-
ctx-size = 262144
|
| 92 |
-
batch-size = 2048
|
| 93 |
-
ubatch-size = 2048
|
| 94 |
-
alias = qwen3.6-35b-a3b-hf
|
| 95 |
-
chat-template-kwargs = {"enable_thinking": false}
|
| 96 |
-
|
| 97 |
-
[qwen3.6-27b-hf]
|
| 98 |
-
hf = ggml-org/Qwen3.6-27B-GGUF:Q8_0
|
| 99 |
-
ctx-size = 262144
|
| 100 |
-
batch-size = 2048
|
| 101 |
-
ubatch-size = 2048
|
| 102 |
-
alias = qwen3.6-27b-hf
|
| 103 |
-
chat-template-kwargs = {"enable_thinking": false}
|
| 104 |
-
|
| 105 |
-
[qwen3.6-35b-a3b-hf-think]
|
| 106 |
-
hf = ggml-org/Qwen3.6-35B-A3B-GGUF:Q8_0
|
| 107 |
-
ctx-size = 262144
|
| 108 |
-
batch-size = 2048
|
| 109 |
-
ubatch-size = 2048
|
| 110 |
-
alias = qwen3.6-35b-a3b-hf-think
|
| 111 |
-
chat-template-kwargs = {"preserve_thinking": true}
|
| 112 |
-
|
| 113 |
-
[qwen3.6-27b-hf-think]
|
| 114 |
-
hf = ggml-org/Qwen3.6-27B-GGUF:Q8_0
|
| 115 |
-
ctx-size = 262144
|
| 116 |
-
batch-size = 2048
|
| 117 |
-
ubatch-size = 2048
|
| 118 |
-
alias = qwen3.6-27b-hf-think
|
| 119 |
-
chat-template-kwargs = {"preserve_thinking": true}
|
| 120 |
-
|
| 121 |
[qwen3.6-35b-a3b-hf-think-spec]
|
| 122 |
hf = ggml-org/Qwen3.6-35B-A3B-GGUF:Q8_0
|
| 123 |
spec-draft-hf = ggml-org/Qwen3.5-0.8B-GGUF:Q8_0
|
|
|
|
| 86 |
top-k = 0
|
| 87 |
temp = 1.0
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
[qwen3.6-35b-a3b-hf-think-spec]
|
| 90 |
hf = ggml-org/Qwen3.6-35B-A3B-GGUF:Q8_0
|
| 91 |
spec-draft-hf = ggml-org/Qwen3.5-0.8B-GGUF:Q8_0
|
preset-32GB.ini
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# uses:
|
| 2 |
+
# - llama.cpp webui
|
| 3 |
+
# - llama.vim
|
| 4 |
+
# - pi
|
| 5 |
+
#
|
| 6 |
+
# target machines with 32GB VRAM:
|
| 7 |
+
# - Apple MacBook
|
| 8 |
+
# - NVIDIA RTX 5090
|
| 9 |
+
|
| 10 |
+
[*]
|
| 11 |
+
ctx-size = 0
|
| 12 |
+
mmap = 1
|
| 13 |
+
kv-unified = 1
|
| 14 |
+
parallel = 4
|
| 15 |
+
spec-default = 1
|
| 16 |
+
|
| 17 |
+
[gpt-oss-20b-hf]
|
| 18 |
+
hf = ggml-org/gpt-oss-20b-GGUF
|
| 19 |
+
ctx-size = 262144
|
| 20 |
+
batch-size = 2048
|
| 21 |
+
ubatch-size = 2048
|
| 22 |
+
top-p = 1.0
|
| 23 |
+
top-k = 0
|
| 24 |
+
min-p = 0.01
|
| 25 |
+
temp = 1.0
|
| 26 |
+
chat-template-kwargs = {"reasoning_effort": "high"}
|
| 27 |
+
|
| 28 |
+
[gpt-oss-20b-hf-low]
|
| 29 |
+
hf = ggml-org/gpt-oss-20b-GGUF
|
| 30 |
+
ctx-size = 262144
|
| 31 |
+
batch-size = 2048
|
| 32 |
+
ubatch-size = 2048
|
| 33 |
+
top-p = 1.0
|
| 34 |
+
top-k = 0
|
| 35 |
+
min-p = 0.01
|
| 36 |
+
temp = 1.0
|
| 37 |
+
chat-template-kwargs = {"reasoning_effort": "low"}
|
| 38 |
+
|
| 39 |
+
[gpt-oss-20b-hf-medium]
|
| 40 |
+
hf = ggml-org/gpt-oss-20b-GGUF
|
| 41 |
+
ctx-size = 262144
|
| 42 |
+
batch-size = 2048
|
| 43 |
+
ubatch-size = 2048
|
| 44 |
+
top-p = 1.0
|
| 45 |
+
top-k = 0
|
| 46 |
+
min-p = 0.01
|
| 47 |
+
temp = 1.0
|
| 48 |
+
chat-template-kwargs = {"reasoning_effort": "medium"}
|
| 49 |
+
|
| 50 |
+
[gpt-oss-20b-hf-high]
|
| 51 |
+
hf = ggml-org/gpt-oss-20b-GGUF
|
| 52 |
+
ctx-size = 262144
|
| 53 |
+
batch-size = 2048
|
| 54 |
+
ubatch-size = 2048
|
| 55 |
+
top-p = 1.0
|
| 56 |
+
top-k = 0
|
| 57 |
+
min-p = 0.01
|
| 58 |
+
temp = 1.0
|
| 59 |
+
chat-template-kwargs = {"reasoning_effort": "high"}
|
| 60 |
+
|
| 61 |
+
[qwen2.5-coder-7b-hf-fim]
|
| 62 |
+
hf = ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF
|
| 63 |
+
ctx-size = 65536
|
| 64 |
+
cache-reuse = 256
|
| 65 |
+
batch-size = 1024
|
| 66 |
+
ubatch-size = 1024
|
| 67 |
+
alias = fim,infill
|
| 68 |
+
|
| 69 |
+
# uses a dedicated, pure 4-bit drafter
|
| 70 |
+
[qwen3.6-35b-a3b-hf-think-mtp]
|
| 71 |
+
hf = unsloth/Qwen3.6-35B-A3B-MTP-GGUF:Q4_K_M
|
| 72 |
+
spec-type = draft-mtp
|
| 73 |
+
spec-draft-n-max = 3
|
| 74 |
+
spec-draft-hf = ggml-org/Qwen3.6-35B-A3B-GGUF
|
| 75 |
+
spec-draft-model = mtp-Qwen3.6-35B-A3B-Q4_0.gguf
|
| 76 |
+
temp = 0.6
|
| 77 |
+
min-p = 0.05
|
| 78 |
+
ctx-size = 262144
|
| 79 |
+
batch-size = 2048
|
| 80 |
+
ubatch-size = 512
|
| 81 |
+
alias = qwen3.6-35b-a3b-hf-think-mtp,inst
|
| 82 |
+
chat-template-kwargs = {"preserve_thinking": true}
|
| 83 |
+
|
| 84 |
+
# uses a dedicated, pure 4-bit drafter
|
| 85 |
+
[qwen3.6-27b-hf-think-mtp]
|
| 86 |
+
hf = unsloth/Qwen3.6-27B-MTP-GGUF:Q4_K_M
|
| 87 |
+
spec-type = draft-mtp
|
| 88 |
+
spec-draft-n-max = 3
|
| 89 |
+
spec-draft-hf = ggml-org/Qwen3.6-27B-GGUF
|
| 90 |
+
spec-draft-model = mtp-Qwen3.6-27B-Q4_0.gguf
|
| 91 |
+
temp = 0.6
|
| 92 |
+
min-p = 0.05
|
| 93 |
+
ctx-size = 262144
|
| 94 |
+
batch-size = 2048
|
| 95 |
+
ubatch-size = 2048
|
| 96 |
+
alias = qwen3.6-27b-hf-think-mtp,pi
|
| 97 |
+
reasoning-budget = 4096
|
| 98 |
+
reasoning-budget-message = ... I am thinking for too long and cannot make a decision. I will now explain the problem to the user and ask them for advice.
|
| 99 |
+
chat-template-kwargs = {"preserve_thinking": true}
|