Spaces:
Running
Running
feat: Add two non-reasoning model variants
Browse files- Added ERNIE-4.5-21B-A3B-PT (21B, non-thinking) from unsloth
- Standard inference settings (temp 0.7, no thinking mode)
- Added Qwen3-30B-A3B-Instruct (30B, non-thinking) from unsloth
- Standard instruct settings (temp 0.6, no thinking mode)
- Both use TQ1_0 quantization
- Models ordered by parameter count (21B and 30B sections now have variants)
app.py
CHANGED
|
@@ -192,15 +192,29 @@ AVAILABLE_MODELS = {
|
|
| 192 |
},
|
| 193 |
"granite4_tiny_q3": {
|
| 194 |
"name": "Granite 4.0 Tiny 7B (128K Context)",
|
| 195 |
-
"repo_id": "
|
| 196 |
"filename": "*Q3_K_M.gguf",
|
| 197 |
"max_context": 131072,
|
| 198 |
-
"default_temperature": 0.
|
| 199 |
"supports_toggle": False,
|
| 200 |
"inference_settings": {
|
| 201 |
-
"temperature": 0.
|
| 202 |
-
"top_p":
|
| 203 |
-
"top_k":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
"repeat_penalty": 1.1,
|
| 205 |
},
|
| 206 |
},
|
|
@@ -246,6 +260,20 @@ AVAILABLE_MODELS = {
|
|
| 246 |
"repeat_penalty": 1.0,
|
| 247 |
},
|
| 248 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
}
|
| 250 |
|
| 251 |
DEFAULT_MODEL_KEY = "qwen3_600m_q4"
|
|
|
|
| 192 |
},
|
| 193 |
"granite4_tiny_q3": {
|
| 194 |
"name": "Granite 4.0 Tiny 7B (128K Context)",
|
| 195 |
+
"repo_id": "ibm-research/granite-4.0-Tiny-7B-Instruct-GGUF",
|
| 196 |
"filename": "*Q3_K_M.gguf",
|
| 197 |
"max_context": 131072,
|
| 198 |
+
"default_temperature": 0.7,
|
| 199 |
"supports_toggle": False,
|
| 200 |
"inference_settings": {
|
| 201 |
+
"temperature": 0.7,
|
| 202 |
+
"top_p": 0.9,
|
| 203 |
+
"top_k": 40,
|
| 204 |
+
"repeat_penalty": 1.1,
|
| 205 |
+
},
|
| 206 |
+
},
|
| 207 |
+
"ernie_21b_pt_q1": {
|
| 208 |
+
"name": "ERNIE-4.5 21B PT (128K Context)",
|
| 209 |
+
"repo_id": "unsloth/ERNIE-4.5-21B-A3B-PT-GGUF",
|
| 210 |
+
"filename": "*TQ1_0.gguf",
|
| 211 |
+
"max_context": 131072,
|
| 212 |
+
"default_temperature": 0.7,
|
| 213 |
+
"supports_toggle": False,
|
| 214 |
+
"inference_settings": {
|
| 215 |
+
"temperature": 0.7,
|
| 216 |
+
"top_p": 0.9,
|
| 217 |
+
"top_k": 40,
|
| 218 |
"repeat_penalty": 1.1,
|
| 219 |
},
|
| 220 |
},
|
|
|
|
| 260 |
"repeat_penalty": 1.0,
|
| 261 |
},
|
| 262 |
},
|
| 263 |
+
"qwen3_30b_instruct_q1": {
|
| 264 |
+
"name": "Qwen3 30B Instruct (256K Context)",
|
| 265 |
+
"repo_id": "unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF",
|
| 266 |
+
"filename": "*TQ1_0.gguf",
|
| 267 |
+
"max_context": 262144,
|
| 268 |
+
"default_temperature": 0.6,
|
| 269 |
+
"supports_toggle": False,
|
| 270 |
+
"inference_settings": {
|
| 271 |
+
"temperature": 0.6,
|
| 272 |
+
"top_p": 0.95,
|
| 273 |
+
"top_k": 20,
|
| 274 |
+
"repeat_penalty": 1.0,
|
| 275 |
+
},
|
| 276 |
+
},
|
| 277 |
}
|
| 278 |
|
| 279 |
DEFAULT_MODEL_KEY = "qwen3_600m_q4"
|