Spaces:
Running
Running
Add ERNIE-4.5-21B-Thinking (Q1_0) to model registry
Browse files- Add unsloth/ERNIE-4.5-21B-A3B-Thinking-GGUF with TQ1_0 quantization
- MoE architecture: 21B total params / 3B activated per token
- 128K context window (capped at 32K for CPU performance)
- Inference settings: temp=0.7, top_p=0.8, top_k=40 (Baidu/Unsolt defaults)
- Thinking-only mode (no /think toggle needed)
- Largest model in registry (21B total parameters)
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
app.py
CHANGED
|
@@ -191,6 +191,19 @@ AVAILABLE_MODELS = {
|
|
| 191 |
"repeat_penalty": 1.1,
|
| 192 |
},
|
| 193 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
}
|
| 195 |
|
| 196 |
DEFAULT_MODEL_KEY = "qwen3_600m_q4"
|
|
|
|
| 191 |
"repeat_penalty": 1.1,
|
| 192 |
},
|
| 193 |
},
|
| 194 |
+
"ernie_21b_thinking_q1": {
|
| 195 |
+
"name": "ERNIE-4.5 21B Thinking (128K Context)",
|
| 196 |
+
"repo_id": "unsloth/ERNIE-4.5-21B-A3B-Thinking-GGUF",
|
| 197 |
+
"filename": "*TQ1_0.gguf",
|
| 198 |
+
"max_context": 131072,
|
| 199 |
+
"supports_toggle": False, # Thinking-only mode
|
| 200 |
+
"inference_settings": {
|
| 201 |
+
"temperature": 0.7,
|
| 202 |
+
"top_p": 0.8,
|
| 203 |
+
"top_k": 40,
|
| 204 |
+
"repeat_penalty": 1.05,
|
| 205 |
+
},
|
| 206 |
+
},
|
| 207 |
}
|
| 208 |
|
| 209 |
DEFAULT_MODEL_KEY = "qwen3_600m_q4"
|