Spaces:
Running
Running
feat: Add GLM-4.7-Flash-REAP-23B model with community settings
Browse files- Added glm_4_7_flash_reap_23b from unsloth
- Uses TQ1_0 quantization (6.54 GB)
- Community-suggested settings:
- temperature: 0.6
- top_p: 0.95
- top_k: 20
- repeat_penalty: 1.05
- 128K context window (capped at 32K for HF)
app.py
CHANGED
|
@@ -218,6 +218,20 @@ AVAILABLE_MODELS = {
|
|
| 218 |
"repeat_penalty": 1.1,
|
| 219 |
},
|
| 220 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
}
|
| 222 |
|
| 223 |
DEFAULT_MODEL_KEY = "qwen3_600m_q4"
|
|
|
|
| 218 |
"repeat_penalty": 1.1,
|
| 219 |
},
|
| 220 |
},
|
| 221 |
+
"glm_4_7_flash_reap_23b": {
|
| 222 |
+
"name": "GLM-4.7-Flash-REAP-23B Thinking (128K Context)",
|
| 223 |
+
"repo_id": "unsloth/GLM-4.7-Flash-REAP-23B-A3B-GGUF",
|
| 224 |
+
"filename": "*TQ1_0.gguf",
|
| 225 |
+
"max_context": 131072,
|
| 226 |
+
"default_temperature": 0.6,
|
| 227 |
+
"supports_toggle": False,
|
| 228 |
+
"inference_settings": {
|
| 229 |
+
"temperature": 0.6,
|
| 230 |
+
"top_p": 0.95,
|
| 231 |
+
"top_k": 20,
|
| 232 |
+
"repeat_penalty": 1.05,
|
| 233 |
+
},
|
| 234 |
+
},
|
| 235 |
}
|
| 236 |
|
| 237 |
DEFAULT_MODEL_KEY = "qwen3_600m_q4"
|