Luigi commited on
Commit
cc9a1a9
·
1 Parent(s): 53bb554

feat: Add two non-reasoning model variants

Browse files

- Added ERNIE-4.5-21B-A3B-PT (21B, non-thinking) from unsloth
- Standard inference settings (temp 0.7, no thinking mode)
- Added Qwen3-30B-A3B-Instruct (30B, non-thinking) from unsloth
- Standard instruct settings (temp 0.6, no thinking mode)
- Both use TQ1_0 quantization
- Models ordered by parameter count (21B and 30B sections now have variants)

Files changed (1) hide show
  1. app.py +33 -5
app.py CHANGED
@@ -192,15 +192,29 @@ AVAILABLE_MODELS = {
192
  },
193
  "granite4_tiny_q3": {
194
  "name": "Granite 4.0 Tiny 7B (128K Context)",
195
- "repo_id": "unsloth/granite-4.0-h-tiny-GGUF",
196
  "filename": "*Q3_K_M.gguf",
197
  "max_context": 131072,
198
- "default_temperature": 0.6,
199
  "supports_toggle": False,
200
  "inference_settings": {
201
- "temperature": 0.0,
202
- "top_p": 1.0,
203
- "top_k": 0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  "repeat_penalty": 1.1,
205
  },
206
  },
@@ -246,6 +260,20 @@ AVAILABLE_MODELS = {
246
  "repeat_penalty": 1.0,
247
  },
248
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  }
250
 
251
  DEFAULT_MODEL_KEY = "qwen3_600m_q4"
 
192
  },
193
  "granite4_tiny_q3": {
194
  "name": "Granite 4.0 Tiny 7B (128K Context)",
195
+ "repo_id": "ibm-research/granite-4.0-Tiny-7B-Instruct-GGUF",
196
  "filename": "*Q3_K_M.gguf",
197
  "max_context": 131072,
198
+ "default_temperature": 0.7,
199
  "supports_toggle": False,
200
  "inference_settings": {
201
+ "temperature": 0.7,
202
+ "top_p": 0.9,
203
+ "top_k": 40,
204
+ "repeat_penalty": 1.1,
205
+ },
206
+ },
207
+ "ernie_21b_pt_q1": {
208
+ "name": "ERNIE-4.5 21B PT (128K Context)",
209
+ "repo_id": "unsloth/ERNIE-4.5-21B-A3B-PT-GGUF",
210
+ "filename": "*TQ1_0.gguf",
211
+ "max_context": 131072,
212
+ "default_temperature": 0.7,
213
+ "supports_toggle": False,
214
+ "inference_settings": {
215
+ "temperature": 0.7,
216
+ "top_p": 0.9,
217
+ "top_k": 40,
218
  "repeat_penalty": 1.1,
219
  },
220
  },
 
260
  "repeat_penalty": 1.0,
261
  },
262
  },
263
+ "qwen3_30b_instruct_q1": {
264
+ "name": "Qwen3 30B Instruct (256K Context)",
265
+ "repo_id": "unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF",
266
+ "filename": "*TQ1_0.gguf",
267
+ "max_context": 262144,
268
+ "default_temperature": 0.6,
269
+ "supports_toggle": False,
270
+ "inference_settings": {
271
+ "temperature": 0.6,
272
+ "top_p": 0.95,
273
+ "top_k": 20,
274
+ "repeat_penalty": 1.0,
275
+ },
276
+ },
277
  }
278
 
279
  DEFAULT_MODEL_KEY = "qwen3_600m_q4"