Luigi commited on
Commit
20d33b2
·
1 Parent(s): 26a8350

Update three-model reasoning system with supports_reasoning field

Browse files

- Add supports_reasoning field to all 24 models
- Update calculate_effective_max_tokens() to use supports_reasoning
- Update update_reasoning_visibility() for three model types:
- Non-reasoning: hidden checkbox
- Thinking-only: visible, checked, locked with '⚡ Reasoning Mode (Always On)' label
- Hybrid: visible, toggleable with 'Enable Reasoning Mode' label
- Add '⚡' indicator in dropdown for thinking-only models

Files changed (1) hide show
  1. app.py +56 -9
app.py CHANGED
@@ -54,6 +54,7 @@ AVAILABLE_MODELS = {
54
  "filename": "*Q8_0.gguf",
55
  "max_context": 32768,
56
  "default_temperature": 0.6,
 
57
  "inference_settings": {
58
  "temperature": 0.1,
59
  "top_p": 0.9,
@@ -67,6 +68,7 @@ AVAILABLE_MODELS = {
67
  "filename": "*Q8_0.gguf",
68
  "max_context": 32768,
69
  "default_temperature": 0.6,
 
70
  "inference_settings": {
71
  "temperature": 1.0,
72
  "top_p": 0.95,
@@ -80,6 +82,7 @@ AVAILABLE_MODELS = {
80
  "filename": "*Q8_0.gguf",
81
  "max_context": 131072,
82
  "default_temperature": 0.6,
 
83
  "inference_settings": {
84
  "temperature": 0.3,
85
  "top_p": 0.95,
@@ -93,6 +96,7 @@ AVAILABLE_MODELS = {
93
  "filename": "*Q8_0.gguf",
94
  "max_context": 32768,
95
  "default_temperature": 0.6,
 
96
  "inference_settings": {
97
  "temperature": 0.0,
98
  "top_p": 1.0,
@@ -106,6 +110,7 @@ AVAILABLE_MODELS = {
106
  "filename": "*Q8_0.gguf",
107
  "max_context": 32768,
108
  "default_temperature": 0.6,
 
109
  "inference_settings": {
110
  "temperature": 0.1,
111
  "top_p": 0.1,
@@ -119,6 +124,7 @@ AVAILABLE_MODELS = {
119
  "filename": "*q4_0.gguf",
120
  "max_context": 131072,
121
  "default_temperature": 0.6,
 
122
  "inference_settings": {
123
  "temperature": 0.3,
124
  "top_p": 0.95,
@@ -132,6 +138,7 @@ AVAILABLE_MODELS = {
132
  "filename": "*Q8_0.gguf",
133
  "max_context": 262144,
134
  "default_temperature": 0.6,
 
135
  "inference_settings": {
136
  "temperature": 0.3,
137
  "top_p": 0.95,
@@ -145,6 +152,7 @@ AVAILABLE_MODELS = {
145
  "filename": "*Q4_0.gguf",
146
  "max_context": 32768,
147
  "default_temperature": 0.6,
 
148
  "supports_toggle": True,
149
  "inference_settings": {
150
  "temperature": 0.6,
@@ -159,6 +167,7 @@ AVAILABLE_MODELS = {
159
  "filename": "*Q8_0.gguf",
160
  "max_context": 131072,
161
  "default_temperature": 0.7,
 
162
  "supports_toggle": False,
163
  "inference_settings": {
164
  "temperature": 0.7,
@@ -173,6 +182,7 @@ AVAILABLE_MODELS = {
173
  "filename": "*Q4_K_M.gguf",
174
  "max_context": 32768,
175
  "default_temperature": 0.6,
 
176
  "inference_settings": {
177
  "temperature": 0.1,
178
  "top_p": 0.9,
@@ -186,6 +196,7 @@ AVAILABLE_MODELS = {
186
  "filename": "*Q4_0.gguf",
187
  "max_context": 32768,
188
  "default_temperature": 0.6,
 
189
  "supports_toggle": True,
190
  "inference_settings": {
191
  "temperature": 0.6,
@@ -200,6 +211,7 @@ AVAILABLE_MODELS = {
200
  "filename": "*Q4_K_M.gguf",
201
  "max_context": 131072,
202
  "default_temperature": 0.7,
 
203
  "supports_toggle": False,
204
  "inference_settings": {
205
  "temperature": 0.7,
@@ -214,6 +226,7 @@ AVAILABLE_MODELS = {
214
  "filename": "*Q8_0.gguf",
215
  "max_context": 131072,
216
  "default_temperature": 0.7,
 
217
  "supports_toggle": True,
218
  "inference_settings": {
219
  "temperature": 0.7,
@@ -228,6 +241,7 @@ AVAILABLE_MODELS = {
228
  "filename": "*Q4_0.gguf",
229
  "max_context": 32768,
230
  "default_temperature": 0.6,
 
231
  "supports_toggle": False,
232
  "inference_settings": {
233
  "temperature": 0.6,
@@ -242,6 +256,7 @@ AVAILABLE_MODELS = {
242
  "filename": "*Q4_K_M.gguf",
243
  "max_context": 32768,
244
  "default_temperature": 0.6,
 
245
  "supports_toggle": False,
246
  "inference_settings": {
247
  "temperature": 0.6,
@@ -256,6 +271,7 @@ AVAILABLE_MODELS = {
256
  "filename": "*Q4_K_M.gguf",
257
  "max_context": 131072,
258
  "default_temperature": 0.7,
 
259
  "supports_toggle": False,
260
  "inference_settings": {
261
  "temperature": 0.7,
@@ -270,6 +286,7 @@ AVAILABLE_MODELS = {
270
  "filename": "*Q3_K_M.gguf",
271
  "max_context": 262144,
272
  "default_temperature": 0.6,
 
273
  "supports_toggle": False, # Thinking-only mode
274
  "inference_settings": {
275
  "temperature": 0.6,
@@ -284,6 +301,7 @@ AVAILABLE_MODELS = {
284
  "filename": "*Q3_K_M.gguf",
285
  "max_context": 131072,
286
  "default_temperature": 0.7,
 
287
  "supports_toggle": False,
288
  "inference_settings": {
289
  "temperature": 0.7,
@@ -298,6 +316,7 @@ AVAILABLE_MODELS = {
298
  "filename": "*TQ1_0.gguf",
299
  "max_context": 131072,
300
  "default_temperature": 0.7,
 
301
  "supports_toggle": False,
302
  "inference_settings": {
303
  "temperature": 0.7,
@@ -312,6 +331,7 @@ AVAILABLE_MODELS = {
312
  "filename": "*TQ1_0.gguf",
313
  "max_context": 131072,
314
  "default_temperature": 0.8,
 
315
  "supports_toggle": False, # Thinking-only mode
316
  "inference_settings": {
317
  "temperature": 0.8,
@@ -326,6 +346,7 @@ AVAILABLE_MODELS = {
326
  "filename": "*TQ1_0.gguf",
327
  "max_context": 131072,
328
  "default_temperature": 0.6,
 
329
  "supports_toggle": False,
330
  "inference_settings": {
331
  "temperature": 0.6,
@@ -340,6 +361,7 @@ AVAILABLE_MODELS = {
340
  "filename": "*IQ2_XXS.gguf",
341
  "max_context": 131072,
342
  "default_temperature": 0.6,
 
343
  "supports_toggle": False,
344
  "inference_settings": {
345
  "temperature": 0.6,
@@ -354,6 +376,7 @@ AVAILABLE_MODELS = {
354
  "filename": "*TQ1_0.gguf",
355
  "max_context": 262144,
356
  "default_temperature": 0.6,
 
357
  "supports_toggle": False, # Thinking-only mode
358
  "inference_settings": {
359
  "temperature": 0.6,
@@ -368,6 +391,7 @@ AVAILABLE_MODELS = {
368
  "filename": "*TQ1_0.gguf",
369
  "max_context": 262144,
370
  "default_temperature": 0.6,
 
371
  "supports_toggle": False,
372
  "inference_settings": {
373
  "temperature": 0.6,
@@ -462,10 +486,29 @@ def load_model(model_key: str = None, n_threads: int = 2) -> Tuple[Llama, str]:
462
 
463
 
464
  def update_reasoning_visibility(model_key):
465
- """Show or hide reasoning checkbox based on model capabilities."""
 
 
 
 
 
 
 
 
 
466
  model = AVAILABLE_MODELS[model_key]
 
467
  supports_toggle = model.get("supports_toggle", False)
468
- return gr.update(visible=supports_toggle)
 
 
 
 
 
 
 
 
 
469
 
470
 
471
  def download_summary_json(summary, thinking, model_key, language, metrics):
@@ -591,12 +634,9 @@ def calculate_effective_max_tokens(model_key: str, max_tokens: int, enable_reaso
591
  return max_tokens
592
 
593
  # Check if model supports reasoning/thinking
594
- is_thinking_model = (
595
- model_config.get("supports_toggle", False) or
596
- "thinking" in model_key.lower()
597
- )
598
 
599
- if is_thinking_model:
600
  # Add 50% headroom for thinking process
601
  thinking_headroom = int(max_tokens * 0.5)
602
  effective_max = max_tokens + thinking_headroom
@@ -1218,10 +1258,10 @@ def create_interface():
1218
  gr.HTML('<div class="section-header" style="margin-top: 20px;"><span class="section-icon">🤖</span> Model</div>')
1219
 
1220
  model_dropdown = gr.Dropdown(
1221
- choices=[(info["name"], key) for key, info in AVAILABLE_MODELS.items()],
1222
  value=DEFAULT_MODEL_KEY,
1223
  label="Select Model",
1224
- info="Models ordered by size (0.6B to 30B). Smaller = faster. Large files need bigger context."
1225
  )
1226
 
1227
  enable_reasoning = gr.Checkbox(
@@ -1378,6 +1418,13 @@ def create_interface():
1378
  inputs=[model_dropdown, thread_config_dropdown, custom_threads_slider],
1379
  outputs=[temperature_slider, top_p, top_k, info_output]
1380
  )
 
 
 
 
 
 
 
1381
 
1382
  # Show/hide custom thread slider based on selection
1383
  def toggle_custom_threads(thread_config):
 
54
  "filename": "*Q8_0.gguf",
55
  "max_context": 32768,
56
  "default_temperature": 0.6,
57
+ "supports_reasoning": False,
58
  "inference_settings": {
59
  "temperature": 0.1,
60
  "top_p": 0.9,
 
68
  "filename": "*Q8_0.gguf",
69
  "max_context": 32768,
70
  "default_temperature": 0.6,
71
+ "supports_reasoning": False,
72
  "inference_settings": {
73
  "temperature": 1.0,
74
  "top_p": 0.95,
 
82
  "filename": "*Q8_0.gguf",
83
  "max_context": 131072,
84
  "default_temperature": 0.6,
85
+ "supports_reasoning": False,
86
  "inference_settings": {
87
  "temperature": 0.3,
88
  "top_p": 0.95,
 
96
  "filename": "*Q8_0.gguf",
97
  "max_context": 32768,
98
  "default_temperature": 0.6,
99
+ "supports_reasoning": False,
100
  "inference_settings": {
101
  "temperature": 0.0,
102
  "top_p": 1.0,
 
110
  "filename": "*Q8_0.gguf",
111
  "max_context": 32768,
112
  "default_temperature": 0.6,
113
+ "supports_reasoning": False,
114
  "inference_settings": {
115
  "temperature": 0.1,
116
  "top_p": 0.1,
 
124
  "filename": "*q4_0.gguf",
125
  "max_context": 131072,
126
  "default_temperature": 0.6,
127
+ "supports_reasoning": False,
128
  "inference_settings": {
129
  "temperature": 0.3,
130
  "top_p": 0.95,
 
138
  "filename": "*Q8_0.gguf",
139
  "max_context": 262144,
140
  "default_temperature": 0.6,
141
+ "supports_reasoning": False,
142
  "inference_settings": {
143
  "temperature": 0.3,
144
  "top_p": 0.95,
 
152
  "filename": "*Q4_0.gguf",
153
  "max_context": 32768,
154
  "default_temperature": 0.6,
155
+ "supports_reasoning": True,
156
  "supports_toggle": True,
157
  "inference_settings": {
158
  "temperature": 0.6,
 
167
  "filename": "*Q8_0.gguf",
168
  "max_context": 131072,
169
  "default_temperature": 0.7,
170
+ "supports_reasoning": False,
171
  "supports_toggle": False,
172
  "inference_settings": {
173
  "temperature": 0.7,
 
182
  "filename": "*Q4_K_M.gguf",
183
  "max_context": 32768,
184
  "default_temperature": 0.6,
185
+ "supports_reasoning": False,
186
  "inference_settings": {
187
  "temperature": 0.1,
188
  "top_p": 0.9,
 
196
  "filename": "*Q4_0.gguf",
197
  "max_context": 32768,
198
  "default_temperature": 0.6,
199
+ "supports_reasoning": True,
200
  "supports_toggle": True,
201
  "inference_settings": {
202
  "temperature": 0.6,
 
211
  "filename": "*Q4_K_M.gguf",
212
  "max_context": 131072,
213
  "default_temperature": 0.7,
214
+ "supports_reasoning": False,
215
  "supports_toggle": False,
216
  "inference_settings": {
217
  "temperature": 0.7,
 
226
  "filename": "*Q8_0.gguf",
227
  "max_context": 131072,
228
  "default_temperature": 0.7,
229
+ "supports_reasoning": True,
230
  "supports_toggle": True,
231
  "inference_settings": {
232
  "temperature": 0.7,
 
241
  "filename": "*Q4_0.gguf",
242
  "max_context": 32768,
243
  "default_temperature": 0.6,
244
+ "supports_reasoning": False,
245
  "supports_toggle": False,
246
  "inference_settings": {
247
  "temperature": 0.6,
 
256
  "filename": "*Q4_K_M.gguf",
257
  "max_context": 32768,
258
  "default_temperature": 0.6,
259
+ "supports_reasoning": False,
260
  "supports_toggle": False,
261
  "inference_settings": {
262
  "temperature": 0.6,
 
271
  "filename": "*Q4_K_M.gguf",
272
  "max_context": 131072,
273
  "default_temperature": 0.7,
274
+ "supports_reasoning": False,
275
  "supports_toggle": False,
276
  "inference_settings": {
277
  "temperature": 0.7,
 
286
  "filename": "*Q3_K_M.gguf",
287
  "max_context": 262144,
288
  "default_temperature": 0.6,
289
+ "supports_reasoning": True,
290
  "supports_toggle": False, # Thinking-only mode
291
  "inference_settings": {
292
  "temperature": 0.6,
 
301
  "filename": "*Q3_K_M.gguf",
302
  "max_context": 131072,
303
  "default_temperature": 0.7,
304
+ "supports_reasoning": False,
305
  "supports_toggle": False,
306
  "inference_settings": {
307
  "temperature": 0.7,
 
316
  "filename": "*TQ1_0.gguf",
317
  "max_context": 131072,
318
  "default_temperature": 0.7,
319
+ "supports_reasoning": False,
320
  "supports_toggle": False,
321
  "inference_settings": {
322
  "temperature": 0.7,
 
331
  "filename": "*TQ1_0.gguf",
332
  "max_context": 131072,
333
  "default_temperature": 0.8,
334
+ "supports_reasoning": True,
335
  "supports_toggle": False, # Thinking-only mode
336
  "inference_settings": {
337
  "temperature": 0.8,
 
346
  "filename": "*TQ1_0.gguf",
347
  "max_context": 131072,
348
  "default_temperature": 0.6,
349
+ "supports_reasoning": True,
350
  "supports_toggle": False,
351
  "inference_settings": {
352
  "temperature": 0.6,
 
361
  "filename": "*IQ2_XXS.gguf",
362
  "max_context": 131072,
363
  "default_temperature": 0.6,
364
+ "supports_reasoning": False,
365
  "supports_toggle": False,
366
  "inference_settings": {
367
  "temperature": 0.6,
 
376
  "filename": "*TQ1_0.gguf",
377
  "max_context": 262144,
378
  "default_temperature": 0.6,
379
+ "supports_reasoning": True,
380
  "supports_toggle": False, # Thinking-only mode
381
  "inference_settings": {
382
  "temperature": 0.6,
 
391
  "filename": "*TQ1_0.gguf",
392
  "max_context": 262144,
393
  "default_temperature": 0.6,
394
+ "supports_reasoning": False,
395
  "supports_toggle": False,
396
  "inference_settings": {
397
  "temperature": 0.6,
 
486
 
487
 
488
  def update_reasoning_visibility(model_key):
489
+ """
490
+ Update reasoning checkbox visibility, value, and interactivity based on model type.
491
+
492
+ Three model types:
493
+ - Non-reasoning: checkbox hidden
494
+ - Thinking-only: checkbox visible, checked, locked (non-interactive), label "Reasoning Mode (Always On)"
495
+ - Hybrid: checkbox visible, toggleable, label "Enable Reasoning Mode"
496
+
497
+ Returns: Single gr.update() with all properties
498
+ """
499
  model = AVAILABLE_MODELS[model_key]
500
+ supports_reasoning = model.get("supports_reasoning", False)
501
  supports_toggle = model.get("supports_toggle", False)
502
+
503
+ if not supports_reasoning:
504
+ # Non-reasoning model: hide checkbox
505
+ return gr.update(visible=False, value=False, interactive=False, label="Enable Reasoning Mode")
506
+ elif supports_reasoning and not supports_toggle:
507
+ # Thinking-only model: show, check, lock
508
+ return gr.update(visible=True, value=True, interactive=False, label="⚡ Reasoning Mode (Always On)")
509
+ else:
510
+ # Hybrid model: show, toggleable
511
+ return gr.update(visible=True, value=True, interactive=True, label="Enable Reasoning Mode")
512
 
513
 
514
  def download_summary_json(summary, thinking, model_key, language, metrics):
 
634
  return max_tokens
635
 
636
  # Check if model supports reasoning/thinking
637
+ supports_reasoning = model_config.get("supports_reasoning", False)
 
 
 
638
 
639
+ if supports_reasoning:
640
  # Add 50% headroom for thinking process
641
  thinking_headroom = int(max_tokens * 0.5)
642
  effective_max = max_tokens + thinking_headroom
 
1258
  gr.HTML('<div class="section-header" style="margin-top: 20px;"><span class="section-icon">🤖</span> Model</div>')
1259
 
1260
  model_dropdown = gr.Dropdown(
1261
+ choices=[(info["name"] + (" ⚡" if info.get("supports_reasoning", False) and not info.get("supports_toggle", False) else ""), key) for key, info in AVAILABLE_MODELS.items()],
1262
  value=DEFAULT_MODEL_KEY,
1263
  label="Select Model",
1264
+ info="Models ordered by size (0.6B to 30B). Smaller = faster. Large files need bigger context. ⚡ = Always-reasoning models."
1265
  )
1266
 
1267
  enable_reasoning = gr.Checkbox(
 
1418
  inputs=[model_dropdown, thread_config_dropdown, custom_threads_slider],
1419
  outputs=[temperature_slider, top_p, top_k, info_output]
1420
  )
1421
+
1422
+ # Update reasoning checkbox when model changes
1423
+ model_dropdown.change(
1424
+ fn=update_reasoning_visibility,
1425
+ inputs=[model_dropdown],
1426
+ outputs=[enable_reasoning]
1427
+ )
1428
 
1429
  # Show/hide custom thread slider based on selection
1430
  def toggle_custom_threads(thread_config):