Spaces:

Luigi
/

tiny-scribe

Running

Luigi commited on Feb 2

Commit

20d33b2

1 Parent(s): 26a8350

Update three-model reasoning system with supports_reasoning field

- Add supports_reasoning field to all 24 models
- Update calculate_effective_max_tokens() to use supports_reasoning
- Update update_reasoning_visibility() for three model types:
- Non-reasoning: hidden checkbox
- Thinking-only: visible, checked, locked with '⚡ Reasoning Mode (Always On)' label
- Hybrid: visible, toggleable with 'Enable Reasoning Mode' label
- Add '⚡' indicator in dropdown for thinking-only models

Files changed (1) hide show

app.py +56 -9

app.py CHANGED Viewed

@@ -54,6 +54,7 @@ AVAILABLE_MODELS = {
         "filename": "*Q8_0.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
         "inference_settings": {
             "temperature": 0.1,
             "top_p": 0.9,
@@ -67,6 +68,7 @@ AVAILABLE_MODELS = {
         "filename": "*Q8_0.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
         "inference_settings": {
             "temperature": 1.0,
             "top_p": 0.95,
@@ -80,6 +82,7 @@ AVAILABLE_MODELS = {
         "filename": "*Q8_0.gguf",
         "max_context": 131072,
         "default_temperature": 0.6,
         "inference_settings": {
             "temperature": 0.3,
             "top_p": 0.95,
@@ -93,6 +96,7 @@ AVAILABLE_MODELS = {
         "filename": "*Q8_0.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
         "inference_settings": {
             "temperature": 0.0,
             "top_p": 1.0,
@@ -106,6 +110,7 @@ AVAILABLE_MODELS = {
         "filename": "*Q8_0.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
         "inference_settings": {
             "temperature": 0.1,
             "top_p": 0.1,
@@ -119,6 +124,7 @@ AVAILABLE_MODELS = {
         "filename": "*q4_0.gguf",
         "max_context": 131072,
         "default_temperature": 0.6,
         "inference_settings": {
             "temperature": 0.3,
             "top_p": 0.95,
@@ -132,6 +138,7 @@ AVAILABLE_MODELS = {
         "filename": "*Q8_0.gguf",
         "max_context": 262144,
         "default_temperature": 0.6,
         "inference_settings": {
             "temperature": 0.3,
             "top_p": 0.95,
@@ -145,6 +152,7 @@ AVAILABLE_MODELS = {
         "filename": "*Q4_0.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
         "supports_toggle": True,
         "inference_settings": {
             "temperature": 0.6,
@@ -159,6 +167,7 @@ AVAILABLE_MODELS = {
         "filename": "*Q8_0.gguf",
         "max_context": 131072,
         "default_temperature": 0.7,
         "supports_toggle": False,
         "inference_settings": {
             "temperature": 0.7,
@@ -173,6 +182,7 @@ AVAILABLE_MODELS = {
         "filename": "*Q4_K_M.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
         "inference_settings": {
             "temperature": 0.1,
             "top_p": 0.9,
@@ -186,6 +196,7 @@ AVAILABLE_MODELS = {
         "filename": "*Q4_0.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
         "supports_toggle": True,
         "inference_settings": {
             "temperature": 0.6,
@@ -200,6 +211,7 @@ AVAILABLE_MODELS = {
         "filename": "*Q4_K_M.gguf",
         "max_context": 131072,
         "default_temperature": 0.7,
         "supports_toggle": False,
         "inference_settings": {
             "temperature": 0.7,
@@ -214,6 +226,7 @@ AVAILABLE_MODELS = {
         "filename": "*Q8_0.gguf",
         "max_context": 131072,
         "default_temperature": 0.7,
         "supports_toggle": True,
         "inference_settings": {
             "temperature": 0.7,
@@ -228,6 +241,7 @@ AVAILABLE_MODELS = {
         "filename": "*Q4_0.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
         "supports_toggle": False,
         "inference_settings": {
             "temperature": 0.6,
@@ -242,6 +256,7 @@ AVAILABLE_MODELS = {
         "filename": "*Q4_K_M.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
         "supports_toggle": False,
         "inference_settings": {
             "temperature": 0.6,
@@ -256,6 +271,7 @@ AVAILABLE_MODELS = {
         "filename": "*Q4_K_M.gguf",
         "max_context": 131072,
         "default_temperature": 0.7,
         "supports_toggle": False,
         "inference_settings": {
             "temperature": 0.7,
@@ -270,6 +286,7 @@ AVAILABLE_MODELS = {
         "filename": "*Q3_K_M.gguf",
         "max_context": 262144,
         "default_temperature": 0.6,
         "supports_toggle": False,  # Thinking-only mode
         "inference_settings": {
             "temperature": 0.6,
@@ -284,6 +301,7 @@ AVAILABLE_MODELS = {
         "filename": "*Q3_K_M.gguf",
         "max_context": 131072,
         "default_temperature": 0.7,
         "supports_toggle": False,
         "inference_settings": {
             "temperature": 0.7,
@@ -298,6 +316,7 @@ AVAILABLE_MODELS = {
         "filename": "*TQ1_0.gguf",
         "max_context": 131072,
         "default_temperature": 0.7,
         "supports_toggle": False,
         "inference_settings": {
             "temperature": 0.7,
@@ -312,6 +331,7 @@ AVAILABLE_MODELS = {
         "filename": "*TQ1_0.gguf",
         "max_context": 131072,
         "default_temperature": 0.8,
         "supports_toggle": False,  # Thinking-only mode
         "inference_settings": {
             "temperature": 0.8,
@@ -326,6 +346,7 @@ AVAILABLE_MODELS = {
         "filename": "*TQ1_0.gguf",
         "max_context": 131072,
         "default_temperature": 0.6,
         "supports_toggle": False,
         "inference_settings": {
             "temperature": 0.6,
@@ -340,6 +361,7 @@ AVAILABLE_MODELS = {
         "filename": "*IQ2_XXS.gguf",
         "max_context": 131072,
         "default_temperature": 0.6,
         "supports_toggle": False,
         "inference_settings": {
             "temperature": 0.6,
@@ -354,6 +376,7 @@ AVAILABLE_MODELS = {
         "filename": "*TQ1_0.gguf",
         "max_context": 262144,
         "default_temperature": 0.6,
         "supports_toggle": False,  # Thinking-only mode
         "inference_settings": {
             "temperature": 0.6,
@@ -368,6 +391,7 @@ AVAILABLE_MODELS = {
         "filename": "*TQ1_0.gguf",
         "max_context": 262144,
         "default_temperature": 0.6,
         "supports_toggle": False,
         "inference_settings": {
             "temperature": 0.6,
@@ -462,10 +486,29 @@ def load_model(model_key: str = None, n_threads: int = 2) -> Tuple[Llama, str]:
 def update_reasoning_visibility(model_key):
-    """Show or hide reasoning checkbox based on model capabilities."""
     model = AVAILABLE_MODELS[model_key]
     supports_toggle = model.get("supports_toggle", False)
-    return gr.update(visible=supports_toggle)
 def download_summary_json(summary, thinking, model_key, language, metrics):
@@ -591,12 +634,9 @@ def calculate_effective_max_tokens(model_key: str, max_tokens: int, enable_reaso
         return max_tokens
     # Check if model supports reasoning/thinking
-    is_thinking_model = (
-        model_config.get("supports_toggle", False) or
-        "thinking" in model_key.lower()
-    )
-    if is_thinking_model:
         # Add 50% headroom for thinking process
         thinking_headroom = int(max_tokens * 0.5)
         effective_max = max_tokens + thinking_headroom
@@ -1218,10 +1258,10 @@ def create_interface():
                     gr.HTML('<div class="section-header" style="margin-top: 20px;"><span class="section-icon">🤖</span> Model</div>')
                     model_dropdown = gr.Dropdown(
-                        choices=[(info["name"], key) for key, info in AVAILABLE_MODELS.items()],
                         value=DEFAULT_MODEL_KEY,
                         label="Select Model",
-                        info="Models ordered by size (0.6B to 30B). Smaller = faster. Large files need bigger context."
                     )
                     enable_reasoning = gr.Checkbox(
@@ -1378,6 +1418,13 @@ def create_interface():
             inputs=[model_dropdown, thread_config_dropdown, custom_threads_slider],
             outputs=[temperature_slider, top_p, top_k, info_output]
         )
         # Show/hide custom thread slider based on selection
         def toggle_custom_threads(thread_config):

         "filename": "*Q8_0.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
+        "supports_reasoning": False,
         "inference_settings": {
             "temperature": 0.1,
             "top_p": 0.9,
         "filename": "*Q8_0.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
+        "supports_reasoning": False,
         "inference_settings": {
             "temperature": 1.0,
             "top_p": 0.95,
         "filename": "*Q8_0.gguf",
         "max_context": 131072,
         "default_temperature": 0.6,
+        "supports_reasoning": False,
         "inference_settings": {
             "temperature": 0.3,
             "top_p": 0.95,
         "filename": "*Q8_0.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
+        "supports_reasoning": False,
         "inference_settings": {
             "temperature": 0.0,
             "top_p": 1.0,
         "filename": "*Q8_0.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
+        "supports_reasoning": False,
         "inference_settings": {
             "temperature": 0.1,
             "top_p": 0.1,
         "filename": "*q4_0.gguf",
         "max_context": 131072,
         "default_temperature": 0.6,
+        "supports_reasoning": False,
         "inference_settings": {
             "temperature": 0.3,
             "top_p": 0.95,
         "filename": "*Q8_0.gguf",
         "max_context": 262144,
         "default_temperature": 0.6,
+        "supports_reasoning": False,
         "inference_settings": {
             "temperature": 0.3,
             "top_p": 0.95,
         "filename": "*Q4_0.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
+        "supports_reasoning": True,
         "supports_toggle": True,
         "inference_settings": {
             "temperature": 0.6,
         "filename": "*Q8_0.gguf",
         "max_context": 131072,
         "default_temperature": 0.7,
+        "supports_reasoning": False,
         "supports_toggle": False,
         "inference_settings": {
             "temperature": 0.7,
         "filename": "*Q4_K_M.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
+        "supports_reasoning": False,
         "inference_settings": {
             "temperature": 0.1,
             "top_p": 0.9,
         "filename": "*Q4_0.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
+        "supports_reasoning": True,
         "supports_toggle": True,
         "inference_settings": {
             "temperature": 0.6,
         "filename": "*Q4_K_M.gguf",
         "max_context": 131072,
         "default_temperature": 0.7,
+        "supports_reasoning": False,
         "supports_toggle": False,
         "inference_settings": {
             "temperature": 0.7,
         "filename": "*Q8_0.gguf",
         "max_context": 131072,
         "default_temperature": 0.7,
+        "supports_reasoning": True,
         "supports_toggle": True,
         "inference_settings": {
             "temperature": 0.7,
         "filename": "*Q4_0.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
+        "supports_reasoning": False,
         "supports_toggle": False,
         "inference_settings": {
             "temperature": 0.6,
         "filename": "*Q4_K_M.gguf",
         "max_context": 32768,
         "default_temperature": 0.6,
+        "supports_reasoning": False,
         "supports_toggle": False,
         "inference_settings": {
             "temperature": 0.6,
         "filename": "*Q4_K_M.gguf",
         "max_context": 131072,
         "default_temperature": 0.7,
+        "supports_reasoning": False,
         "supports_toggle": False,
         "inference_settings": {
             "temperature": 0.7,
         "filename": "*Q3_K_M.gguf",
         "max_context": 262144,
         "default_temperature": 0.6,
+        "supports_reasoning": True,
         "supports_toggle": False,  # Thinking-only mode
         "inference_settings": {
             "temperature": 0.6,
         "filename": "*Q3_K_M.gguf",
         "max_context": 131072,
         "default_temperature": 0.7,
+        "supports_reasoning": False,
         "supports_toggle": False,
         "inference_settings": {
             "temperature": 0.7,
         "filename": "*TQ1_0.gguf",
         "max_context": 131072,
         "default_temperature": 0.7,
+        "supports_reasoning": False,
         "supports_toggle": False,
         "inference_settings": {
             "temperature": 0.7,
         "filename": "*TQ1_0.gguf",
         "max_context": 131072,
         "default_temperature": 0.8,
+        "supports_reasoning": True,
         "supports_toggle": False,  # Thinking-only mode
         "inference_settings": {
             "temperature": 0.8,
         "filename": "*TQ1_0.gguf",
         "max_context": 131072,
         "default_temperature": 0.6,
+        "supports_reasoning": True,
         "supports_toggle": False,
         "inference_settings": {
             "temperature": 0.6,
         "filename": "*IQ2_XXS.gguf",
         "max_context": 131072,
         "default_temperature": 0.6,
+        "supports_reasoning": False,
         "supports_toggle": False,
         "inference_settings": {
             "temperature": 0.6,
         "filename": "*TQ1_0.gguf",
         "max_context": 262144,
         "default_temperature": 0.6,
+        "supports_reasoning": True,
         "supports_toggle": False,  # Thinking-only mode
         "inference_settings": {
             "temperature": 0.6,
         "filename": "*TQ1_0.gguf",
         "max_context": 262144,
         "default_temperature": 0.6,
+        "supports_reasoning": False,
         "supports_toggle": False,
         "inference_settings": {
             "temperature": 0.6,
 def update_reasoning_visibility(model_key):
+    """
+    Update reasoning checkbox visibility, value, and interactivity based on model type.
+    Three model types:
+    - Non-reasoning: checkbox hidden
+    - Thinking-only: checkbox visible, checked, locked (non-interactive), label "Reasoning Mode (Always On)"
+    - Hybrid: checkbox visible, toggleable, label "Enable Reasoning Mode"
+    Returns: Single gr.update() with all properties
+    """
     model = AVAILABLE_MODELS[model_key]
+    supports_reasoning = model.get("supports_reasoning", False)
     supports_toggle = model.get("supports_toggle", False)
+    if not supports_reasoning:
+        # Non-reasoning model: hide checkbox
+        return gr.update(visible=False, value=False, interactive=False, label="Enable Reasoning Mode")
+    elif supports_reasoning and not supports_toggle:
+        # Thinking-only model: show, check, lock
+        return gr.update(visible=True, value=True, interactive=False, label="⚡ Reasoning Mode (Always On)")
+    else:
+        # Hybrid model: show, toggleable
+        return gr.update(visible=True, value=True, interactive=True, label="Enable Reasoning Mode")
 def download_summary_json(summary, thinking, model_key, language, metrics):
         return max_tokens
     # Check if model supports reasoning/thinking
+    supports_reasoning = model_config.get("supports_reasoning", False)
+    if supports_reasoning:
         # Add 50% headroom for thinking process
         thinking_headroom = int(max_tokens * 0.5)
         effective_max = max_tokens + thinking_headroom
                     gr.HTML('<div class="section-header" style="margin-top: 20px;"><span class="section-icon">🤖</span> Model</div>')
                     model_dropdown = gr.Dropdown(
+                        choices=[(info["name"] + (" ⚡" if info.get("supports_reasoning", False) and not info.get("supports_toggle", False) else ""), key) for key, info in AVAILABLE_MODELS.items()],
                         value=DEFAULT_MODEL_KEY,
                         label="Select Model",
+                        info="Models ordered by size (0.6B to 30B). Smaller = faster. Large files need bigger context. ⚡ = Always-reasoning models."
                     )
                     enable_reasoning = gr.Checkbox(
             inputs=[model_dropdown, thread_config_dropdown, custom_threads_slider],
             outputs=[temperature_slider, top_p, top_k, info_output]
         )
+        # Update reasoning checkbox when model changes
+        model_dropdown.change(
+            fn=update_reasoning_visibility,
+            inputs=[model_dropdown],
+            outputs=[enable_reasoning]
+        )
         # Show/hide custom thread slider based on selection
         def toggle_custom_threads(thread_config):