Spaces:

Luigi
/

tiny-scribe

Running

Luigi commited on Feb 2

Commit

eeea4ac

1 Parent(s): 6604bf5

feat: add DEFAULT_N_THREADS env var support + prettify model info

- Add DEFAULT_N_THREADS environment variable to set default thread count
- Auto-detect and validate env var (1-32 threads, falls back to HF Free Tier if invalid)
- Display current n_threads in Model Information with nice markdown tables
- Model Information now shows: Model Specs, Hardware Config, Inference Settings
- Thread preset label shows: HF Free Tier (2), HF Upgrade Tier (8), or Custom
- Update thread config in model info when model or thread settings change

Files changed (1) hide show

app.py +69 -18

app.py CHANGED Viewed

@@ -25,6 +25,24 @@ llm = None
 converter = None
 current_model_key = None
 # Maximum context window to use (caps memory usage on 2 vCPUs)
 MAX_USABLE_CTX = 32768
@@ -588,26 +606,49 @@ def calculate_effective_max_tokens(model_key: str, max_tokens: int, enable_reaso
     return max_tokens
-def get_model_info(model_key: str) -> Tuple[str, str, float, int]:
     """Get model information and inference settings for UI display.
     Returns:
         Tuple of (info_text, temperature, top_p, top_k)
     """
     m = AVAILABLE_MODELS[model_key]
     usable_ctx = min(m["max_context"], MAX_USABLE_CTX)
     settings = m["inference_settings"]
     info_text = (
-        f"**{m['name']}**\n\n"
-        f"- Max context: {m['max_context']:,} tokens "
-        f"(capped at {usable_ctx:,} for performance)\n"
-        f"- Repo: `{m['repo_id']}`\n"
-        f"- Quant: `{m['filename']}`\n"
-        f"- Default Temperature: {settings['temperature']}\n"
-        f"- Top P: {settings['top_p']}, Top K: {settings['top_k']}"
     )
     return info_text, str(settings["temperature"]), settings["top_p"], settings["top_k"]
@@ -1196,7 +1237,7 @@ def create_interface():
                                     ("HF Spaces CPU Upgrade (8 vCPUs)", "upgrade"),
                                     ("Custom (manual)", "custom"),
                                 ],
-                                value="free",
                                 label="CPU Thread Preset",
                                 info="Select hardware tier or specify custom thread count"
                             )
@@ -1204,11 +1245,11 @@ def create_interface():
                             custom_threads_slider = gr.Slider(
                                 minimum=1,
                                 maximum=32,
-                                value=4,
                                 step=1,
                                 label="Custom Thread Count",
                                 info="Number of CPU threads for model inference (1-32)",
-                                visible=False
                             )
                             temperature_slider = gr.Slider(
@@ -1256,8 +1297,10 @@ def create_interface():
                 # Model info section (dynamic)
                 with gr.Group():
                     gr.HTML('<div class="section-header"><span class="section-icon">📊</span> Model Information</div>')
                     info_output = gr.Markdown(
-                        value=get_model_info(DEFAULT_MODEL_KEY)[0],
                         elem_classes=["stats-grid"]
                     )
@@ -1293,9 +1336,17 @@ def create_interface():
                     download_output = gr.File(label="Download JSON", visible=True)
         # Function to update settings when model changes
-        def update_settings_on_model_change(model_key):
             """Update all Advanced Settings when model selection changes."""
-            info_text, temp_str, top_p_val, top_k_val = get_model_info(model_key)
             temperature = float(temp_str) if temp_str else 0.6
             return temperature, top_p_val, top_k_val, info_text
@@ -1310,7 +1361,7 @@ def create_interface():
         # Update settings when model changes
         model_dropdown.change(
             fn=update_settings_on_model_change,
-            inputs=[model_dropdown],
             outputs=[temperature_slider, top_p, top_k, info_output]
         )

 converter = None
 current_model_key = None
+# Thread configuration from environment variable
+def _get_default_thread_config():
+    """Get default thread configuration from environment variable."""
+    env_threads = os.environ.get("DEFAULT_N_THREADS", "").strip()
+    if env_threads:
+        try:
+            thread_count = int(env_threads)
+            if 1 <= thread_count <= 32:
+                logger.info(f"Using DEFAULT_N_THREADS={thread_count} from environment")
+                return "custom", thread_count
+            else:
+                logger.warning(f"DEFAULT_N_THREADS={thread_count} out of range (1-32), using HF Free Tier")
+        except ValueError:
+            logger.warning(f"Invalid DEFAULT_N_THREADS='{env_threads}', using HF Free Tier")
+    return "free", -1  # -1 = irrelevant when preset is not "custom"
+DEFAULT_THREAD_PRESET, DEFAULT_CUSTOM_THREADS = _get_default_thread_config()
 # Maximum context window to use (caps memory usage on 2 vCPUs)
 MAX_USABLE_CTX = 32768
     return max_tokens
+def get_model_info(model_key: str, n_threads: int = 2) -> Tuple[str, str, float, int]:
     """Get model information and inference settings for UI display.
+    Args:
+        model_key: Model identifier from AVAILABLE_MODELS
+        n_threads: Number of CPU threads currently configured
     Returns:
         Tuple of (info_text, temperature, top_p, top_k)
     """
     m = AVAILABLE_MODELS[model_key]
     usable_ctx = min(m["max_context"], MAX_USABLE_CTX)
     settings = m["inference_settings"]
+    # Determine thread preset label
+    if n_threads == 2:
+        thread_label = "HF Free Tier"
+    elif n_threads == 8:
+        thread_label = "HF Upgrade Tier"
+    else:
+        thread_label = "Custom"
     info_text = (
+        f"## 🤖 {m['name']}\n\n"
+        f"### 📊 Model Specs\n"
+        f"| Property | Value |\n"
+        f"|----------|-------|\n"
+        f"| **Context** | {m['max_context']:,} tokens (capped at {usable_ctx:,}) |\n"
+        f"| **Quantization** | `{m['filename']}` |\n"
+        f"| **Repository** | `{m['repo_id']}` |\n\n"
+        f"### 🖥️ Hardware Configuration\n"
+        f"| Property | Value |\n"
+        f"|----------|-------|\n"
+        f"| **CPU Threads** | {n_threads} ({thread_label}) |\n\n"
+        f"### ⚙️ Inference Settings\n"
+        f"| Property | Value |\n"
+        f"|----------|-------|\n"
+        f"| **Temperature** | {settings['temperature']} |\n"
+        f"| **Top P** | {settings['top_p']} |\n"
+        f"| **Top K** | {settings['top_k']} |\n"
+        f"| **Repeat Penalty** | {settings.get('repeat_penalty', 1.0)} |"
     )
     return info_text, str(settings["temperature"]), settings["top_p"], settings["top_k"]
                                     ("HF Spaces CPU Upgrade (8 vCPUs)", "upgrade"),
                                     ("Custom (manual)", "custom"),
                                 ],
+                                value=DEFAULT_THREAD_PRESET,
                                 label="CPU Thread Preset",
                                 info="Select hardware tier or specify custom thread count"
                             )
                             custom_threads_slider = gr.Slider(
                                 minimum=1,
                                 maximum=32,
+                                value=DEFAULT_CUSTOM_THREADS if DEFAULT_CUSTOM_THREADS > 0 else 4,
                                 step=1,
                                 label="Custom Thread Count",
                                 info="Number of CPU threads for model inference (1-32)",
+                                visible=DEFAULT_THREAD_PRESET == "custom"
                             )
                             temperature_slider = gr.Slider(
                 # Model info section (dynamic)
                 with gr.Group():
                     gr.HTML('<div class="section-header"><span class="section-icon">📊</span> Model Information</div>')
+                    # Get default thread count for initial display
+                    _default_threads = DEFAULT_CUSTOM_THREADS if DEFAULT_CUSTOM_THREADS > 0 else 2
                     info_output = gr.Markdown(
+                        value=get_model_info(DEFAULT_MODEL_KEY, n_threads=_default_threads)[0],
                         elem_classes=["stats-grid"]
                     )
                     download_output = gr.File(label="Download JSON", visible=True)
         # Function to update settings when model changes
+        def update_settings_on_model_change(model_key, thread_config, custom_threads):
             """Update all Advanced Settings when model selection changes."""
+            # Calculate n_threads based on preset
+            thread_preset_map = {
+                "free": 2,
+                "upgrade": 8,
+                "custom": custom_threads if custom_threads > 0 else 4,
+            }
+            n_threads = thread_preset_map.get(thread_config, 2)
+            info_text, temp_str, top_p_val, top_k_val = get_model_info(model_key, n_threads=n_threads)
             temperature = float(temp_str) if temp_str else 0.6
             return temperature, top_p_val, top_k_val, info_text
         # Update settings when model changes
         model_dropdown.change(
             fn=update_settings_on_model_change,
+            inputs=[model_dropdown, thread_config_dropdown, custom_threads_slider],
             outputs=[temperature_slider, top_p, top_k, info_output]
         )