Luigi commited on
Commit
eeea4ac
Β·
1 Parent(s): 6604bf5

feat: add DEFAULT_N_THREADS env var support + prettify model info

Browse files

- Add DEFAULT_N_THREADS environment variable to set default thread count
- Auto-detect and validate env var (1-32 threads, falls back to HF Free Tier if invalid)
- Display current n_threads in Model Information with nice markdown tables
- Model Information now shows: Model Specs, Hardware Config, Inference Settings
- Thread preset label shows: HF Free Tier (2), HF Upgrade Tier (8), or Custom
- Update thread config in model info when model or thread settings change

Files changed (1) hide show
  1. app.py +69 -18
app.py CHANGED
@@ -25,6 +25,24 @@ llm = None
25
  converter = None
26
  current_model_key = None
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  # Maximum context window to use (caps memory usage on 2 vCPUs)
29
  MAX_USABLE_CTX = 32768
30
 
@@ -588,26 +606,49 @@ def calculate_effective_max_tokens(model_key: str, max_tokens: int, enable_reaso
588
  return max_tokens
589
 
590
 
591
- def get_model_info(model_key: str) -> Tuple[str, str, float, int]:
592
  """Get model information and inference settings for UI display.
593
-
 
 
 
 
594
  Returns:
595
  Tuple of (info_text, temperature, top_p, top_k)
596
  """
597
  m = AVAILABLE_MODELS[model_key]
598
  usable_ctx = min(m["max_context"], MAX_USABLE_CTX)
599
  settings = m["inference_settings"]
600
-
 
 
 
 
 
 
 
 
601
  info_text = (
602
- f"**{m['name']}**\n\n"
603
- f"- Max context: {m['max_context']:,} tokens "
604
- f"(capped at {usable_ctx:,} for performance)\n"
605
- f"- Repo: `{m['repo_id']}`\n"
606
- f"- Quant: `{m['filename']}`\n"
607
- f"- Default Temperature: {settings['temperature']}\n"
608
- f"- Top P: {settings['top_p']}, Top K: {settings['top_k']}"
 
 
 
 
 
 
 
 
 
 
 
609
  )
610
-
611
  return info_text, str(settings["temperature"]), settings["top_p"], settings["top_k"]
612
 
613
 
@@ -1196,7 +1237,7 @@ def create_interface():
1196
  ("HF Spaces CPU Upgrade (8 vCPUs)", "upgrade"),
1197
  ("Custom (manual)", "custom"),
1198
  ],
1199
- value="free",
1200
  label="CPU Thread Preset",
1201
  info="Select hardware tier or specify custom thread count"
1202
  )
@@ -1204,11 +1245,11 @@ def create_interface():
1204
  custom_threads_slider = gr.Slider(
1205
  minimum=1,
1206
  maximum=32,
1207
- value=4,
1208
  step=1,
1209
  label="Custom Thread Count",
1210
  info="Number of CPU threads for model inference (1-32)",
1211
- visible=False
1212
  )
1213
 
1214
  temperature_slider = gr.Slider(
@@ -1256,8 +1297,10 @@ def create_interface():
1256
  # Model info section (dynamic)
1257
  with gr.Group():
1258
  gr.HTML('<div class="section-header"><span class="section-icon">πŸ“Š</span> Model Information</div>')
 
 
1259
  info_output = gr.Markdown(
1260
- value=get_model_info(DEFAULT_MODEL_KEY)[0],
1261
  elem_classes=["stats-grid"]
1262
  )
1263
 
@@ -1293,9 +1336,17 @@ def create_interface():
1293
  download_output = gr.File(label="Download JSON", visible=True)
1294
 
1295
  # Function to update settings when model changes
1296
- def update_settings_on_model_change(model_key):
1297
  """Update all Advanced Settings when model selection changes."""
1298
- info_text, temp_str, top_p_val, top_k_val = get_model_info(model_key)
 
 
 
 
 
 
 
 
1299
  temperature = float(temp_str) if temp_str else 0.6
1300
  return temperature, top_p_val, top_k_val, info_text
1301
 
@@ -1310,7 +1361,7 @@ def create_interface():
1310
  # Update settings when model changes
1311
  model_dropdown.change(
1312
  fn=update_settings_on_model_change,
1313
- inputs=[model_dropdown],
1314
  outputs=[temperature_slider, top_p, top_k, info_output]
1315
  )
1316
 
 
25
  converter = None
26
  current_model_key = None
27
 
28
+ # Thread configuration from environment variable
29
+ def _get_default_thread_config():
30
+ """Get default thread configuration from environment variable."""
31
+ env_threads = os.environ.get("DEFAULT_N_THREADS", "").strip()
32
+ if env_threads:
33
+ try:
34
+ thread_count = int(env_threads)
35
+ if 1 <= thread_count <= 32:
36
+ logger.info(f"Using DEFAULT_N_THREADS={thread_count} from environment")
37
+ return "custom", thread_count
38
+ else:
39
+ logger.warning(f"DEFAULT_N_THREADS={thread_count} out of range (1-32), using HF Free Tier")
40
+ except ValueError:
41
+ logger.warning(f"Invalid DEFAULT_N_THREADS='{env_threads}', using HF Free Tier")
42
+ return "free", -1 # -1 = irrelevant when preset is not "custom"
43
+
44
+ DEFAULT_THREAD_PRESET, DEFAULT_CUSTOM_THREADS = _get_default_thread_config()
45
+
46
  # Maximum context window to use (caps memory usage on 2 vCPUs)
47
  MAX_USABLE_CTX = 32768
48
 
 
606
  return max_tokens
607
 
608
 
609
+ def get_model_info(model_key: str, n_threads: int = 2) -> Tuple[str, str, float, int]:
610
  """Get model information and inference settings for UI display.
611
+
612
+ Args:
613
+ model_key: Model identifier from AVAILABLE_MODELS
614
+ n_threads: Number of CPU threads currently configured
615
+
616
  Returns:
617
  Tuple of (info_text, temperature, top_p, top_k)
618
  """
619
  m = AVAILABLE_MODELS[model_key]
620
  usable_ctx = min(m["max_context"], MAX_USABLE_CTX)
621
  settings = m["inference_settings"]
622
+
623
+ # Determine thread preset label
624
+ if n_threads == 2:
625
+ thread_label = "HF Free Tier"
626
+ elif n_threads == 8:
627
+ thread_label = "HF Upgrade Tier"
628
+ else:
629
+ thread_label = "Custom"
630
+
631
  info_text = (
632
+ f"## πŸ€– {m['name']}\n\n"
633
+ f"### πŸ“Š Model Specs\n"
634
+ f"| Property | Value |\n"
635
+ f"|----------|-------|\n"
636
+ f"| **Context** | {m['max_context']:,} tokens (capped at {usable_ctx:,}) |\n"
637
+ f"| **Quantization** | `{m['filename']}` |\n"
638
+ f"| **Repository** | `{m['repo_id']}` |\n\n"
639
+ f"### πŸ–₯️ Hardware Configuration\n"
640
+ f"| Property | Value |\n"
641
+ f"|----------|-------|\n"
642
+ f"| **CPU Threads** | {n_threads} ({thread_label}) |\n\n"
643
+ f"### βš™οΈ Inference Settings\n"
644
+ f"| Property | Value |\n"
645
+ f"|----------|-------|\n"
646
+ f"| **Temperature** | {settings['temperature']} |\n"
647
+ f"| **Top P** | {settings['top_p']} |\n"
648
+ f"| **Top K** | {settings['top_k']} |\n"
649
+ f"| **Repeat Penalty** | {settings.get('repeat_penalty', 1.0)} |"
650
  )
651
+
652
  return info_text, str(settings["temperature"]), settings["top_p"], settings["top_k"]
653
 
654
 
 
1237
  ("HF Spaces CPU Upgrade (8 vCPUs)", "upgrade"),
1238
  ("Custom (manual)", "custom"),
1239
  ],
1240
+ value=DEFAULT_THREAD_PRESET,
1241
  label="CPU Thread Preset",
1242
  info="Select hardware tier or specify custom thread count"
1243
  )
 
1245
  custom_threads_slider = gr.Slider(
1246
  minimum=1,
1247
  maximum=32,
1248
+ value=DEFAULT_CUSTOM_THREADS if DEFAULT_CUSTOM_THREADS > 0 else 4,
1249
  step=1,
1250
  label="Custom Thread Count",
1251
  info="Number of CPU threads for model inference (1-32)",
1252
+ visible=DEFAULT_THREAD_PRESET == "custom"
1253
  )
1254
 
1255
  temperature_slider = gr.Slider(
 
1297
  # Model info section (dynamic)
1298
  with gr.Group():
1299
  gr.HTML('<div class="section-header"><span class="section-icon">πŸ“Š</span> Model Information</div>')
1300
+ # Get default thread count for initial display
1301
+ _default_threads = DEFAULT_CUSTOM_THREADS if DEFAULT_CUSTOM_THREADS > 0 else 2
1302
  info_output = gr.Markdown(
1303
+ value=get_model_info(DEFAULT_MODEL_KEY, n_threads=_default_threads)[0],
1304
  elem_classes=["stats-grid"]
1305
  )
1306
 
 
1336
  download_output = gr.File(label="Download JSON", visible=True)
1337
 
1338
  # Function to update settings when model changes
1339
+ def update_settings_on_model_change(model_key, thread_config, custom_threads):
1340
  """Update all Advanced Settings when model selection changes."""
1341
+ # Calculate n_threads based on preset
1342
+ thread_preset_map = {
1343
+ "free": 2,
1344
+ "upgrade": 8,
1345
+ "custom": custom_threads if custom_threads > 0 else 4,
1346
+ }
1347
+ n_threads = thread_preset_map.get(thread_config, 2)
1348
+
1349
+ info_text, temp_str, top_p_val, top_k_val = get_model_info(model_key, n_threads=n_threads)
1350
  temperature = float(temp_str) if temp_str else 0.6
1351
  return temperature, top_p_val, top_k_val, info_text
1352
 
 
1361
  # Update settings when model changes
1362
  model_dropdown.change(
1363
  fn=update_settings_on_model_change,
1364
+ inputs=[model_dropdown, thread_config_dropdown, custom_threads_slider],
1365
  outputs=[temperature_slider, top_p, top_k, info_output]
1366
  )
1367