Spaces:
Running
Running
feat: add DEFAULT_N_THREADS env var support + prettify model info
Browse files- Add DEFAULT_N_THREADS environment variable to set default thread count
- Auto-detect and validate env var (1-32 threads, falls back to HF Free Tier if invalid)
- Display current n_threads in Model Information with nice markdown tables
- Model Information now shows: Model Specs, Hardware Config, Inference Settings
- Thread preset label shows: HF Free Tier (2), HF Upgrade Tier (8), or Custom
- Update thread config in model info when model or thread settings change
app.py
CHANGED
|
@@ -25,6 +25,24 @@ llm = None
|
|
| 25 |
converter = None
|
| 26 |
current_model_key = None
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
# Maximum context window to use (caps memory usage on 2 vCPUs)
|
| 29 |
MAX_USABLE_CTX = 32768
|
| 30 |
|
|
@@ -588,26 +606,49 @@ def calculate_effective_max_tokens(model_key: str, max_tokens: int, enable_reaso
|
|
| 588 |
return max_tokens
|
| 589 |
|
| 590 |
|
| 591 |
-
def get_model_info(model_key: str) -> Tuple[str, str, float, int]:
|
| 592 |
"""Get model information and inference settings for UI display.
|
| 593 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 594 |
Returns:
|
| 595 |
Tuple of (info_text, temperature, top_p, top_k)
|
| 596 |
"""
|
| 597 |
m = AVAILABLE_MODELS[model_key]
|
| 598 |
usable_ctx = min(m["max_context"], MAX_USABLE_CTX)
|
| 599 |
settings = m["inference_settings"]
|
| 600 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 601 |
info_text = (
|
| 602 |
-
f"
|
| 603 |
-
f"
|
| 604 |
-
f"
|
| 605 |
-
f"-
|
| 606 |
-
f"
|
| 607 |
-
f"
|
| 608 |
-
f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 609 |
)
|
| 610 |
-
|
| 611 |
return info_text, str(settings["temperature"]), settings["top_p"], settings["top_k"]
|
| 612 |
|
| 613 |
|
|
@@ -1196,7 +1237,7 @@ def create_interface():
|
|
| 1196 |
("HF Spaces CPU Upgrade (8 vCPUs)", "upgrade"),
|
| 1197 |
("Custom (manual)", "custom"),
|
| 1198 |
],
|
| 1199 |
-
value=
|
| 1200 |
label="CPU Thread Preset",
|
| 1201 |
info="Select hardware tier or specify custom thread count"
|
| 1202 |
)
|
|
@@ -1204,11 +1245,11 @@ def create_interface():
|
|
| 1204 |
custom_threads_slider = gr.Slider(
|
| 1205 |
minimum=1,
|
| 1206 |
maximum=32,
|
| 1207 |
-
value=4,
|
| 1208 |
step=1,
|
| 1209 |
label="Custom Thread Count",
|
| 1210 |
info="Number of CPU threads for model inference (1-32)",
|
| 1211 |
-
visible=
|
| 1212 |
)
|
| 1213 |
|
| 1214 |
temperature_slider = gr.Slider(
|
|
@@ -1256,8 +1297,10 @@ def create_interface():
|
|
| 1256 |
# Model info section (dynamic)
|
| 1257 |
with gr.Group():
|
| 1258 |
gr.HTML('<div class="section-header"><span class="section-icon">π</span> Model Information</div>')
|
|
|
|
|
|
|
| 1259 |
info_output = gr.Markdown(
|
| 1260 |
-
value=get_model_info(DEFAULT_MODEL_KEY)[0],
|
| 1261 |
elem_classes=["stats-grid"]
|
| 1262 |
)
|
| 1263 |
|
|
@@ -1293,9 +1336,17 @@ def create_interface():
|
|
| 1293 |
download_output = gr.File(label="Download JSON", visible=True)
|
| 1294 |
|
| 1295 |
# Function to update settings when model changes
|
| 1296 |
-
def update_settings_on_model_change(model_key):
|
| 1297 |
"""Update all Advanced Settings when model selection changes."""
|
| 1298 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1299 |
temperature = float(temp_str) if temp_str else 0.6
|
| 1300 |
return temperature, top_p_val, top_k_val, info_text
|
| 1301 |
|
|
@@ -1310,7 +1361,7 @@ def create_interface():
|
|
| 1310 |
# Update settings when model changes
|
| 1311 |
model_dropdown.change(
|
| 1312 |
fn=update_settings_on_model_change,
|
| 1313 |
-
inputs=[model_dropdown],
|
| 1314 |
outputs=[temperature_slider, top_p, top_k, info_output]
|
| 1315 |
)
|
| 1316 |
|
|
|
|
| 25 |
converter = None
|
| 26 |
current_model_key = None
|
| 27 |
|
| 28 |
+
# Thread configuration from environment variable
|
| 29 |
+
def _get_default_thread_config():
|
| 30 |
+
"""Get default thread configuration from environment variable."""
|
| 31 |
+
env_threads = os.environ.get("DEFAULT_N_THREADS", "").strip()
|
| 32 |
+
if env_threads:
|
| 33 |
+
try:
|
| 34 |
+
thread_count = int(env_threads)
|
| 35 |
+
if 1 <= thread_count <= 32:
|
| 36 |
+
logger.info(f"Using DEFAULT_N_THREADS={thread_count} from environment")
|
| 37 |
+
return "custom", thread_count
|
| 38 |
+
else:
|
| 39 |
+
logger.warning(f"DEFAULT_N_THREADS={thread_count} out of range (1-32), using HF Free Tier")
|
| 40 |
+
except ValueError:
|
| 41 |
+
logger.warning(f"Invalid DEFAULT_N_THREADS='{env_threads}', using HF Free Tier")
|
| 42 |
+
return "free", -1 # -1 = irrelevant when preset is not "custom"
|
| 43 |
+
|
| 44 |
+
DEFAULT_THREAD_PRESET, DEFAULT_CUSTOM_THREADS = _get_default_thread_config()
|
| 45 |
+
|
| 46 |
# Maximum context window to use (caps memory usage on 2 vCPUs)
|
| 47 |
MAX_USABLE_CTX = 32768
|
| 48 |
|
|
|
|
| 606 |
return max_tokens
|
| 607 |
|
| 608 |
|
| 609 |
+
def get_model_info(model_key: str, n_threads: int = 2) -> Tuple[str, str, float, int]:
|
| 610 |
"""Get model information and inference settings for UI display.
|
| 611 |
+
|
| 612 |
+
Args:
|
| 613 |
+
model_key: Model identifier from AVAILABLE_MODELS
|
| 614 |
+
n_threads: Number of CPU threads currently configured
|
| 615 |
+
|
| 616 |
Returns:
|
| 617 |
Tuple of (info_text, temperature, top_p, top_k)
|
| 618 |
"""
|
| 619 |
m = AVAILABLE_MODELS[model_key]
|
| 620 |
usable_ctx = min(m["max_context"], MAX_USABLE_CTX)
|
| 621 |
settings = m["inference_settings"]
|
| 622 |
+
|
| 623 |
+
# Determine thread preset label
|
| 624 |
+
if n_threads == 2:
|
| 625 |
+
thread_label = "HF Free Tier"
|
| 626 |
+
elif n_threads == 8:
|
| 627 |
+
thread_label = "HF Upgrade Tier"
|
| 628 |
+
else:
|
| 629 |
+
thread_label = "Custom"
|
| 630 |
+
|
| 631 |
info_text = (
|
| 632 |
+
f"## π€ {m['name']}\n\n"
|
| 633 |
+
f"### π Model Specs\n"
|
| 634 |
+
f"| Property | Value |\n"
|
| 635 |
+
f"|----------|-------|\n"
|
| 636 |
+
f"| **Context** | {m['max_context']:,} tokens (capped at {usable_ctx:,}) |\n"
|
| 637 |
+
f"| **Quantization** | `{m['filename']}` |\n"
|
| 638 |
+
f"| **Repository** | `{m['repo_id']}` |\n\n"
|
| 639 |
+
f"### π₯οΈ Hardware Configuration\n"
|
| 640 |
+
f"| Property | Value |\n"
|
| 641 |
+
f"|----------|-------|\n"
|
| 642 |
+
f"| **CPU Threads** | {n_threads} ({thread_label}) |\n\n"
|
| 643 |
+
f"### βοΈ Inference Settings\n"
|
| 644 |
+
f"| Property | Value |\n"
|
| 645 |
+
f"|----------|-------|\n"
|
| 646 |
+
f"| **Temperature** | {settings['temperature']} |\n"
|
| 647 |
+
f"| **Top P** | {settings['top_p']} |\n"
|
| 648 |
+
f"| **Top K** | {settings['top_k']} |\n"
|
| 649 |
+
f"| **Repeat Penalty** | {settings.get('repeat_penalty', 1.0)} |"
|
| 650 |
)
|
| 651 |
+
|
| 652 |
return info_text, str(settings["temperature"]), settings["top_p"], settings["top_k"]
|
| 653 |
|
| 654 |
|
|
|
|
| 1237 |
("HF Spaces CPU Upgrade (8 vCPUs)", "upgrade"),
|
| 1238 |
("Custom (manual)", "custom"),
|
| 1239 |
],
|
| 1240 |
+
value=DEFAULT_THREAD_PRESET,
|
| 1241 |
label="CPU Thread Preset",
|
| 1242 |
info="Select hardware tier or specify custom thread count"
|
| 1243 |
)
|
|
|
|
| 1245 |
custom_threads_slider = gr.Slider(
|
| 1246 |
minimum=1,
|
| 1247 |
maximum=32,
|
| 1248 |
+
value=DEFAULT_CUSTOM_THREADS if DEFAULT_CUSTOM_THREADS > 0 else 4,
|
| 1249 |
step=1,
|
| 1250 |
label="Custom Thread Count",
|
| 1251 |
info="Number of CPU threads for model inference (1-32)",
|
| 1252 |
+
visible=DEFAULT_THREAD_PRESET == "custom"
|
| 1253 |
)
|
| 1254 |
|
| 1255 |
temperature_slider = gr.Slider(
|
|
|
|
| 1297 |
# Model info section (dynamic)
|
| 1298 |
with gr.Group():
|
| 1299 |
gr.HTML('<div class="section-header"><span class="section-icon">π</span> Model Information</div>')
|
| 1300 |
+
# Get default thread count for initial display
|
| 1301 |
+
_default_threads = DEFAULT_CUSTOM_THREADS if DEFAULT_CUSTOM_THREADS > 0 else 2
|
| 1302 |
info_output = gr.Markdown(
|
| 1303 |
+
value=get_model_info(DEFAULT_MODEL_KEY, n_threads=_default_threads)[0],
|
| 1304 |
elem_classes=["stats-grid"]
|
| 1305 |
)
|
| 1306 |
|
|
|
|
| 1336 |
download_output = gr.File(label="Download JSON", visible=True)
|
| 1337 |
|
| 1338 |
# Function to update settings when model changes
|
| 1339 |
+
def update_settings_on_model_change(model_key, thread_config, custom_threads):
|
| 1340 |
"""Update all Advanced Settings when model selection changes."""
|
| 1341 |
+
# Calculate n_threads based on preset
|
| 1342 |
+
thread_preset_map = {
|
| 1343 |
+
"free": 2,
|
| 1344 |
+
"upgrade": 8,
|
| 1345 |
+
"custom": custom_threads if custom_threads > 0 else 4,
|
| 1346 |
+
}
|
| 1347 |
+
n_threads = thread_preset_map.get(thread_config, 2)
|
| 1348 |
+
|
| 1349 |
+
info_text, temp_str, top_p_val, top_k_val = get_model_info(model_key, n_threads=n_threads)
|
| 1350 |
temperature = float(temp_str) if temp_str else 0.6
|
| 1351 |
return temperature, top_p_val, top_k_val, info_text
|
| 1352 |
|
|
|
|
| 1361 |
# Update settings when model changes
|
| 1362 |
model_dropdown.change(
|
| 1363 |
fn=update_settings_on_model_change,
|
| 1364 |
+
inputs=[model_dropdown, thread_config_dropdown, custom_threads_slider],
|
| 1365 |
outputs=[temperature_slider, top_p, top_k, info_output]
|
| 1366 |
)
|
| 1367 |
|