Spaces:
Running on A10G
Running on A10G
Fix stale model ref race, add ACTIVE_MODELS env var, hide unavailable modes
Browse files- server.py: resolve model inside run_generation() after lock is held,
not before queuing β prevents using an evicted model's CUDA graphs
- talker_graph.py: raise clear RuntimeError in prefill_kv() when
prefill exceeds max_seq_len instead of cryptic CUDA device-side assert
- server.py + index.html: ACTIVE_MODELS env var filters available models;
mode rows (Custom, Voice Design) hidden entirely when no models available
- Dockerfile: set ACTIVE_MODELS=Qwen/Qwen3-TTS-12Hz-1.7B-Base for Space
- Dockerfile +1 -0
- index.html +46 -12
- server.py +19 -3
Dockerfile
CHANGED
|
@@ -3,6 +3,7 @@ FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu22.04
|
|
| 3 |
ENV DEBIAN_FRONTEND=noninteractive
|
| 4 |
ENV PYTHONUNBUFFERED=1
|
| 5 |
ENV MODEL_CACHE_SIZE=5
|
|
|
|
| 6 |
ENV HOME=/tmp
|
| 7 |
ENV TORCHINDUCTOR_CACHE_DIR=/tmp/torch_inductor
|
| 8 |
|
|
|
|
| 3 |
ENV DEBIAN_FRONTEND=noninteractive
|
| 4 |
ENV PYTHONUNBUFFERED=1
|
| 5 |
ENV MODEL_CACHE_SIZE=5
|
| 6 |
+
ENV ACTIVE_MODELS=Qwen/Qwen3-TTS-12Hz-1.7B-Base
|
| 7 |
ENV HOME=/tmp
|
| 8 |
ENV TORCHINDUCTOR_CACHE_DIR=/tmp/torch_inductor
|
| 9 |
|
index.html
CHANGED
|
@@ -772,13 +772,7 @@ body {
|
|
| 772 |
<div class="s-section">Model</div>
|
| 773 |
<div class="s-row">
|
| 774 |
<label>Model</label>
|
| 775 |
-
<select id="modelSel">
|
| 776 |
-
<option value="Qwen/Qwen3-TTS-12Hz-0.6B-Base">0.6B Base</option>
|
| 777 |
-
<option value="Qwen/Qwen3-TTS-12Hz-1.7B-Base">1.7B Base</option>
|
| 778 |
-
<option value="Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice">0.6B CustomVoice</option>
|
| 779 |
-
<option value="Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice" selected>1.7B CustomVoice</option>
|
| 780 |
-
<option value="Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign">1.7B VoiceDesign</option>
|
| 781 |
-
</select>
|
| 782 |
<button class="s-btn" id="loadBtn" onclick="loadModel()">Load</button>
|
| 783 |
</div>
|
| 784 |
|
|
@@ -906,8 +900,11 @@ window.addEventListener('load', async () => {
|
|
| 906 |
initTheme();
|
| 907 |
autoGrow($('voiceInstr'));
|
| 908 |
await fetchStatus();
|
| 909 |
-
if (!loadedModel) {
|
| 910 |
-
|
|
|
|
|
|
|
|
|
|
| 911 |
loadModel();
|
| 912 |
}
|
| 913 |
loadMics();
|
|
@@ -1036,11 +1033,11 @@ async function transcribeRef() {
|
|
| 1036 |
async function fetchStatus() {
|
| 1037 |
try {
|
| 1038 |
const d = await fetch('/status').then(r => r.json());
|
|
|
|
| 1039 |
if (d.loaded && d.model) {
|
| 1040 |
// Only trust loadedModel when the server confirms it's actually loaded
|
| 1041 |
loadedModel = d.model;
|
| 1042 |
-
|
| 1043 |
-
for (const o of sel.options) if (o.value === d.model) { o.selected = true; break; }
|
| 1044 |
}
|
| 1045 |
populateSpeakers(d.speakers || []);
|
| 1046 |
renderPresetRefs(d.preset_refs || []);
|
|
@@ -1100,7 +1097,7 @@ function updateModeRows() {
|
|
| 1100 |
}
|
| 1101 |
|
| 1102 |
// ββ Arcade mode loader βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1103 |
-
const
|
| 1104 |
voice_clone: [
|
| 1105 |
{ id: 'Qwen/Qwen3-TTS-12Hz-0.6B-Base', name: '0.6B Base', sub: 'Fastest Β· RTF ~4Γ' },
|
| 1106 |
{ id: 'Qwen/Qwen3-TTS-12Hz-1.7B-Base', name: '1.7B Base', sub: 'Higher quality Β· RTF ~3.5Γ' },
|
|
@@ -1113,9 +1110,46 @@ const MODE_MODELS = {
|
|
| 1113 |
{ id: 'Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign', name: '1.7B VoiceDesign', sub: 'Describe any voice' },
|
| 1114 |
],
|
| 1115 |
};
|
|
|
|
|
|
|
| 1116 |
const MODE_LABELS = { voice_clone: 'Clone', custom: 'Custom', voice_design: 'Voice Design' };
|
| 1117 |
let loaderMode = null;
|
| 1118 |
let loaderSelectedModel = null;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1119 |
|
| 1120 |
function onModeRowClick(mode) {
|
| 1121 |
const rowMap = { voice_clone: 'rowClone', custom: 'rowChar', voice_design: 'rowCustom' };
|
|
|
|
| 772 |
<div class="s-section">Model</div>
|
| 773 |
<div class="s-row">
|
| 774 |
<label>Model</label>
|
| 775 |
+
<select id="modelSel"></select>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 776 |
<button class="s-btn" id="loadBtn" onclick="loadModel()">Load</button>
|
| 777 |
</div>
|
| 778 |
|
|
|
|
| 900 |
initTheme();
|
| 901 |
autoGrow($('voiceInstr'));
|
| 902 |
await fetchStatus();
|
| 903 |
+
if (!loadedModel && availableModels.length > 0) {
|
| 904 |
+
// Prefer CustomVoice as default (richer demo), fall back to first available
|
| 905 |
+
const preferred = availableModels.find(m => m.includes('CustomVoice'))
|
| 906 |
+
|| availableModels[0];
|
| 907 |
+
$('modelSel').value = preferred;
|
| 908 |
loadModel();
|
| 909 |
}
|
| 910 |
loadMics();
|
|
|
|
| 1033 |
async function fetchStatus() {
|
| 1034 |
try {
|
| 1035 |
const d = await fetch('/status').then(r => r.json());
|
| 1036 |
+
applyAvailableModels(d.available_models || []);
|
| 1037 |
if (d.loaded && d.model) {
|
| 1038 |
// Only trust loadedModel when the server confirms it's actually loaded
|
| 1039 |
loadedModel = d.model;
|
| 1040 |
+
$('modelSel').value = d.model;
|
|
|
|
| 1041 |
}
|
| 1042 |
populateSpeakers(d.speakers || []);
|
| 1043 |
renderPresetRefs(d.preset_refs || []);
|
|
|
|
| 1097 |
}
|
| 1098 |
|
| 1099 |
// ββ Arcade mode loader βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1100 |
+
const ALL_MODE_MODELS = {
|
| 1101 |
voice_clone: [
|
| 1102 |
{ id: 'Qwen/Qwen3-TTS-12Hz-0.6B-Base', name: '0.6B Base', sub: 'Fastest Β· RTF ~4Γ' },
|
| 1103 |
{ id: 'Qwen/Qwen3-TTS-12Hz-1.7B-Base', name: '1.7B Base', sub: 'Higher quality Β· RTF ~3.5Γ' },
|
|
|
|
| 1110 |
{ id: 'Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign', name: '1.7B VoiceDesign', sub: 'Describe any voice' },
|
| 1111 |
],
|
| 1112 |
};
|
| 1113 |
+
// Filtered at runtime by available_models from /status
|
| 1114 |
+
let MODE_MODELS = ALL_MODE_MODELS;
|
| 1115 |
const MODE_LABELS = { voice_clone: 'Clone', custom: 'Custom', voice_design: 'Voice Design' };
|
| 1116 |
let loaderMode = null;
|
| 1117 |
let loaderSelectedModel = null;
|
| 1118 |
+
let availableModels = [];
|
| 1119 |
+
|
| 1120 |
+
function applyAvailableModels(available) {
|
| 1121 |
+
availableModels = available || [];
|
| 1122 |
+
const avail = new Set(availableModels);
|
| 1123 |
+
|
| 1124 |
+
// Rebuild model selector options
|
| 1125 |
+
const sel = $('modelSel');
|
| 1126 |
+
const prev = sel.value;
|
| 1127 |
+
sel.innerHTML = '';
|
| 1128 |
+
for (const [mode, models] of Object.entries(ALL_MODE_MODELS)) {
|
| 1129 |
+
for (const m of models) {
|
| 1130 |
+
if (!avail.has(m.id)) continue;
|
| 1131 |
+
const opt = document.createElement('option');
|
| 1132 |
+
opt.value = m.id;
|
| 1133 |
+
opt.textContent = m.name;
|
| 1134 |
+
sel.appendChild(opt);
|
| 1135 |
+
}
|
| 1136 |
+
}
|
| 1137 |
+
// Restore selection if still available, else pick first
|
| 1138 |
+
if (avail.has(prev)) sel.value = prev;
|
| 1139 |
+
else if (sel.options.length > 0) sel.value = sel.options[0].value;
|
| 1140 |
+
|
| 1141 |
+
// Filter MODE_MODELS to available only
|
| 1142 |
+
MODE_MODELS = {};
|
| 1143 |
+
for (const [mode, models] of Object.entries(ALL_MODE_MODELS)) {
|
| 1144 |
+
MODE_MODELS[mode] = models.filter(m => avail.has(m.id));
|
| 1145 |
+
}
|
| 1146 |
+
|
| 1147 |
+
// Hide mode rows that have no available models; show those that do
|
| 1148 |
+
const modeRows = { voice_clone: 'rowClone', custom: 'rowChar', voice_design: 'rowCustom' };
|
| 1149 |
+
for (const [mode, rowId] of Object.entries(modeRows)) {
|
| 1150 |
+
$(rowId).style.display = MODE_MODELS[mode].length > 0 ? '' : 'none';
|
| 1151 |
+
}
|
| 1152 |
+
}
|
| 1153 |
|
| 1154 |
function onModeRowClick(mode) {
|
| 1155 |
const rowMap = { voice_clone: 'rowClone', custom: 'rowChar', voice_design: 'rowCustom' };
|
server.py
CHANGED
|
@@ -44,7 +44,7 @@ except ImportError:
|
|
| 44 |
from nano_parakeet import from_pretrained as _parakeet_from_pretrained
|
| 45 |
|
| 46 |
|
| 47 |
-
|
| 48 |
"Qwen/Qwen3-TTS-12Hz-0.6B-Base",
|
| 49 |
"Qwen/Qwen3-TTS-12Hz-1.7B-Base",
|
| 50 |
"Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice",
|
|
@@ -52,6 +52,13 @@ AVAILABLE_MODELS = [
|
|
| 52 |
"Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign",
|
| 53 |
]
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
BASE_DIR = Path(__file__).resolve().parent
|
| 56 |
# Assets that need to be downloaded at runtime go to a writable directory.
|
| 57 |
# /app is read-only in HF Spaces; fall back to /tmp.
|
|
@@ -325,7 +332,6 @@ async def generate_stream(
|
|
| 325 |
if not _active_model_name or _active_model_name not in _model_cache:
|
| 326 |
raise HTTPException(status_code=400, detail="Model not loaded. Click 'Load' first.")
|
| 327 |
|
| 328 |
-
model = _model_cache[_active_model_name]
|
| 329 |
tmp_path = None
|
| 330 |
tmp_is_cached = False
|
| 331 |
|
|
@@ -345,6 +351,13 @@ async def generate_stream(
|
|
| 345 |
|
| 346 |
def run_generation():
|
| 347 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
t0 = time.perf_counter()
|
| 349 |
total_audio_s = 0.0
|
| 350 |
voice_clone_ms = 0.0
|
|
@@ -522,7 +535,6 @@ async def generate_non_streaming(
|
|
| 522 |
if not _active_model_name or _active_model_name not in _model_cache:
|
| 523 |
raise HTTPException(status_code=400, detail="Model not loaded. Click 'Load' first.")
|
| 524 |
|
| 525 |
-
model = _model_cache[_active_model_name]
|
| 526 |
tmp_path = None
|
| 527 |
tmp_is_cached = False
|
| 528 |
|
|
@@ -538,6 +550,10 @@ async def generate_non_streaming(
|
|
| 538 |
tmp_is_cached = True
|
| 539 |
|
| 540 |
def run():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 541 |
t0 = time.perf_counter()
|
| 542 |
if mode == "voice_clone":
|
| 543 |
audio_list, sr = model.generate_voice_clone(
|
|
|
|
| 44 |
from nano_parakeet import from_pretrained as _parakeet_from_pretrained
|
| 45 |
|
| 46 |
|
| 47 |
+
_ALL_MODELS = [
|
| 48 |
"Qwen/Qwen3-TTS-12Hz-0.6B-Base",
|
| 49 |
"Qwen/Qwen3-TTS-12Hz-1.7B-Base",
|
| 50 |
"Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice",
|
|
|
|
| 52 |
"Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign",
|
| 53 |
]
|
| 54 |
|
| 55 |
+
_active_models_env = os.environ.get("ACTIVE_MODELS", "")
|
| 56 |
+
if _active_models_env:
|
| 57 |
+
_allowed = {m.strip() for m in _active_models_env.split(",") if m.strip()}
|
| 58 |
+
AVAILABLE_MODELS = [m for m in _ALL_MODELS if m in _allowed]
|
| 59 |
+
else:
|
| 60 |
+
AVAILABLE_MODELS = list(_ALL_MODELS)
|
| 61 |
+
|
| 62 |
BASE_DIR = Path(__file__).resolve().parent
|
| 63 |
# Assets that need to be downloaded at runtime go to a writable directory.
|
| 64 |
# /app is read-only in HF Spaces; fall back to /tmp.
|
|
|
|
| 332 |
if not _active_model_name or _active_model_name not in _model_cache:
|
| 333 |
raise HTTPException(status_code=400, detail="Model not loaded. Click 'Load' first.")
|
| 334 |
|
|
|
|
| 335 |
tmp_path = None
|
| 336 |
tmp_is_cached = False
|
| 337 |
|
|
|
|
| 351 |
|
| 352 |
def run_generation():
|
| 353 |
try:
|
| 354 |
+
# Resolve the model after the generation lock is held so we always
|
| 355 |
+
# use the currently active model, not a stale reference captured
|
| 356 |
+
# before a concurrent /load request changed the active model.
|
| 357 |
+
model = _model_cache.get(_active_model_name)
|
| 358 |
+
if model is None:
|
| 359 |
+
raise RuntimeError("No model loaded. Please load a model first.")
|
| 360 |
+
|
| 361 |
t0 = time.perf_counter()
|
| 362 |
total_audio_s = 0.0
|
| 363 |
voice_clone_ms = 0.0
|
|
|
|
| 535 |
if not _active_model_name or _active_model_name not in _model_cache:
|
| 536 |
raise HTTPException(status_code=400, detail="Model not loaded. Click 'Load' first.")
|
| 537 |
|
|
|
|
| 538 |
tmp_path = None
|
| 539 |
tmp_is_cached = False
|
| 540 |
|
|
|
|
| 550 |
tmp_is_cached = True
|
| 551 |
|
| 552 |
def run():
|
| 553 |
+
# Resolve the model after the generation lock is held.
|
| 554 |
+
model = _model_cache.get(_active_model_name)
|
| 555 |
+
if model is None:
|
| 556 |
+
raise RuntimeError("No model loaded. Please load a model first.")
|
| 557 |
t0 = time.perf_counter()
|
| 558 |
if mode == "voice_clone":
|
| 559 |
audio_list, sr = model.generate_voice_clone(
|