andito HF Staff commited on
Commit
083bc11
Β·
1 Parent(s): 88969e9

Fix stale model ref race, add ACTIVE_MODELS env var, hide unavailable modes

Browse files

- server.py: resolve model inside run_generation() after lock is held,
not before queuing β€” prevents using an evicted model's CUDA graphs
- talker_graph.py: raise clear RuntimeError in prefill_kv() when
prefill exceeds max_seq_len instead of cryptic CUDA device-side assert
- server.py + index.html: ACTIVE_MODELS env var filters available models;
mode rows (Custom, Voice Design) hidden entirely when no models available
- Dockerfile: set ACTIVE_MODELS=Qwen/Qwen3-TTS-12Hz-1.7B-Base for Space

Files changed (3) hide show
  1. Dockerfile +1 -0
  2. index.html +46 -12
  3. server.py +19 -3
Dockerfile CHANGED
@@ -3,6 +3,7 @@ FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu22.04
3
  ENV DEBIAN_FRONTEND=noninteractive
4
  ENV PYTHONUNBUFFERED=1
5
  ENV MODEL_CACHE_SIZE=5
 
6
  ENV HOME=/tmp
7
  ENV TORCHINDUCTOR_CACHE_DIR=/tmp/torch_inductor
8
 
 
3
  ENV DEBIAN_FRONTEND=noninteractive
4
  ENV PYTHONUNBUFFERED=1
5
  ENV MODEL_CACHE_SIZE=5
6
+ ENV ACTIVE_MODELS=Qwen/Qwen3-TTS-12Hz-1.7B-Base
7
  ENV HOME=/tmp
8
  ENV TORCHINDUCTOR_CACHE_DIR=/tmp/torch_inductor
9
 
index.html CHANGED
@@ -772,13 +772,7 @@ body {
772
  <div class="s-section">Model</div>
773
  <div class="s-row">
774
  <label>Model</label>
775
- <select id="modelSel">
776
- <option value="Qwen/Qwen3-TTS-12Hz-0.6B-Base">0.6B Base</option>
777
- <option value="Qwen/Qwen3-TTS-12Hz-1.7B-Base">1.7B Base</option>
778
- <option value="Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice">0.6B CustomVoice</option>
779
- <option value="Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice" selected>1.7B CustomVoice</option>
780
- <option value="Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign">1.7B VoiceDesign</option>
781
- </select>
782
  <button class="s-btn" id="loadBtn" onclick="loadModel()">Load</button>
783
  </div>
784
 
@@ -906,8 +900,11 @@ window.addEventListener('load', async () => {
906
  initTheme();
907
  autoGrow($('voiceInstr'));
908
  await fetchStatus();
909
- if (!loadedModel) {
910
- $('modelSel').value = 'Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice';
 
 
 
911
  loadModel();
912
  }
913
  loadMics();
@@ -1036,11 +1033,11 @@ async function transcribeRef() {
1036
  async function fetchStatus() {
1037
  try {
1038
  const d = await fetch('/status').then(r => r.json());
 
1039
  if (d.loaded && d.model) {
1040
  // Only trust loadedModel when the server confirms it's actually loaded
1041
  loadedModel = d.model;
1042
- const sel = $('modelSel');
1043
- for (const o of sel.options) if (o.value === d.model) { o.selected = true; break; }
1044
  }
1045
  populateSpeakers(d.speakers || []);
1046
  renderPresetRefs(d.preset_refs || []);
@@ -1100,7 +1097,7 @@ function updateModeRows() {
1100
  }
1101
 
1102
  // ── Arcade mode loader ─────────────────────────────────────────────────────────
1103
- const MODE_MODELS = {
1104
  voice_clone: [
1105
  { id: 'Qwen/Qwen3-TTS-12Hz-0.6B-Base', name: '0.6B Base', sub: 'Fastest Β· RTF ~4Γ—' },
1106
  { id: 'Qwen/Qwen3-TTS-12Hz-1.7B-Base', name: '1.7B Base', sub: 'Higher quality Β· RTF ~3.5Γ—' },
@@ -1113,9 +1110,46 @@ const MODE_MODELS = {
1113
  { id: 'Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign', name: '1.7B VoiceDesign', sub: 'Describe any voice' },
1114
  ],
1115
  };
 
 
1116
  const MODE_LABELS = { voice_clone: 'Clone', custom: 'Custom', voice_design: 'Voice Design' };
1117
  let loaderMode = null;
1118
  let loaderSelectedModel = null;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1119
 
1120
  function onModeRowClick(mode) {
1121
  const rowMap = { voice_clone: 'rowClone', custom: 'rowChar', voice_design: 'rowCustom' };
 
772
  <div class="s-section">Model</div>
773
  <div class="s-row">
774
  <label>Model</label>
775
+ <select id="modelSel"></select>
 
 
 
 
 
 
776
  <button class="s-btn" id="loadBtn" onclick="loadModel()">Load</button>
777
  </div>
778
 
 
900
  initTheme();
901
  autoGrow($('voiceInstr'));
902
  await fetchStatus();
903
+ if (!loadedModel && availableModels.length > 0) {
904
+ // Prefer CustomVoice as default (richer demo), fall back to first available
905
+ const preferred = availableModels.find(m => m.includes('CustomVoice'))
906
+ || availableModels[0];
907
+ $('modelSel').value = preferred;
908
  loadModel();
909
  }
910
  loadMics();
 
1033
  async function fetchStatus() {
1034
  try {
1035
  const d = await fetch('/status').then(r => r.json());
1036
+ applyAvailableModels(d.available_models || []);
1037
  if (d.loaded && d.model) {
1038
  // Only trust loadedModel when the server confirms it's actually loaded
1039
  loadedModel = d.model;
1040
+ $('modelSel').value = d.model;
 
1041
  }
1042
  populateSpeakers(d.speakers || []);
1043
  renderPresetRefs(d.preset_refs || []);
 
1097
  }
1098
 
1099
  // ── Arcade mode loader ─────────────────────────────────────────────────────────
1100
+ const ALL_MODE_MODELS = {
1101
  voice_clone: [
1102
  { id: 'Qwen/Qwen3-TTS-12Hz-0.6B-Base', name: '0.6B Base', sub: 'Fastest Β· RTF ~4Γ—' },
1103
  { id: 'Qwen/Qwen3-TTS-12Hz-1.7B-Base', name: '1.7B Base', sub: 'Higher quality Β· RTF ~3.5Γ—' },
 
1110
  { id: 'Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign', name: '1.7B VoiceDesign', sub: 'Describe any voice' },
1111
  ],
1112
  };
1113
+ // Filtered at runtime by available_models from /status
1114
+ let MODE_MODELS = ALL_MODE_MODELS;
1115
  const MODE_LABELS = { voice_clone: 'Clone', custom: 'Custom', voice_design: 'Voice Design' };
1116
  let loaderMode = null;
1117
  let loaderSelectedModel = null;
1118
+ let availableModels = [];
1119
+
1120
+ function applyAvailableModels(available) {
1121
+ availableModels = available || [];
1122
+ const avail = new Set(availableModels);
1123
+
1124
+ // Rebuild model selector options
1125
+ const sel = $('modelSel');
1126
+ const prev = sel.value;
1127
+ sel.innerHTML = '';
1128
+ for (const [mode, models] of Object.entries(ALL_MODE_MODELS)) {
1129
+ for (const m of models) {
1130
+ if (!avail.has(m.id)) continue;
1131
+ const opt = document.createElement('option');
1132
+ opt.value = m.id;
1133
+ opt.textContent = m.name;
1134
+ sel.appendChild(opt);
1135
+ }
1136
+ }
1137
+ // Restore selection if still available, else pick first
1138
+ if (avail.has(prev)) sel.value = prev;
1139
+ else if (sel.options.length > 0) sel.value = sel.options[0].value;
1140
+
1141
+ // Filter MODE_MODELS to available only
1142
+ MODE_MODELS = {};
1143
+ for (const [mode, models] of Object.entries(ALL_MODE_MODELS)) {
1144
+ MODE_MODELS[mode] = models.filter(m => avail.has(m.id));
1145
+ }
1146
+
1147
+ // Hide mode rows that have no available models; show those that do
1148
+ const modeRows = { voice_clone: 'rowClone', custom: 'rowChar', voice_design: 'rowCustom' };
1149
+ for (const [mode, rowId] of Object.entries(modeRows)) {
1150
+ $(rowId).style.display = MODE_MODELS[mode].length > 0 ? '' : 'none';
1151
+ }
1152
+ }
1153
 
1154
  function onModeRowClick(mode) {
1155
  const rowMap = { voice_clone: 'rowClone', custom: 'rowChar', voice_design: 'rowCustom' };
server.py CHANGED
@@ -44,7 +44,7 @@ except ImportError:
44
  from nano_parakeet import from_pretrained as _parakeet_from_pretrained
45
 
46
 
47
- AVAILABLE_MODELS = [
48
  "Qwen/Qwen3-TTS-12Hz-0.6B-Base",
49
  "Qwen/Qwen3-TTS-12Hz-1.7B-Base",
50
  "Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice",
@@ -52,6 +52,13 @@ AVAILABLE_MODELS = [
52
  "Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign",
53
  ]
54
 
 
 
 
 
 
 
 
55
  BASE_DIR = Path(__file__).resolve().parent
56
  # Assets that need to be downloaded at runtime go to a writable directory.
57
  # /app is read-only in HF Spaces; fall back to /tmp.
@@ -325,7 +332,6 @@ async def generate_stream(
325
  if not _active_model_name or _active_model_name not in _model_cache:
326
  raise HTTPException(status_code=400, detail="Model not loaded. Click 'Load' first.")
327
 
328
- model = _model_cache[_active_model_name]
329
  tmp_path = None
330
  tmp_is_cached = False
331
 
@@ -345,6 +351,13 @@ async def generate_stream(
345
 
346
  def run_generation():
347
  try:
 
 
 
 
 
 
 
348
  t0 = time.perf_counter()
349
  total_audio_s = 0.0
350
  voice_clone_ms = 0.0
@@ -522,7 +535,6 @@ async def generate_non_streaming(
522
  if not _active_model_name or _active_model_name not in _model_cache:
523
  raise HTTPException(status_code=400, detail="Model not loaded. Click 'Load' first.")
524
 
525
- model = _model_cache[_active_model_name]
526
  tmp_path = None
527
  tmp_is_cached = False
528
 
@@ -538,6 +550,10 @@ async def generate_non_streaming(
538
  tmp_is_cached = True
539
 
540
  def run():
 
 
 
 
541
  t0 = time.perf_counter()
542
  if mode == "voice_clone":
543
  audio_list, sr = model.generate_voice_clone(
 
44
  from nano_parakeet import from_pretrained as _parakeet_from_pretrained
45
 
46
 
47
+ _ALL_MODELS = [
48
  "Qwen/Qwen3-TTS-12Hz-0.6B-Base",
49
  "Qwen/Qwen3-TTS-12Hz-1.7B-Base",
50
  "Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice",
 
52
  "Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign",
53
  ]
54
 
55
+ _active_models_env = os.environ.get("ACTIVE_MODELS", "")
56
+ if _active_models_env:
57
+ _allowed = {m.strip() for m in _active_models_env.split(",") if m.strip()}
58
+ AVAILABLE_MODELS = [m for m in _ALL_MODELS if m in _allowed]
59
+ else:
60
+ AVAILABLE_MODELS = list(_ALL_MODELS)
61
+
62
  BASE_DIR = Path(__file__).resolve().parent
63
  # Assets that need to be downloaded at runtime go to a writable directory.
64
  # /app is read-only in HF Spaces; fall back to /tmp.
 
332
  if not _active_model_name or _active_model_name not in _model_cache:
333
  raise HTTPException(status_code=400, detail="Model not loaded. Click 'Load' first.")
334
 
 
335
  tmp_path = None
336
  tmp_is_cached = False
337
 
 
351
 
352
  def run_generation():
353
  try:
354
+ # Resolve the model after the generation lock is held so we always
355
+ # use the currently active model, not a stale reference captured
356
+ # before a concurrent /load request changed the active model.
357
+ model = _model_cache.get(_active_model_name)
358
+ if model is None:
359
+ raise RuntimeError("No model loaded. Please load a model first.")
360
+
361
  t0 = time.perf_counter()
362
  total_audio_s = 0.0
363
  voice_clone_ms = 0.0
 
535
  if not _active_model_name or _active_model_name not in _model_cache:
536
  raise HTTPException(status_code=400, detail="Model not loaded. Click 'Load' first.")
537
 
 
538
  tmp_path = None
539
  tmp_is_cached = False
540
 
 
550
  tmp_is_cached = True
551
 
552
  def run():
553
+ # Resolve the model after the generation lock is held.
554
+ model = _model_cache.get(_active_model_name)
555
+ if model is None:
556
+ raise RuntimeError("No model loaded. Please load a model first.")
557
  t0 = time.perf_counter()
558
  if mode == "voice_clone":
559
  audio_list, sr = model.generate_voice_clone(