Peter Michael Gits Claude commited on
Commit
393f5a7
·
1 Parent(s): 94bc832

Fix device mismatch error - CUDA/CPU tensor mixing

Browse files

v1.3.14 - CRITICAL FIX: Device consistency for CPU fallback
1. When Moshi falls back to CPU due to CUDA OOM, also move Mimi to CPU
2. Added model device detection in transcription function
3. All tensors now use same device as loaded models
4. Fixes: 'Expected all tensors to be on the same device, but found cuda:0 and cpu!'
5. Service now works properly on CPU fallback mode

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +11 -3
app.py CHANGED
@@ -16,7 +16,7 @@ from fastapi.responses import JSONResponse, HTMLResponse
16
  import uvicorn
17
 
18
  # Version tracking
19
- VERSION = "1.3.13"
20
  COMMIT_SHA = "TBD"
21
 
22
  # Configure logging
@@ -77,10 +77,14 @@ async def load_moshi_models():
77
  except RuntimeError as cuda_error:
78
  if "CUDA out of memory" in str(cuda_error):
79
  logger.warning(f"CUDA out of memory, trying CPU fallback: {cuda_error}")
 
 
 
80
  device = "cpu"
81
  moshi = loaders.get_moshi_lm(moshi_weight, device="cpu")
82
  lm_gen = LMGen(moshi, temp=0.8, temp_text=0.7)
83
  logger.info("✅ Moshi loaded successfully on CPU (fallback)")
 
84
  else:
85
  raise
86
 
@@ -121,8 +125,12 @@ def transcribe_audio_moshi(audio_data: np.ndarray, sample_rate: int = 24000) ->
121
  import librosa
122
  audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=24000)
123
 
124
- # Convert to torch tensor
125
- wav = torch.from_numpy(audio_data).unsqueeze(0).unsqueeze(0).to(device)
 
 
 
 
126
 
127
  # Process with Mimi codec in streaming mode
128
  with torch.no_grad(), mimi.streaming(batch_size=1):
 
16
  import uvicorn
17
 
18
  # Version tracking
19
+ VERSION = "1.3.14"
20
  COMMIT_SHA = "TBD"
21
 
22
  # Configure logging
 
77
  except RuntimeError as cuda_error:
78
  if "CUDA out of memory" in str(cuda_error):
79
  logger.warning(f"CUDA out of memory, trying CPU fallback: {cuda_error}")
80
+ # Move Mimi to CPU as well for consistency
81
+ mimi = loaders.get_mimi(mimi_weight, device="cpu")
82
+ mimi.set_num_codebooks(8)
83
  device = "cpu"
84
  moshi = loaders.get_moshi_lm(moshi_weight, device="cpu")
85
  lm_gen = LMGen(moshi, temp=0.8, temp_text=0.7)
86
  logger.info("✅ Moshi loaded successfully on CPU (fallback)")
87
+ logger.info("✅ Mimi also moved to CPU for device consistency")
88
  else:
89
  raise
90
 
 
125
  import librosa
126
  audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=24000)
127
 
128
+ # Determine actual device of the models (might have fallen back to CPU)
129
+ model_device = next(mimi.parameters()).device if hasattr(mimi, 'parameters') else device
130
+ logger.info(f"Using device for transcription: {model_device}")
131
+
132
+ # Convert to torch tensor and put on same device as models
133
+ wav = torch.from_numpy(audio_data).unsqueeze(0).unsqueeze(0).to(model_device)
134
 
135
  # Process with Mimi codec in streaming mode
136
  with torch.no_grad(), mimi.streaming(batch_size=1):