Spaces:
Runtime error
Runtime error
Peter Michael Gits Claude commited on
Commit ·
393f5a7
1
Parent(s): 94bc832
Fix device mismatch error - CUDA/CPU tensor mixing
Browse filesv1.3.14 - CRITICAL FIX: Device consistency for CPU fallback
1. When Moshi falls back to CPU due to CUDA OOM, also move Mimi to CPU
2. Added model device detection in transcription function
3. All tensors now use same device as loaded models
4. Fixes: 'Expected all tensors to be on the same device, but found cuda:0 and cpu!'
5. Service now works properly on CPU fallback mode
🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
app.py
CHANGED
|
@@ -16,7 +16,7 @@ from fastapi.responses import JSONResponse, HTMLResponse
|
|
| 16 |
import uvicorn
|
| 17 |
|
| 18 |
# Version tracking
|
| 19 |
-
VERSION = "1.3.
|
| 20 |
COMMIT_SHA = "TBD"
|
| 21 |
|
| 22 |
# Configure logging
|
|
@@ -77,10 +77,14 @@ async def load_moshi_models():
|
|
| 77 |
except RuntimeError as cuda_error:
|
| 78 |
if "CUDA out of memory" in str(cuda_error):
|
| 79 |
logger.warning(f"CUDA out of memory, trying CPU fallback: {cuda_error}")
|
|
|
|
|
|
|
|
|
|
| 80 |
device = "cpu"
|
| 81 |
moshi = loaders.get_moshi_lm(moshi_weight, device="cpu")
|
| 82 |
lm_gen = LMGen(moshi, temp=0.8, temp_text=0.7)
|
| 83 |
logger.info("✅ Moshi loaded successfully on CPU (fallback)")
|
|
|
|
| 84 |
else:
|
| 85 |
raise
|
| 86 |
|
|
@@ -121,8 +125,12 @@ def transcribe_audio_moshi(audio_data: np.ndarray, sample_rate: int = 24000) ->
|
|
| 121 |
import librosa
|
| 122 |
audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=24000)
|
| 123 |
|
| 124 |
-
#
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
# Process with Mimi codec in streaming mode
|
| 128 |
with torch.no_grad(), mimi.streaming(batch_size=1):
|
|
|
|
| 16 |
import uvicorn
|
| 17 |
|
| 18 |
# Version tracking
|
| 19 |
+
VERSION = "1.3.14"
|
| 20 |
COMMIT_SHA = "TBD"
|
| 21 |
|
| 22 |
# Configure logging
|
|
|
|
| 77 |
except RuntimeError as cuda_error:
|
| 78 |
if "CUDA out of memory" in str(cuda_error):
|
| 79 |
logger.warning(f"CUDA out of memory, trying CPU fallback: {cuda_error}")
|
| 80 |
+
# Move Mimi to CPU as well for consistency
|
| 81 |
+
mimi = loaders.get_mimi(mimi_weight, device="cpu")
|
| 82 |
+
mimi.set_num_codebooks(8)
|
| 83 |
device = "cpu"
|
| 84 |
moshi = loaders.get_moshi_lm(moshi_weight, device="cpu")
|
| 85 |
lm_gen = LMGen(moshi, temp=0.8, temp_text=0.7)
|
| 86 |
logger.info("✅ Moshi loaded successfully on CPU (fallback)")
|
| 87 |
+
logger.info("✅ Mimi also moved to CPU for device consistency")
|
| 88 |
else:
|
| 89 |
raise
|
| 90 |
|
|
|
|
| 125 |
import librosa
|
| 126 |
audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=24000)
|
| 127 |
|
| 128 |
+
# Determine actual device of the models (might have fallen back to CPU)
|
| 129 |
+
model_device = next(mimi.parameters()).device if hasattr(mimi, 'parameters') else device
|
| 130 |
+
logger.info(f"Using device for transcription: {model_device}")
|
| 131 |
+
|
| 132 |
+
# Convert to torch tensor and put on same device as models
|
| 133 |
+
wav = torch.from_numpy(audio_data).unsqueeze(0).unsqueeze(0).to(model_device)
|
| 134 |
|
| 135 |
# Process with Mimi codec in streaming mode
|
| 136 |
with torch.no_grad(), mimi.streaming(batch_size=1):
|