Spaces:

CDOM201
/

chatterbox_dimabot

Paused

CDOM201 commited on 23 days ago

Commit

e00ded0

verified ·

1 Parent(s): f1f26fc

Mat1 and Mat2 fixes

Files changed (1) hide show

main.py CHANGED Viewed

@@ -28,11 +28,9 @@ print("Loading TTS model...")
 tts_model = ChatterboxMultilingualTTS.from_pretrained(device=device_map)
 # Optimize for T4 GPU using half-precision (FP16)
-# FP16 provides a significant speed boost with negligible quality loss
 if device_map == "cuda":
-    tts_model.t3.to(torch.float16)
-    tts_model.s3gen.to(torch.float16)
-    tts_model.ve.to(torch.float16)
 print("Model loaded.")
@@ -58,7 +56,14 @@ def generate_audio(req: TTSRequest) -> str:
     filename = os.path.join("outputs", f"{req.channelID}-{req.username}-{req.messageid}.wav")
     try:
-        audio_tensor = tts_model.generate(req.message, language_id=req.language)
         ta.save(filename, audio_tensor, tts_model.sr)
         return filename
     except Exception as e:

 tts_model = ChatterboxMultilingualTTS.from_pretrained(device=device_map)
 # Optimize for T4 GPU using half-precision (FP16)
+# We use autocast during inference for the best balance of speed and stability
 if device_map == "cuda":
+    print("GPU optimization: FP16 Autocast enabled.")
 print("Model loaded.")
     filename = os.path.join("outputs", f"{req.channelID}-{req.username}-{req.messageid}.wav")
     try:
+        # Use autocast to automatically handle float16/float32 mixing
+        # This prevents the "mat1 and mat2 must have the same dtype" error
+        if device_map == "cuda":
+            with torch.amp.autocast(device_type='cuda', dtype=torch.float16):
+                audio_tensor = tts_model.generate(req.message, language_id=req.language)
+        else:
+            audio_tensor = tts_model.generate(req.message, language_id=req.language)
         ta.save(filename, audio_tensor, tts_model.sr)
         return filename
     except Exception as e: