Spaces:

eaysu
/

Chatterbox-Multi-CPU

Sleeping

eaysu Claude Sonnet 4.6 commited on 7 days ago

Commit

446e67b

1 Parent(s): c612a94

Convert to CPU-compatible Space: remove ZeroGPU dependency

- Remove spaces import and @spaces.GPU decorator from app.py
- Add map_location="cpu" in mtl_tts.py for CPU/MPS device loading
- Fix improperly uncommented optional deps in requirements.txt
- Update README title/description for CPU variant

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (4) hide show

README.md +3 -3
app.py +0 -2
requirements.txt +3 -3
src/chatterbox/mtl_tts.py +5 -2

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Chatterbox-Multilingual-TTS
 emoji: 🌎
 colorFrom: indigo
 colorTo: blue
@@ -7,5 +7,5 @@ sdk: gradio
 sdk_version: 5.29.0
 app_file: app.py
 pinned: false
-short_description: Chatterbox TTS supporting 23 languages
----

 ---
+title: Chatterbox Multilingual TTS CPU
 emoji: 🌎
 colorFrom: indigo
 colorTo: blue
 sdk_version: 5.29.0
 app_file: app.py
 pinned: false
+short_description: Chatterbox TTS supporting 23 languages (CPU)
+---

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import numpy as np
 import torch
 from src.chatterbox.mtl_tts import ChatterboxMultilingualTTS, SUPPORTED_LANGUAGES
 import gradio as gr
-import spaces
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"🚀 Running on device: {DEVICE}")
@@ -176,7 +175,6 @@ def resolve_audio_prompt(language_id: str, provided_path: str | None) -> str | N
     return LANGUAGE_CONFIG.get(language_id, {}).get("audio")
-@spaces.GPU
 def generate_tts_audio(
     text_input: str,
     language_id: str,

 import torch
 from src.chatterbox.mtl_tts import ChatterboxMultilingualTTS, SUPPORTED_LANGUAGES
 import gradio as gr
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"🚀 Running on device: {DEVICE}")
     return LANGUAGE_CONFIG.get(language_id, {}).get("audio")
 def generate_tts_audio(
     text_input: str,
     language_id: str,

requirements.txt CHANGED Viewed

@@ -13,7 +13,7 @@ safetensors
 # Optional language-specific dependencies
 # Uncomment the ones you need for specific languages:
- spacy_pkuseg          # For Chinese text segmentation
- pykakasi>=2.2.0       # For Japanese text processing (Kanji to Hiragana)
- russian-text-stresser @ git+https://github.com/Vuizur/add-stress-to-epub
 # dicta-onnx>=0.1.0     # For Hebrew diacritization

 # Optional language-specific dependencies
 # Uncomment the ones you need for specific languages:
+# spacy_pkuseg          # For Chinese text segmentation
+# pykakasi>=2.2.0       # For Japanese text processing (Kanji to Hiragana)
+# russian-text-stresser @ git+https://github.com/Vuizur/add-stress-to-epub
 # dicta-onnx>=0.1.0     # For Hebrew diacritization

src/chatterbox/mtl_tts.py CHANGED Viewed

@@ -161,9 +161,12 @@ class ChatterboxMultilingualTTS:
     def from_local(cls, ckpt_dir, device) -> 'ChatterboxMultilingualTTS':
         ckpt_dir = Path(ckpt_dir)
         ve = VoiceEncoder()
         ve.load_state_dict(
-            torch.load(ckpt_dir / "ve.pt", weights_only=True)
         )
         ve.to(device).eval()
@@ -176,7 +179,7 @@ class ChatterboxMultilingualTTS:
         s3gen = S3Gen()
         s3gen.load_state_dict(
-            torch.load(ckpt_dir / "s3gen.pt", weights_only=True)
         )
         s3gen.to(device).eval()

     def from_local(cls, ckpt_dir, device) -> 'ChatterboxMultilingualTTS':
         ckpt_dir = Path(ckpt_dir)
+        # Load to CPU first for non-CUDA devices to handle CUDA-saved checkpoints
+        map_location = "cpu" if str(device) in ("cpu", "mps") else None
         ve = VoiceEncoder()
         ve.load_state_dict(
+            torch.load(ckpt_dir / "ve.pt", weights_only=True, map_location=map_location)
         )
         ve.to(device).eval()
         s3gen = S3Gen()
         s3gen.load_state_dict(
+            torch.load(ckpt_dir / "s3gen.pt", weights_only=True, map_location=map_location)
         )
         s3gen.to(device).eval()