Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,8 @@ from chatterbox.src.chatterbox.tts import ChatterboxTTS
|
|
| 5 |
import gradio as gr
|
| 6 |
import spaces
|
| 7 |
|
| 8 |
-
|
|
|
|
| 9 |
print(f"🚀 Running on device: {DEVICE}")
|
| 10 |
|
| 11 |
# --- Global Model Initialization ---
|
|
@@ -18,8 +19,9 @@ def get_or_load_model():
|
|
| 18 |
if MODEL is None:
|
| 19 |
print("Model not loaded, initializing...")
|
| 20 |
try:
|
|
|
|
| 21 |
MODEL = ChatterboxTTS.from_pretrained(DEVICE)
|
| 22 |
-
if hasattr(MODEL, 'to') and str(MODEL
|
| 23 |
MODEL.to(DEVICE)
|
| 24 |
print(f"Model loaded successfully. Internal device: {getattr(MODEL, 'device', 'N/A')}")
|
| 25 |
except Exception as e:
|
|
@@ -36,13 +38,11 @@ except Exception as e:
|
|
| 36 |
def set_seed(seed: int):
|
| 37 |
"""Sets the random seed for reproducibility across torch, numpy, and random."""
|
| 38 |
torch.manual_seed(seed)
|
| 39 |
-
|
| 40 |
-
torch.cuda.manual_seed(seed)
|
| 41 |
-
torch.cuda.manual_seed_all(seed)
|
| 42 |
random.seed(seed)
|
| 43 |
np.random.seed(seed)
|
| 44 |
|
| 45 |
-
@spaces.GPU
|
| 46 |
def generate_tts_audio(
|
| 47 |
text_input: str,
|
| 48 |
audio_prompt_path_input: str,
|
|
@@ -74,6 +74,7 @@ def generate_tts_audio(
|
|
| 74 |
set_seed(int(seed_num_input))
|
| 75 |
|
| 76 |
print(f"Generating audio for text: '{text_input[:50]}...'")
|
|
|
|
| 77 |
wav = current_model.generate(
|
| 78 |
text_input[:300], # Truncate text to max chars
|
| 79 |
audio_prompt_path=audio_prompt_path_input,
|
|
@@ -82,12 +83,12 @@ def generate_tts_audio(
|
|
| 82 |
cfg_weight=cfgw_input,
|
| 83 |
)
|
| 84 |
print("Audio generation complete.")
|
| 85 |
-
return (current_model.sr, wav.squeeze(0).numpy())
|
| 86 |
|
| 87 |
with gr.Blocks() as demo:
|
| 88 |
gr.Markdown(
|
| 89 |
"""
|
| 90 |
-
# Chatterbox TTS Demo
|
| 91 |
Generate high-quality speech from text with reference audio styling.
|
| 92 |
"""
|
| 93 |
)
|
|
@@ -133,4 +134,4 @@ with gr.Blocks() as demo:
|
|
| 133 |
outputs=[audio_output],
|
| 134 |
)
|
| 135 |
|
| 136 |
-
demo.launch()
|
|
|
|
| 5 |
import gradio as gr
|
| 6 |
import spaces
|
| 7 |
|
| 8 |
+
# Force the device to CPU
|
| 9 |
+
DEVICE = "cpu"
|
| 10 |
print(f"🚀 Running on device: {DEVICE}")
|
| 11 |
|
| 12 |
# --- Global Model Initialization ---
|
|
|
|
| 19 |
if MODEL is None:
|
| 20 |
print("Model not loaded, initializing...")
|
| 21 |
try:
|
| 22 |
+
# Load the model directly to the specified DEVICE (CPU)
|
| 23 |
MODEL = ChatterboxTTS.from_pretrained(DEVICE)
|
| 24 |
+
if hasattr(MODEL, 'to') and str(getattr(MODEL, 'device', 'cpu')) != DEVICE:
|
| 25 |
MODEL.to(DEVICE)
|
| 26 |
print(f"Model loaded successfully. Internal device: {getattr(MODEL, 'device', 'N/A')}")
|
| 27 |
except Exception as e:
|
|
|
|
| 38 |
def set_seed(seed: int):
|
| 39 |
"""Sets the random seed for reproducibility across torch, numpy, and random."""
|
| 40 |
torch.manual_seed(seed)
|
| 41 |
+
# No need for CUDA-specific seed setting
|
|
|
|
|
|
|
| 42 |
random.seed(seed)
|
| 43 |
np.random.seed(seed)
|
| 44 |
|
| 45 |
+
# Removed @spaces.GPU decorator as we are targeting CPU
|
| 46 |
def generate_tts_audio(
|
| 47 |
text_input: str,
|
| 48 |
audio_prompt_path_input: str,
|
|
|
|
| 74 |
set_seed(int(seed_num_input))
|
| 75 |
|
| 76 |
print(f"Generating audio for text: '{text_input[:50]}...'")
|
| 77 |
+
# Generate the waveform on the CPU
|
| 78 |
wav = current_model.generate(
|
| 79 |
text_input[:300], # Truncate text to max chars
|
| 80 |
audio_prompt_path=audio_prompt_path_input,
|
|
|
|
| 83 |
cfg_weight=cfgw_input,
|
| 84 |
)
|
| 85 |
print("Audio generation complete.")
|
| 86 |
+
return (current_model.sr, wav.squeeze(0).cpu().numpy()) # Ensure tensor is on CPU before converting to numpy
|
| 87 |
|
| 88 |
with gr.Blocks() as demo:
|
| 89 |
gr.Markdown(
|
| 90 |
"""
|
| 91 |
+
# Chatterbox TTS Demo (CPU Version)
|
| 92 |
Generate high-quality speech from text with reference audio styling.
|
| 93 |
"""
|
| 94 |
)
|
|
|
|
| 134 |
outputs=[audio_output],
|
| 135 |
)
|
| 136 |
|
| 137 |
+
demo.launch()
|