Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,7 +8,7 @@ import struct
|
|
| 8 |
import textwrap
|
| 9 |
import requests
|
| 10 |
import atexit
|
| 11 |
-
import tempfile
|
| 12 |
from typing import List, Dict, Tuple, Generator
|
| 13 |
|
| 14 |
# --- Fast, safe defaults ---
|
|
@@ -206,7 +206,6 @@ def init_models_and_latents() -> None:
|
|
| 206 |
|
| 207 |
if not voice_latents:
|
| 208 |
print("Computing voice conditioning latents...")
|
| 209 |
-
# --- FIX: Use a temporary directory to store resampled audio files ---
|
| 210 |
with tempfile.TemporaryDirectory() as temp_dir:
|
| 211 |
voice_files = {
|
| 212 |
"Cloée": "cloee-1.wav", "Julian": "julian-bedtime-style-1.wav",
|
|
@@ -214,15 +213,14 @@ def init_models_and_latents() -> None:
|
|
| 214 |
}
|
| 215 |
for role, filename in voice_files.items():
|
| 216 |
original_path = os.path.join("voices", filename)
|
| 217 |
-
|
| 218 |
-
# 1. Load and resample audio into a tensor
|
| 219 |
resampled_waveform = load_and_resample_audio(original_path)
|
| 220 |
|
| 221 |
-
# 2. Save the corrected tensor to a temporary file
|
| 222 |
temp_path = os.path.join(temp_dir, f"resampled_{filename}")
|
| 223 |
-
torchaudio.save(temp_path, resampled_waveform.squeeze(0), 24000)
|
| 224 |
|
| 225 |
-
#
|
|
|
|
|
|
|
|
|
|
| 226 |
voice_latents[role] = tts_model.get_conditioning_latents(
|
| 227 |
audio_path=temp_path,
|
| 228 |
gpt_cond_len=30,
|
|
|
|
| 8 |
import textwrap
|
| 9 |
import requests
|
| 10 |
import atexit
|
| 11 |
+
import tempfile
|
| 12 |
from typing import List, Dict, Tuple, Generator
|
| 13 |
|
| 14 |
# --- Fast, safe defaults ---
|
|
|
|
| 206 |
|
| 207 |
if not voice_latents:
|
| 208 |
print("Computing voice conditioning latents...")
|
|
|
|
| 209 |
with tempfile.TemporaryDirectory() as temp_dir:
|
| 210 |
voice_files = {
|
| 211 |
"Cloée": "cloee-1.wav", "Julian": "julian-bedtime-style-1.wav",
|
|
|
|
| 213 |
}
|
| 214 |
for role, filename in voice_files.items():
|
| 215 |
original_path = os.path.join("voices", filename)
|
|
|
|
|
|
|
| 216 |
resampled_waveform = load_and_resample_audio(original_path)
|
| 217 |
|
|
|
|
| 218 |
temp_path = os.path.join(temp_dir, f"resampled_{filename}")
|
|
|
|
| 219 |
|
| 220 |
+
# --- FIX: Replace torchaudio.save with the more stable soundfile.write ---
|
| 221 |
+
numpy_waveform = resampled_waveform.squeeze(0).cpu().numpy()
|
| 222 |
+
sf.write(temp_path, numpy_waveform, 24000)
|
| 223 |
+
|
| 224 |
voice_latents[role] = tts_model.get_conditioning_latents(
|
| 225 |
audio_path=temp_path,
|
| 226 |
gpt_cond_len=30,
|