ruslanmv commited on
Commit
69e6077
·
1 Parent(s): fa37078

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -7
app.py CHANGED
@@ -8,7 +8,7 @@ import struct
8
  import textwrap
9
  import requests
10
  import atexit
11
- import tempfile # <-- FIX: Import tempfile to manage temporary audio files
12
  from typing import List, Dict, Tuple, Generator
13
 
14
  # --- Fast, safe defaults ---
@@ -206,7 +206,6 @@ def init_models_and_latents() -> None:
206
 
207
  if not voice_latents:
208
  print("Computing voice conditioning latents...")
209
- # --- FIX: Use a temporary directory to store resampled audio files ---
210
  with tempfile.TemporaryDirectory() as temp_dir:
211
  voice_files = {
212
  "Cloée": "cloee-1.wav", "Julian": "julian-bedtime-style-1.wav",
@@ -214,15 +213,14 @@ def init_models_and_latents() -> None:
214
  }
215
  for role, filename in voice_files.items():
216
  original_path = os.path.join("voices", filename)
217
-
218
- # 1. Load and resample audio into a tensor
219
  resampled_waveform = load_and_resample_audio(original_path)
220
 
221
- # 2. Save the corrected tensor to a temporary file
222
  temp_path = os.path.join(temp_dir, f"resampled_{filename}")
223
- torchaudio.save(temp_path, resampled_waveform.squeeze(0), 24000)
224
 
225
- # 3. Pass the path of the clean, temporary file to the model
 
 
 
226
  voice_latents[role] = tts_model.get_conditioning_latents(
227
  audio_path=temp_path,
228
  gpt_cond_len=30,
 
8
  import textwrap
9
  import requests
10
  import atexit
11
+ import tempfile
12
  from typing import List, Dict, Tuple, Generator
13
 
14
  # --- Fast, safe defaults ---
 
206
 
207
  if not voice_latents:
208
  print("Computing voice conditioning latents...")
 
209
  with tempfile.TemporaryDirectory() as temp_dir:
210
  voice_files = {
211
  "Cloée": "cloee-1.wav", "Julian": "julian-bedtime-style-1.wav",
 
213
  }
214
  for role, filename in voice_files.items():
215
  original_path = os.path.join("voices", filename)
 
 
216
  resampled_waveform = load_and_resample_audio(original_path)
217
 
 
218
  temp_path = os.path.join(temp_dir, f"resampled_{filename}")
 
219
 
220
+ # --- FIX: Replace torchaudio.save with the more stable soundfile.write ---
221
+ numpy_waveform = resampled_waveform.squeeze(0).cpu().numpy()
222
+ sf.write(temp_path, numpy_waveform, 24000)
223
+
224
  voice_latents[role] = tts_model.get_conditioning_latents(
225
  audio_path=temp_path,
226
  gpt_cond_len=30,