rishidahiya commited on
Commit
1fae304
·
verified ·
1 Parent(s): ee7df1b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -27
app.py CHANGED
@@ -5,36 +5,26 @@ import librosa
5
  import soundfile as sf
6
  import numpy as np
7
  import os
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # Load models at startup
10
- import sys
11
  print("Loading models...")
12
- print(f"Current working directory: {os.getcwd()}")
13
- print(f"Python path: {sys.path}")
14
- print(f"Files in /app: {os.listdir('/app') if os.path.exists('/app') else 'N/A'}")
15
-
16
- # Try multiple possible locations
17
- possible_paths = [
18
- "saved_models/encoder.pt",
19
- "/app/saved_models/encoder.pt",
20
- "./saved_models/encoder.pt"
21
- ]
22
-
23
- encoder_path = None
24
- for path in possible_paths:
25
- if os.path.exists(path):
26
- encoder_path = path
27
- print(f"Found encoder at: {encoder_path}")
28
- break
29
 
30
- if not encoder_path:
31
- print(f"ERROR: Could not find encoder.pt in any location!")
32
- print(f"Trying to list saved_models: {os.listdir('saved_models') if os.path.exists('saved_models') else 'Folder does not exist'}")
33
-
34
- synthesizer_path = encoder_path.replace('encoder.pt', 'synthesizer.pt') if encoder_path else "saved_models/synthesizer.pt"
35
 
36
  try:
37
- encoder_inference.load_model(encoder_path or "saved_models/encoder.pt")
38
  print("✓ Encoder loaded!")
39
  except Exception as e:
40
  print(f"Encoder load error: {e}")
@@ -88,9 +78,28 @@ def clone_voice(voice_sample, text):
88
  mels = synthesizer.synthesize_spectrograms([text], [embed])
89
  print(f"Mel-spectrogram: {mels[0].shape}")
90
 
91
- # Vocode to audio using Griffin-Lim algorithm
92
- wav_generated = librosa.feature.inverse.mel_to_audio(mels[0], sr=22050, n_iter=32)
93
- print(f"Generated audio: {wav_generated.shape}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
  return (22050, (wav_generated * 32768).astype(np.int16)), "✅ Success! Your voice has been cloned!"
96
 
 
5
  import soundfile as sf
6
  import numpy as np
7
  import os
8
+ import torch
9
+
10
+ # Try to load HiFi-GAN vocoder
11
+ vocoder = None
12
+ try:
13
+ from speechbrain.inference.vocoders import HIFIGAN
14
+ vocoder = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="pretrained_models/hifigan", run_opts={"device":"cpu"})
15
+ print("✓ HiFi-GAN vocoder loaded!")
16
+ except Exception as e:
17
+ print(f"HiFi-GAN load error: {e}, will use Griffin-Lim fallback")
18
+ vocoder = None
19
 
20
  # Load models at startup
 
21
  print("Loading models...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ encoder_path = "saved_models/encoder.pt"
24
+ synthesizer_path = "saved_models/synthesizer.pt"
 
 
 
25
 
26
  try:
27
+ encoder_inference.load_model(encoder_path)
28
  print("✓ Encoder loaded!")
29
  except Exception as e:
30
  print(f"Encoder load error: {e}")
 
78
  mels = synthesizer.synthesize_spectrograms([text], [embed])
79
  print(f"Mel-spectrogram: {mels[0].shape}")
80
 
81
+ # Vocode to audio
82
+ if vocoder is not None:
83
+ try:
84
+ # Use HiFi-GAN
85
+ mel_spec_tensor = torch.from_numpy(mels[0]).unsqueeze(0).float()
86
+ with torch.no_grad():
87
+ wav_generated = vocoder.decode_batch(mel_spec_tensor)
88
+ wav_generated = wav_generated.squeeze().cpu().numpy()
89
+ print(f"Generated audio with HiFi-GAN: {wav_generated.shape}")
90
+ except Exception as e:
91
+ print(f"HiFi-GAN failed: {e}, using Griffin-Lim fallback")
92
+ wav_generated = librosa.feature.inverse.mel_to_audio(mels[0], sr=22050, n_iter=32)
93
+ else:
94
+ # Use Griffin-Lim as fallback
95
+ print("Using Griffin-Lim vocoder (fallback)")
96
+ wav_generated = librosa.feature.inverse.mel_to_audio(mels[0], sr=22050, n_iter=32)
97
+
98
+ # Normalize audio
99
+ if np.max(np.abs(wav_generated)) > 0:
100
+ wav_generated = wav_generated / np.max(np.abs(wav_generated)) * 0.95
101
+
102
+ print(f"Generated audio: {wav_generated.shape}, range: {np.min(wav_generated):.4f} to {np.max(np.abs(wav_generated)):.4f}")
103
 
104
  return (22050, (wav_generated * 32768).astype(np.int16)), "✅ Success! Your voice has been cloned!"
105