SefyanKehail commited on
Commit ·
231bc11
1
Parent(s): 3791ba5
debugging..
Browse files
app.py
CHANGED
|
@@ -11,14 +11,18 @@ sys.path.append(parent_dir)
|
|
| 11 |
|
| 12 |
|
| 13 |
|
| 14 |
-
import hubert.model as HUmodel
|
| 15 |
-
import acoustic_model.model as ACmodel
|
| 16 |
-
from hifigan
|
| 17 |
|
| 18 |
|
| 19 |
-
hubert =
|
| 20 |
-
acoustic =
|
| 21 |
-
hifigan
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
# Set the state dictionaries to the models
|
| 24 |
# model.load_state_dict(hubert_loaded.state_dict(), strict=False)
|
|
@@ -57,11 +61,11 @@ def convert_speech(filename, progress=gr.Progress()):
|
|
| 57 |
# Convert to the target speaker:
|
| 58 |
with torch.inference_mode():
|
| 59 |
# Extract speech units
|
| 60 |
-
units =
|
| 61 |
# Generate target spectrogram
|
| 62 |
-
mel =
|
| 63 |
# Generate audio waveform
|
| 64 |
-
target =
|
| 65 |
progress(0.9, desc="Postprocessing audio")
|
| 66 |
# Move the tensor to CPU and convert to NumPy
|
| 67 |
target = target.squeeze().cpu().numpy()
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
|
| 14 |
+
import hubert.hubert.model as HUmodel
|
| 15 |
+
import acoustic_model.acoustic.model as ACmodel
|
| 16 |
+
from hifigan import Generator as HIFIGANModel
|
| 17 |
|
| 18 |
|
| 19 |
+
hubert = HUmodel.HubertSoft()
|
| 20 |
+
acoustic = ACmodel.AcousticModel()
|
| 21 |
+
hifigan = HIFIGANModel()
|
| 22 |
+
|
| 23 |
+
hubert_loaded = torch.load("hubert_cpu.pt", map_location=torch.device('cpu'))
|
| 24 |
+
acoustic_loaded = torch.load("acoustic_cpu.pt", map_location=torch.device('cpu'))
|
| 25 |
+
hifigan_loaded = torch.load("hifigan_cpu.pt", map_location=torch.device('cpu'))
|
| 26 |
|
| 27 |
# Set the state dictionaries to the models
|
| 28 |
# model.load_state_dict(hubert_loaded.state_dict(), strict=False)
|
|
|
|
| 61 |
# Convert to the target speaker:
|
| 62 |
with torch.inference_mode():
|
| 63 |
# Extract speech units
|
| 64 |
+
units = hubert_loaded.units(source)
|
| 65 |
# Generate target spectrogram
|
| 66 |
+
mel = acoustic_loaded.generate(units).transpose(1, 2)
|
| 67 |
# Generate audio waveform
|
| 68 |
+
target = hifigan_loaded(mel)
|
| 69 |
progress(0.9, desc="Postprocessing audio")
|
| 70 |
# Move the tensor to CPU and convert to NumPy
|
| 71 |
target = target.squeeze().cpu().numpy()
|