Spaces:

Transduce
/

vc_demo

Runtime error

SefyanKehail commited on Jun 6, 2024

Commit

231bc11

1 Parent(s): 3791ba5

debugging..

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,14 +11,18 @@ sys.path.append(parent_dir)
-import hubert.model as HUmodel
-import acoustic_model.model as ACmodel
-from hifigan.models import Generator as HIFIGANModel
-hubert = torch.load("hubert_cpu.pt", map_location=torch.device('cpu'))
-acoustic = torch.load("acoustic_cpu.pt", map_location=torch.device('cpu'))
-hifigan = torch.load("hifigan_cpu.pt", map_location=torch.device('cpu'))
 # Set the state dictionaries to the models
 # model.load_state_dict(hubert_loaded.state_dict(),  strict=False)
@@ -57,11 +61,11 @@ def convert_speech(filename, progress=gr.Progress()):
     # Convert to the target speaker:
     with torch.inference_mode():
         # Extract speech units
-        units = hubert.units(source)
         # Generate target spectrogram
-        mel = acoustic.generate(units).transpose(1, 2)
         # Generate audio waveform
-        target = hifigan(mel)
         progress(0.9, desc="Postprocessing audio")
     # Move the tensor to CPU and convert to NumPy
     target = target.squeeze().cpu().numpy()

+import hubert.hubert.model as HUmodel
+import acoustic_model.acoustic.model as ACmodel
+from hifigan import Generator as HIFIGANModel
+hubert = HUmodel.HubertSoft()
+acoustic = ACmodel.AcousticModel()
+hifigan  = HIFIGANModel()
+hubert_loaded = torch.load("hubert_cpu.pt", map_location=torch.device('cpu'))
+acoustic_loaded = torch.load("acoustic_cpu.pt", map_location=torch.device('cpu'))
+hifigan_loaded = torch.load("hifigan_cpu.pt", map_location=torch.device('cpu'))
 # Set the state dictionaries to the models
 # model.load_state_dict(hubert_loaded.state_dict(),  strict=False)
     # Convert to the target speaker:
     with torch.inference_mode():
         # Extract speech units
+        units = hubert_loaded.units(source)
         # Generate target spectrogram
+        mel = acoustic_loaded.generate(units).transpose(1, 2)
         # Generate audio waveform
+        target = hifigan_loaded(mel)
         progress(0.9, desc="Postprocessing audio")
     # Move the tensor to CPU and convert to NumPy
     target = target.squeeze().cpu().numpy()