SefyanKehail commited on
Commit
231bc11
·
1 Parent(s): 3791ba5

debugging..

Browse files
Files changed (1) hide show
  1. app.py +13 -9
app.py CHANGED
@@ -11,14 +11,18 @@ sys.path.append(parent_dir)
11
 
12
 
13
 
14
- import hubert.model as HUmodel
15
- import acoustic_model.model as ACmodel
16
- from hifigan.models import Generator as HIFIGANModel
17
 
18
 
19
- hubert = torch.load("hubert_cpu.pt", map_location=torch.device('cpu'))
20
- acoustic = torch.load("acoustic_cpu.pt", map_location=torch.device('cpu'))
21
- hifigan = torch.load("hifigan_cpu.pt", map_location=torch.device('cpu'))
 
 
 
 
22
 
23
  # Set the state dictionaries to the models
24
  # model.load_state_dict(hubert_loaded.state_dict(), strict=False)
@@ -57,11 +61,11 @@ def convert_speech(filename, progress=gr.Progress()):
57
  # Convert to the target speaker:
58
  with torch.inference_mode():
59
  # Extract speech units
60
- units = hubert.units(source)
61
  # Generate target spectrogram
62
- mel = acoustic.generate(units).transpose(1, 2)
63
  # Generate audio waveform
64
- target = hifigan(mel)
65
  progress(0.9, desc="Postprocessing audio")
66
  # Move the tensor to CPU and convert to NumPy
67
  target = target.squeeze().cpu().numpy()
 
11
 
12
 
13
 
14
+ import hubert.hubert.model as HUmodel
15
+ import acoustic_model.acoustic.model as ACmodel
16
+ from hifigan import Generator as HIFIGANModel
17
 
18
 
19
+ hubert = HUmodel.HubertSoft()
20
+ acoustic = ACmodel.AcousticModel()
21
+ hifigan = HIFIGANModel()
22
+
23
+ hubert_loaded = torch.load("hubert_cpu.pt", map_location=torch.device('cpu'))
24
+ acoustic_loaded = torch.load("acoustic_cpu.pt", map_location=torch.device('cpu'))
25
+ hifigan_loaded = torch.load("hifigan_cpu.pt", map_location=torch.device('cpu'))
26
 
27
  # Set the state dictionaries to the models
28
  # model.load_state_dict(hubert_loaded.state_dict(), strict=False)
 
61
  # Convert to the target speaker:
62
  with torch.inference_mode():
63
  # Extract speech units
64
+ units = hubert_loaded.units(source)
65
  # Generate target spectrogram
66
+ mel = acoustic_loaded.generate(units).transpose(1, 2)
67
  # Generate audio waveform
68
+ target = hifigan_loaded(mel)
69
  progress(0.9, desc="Postprocessing audio")
70
  # Move the tensor to CPU and convert to NumPy
71
  target = target.squeeze().cpu().numpy()