Spaces:

lord-reso
/

host

Sleeping

lord-reso commited on Feb 2, 2024

Commit

cae7b2b

verified ·

1 Parent(s): dc5c54f

Update hifigan/inference_e2e.py

Files changed (1) hide show

hifigan/inference_e2e.py CHANGED Viewed

@@ -1,26 +1,3 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-import os
-import numpy as np
-import json
-import torch
-from scipy.io.wavfile import write
-from hifigan.env import AttrDict
-from hifigan.models import Generator
-from io import BytesIO
-h = None
-device = None
-def load_checkpoint(filepath, device):
-    assert os.path.isfile(filepath)
-    print("Loading '{}'".format(filepath))
-    checkpoint_dict = torch.load(filepath, map_location=device)
-    print("Complete.")
-    return checkpoint_dict
 def hifi_gan_inference(input_mel, checkpoint_file):
     print('Initializing Inference Process..')
     config_file = os.path.join(os.path.split(checkpoint_file)[0], 'config.json')
@@ -57,17 +34,19 @@ def hifi_gan_inference(input_mel, checkpoint_file):
     x = torch.FloatTensor(x).to(device)
     y_g_hat = generator(x)
-    audio = y_g_hat.squeeze()
     # Set MAX_WAV_VALUE if not present
     if 'MAX_WAV_VALUE' not in h:
         h.MAX_WAV_VALUE = 32768.0  # Adjust this value based on your requirements
     audio = audio * h.MAX_WAV_VALUE
-    audio = audio.cpu().numpy().astype('int16')
     # Save audio to BytesIO
     output_buffer = BytesIO()
     write(output_buffer, h.sampling_rate, audio)
-    return output_buffer.getvalue()

 def hifi_gan_inference(input_mel, checkpoint_file):
     print('Initializing Inference Process..')
     config_file = os.path.join(os.path.split(checkpoint_file)[0], 'config.json')
     x = torch.FloatTensor(x).to(device)
     y_g_hat = generator(x)
+    # Detach tensor before converting to numpy
+    audio = y_g_hat.squeeze().detach().numpy()
     # Set MAX_WAV_VALUE if not present
     if 'MAX_WAV_VALUE' not in h:
         h.MAX_WAV_VALUE = 32768.0  # Adjust this value based on your requirements
     audio = audio * h.MAX_WAV_VALUE
+    audio = audio.astype('int16')
     # Save audio to BytesIO
     output_buffer = BytesIO()
     write(output_buffer, h.sampling_rate, audio)
+    return output_buffer.getvalue()