Spaces:
Running
Running
| # Copyright (c) Facebook, Inc. and its affiliates. | |
| # | |
| # This source code is licensed under the MIT license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| import torch | |
| from examples.textless_nlp.gslm.unit2speech.tacotron2.model import Tacotron2 | |
| from examples.textless_nlp.gslm.unit2speech.tacotron2.waveglow_denoiser import ( | |
| Denoiser, | |
| ) | |
| def load_quantized_audio_from_file(file_path): | |
| base_fname_batch, quantized_units_batch = [], [] | |
| with open(file_path) as f: | |
| for line in f: | |
| base_fname, quantized_units_str = line.rstrip().split("|") | |
| quantized_units = [int(q) for q in quantized_units_str.split(" ")] | |
| base_fname_batch.append(base_fname) | |
| quantized_units_batch.append(quantized_units) | |
| return base_fname_batch, quantized_units_batch | |
| def synthesize_audio(model, waveglow, denoiser, inp, lab=None, strength=0.0): | |
| assert inp.size(0) == 1 | |
| inp = inp.cuda() | |
| if lab is not None: | |
| lab = torch.LongTensor(1).cuda().fill_(lab) | |
| with torch.no_grad(): | |
| _, mel, _, ali, has_eos = model.inference(inp, lab, ret_has_eos=True) | |
| aud = waveglow.infer(mel, sigma=0.666) | |
| aud_dn = denoiser(aud, strength=strength).squeeze(1) | |
| return mel, aud, aud_dn, has_eos | |
| def load_tacotron(tacotron_model_path, max_decoder_steps): | |
| ckpt_dict = torch.load(tacotron_model_path) | |
| hparams = ckpt_dict["hparams"] | |
| hparams.max_decoder_steps = max_decoder_steps | |
| sr = hparams.sampling_rate | |
| model = Tacotron2(hparams) | |
| model.load_state_dict(ckpt_dict["model_dict"]) | |
| model = model.cuda().eval().half() | |
| return model, sr, hparams | |
| def load_waveglow(waveglow_path): | |
| waveglow = torch.load(waveglow_path)["model"] | |
| waveglow = waveglow.cuda().eval().half() | |
| for k in waveglow.convinv: | |
| k.float() | |
| denoiser = Denoiser(waveglow) | |
| return waveglow, denoiser | |