| | import librosa |
| | import matplotlib |
| | import matplotlib.pyplot as plt |
| | import numpy as np |
| | import torch |
| | from matplotlib.colors import LogNorm |
| |
|
| | matplotlib.use("Agg") |
| |
|
| |
|
| | def plot_alignment(alignment, info=None, fig_size=(16, 10), title=None, output_fig=False, plot_log=False): |
| | if isinstance(alignment, torch.Tensor): |
| | alignment_ = alignment.detach().cpu().numpy().squeeze() |
| | else: |
| | alignment_ = alignment |
| | alignment_ = alignment_.astype(np.float32) if alignment_.dtype == np.float16 else alignment_ |
| | fig, ax = plt.subplots(figsize=fig_size) |
| | im = ax.imshow( |
| | alignment_.T, aspect="auto", origin="lower", interpolation="none", norm=LogNorm() if plot_log else None |
| | ) |
| | fig.colorbar(im, ax=ax) |
| | xlabel = "Decoder timestep" |
| | if info is not None: |
| | xlabel += "\n\n" + info |
| | plt.xlabel(xlabel) |
| | plt.ylabel("Encoder timestep") |
| | |
| | plt.tight_layout() |
| | if title is not None: |
| | plt.title(title) |
| | if not output_fig: |
| | plt.close() |
| | return fig |
| |
|
| |
|
| | def plot_spectrogram(spectrogram, ap=None, fig_size=(16, 10), output_fig=False): |
| | if isinstance(spectrogram, torch.Tensor): |
| | spectrogram_ = spectrogram.detach().cpu().numpy().squeeze().T |
| | else: |
| | spectrogram_ = spectrogram.T |
| | spectrogram_ = spectrogram_.astype(np.float32) if spectrogram_.dtype == np.float16 else spectrogram_ |
| | if ap is not None: |
| | spectrogram_ = ap.denormalize(spectrogram_) |
| | fig = plt.figure(figsize=fig_size) |
| | plt.imshow(spectrogram_, aspect="auto", origin="lower") |
| | plt.colorbar() |
| | plt.tight_layout() |
| | if not output_fig: |
| | plt.close() |
| | return fig |
| |
|
| |
|
| | def plot_pitch(pitch, spectrogram, ap=None, fig_size=(30, 10), output_fig=False): |
| | """Plot pitch curves on top of the spectrogram. |
| | |
| | Args: |
| | pitch (np.array): Pitch values. |
| | spectrogram (np.array): Spectrogram values. |
| | |
| | Shapes: |
| | pitch: :math:`(T,)` |
| | spec: :math:`(C, T)` |
| | """ |
| |
|
| | if isinstance(spectrogram, torch.Tensor): |
| | spectrogram_ = spectrogram.detach().cpu().numpy().squeeze().T |
| | else: |
| | spectrogram_ = spectrogram.T |
| | spectrogram_ = spectrogram_.astype(np.float32) if spectrogram_.dtype == np.float16 else spectrogram_ |
| | if ap is not None: |
| | spectrogram_ = ap.denormalize(spectrogram_) |
| |
|
| | old_fig_size = plt.rcParams["figure.figsize"] |
| | if fig_size is not None: |
| | plt.rcParams["figure.figsize"] = fig_size |
| |
|
| | fig, ax = plt.subplots() |
| |
|
| | ax.imshow(spectrogram_, aspect="auto", origin="lower") |
| | ax.set_xlabel("time") |
| | ax.set_ylabel("spec_freq") |
| |
|
| | ax2 = ax.twinx() |
| | ax2.plot(pitch, linewidth=5.0, color="red") |
| | ax2.set_ylabel("F0") |
| |
|
| | plt.rcParams["figure.figsize"] = old_fig_size |
| | if not output_fig: |
| | plt.close() |
| | return fig |
| |
|
| |
|
| | def plot_avg_pitch(pitch, chars, fig_size=(30, 10), output_fig=False): |
| | """Plot pitch curves on top of the input characters. |
| | |
| | Args: |
| | pitch (np.array): Pitch values. |
| | chars (str): Characters to place to the x-axis. |
| | |
| | Shapes: |
| | pitch: :math:`(T,)` |
| | """ |
| | old_fig_size = plt.rcParams["figure.figsize"] |
| | if fig_size is not None: |
| | plt.rcParams["figure.figsize"] = fig_size |
| |
|
| | fig, ax = plt.subplots() |
| |
|
| | x = np.array(range(len(chars))) |
| | my_xticks = chars |
| | plt.xticks(x, my_xticks) |
| |
|
| | ax.set_xlabel("characters") |
| | ax.set_ylabel("freq") |
| |
|
| | ax2 = ax.twinx() |
| | ax2.plot(pitch, linewidth=5.0, color="red") |
| | ax2.set_ylabel("F0") |
| |
|
| | plt.rcParams["figure.figsize"] = old_fig_size |
| | if not output_fig: |
| | plt.close() |
| | return fig |
| |
|
| |
|
| | def plot_avg_energy(energy, chars, fig_size=(30, 10), output_fig=False): |
| | """Plot energy curves on top of the input characters. |
| | |
| | Args: |
| | energy (np.array): energy values. |
| | chars (str): Characters to place to the x-axis. |
| | |
| | Shapes: |
| | energy: :math:`(T,)` |
| | """ |
| | old_fig_size = plt.rcParams["figure.figsize"] |
| | if fig_size is not None: |
| | plt.rcParams["figure.figsize"] = fig_size |
| |
|
| | fig, ax = plt.subplots() |
| |
|
| | x = np.array(range(len(chars))) |
| | my_xticks = chars |
| | plt.xticks(x, my_xticks) |
| |
|
| | ax.set_xlabel("characters") |
| | ax.set_ylabel("freq") |
| |
|
| | ax2 = ax.twinx() |
| | ax2.plot(energy, linewidth=5.0, color="red") |
| | ax2.set_ylabel("energy") |
| |
|
| | plt.rcParams["figure.figsize"] = old_fig_size |
| | if not output_fig: |
| | plt.close() |
| | return fig |
| |
|
| |
|
| | def visualize( |
| | alignment, |
| | postnet_output, |
| | text, |
| | hop_length, |
| | CONFIG, |
| | tokenizer, |
| | stop_tokens=None, |
| | decoder_output=None, |
| | output_path=None, |
| | figsize=(8, 24), |
| | output_fig=False, |
| | ): |
| | """Intended to be used in Notebooks.""" |
| |
|
| | if decoder_output is not None: |
| | num_plot = 4 |
| | else: |
| | num_plot = 3 |
| |
|
| | label_fontsize = 16 |
| | fig = plt.figure(figsize=figsize) |
| |
|
| | plt.subplot(num_plot, 1, 1) |
| | plt.imshow(alignment.T, aspect="auto", origin="lower", interpolation=None) |
| | plt.xlabel("Decoder timestamp", fontsize=label_fontsize) |
| | plt.ylabel("Encoder timestamp", fontsize=label_fontsize) |
| | |
| | if CONFIG.use_phonemes: |
| | seq = tokenizer.text_to_ids(text) |
| | text = tokenizer.ids_to_text(seq) |
| | print(text) |
| | plt.yticks(range(len(text)), list(text)) |
| | plt.colorbar() |
| |
|
| | if stop_tokens is not None: |
| | |
| | plt.subplot(num_plot, 1, 2) |
| | plt.plot(range(len(stop_tokens)), list(stop_tokens)) |
| |
|
| | |
| | plt.subplot(num_plot, 1, 3) |
| | librosa.display.specshow( |
| | postnet_output.T, |
| | sr=CONFIG.audio["sample_rate"], |
| | hop_length=hop_length, |
| | x_axis="time", |
| | y_axis="linear", |
| | fmin=CONFIG.audio["mel_fmin"], |
| | fmax=CONFIG.audio["mel_fmax"], |
| | ) |
| |
|
| | plt.xlabel("Time", fontsize=label_fontsize) |
| | plt.ylabel("Hz", fontsize=label_fontsize) |
| | plt.tight_layout() |
| | plt.colorbar() |
| |
|
| | if decoder_output is not None: |
| | plt.subplot(num_plot, 1, 4) |
| | librosa.display.specshow( |
| | decoder_output.T, |
| | sr=CONFIG.audio["sample_rate"], |
| | hop_length=hop_length, |
| | x_axis="time", |
| | y_axis="linear", |
| | fmin=CONFIG.audio["mel_fmin"], |
| | fmax=CONFIG.audio["mel_fmax"], |
| | ) |
| | plt.xlabel("Time", fontsize=label_fontsize) |
| | plt.ylabel("Hz", fontsize=label_fontsize) |
| | plt.tight_layout() |
| | plt.colorbar() |
| |
|
| | if output_path: |
| | print(output_path) |
| | fig.savefig(output_path) |
| | plt.close() |
| |
|
| | if not output_fig: |
| | plt.close() |
| |
|