| import librosa |
| import matplotlib |
| import matplotlib.pyplot as plt |
| import numpy as np |
| import torch |
| from matplotlib.colors import LogNorm |
|
|
| matplotlib.use("Agg") |
|
|
|
|
| def plot_alignment(alignment, info=None, fig_size=(16, 10), title=None, output_fig=False, plot_log=False): |
| if isinstance(alignment, torch.Tensor): |
| alignment_ = alignment.detach().cpu().numpy().squeeze() |
| else: |
| alignment_ = alignment |
| alignment_ = alignment_.astype(np.float32) if alignment_.dtype == np.float16 else alignment_ |
| fig, ax = plt.subplots(figsize=fig_size) |
| im = ax.imshow( |
| alignment_.T, aspect="auto", origin="lower", interpolation="none", norm=LogNorm() if plot_log else None |
| ) |
| fig.colorbar(im, ax=ax) |
| xlabel = "Decoder timestep" |
| if info is not None: |
| xlabel += "\n\n" + info |
| plt.xlabel(xlabel) |
| plt.ylabel("Encoder timestep") |
| |
| plt.tight_layout() |
| if title is not None: |
| plt.title(title) |
| if not output_fig: |
| plt.close() |
| return fig |
|
|
|
|
| def plot_spectrogram(spectrogram, ap=None, fig_size=(16, 10), output_fig=False): |
| if isinstance(spectrogram, torch.Tensor): |
| spectrogram_ = spectrogram.detach().cpu().numpy().squeeze().T |
| else: |
| spectrogram_ = spectrogram.T |
| spectrogram_ = spectrogram_.astype(np.float32) if spectrogram_.dtype == np.float16 else spectrogram_ |
| if ap is not None: |
| spectrogram_ = ap.denormalize(spectrogram_) |
| fig = plt.figure(figsize=fig_size) |
| plt.imshow(spectrogram_, aspect="auto", origin="lower") |
| plt.colorbar() |
| plt.tight_layout() |
| if not output_fig: |
| plt.close() |
| return fig |
|
|
|
|
| def plot_pitch(pitch, spectrogram, ap=None, fig_size=(30, 10), output_fig=False): |
| """Plot pitch curves on top of the spectrogram. |
| |
| Args: |
| pitch (np.array): Pitch values. |
| spectrogram (np.array): Spectrogram values. |
| |
| Shapes: |
| pitch: :math:`(T,)` |
| spec: :math:`(C, T)` |
| """ |
|
|
| if isinstance(spectrogram, torch.Tensor): |
| spectrogram_ = spectrogram.detach().cpu().numpy().squeeze().T |
| else: |
| spectrogram_ = spectrogram.T |
| spectrogram_ = spectrogram_.astype(np.float32) if spectrogram_.dtype == np.float16 else spectrogram_ |
| if ap is not None: |
| spectrogram_ = ap.denormalize(spectrogram_) |
|
|
| old_fig_size = plt.rcParams["figure.figsize"] |
| if fig_size is not None: |
| plt.rcParams["figure.figsize"] = fig_size |
|
|
| fig, ax = plt.subplots() |
|
|
| ax.imshow(spectrogram_, aspect="auto", origin="lower") |
| ax.set_xlabel("time") |
| ax.set_ylabel("spec_freq") |
|
|
| ax2 = ax.twinx() |
| ax2.plot(pitch, linewidth=5.0, color="red") |
| ax2.set_ylabel("F0") |
|
|
| plt.rcParams["figure.figsize"] = old_fig_size |
| if not output_fig: |
| plt.close() |
| return fig |
|
|
|
|
| def plot_avg_pitch(pitch, chars, fig_size=(30, 10), output_fig=False): |
| """Plot pitch curves on top of the input characters. |
| |
| Args: |
| pitch (np.array): Pitch values. |
| chars (str): Characters to place to the x-axis. |
| |
| Shapes: |
| pitch: :math:`(T,)` |
| """ |
| old_fig_size = plt.rcParams["figure.figsize"] |
| if fig_size is not None: |
| plt.rcParams["figure.figsize"] = fig_size |
|
|
| fig, ax = plt.subplots() |
|
|
| x = np.array(range(len(chars))) |
| my_xticks = chars |
| plt.xticks(x, my_xticks) |
|
|
| ax.set_xlabel("characters") |
| ax.set_ylabel("freq") |
|
|
| ax2 = ax.twinx() |
| ax2.plot(pitch, linewidth=5.0, color="red") |
| ax2.set_ylabel("F0") |
|
|
| plt.rcParams["figure.figsize"] = old_fig_size |
| if not output_fig: |
| plt.close() |
| return fig |
|
|
|
|
| def plot_avg_energy(energy, chars, fig_size=(30, 10), output_fig=False): |
| """Plot energy curves on top of the input characters. |
| |
| Args: |
| energy (np.array): energy values. |
| chars (str): Characters to place to the x-axis. |
| |
| Shapes: |
| energy: :math:`(T,)` |
| """ |
| old_fig_size = plt.rcParams["figure.figsize"] |
| if fig_size is not None: |
| plt.rcParams["figure.figsize"] = fig_size |
|
|
| fig, ax = plt.subplots() |
|
|
| x = np.array(range(len(chars))) |
| my_xticks = chars |
| plt.xticks(x, my_xticks) |
|
|
| ax.set_xlabel("characters") |
| ax.set_ylabel("freq") |
|
|
| ax2 = ax.twinx() |
| ax2.plot(energy, linewidth=5.0, color="red") |
| ax2.set_ylabel("energy") |
|
|
| plt.rcParams["figure.figsize"] = old_fig_size |
| if not output_fig: |
| plt.close() |
| return fig |
|
|
|
|
| def visualize( |
| alignment, |
| postnet_output, |
| text, |
| hop_length, |
| CONFIG, |
| tokenizer, |
| stop_tokens=None, |
| decoder_output=None, |
| output_path=None, |
| figsize=(8, 24), |
| output_fig=False, |
| ): |
| """Intended to be used in Notebooks.""" |
|
|
| if decoder_output is not None: |
| num_plot = 4 |
| else: |
| num_plot = 3 |
|
|
| label_fontsize = 16 |
| fig = plt.figure(figsize=figsize) |
|
|
| plt.subplot(num_plot, 1, 1) |
| plt.imshow(alignment.T, aspect="auto", origin="lower", interpolation=None) |
| plt.xlabel("Decoder timestamp", fontsize=label_fontsize) |
| plt.ylabel("Encoder timestamp", fontsize=label_fontsize) |
| |
| if CONFIG.use_phonemes: |
| seq = tokenizer.text_to_ids(text) |
| text = tokenizer.ids_to_text(seq) |
| print(text) |
| plt.yticks(range(len(text)), list(text)) |
| plt.colorbar() |
|
|
| if stop_tokens is not None: |
| |
| plt.subplot(num_plot, 1, 2) |
| plt.plot(range(len(stop_tokens)), list(stop_tokens)) |
|
|
| |
| plt.subplot(num_plot, 1, 3) |
| librosa.display.specshow( |
| postnet_output.T, |
| sr=CONFIG.audio["sample_rate"], |
| hop_length=hop_length, |
| x_axis="time", |
| y_axis="linear", |
| fmin=CONFIG.audio["mel_fmin"], |
| fmax=CONFIG.audio["mel_fmax"], |
| ) |
|
|
| plt.xlabel("Time", fontsize=label_fontsize) |
| plt.ylabel("Hz", fontsize=label_fontsize) |
| plt.tight_layout() |
| plt.colorbar() |
|
|
| if decoder_output is not None: |
| plt.subplot(num_plot, 1, 4) |
| librosa.display.specshow( |
| decoder_output.T, |
| sr=CONFIG.audio["sample_rate"], |
| hop_length=hop_length, |
| x_axis="time", |
| y_axis="linear", |
| fmin=CONFIG.audio["mel_fmin"], |
| fmax=CONFIG.audio["mel_fmax"], |
| ) |
| plt.xlabel("Time", fontsize=label_fontsize) |
| plt.ylabel("Hz", fontsize=label_fontsize) |
| plt.tight_layout() |
| plt.colorbar() |
|
|
| if output_path: |
| print(output_path) |
| fig.savefig(output_path) |
| plt.close() |
|
|
| if not output_fig: |
| plt.close() |
|
|