Spaces:
Running
Running
| # ================================================================================================== | |
| # DEEPFAKE AUDIO - synthesizer/utils/plot.py (Neural Visualization Engine) | |
| # ================================================================================================== | |
| # | |
| # π DESCRIPTION | |
| # This module provides visualization utilities for the synthesizer. It includes | |
| # functions for plotting attention alignments (showing the model's focus | |
| # during synthesis) and Mel-Spectrograms (predictive vs. target), | |
| # facilitating model diagnostics and qualitative evaluation. | |
| # | |
| # π€ AUTHORS | |
| # - Amey Thakur (https://github.com/Amey-Thakur) | |
| # - Mega Satish (https://github.com/msatmod) | |
| # | |
| # π€π» CREDITS | |
| # Original Real-Time Voice Cloning methodology by CorentinJ | |
| # Repository: https://github.com/CorentinJ/Real-Time-Voice-Cloning | |
| # | |
| # π PROJECT LINKS | |
| # Repository: https://github.com/Amey-Thakur/DEEPFAKE-AUDIO | |
| # Video Demo: https://youtu.be/i3wnBcbHDbs | |
| # Research: https://github.com/Amey-Thakur/DEEPFAKE-AUDIO/blob/main/DEEPFAKE-AUDIO.ipynb | |
| # | |
| # π LICENSE | |
| # Released under the MIT License | |
| # Release Date: 2021-02-06 | |
| # ================================================================================================== | |
| import numpy as np | |
| def split_title_line(title_text, max_words=5): | |
| """ | |
| Title Formatter: | |
| Splits long strings into multiple lines for enhanced legibility in plots. | |
| """ | |
| seq = title_text.split() | |
| return "\n".join([" ".join(seq[i:i + max_words]) for i in range(0, len(seq), max_words)]) | |
| def plot_alignment(alignment, path, title=None, split_title=False, max_len=None): | |
| """ | |
| Attention Visualizer: | |
| Generates a heat map of the attention weights, illustrating the temporal | |
| alignment between encoder (text) and decoder (audio) timesteps. | |
| """ | |
| import matplotlib | |
| matplotlib.use("Agg") | |
| import matplotlib.pyplot as plt | |
| if max_len is not None: | |
| alignment = alignment[:, :max_len] | |
| fig = plt.figure(figsize=(8, 6)) | |
| ax = fig.add_subplot(111) | |
| im = ax.imshow( | |
| alignment, | |
| aspect="auto", | |
| origin="lower", | |
| interpolation="none") | |
| fig.colorbar(im, ax=ax) | |
| xlabel = "Decoder timestep" | |
| if split_title: | |
| title = split_title_line(title) | |
| plt.xlabel(xlabel) | |
| plt.title(title) | |
| plt.ylabel("Encoder timestep") | |
| plt.tight_layout() | |
| plt.savefig(path, format="png") | |
| plt.close() | |
| def plot_spectrogram(pred_spectrogram, path, title=None, split_title=False, target_spectrogram=None, max_len=None, auto_aspect=False): | |
| """ | |
| Spectrographic Comparison: | |
| Plots the predicted Mel-Spectrogram alongside the ground-truth target (if provided) | |
| to visualize reconstruction accuracy and model fidelity. | |
| """ | |
| import matplotlib | |
| matplotlib.use("Agg") | |
| import matplotlib.pyplot as plt | |
| if max_len is not None: | |
| target_spectrogram = target_spectrogram[:max_len] | |
| pred_spectrogram = pred_spectrogram[:max_len] | |
| if split_title: | |
| title = split_title_line(title) | |
| fig = plt.figure(figsize=(10, 8)) | |
| fig.text(0.5, 0.18, title, horizontalalignment="center", fontsize=16) | |
| # Render target spectrogram for reference | |
| if target_spectrogram is not None: | |
| ax1 = fig.add_subplot(311) | |
| ax2 = fig.add_subplot(312) | |
| if auto_aspect: | |
| im = ax1.imshow(np.rot90(target_spectrogram), aspect="auto", interpolation="none") | |
| else: | |
| im = ax1.imshow(np.rot90(target_spectrogram), interpolation="none") | |
| ax1.set_title("Target Mel-Spectrogram") | |
| fig.colorbar(mappable=im, shrink=0.65, orientation="horizontal", ax=ax1) | |
| ax2.set_title("Predicted Mel-Spectrogram") | |
| else: | |
| ax2 = fig.add_subplot(211) | |
| # Render predicted spectrogram | |
| if auto_aspect: | |
| im = ax2.imshow(np.rot90(pred_spectrogram), aspect="auto", interpolation="none") | |
| else: | |
| im = ax2.imshow(np.rot90(pred_spectrogram), interpolation="none") | |
| fig.colorbar(mappable=im, shrink=0.65, orientation="horizontal", ax=ax2) | |
| plt.tight_layout() | |
| plt.savefig(path, format="png") | |
| plt.close() | |