File size: 7,584 Bytes
1d8403e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# ==================================================================================================
# DEEPFAKE AUDIO - encoder/visualizations.py (Neural Telemetry & Projections)
# ==================================================================================================
# 
# πŸ“ DESCRIPTION
# This module implements the visual diagnostic engine for training monitoring. 
# It utilizes Visdom for real-time loss/EER plotting and UMAP (Uniform Manifold 
# Approximation and Projection) to visualize the high-dimensional speaker 
# identity space, allowing researchers to observe the clustering of d-vectors.
#
# πŸ‘€ AUTHORS
# - Amey Thakur (https://github.com/Amey-Thakur)
# - Mega Satish (https://github.com/msatmod)
#
# 🀝🏻 CREDITS
# Original Real-Time Voice Cloning methodology by CorentinJ
# Repository: https://github.com/CorentinJ/Real-Time-Voice-Cloning
#
# πŸ”— PROJECT LINKS
# Repository: https://github.com/Amey-Thakur/DEEPFAKE-AUDIO
# Video Demo: https://youtu.be/i3wnBcbHDbs
# Research: https://github.com/Amey-Thakur/DEEPFAKE-AUDIO/blob/main/DEEPFAKE-AUDIO.ipynb
#
# πŸ“œ LICENSE
# Released under the MIT License
# Release Date: 2021-02-06
# ==================================================================================================

from datetime import datetime
from time import perf_counter as timer
import numpy as np
import umap
import visdom

# --- PROJECT CORE MODULES ---
from encoder.data_objects.speaker_verification_dataset import SpeakerVerificationDataset

# --- AESTHETIC CONFIGURATION (Categorical Color Map) ---
colormap = np.array([
    [76, 255, 0],   [0, 127, 70],   [255, 0, 0],   [255, 217, 38],
    [0, 135, 255],  [165, 0, 165],  [255, 167, 255], [0, 255, 255],
    [255, 96, 38],  [142, 76, 0],   [33, 0, 127],  [0, 0, 0],
    [183, 183, 183],
], dtype=float) / 255

class Visualizations:
    """
    Experimental Dashboard:
    Provides a real-time window into the model's convergence and identity manifold.
    """
    def __init__(self, env_name=None, update_every=10, server="http://localhost", disabled=False):
        # Neural Tracking State
        self.last_update_timestamp = timer()
        self.update_every = update_every
        self.step_times, self.losses, self.eers = [], [], []
        
        # Operational Mode
        self.disabled = disabled
        if self.disabled: return

        # Temporal Versioning of Environment
        now = str(datetime.now().strftime("%d-%m %Hh%M"))
        self.env_name = now if env_name is None else "%s (%s)" % (env_name, now)

        # Visdom Server Handshake
        try:
            self.vis = visdom.Visdom(server, env=self.env_name, raise_exceptions=True)
        except ConnectionError:
            raise Exception("⚠️ Technical Blocker: Visdom server not detected. Start it via 'visdom' CLI.")

        # Dashboard Window Handles
        self.loss_win = self.eer_win = self.implementation_win = self.projection_win = None
        self.implementation_string = ""

    def log_params(self):
        """Archives model and data hyperparameters in the Visdom environment."""
        if self.disabled: return
        from encoder import params_data, params_model
        
        param_string = "<b>🧬 Neural Architecture Parameters</b>:<br>"
        for p in (n for n in dir(params_model) if not n.startswith("__")):
            param_string += "\t%s: %s<br>" % (p, getattr(params_model, p))
            
        param_string += "<br><b>πŸ”Š Acoustic Signal Parameters</b>:<br>"
        for p in (n for n in dir(params_data) if not n.startswith("__")):
            param_string += "\t%s: %s<br>" % (p, getattr(params_data, p))
            
        self.vis.text(param_string, opts={"title": "Hyperparameters (Static Snapshot)"})

    def log_dataset(self, dataset: SpeakerVerificationDataset):
        """Documents the corpus profile being utilized for training."""
        if self.disabled: return
        ds_string = "<b>πŸ“š Corpus Overview</b>: %d Registered Speakers<br>" % len(dataset.speakers)
        ds_string += dataset.get_logs().replace("\n", "<br>")
        self.vis.text(ds_string, opts={"title": "Dataset Manifest"})

    def log_implementation(self, params):
        """Metadata regarding the execution environment (e.g., CUDA device)."""
        if self.disabled: return
        impl_str = "".join("<b>%s</b>: %s<br>" % (p, v) for p, v in params.items())
        self.implementation_string = impl_str
        self.implementation_win = self.vis.text(impl_str, opts={"title": "Training Environment"})

    def update(self, loss, eer, step):
        """Calculates and plots rolling averages of optimization metrics."""
        now = timer()
        self.step_times.append(1000 * (now - self.last_update_timestamp))
        self.last_update_timestamp = now
        self.losses.append(loss)
        self.eers.append(eer)
        print(".", end="") # Silent progress heartbeat

        if step % self.update_every != 0: return # Stratified reporting
        
        t_str = "Step Latency: mean %dms, std %dms" % (int(np.mean(self.step_times)), int(np.std(self.step_times)))
        print("\nπŸ“ˆ Step %d | Loss: %.4f | EER: %.4f | %s" % (step, np.mean(self.losses), np.mean(self.eers), t_str))
        
        if not self.disabled:
            # Loss Convergence Plot
            self.loss_win = self.vis.line([np.mean(self.losses)], [step], win=self.loss_win,
                                           update="append" if self.loss_win else None,
                                           opts=dict(legend=["Avg. GE2E Loss"], xlabel="Step", ylabel="Loss", title="Training Loss"))
            # Error Rate Plot
            self.eer_win = self.vis.line([np.mean(self.eers)], [step], win=self.eer_win,
                                          update="append" if self.eer_win else None,
                                          opts=dict(legend=["Avg. EER"], xlabel="Step", ylabel="EER", title="Equal Error Rate"))
            
            if self.implementation_win:
                self.vis.text(self.implementation_string + ("<b>%s</b>" % t_str),
                              win=self.implementation_win, opts={"title": "Training Environment"})

        # Memory Cleanup for the next window
        self.losses.clear(); self.eers.clear(); self.step_times.clear()

    def draw_projections(self, embeds, utterances_per_speaker, step, out_fpath=None, max_speakers=10):
        """
        Manifold Mapping:
        Projects high-dimensional d-vectors onto a 2D plane for visual cluster analysis.
        """
        import matplotlib.pyplot as plt
        max_speakers = min(max_speakers, len(colormap))
        embeds = embeds[:max_speakers * utterances_per_speaker]

        # Categorical Color Orchestration
        n_speakers = len(embeds) // utterances_per_speaker
        colors = [colormap[i] for i in np.repeat(np.arange(n_speakers), utterances_per_speaker)]

        # UMAP Non-Linear Dimensionality Reduction
        reducer = umap.UMAP()
        projected = reducer.fit_transform(embeds)
        
        plt.scatter(projected[:, 0], projected[:, 1], c=colors)
        plt.gca().set_aspect("equal", "datalim")
        plt.title("🌌 Identity Manifold (UMAP) - Step %d" % step)
        
        if not self.disabled:
            self.projection_win = self.vis.matplot(plt, win=self.projection_win)
        if out_fpath:
            plt.savefig(out_fpath)
        plt.clf()

    def save(self):
        """Persists the Visdom environment state to the server."""
        if not self.disabled:
            self.vis.save([self.env_name])