File size: 3,834 Bytes
1d8403e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# ==================================================================================================
# DEEPFAKE AUDIO - encoder_train.py (Neural Identity Orchestration)
# ==================================================================================================
# 
# πŸ“ DESCRIPTION
# This script manages the training lifecycle of the Speaker Encoder. It optimizes a 
# d-vector based neural network to minimize the GE2E (Generalized End-to-End) loss. 
# The goal is to maximize the similarity between embeddings of the same speaker 
# while minimizing similarity between different speakers, enabling high-fidelity 
# zero-shot voice cloning.
#
# πŸ‘€ AUTHORS
# - Amey Thakur (https://github.com/Amey-Thakur)
# - Mega Satish (https://github.com/msatmod)
#
# 🀝🏻 CREDITS
# Original Real-Time Voice Cloning methodology by CorentinJ
# Repository: https://github.com/CorentinJ/Real-Time-Voice-Cloning
#
# πŸ”— PROJECT LINKS
# Repository: https://github.com/Amey-Thakur/DEEPFAKE-AUDIO
# Video Demo: https://youtu.be/i3wnBcbHDbs
# Research: https://github.com/Amey-Thakur/DEEPFAKE-AUDIO/blob/main/DEEPFAKE-AUDIO.ipynb
#
# πŸ“œ LICENSE
# Released under the MIT License
# Release Date: 2021-02-06
# ==================================================================================================

from utils.argutils import print_args
from encoder.train import train
from pathlib import Path
import argparse

if __name__ == "__main__":
    # --- INTERFACE COMMANDS ---
    parser = argparse.ArgumentParser(
        description="Encoder Training Hub: Optimizing identity embeddings from preprocessed data.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    # --- SESSION DEFINITION ---
    parser.add_argument("run_id", type=str, 
                        help="Identifier for this training experiment. Models and logs will be organized under this ID.")
    parser.add_argument("clean_data_root", type=Path, 
                        help="Root path to the mel-spectrograms generated by encoder_preprocess.py.")
    
    # --- STORAGE & TELEMETRY ---
    parser.add_argument("-m", "--models_dir", type=Path, default="saved_models", 
                        help="Parent directory for serialized weights, backups, and diagnostic plots.")
    parser.add_argument("-v", "--vis_every", type=int, default=10, 
                        help="Iteration frequency for updating training curves and loss metrics.")
    parser.add_argument("-u", "--umap_every", type=int, default=100, 
                        help="Frequency of UMAP projections to visualize speaker cluster separation.")
    parser.add_argument("-s", "--save_every", type=int, default=500, 
                        help="Step interval for materializing model weights (.pt) on disk.")
    parser.add_argument("-b", "--backup_every", type=int, default=7500, 
                        help="Interval for creating immutable rolling backups of the model state.")
    parser.add_argument("-f", "--force_restart", action="store_true", 
                        help="Bypass existing checkpoints and initialize weights from distribution (restart from scratch).")
    
    # --- VISUALIZATION SERVER ---
    parser.add_argument("--visdom_server", type=str, default="http://localhost", 
                        help="Remote address of the Visdom dashboard server.")
    parser.add_argument("--no_visdom", action="store_true", 
                        help="Inhibit rich visual telemetry (not recommended for production monitoring).")
    
    args = parser.parse_args()

    # --- EXECUTION ---
    print_args(args, parser)
    print("🀝🏻 Scholarly Partnership: Amey Thakur & Mega Satish")
    print("πŸš€ Initiating Neural Training Pipeline - Monitoring d-vector clusters...")
    
    # Delegate to the internal training engine.
    train(**vars(args))