{ "codec": { "mlp_in":{ "in_features": 320, "hidden_features": 768, "out_features": 1024, "compute_dtype": "float32" }, "mlp_out":{ "in_features": 1024, "hidden_features": 768, "out_features": 320, "compute_dtype": "float32" }, "decoder": { "n_layers": 8, "n_embd": 1024, "n_hidden": 4096, "n_heads": 16, "head_dim": 64, "compute_dtype": "float32", "window_size": 15, "dropout_rate": 0.1 }, "encoder": { "n_layers": 8, "n_embd": 1024, "n_hidden": 4096, "n_heads": 16, "head_dim": 64, "compute_dtype": "float32", "window_size": 15, "dropout_rate": 0.1 }, "rvq": { "num_codebooks": 8, "codebook_size": 1024, "embedding_dim": 16, "latent_dim": 16, "updown_linears": false, "codebook_weight_dtype": "float32" } }, "w2v":{ "mlp_in":{ "in_features": 320, "hidden_features": 768, "out_features": 1024, "compute_dtype": "float32" }, "encoder": { "n_layers": 8, "n_embd": 1024, "n_hidden": 4096, "n_heads": 16, "head_dim": 64, "compute_dtype": "float32", "window_size": 15, "dropout_rate": 0.1 }, "rvq": { "num_codebooks": 8, "codebook_size": 1024, "embedding_dim": 1024, "latent_dim": 1024, "updown_linears": false, "codebook_weight_dtype": "float32" }, "training": { "noise_masking": 0.1, "noise_augmentation": 0.1 } }, "training":{ "resume": false, "loss_type": "cossim", "strict_model": true, "load_discriminator": false, "learning_rate": 1e-4, "weight_decay": 1e-2, "discriminator_start_steps": 100, "discriminator_segment_duration": 1.28, "apply_apa": true, "warmup_steps": 1000, "min_lr": 1e-6, "num_epochs": 100000, "use_continuous": 0.1, "max_grad_norm": 1000.0, "batch_size": 300, "gradient_accumulation_steps": 1, "num_workers": 6, "use_phaseaug": true, "init_dataset": false, "profile": false, "verbose_grad_norm": false, "verbose_norm_threshold_max": 5.0, "verbose_norm_threshold_min": 0.001, "verbose_paramter_norm": false, "use_discriminator": false, "codebook_reset_interval": 1000 }, "loss":{ "recon_loss_weight": 1 }, "data": { "audio_dir": "/data", "sample_rate": 16000, "segment_duration": 10.24, "cache_dir": "/data/dataloader/v9" }, "logging": { "log_interval": 100, "save_interval": 500, "eval_interval": 3000, "experiment_dir": "/data/jhcodec/sw2v/{experiment_name}", "checkpoint_dir": "/data/jhcodec/sw2v/{experiment_name}/checkpoints", "tensorboard_dir": "/data/jhcodec/sw2v/{experiment_name}/tensorboard", "n_samples": 3 } }