Spaces:
Running
on
Zero
Running
on
Zero
| { | |
| "model_type": "autoencoder", | |
| "sample_size": 12000, | |
| "sample_rate": 24000, | |
| "audio_channels": 1, | |
| "model": { | |
| "encoder": { | |
| "type": "oobleck", | |
| "config": { | |
| "in_channels": 1, | |
| "channels": 128, | |
| "c_mults": [1, 2, 4, 8], | |
| "strides": [2, 4, 6, 10], | |
| "latent_dim": 256, | |
| "use_snake": true | |
| } | |
| }, | |
| "decoder": { | |
| "type": "oobleck", | |
| "config": { | |
| "out_channels": 1, | |
| "channels": 128, | |
| "c_mults": [1, 2, 4, 8], | |
| "strides": [2, 4, 6, 10], | |
| "latent_dim": 128, | |
| "use_snake": true, | |
| "final_tanh": false | |
| } | |
| }, | |
| "bottleneck": { | |
| "type": "vae" | |
| }, | |
| "latent_dim": 128, | |
| "downsampling_ratio": 480, | |
| "io_channels": 1 | |
| }, | |
| "training": { | |
| "learning_rate": 1.5e-4, | |
| "warmup_steps": 0, | |
| "use_ema": false, | |
| "optimizer_configs": { | |
| "autoencoder": { | |
| "optimizer": { | |
| "type": "AdamW", | |
| "config": { | |
| "betas": [0.8, 0.99], | |
| "lr": 1.5e-4, | |
| "weight_decay": 1e-3 | |
| } | |
| }, | |
| "scheduler": { | |
| "type": "InverseLR", | |
| "config": { | |
| "inv_gamma": 200000, | |
| "power": 0.5, | |
| "warmup": 0.999 | |
| } | |
| } | |
| }, | |
| "discriminator": { | |
| "optimizer": { | |
| "type": "AdamW", | |
| "config": { | |
| "betas": [0.8, 0.99], | |
| "lr": 3e-4, | |
| "weight_decay": 1e-3 | |
| } | |
| }, | |
| "scheduler": { | |
| "type": "InverseLR", | |
| "config": { | |
| "inv_gamma": 200000, | |
| "power": 0.5, | |
| "warmup": 0.999 | |
| } | |
| } | |
| } | |
| }, | |
| "loss_configs": { | |
| "discriminator": { | |
| "type": "encodec", | |
| "config": { | |
| "filters": 64, | |
| "n_ffts": [1280, 640, 320, 160, 80], | |
| "hop_lengths": [320, 160, 80, 40, 20], | |
| "win_lengths": [1280, 640, 320, 160, 80] | |
| }, | |
| "weights": { | |
| "adversarial": 0.1, | |
| "feature_matching": 5.0 | |
| } | |
| }, | |
| "spectral": { | |
| "type": "mrstft", | |
| "config": { | |
| "fft_sizes": [1280, 640, 320, 160, 80, 40, 20], | |
| "hop_sizes": [320, 160, 80, 40, 20, 10, 5], | |
| "win_lengths": [1280, 640, 320, 160, 80, 40, 20], | |
| "perceptual_weighting": true | |
| }, | |
| "weights": { | |
| "mrstft": 1.0 | |
| } | |
| }, | |
| "time": { | |
| "type": "l1", | |
| "weights": { | |
| "l1": 0.0 | |
| } | |
| }, | |
| "bottleneck": { | |
| "type": "kl", | |
| "weights": { | |
| "kl": 1e-4 | |
| } | |
| } | |
| }, | |
| "demo": { | |
| "demo_every": 10000 | |
| } | |
| } | |
| } |