{ "model_type": "autoencoder", "sample_size": 24576, "sample_rate": 44100, "audio_channels": 2, "model": { "pretransform": { "type": "patched", "config": { "patch_size": 256, "channels": 2 } }, "encoder": { "type": "same", "requires_grad": false, "config": { "in_channels": 512, "channels": 128, "c_mults": [6], "strides": [16], "latent_dim": 256, "transformer_depths": [6], "checkpointing": false, "differential": true, "dyt": true, "dim_heads": 64, "variable_stride": true, "chunk_size": 32, "chunk_midpoint_shift": true, "mask_noise": 0.0 } }, "decoder": { "type": "same", "requires_grad": false, "config": { "out_channels": 512, "channels": 128, "c_mults": [6], "strides": [16], "latent_dim": 256, "transformer_depths": [6], "sinusoidal_blocks": [0], "checkpointing": false, "differential": true, "dyt": true, "dim_heads": 64, "variable_stride": true, "chunk_size": 32, "chunk_midpoint_shift": true, "conv_mapping": true, "mask_noise": 0.01 } }, "bottleneck": { "type": "softnorm", "config": { "dim": 256, "noise_augment_dim": 0, "noise_regularize": true, "auto_scale": true, "freeze": true } }, "latent_dim": 256, "downsampling_ratio": 4096, "io_channels": 2 } }