lucadellalib's picture
Upload 2 files
29f9f80 verified
{
"encoder_name": "WavLM",
"encoder_config": {
"hidden_dims": [
512,
512,
512,
512,
512,
512,
512
],
"kernel_sizes": [
10,
3,
3,
3,
3,
2,
2
],
"strides": [
5,
2,
2,
2,
2,
2,
2
],
"num_layers": 6,
"dim": 1024,
"ffn_dim": 4096,
"num_heads": 16,
"num_buckets": 320,
"max_distance": 800,
"max_cached_steps": 2048,
"dropout": 0.0,
"conv_pos": 128,
"conv_pos_groups": 16,
"causal": true,
"window_size": 512,
"lookahead_size": 3,
"use_flex_attention": false
},
"compressor_name": "FocalEncoder",
"compressor_config": {
"input_dim": 1024,
"output_dim": 11,
"hidden_dims": [
1024,
1024,
1024
],
"downscale_factors": [
1,
1,
1
],
"focal_window": 14,
"focal_level": 2,
"focal_factor": 4,
"dropout": 0.0,
"use_post_norm": false,
"use_layerscale": false,
"layerscale_init": 0.0001,
"tanhscale_init": 0.5,
"normalize_modulator": false,
"causal": true,
"window_size": 512
},
"quantizer_name": "BinarySphericalQuantizer",
"quantizer_config": {
"codebook_size": 2048
},
"decompressor_name": "FocalDecoder",
"decompressor_config": {
"input_dim": 11,
"output_dim": 1024,
"hidden_dims": [
1024,
1024,
1024
],
"upscale_factors": [
1,
1,
1
],
"focal_window": 14,
"focal_level": 2,
"focal_factor": 4,
"dropout": 0.0,
"use_post_norm": false,
"use_layerscale": false,
"layerscale_init": 0.0001,
"tanhscale_init": 0.5,
"normalize_modulator": false,
"causal": true,
"window_size": 512,
"last_window_size": 512,
"lookahead_size": 3
},
"decoder_name": "WaveNeXt",
"decoder_config": {
"input_dim": 1024,
"num_layers": 8,
"dim": 1024,
"ffn_dim": 2048,
"kernel_size": 7,
"layerscale_init": null,
"hop_length": 480,
"causal": true
}
}