mule / config.json
matteospanio's picture
Add config.json
c8c7f79 verified
{
"embedding_dim": 1728,
"slow_stem_channels": [
16,
32,
64,
128
],
"slow_stem_kernels": [
[
3,
1
],
[
3,
1
],
[
3,
1
],
[
3,
3
]
],
"slow_stem_strides": [
[
2,
8
],
[
1,
1
],
[
1,
1
],
[
2,
2
]
],
"fast_stem_channels": [
2,
4,
8,
16
],
"fast_stem_kernels": [
[
3,
3
],
[
3,
3
],
[
3,
3
],
[
3,
3
]
],
"fast_stem_strides": [
[
2,
2
],
[
1,
1
],
[
1,
1
],
[
2,
2
]
],
"slow_stage_in": [
256,
512,
1024,
4608
],
"slow_stage_out": [
256,
512,
1536,
1536
],
"fast_stage_in": [
16,
32,
64,
192
],
"fast_stage_out": [
32,
64,
192,
192
],
"slow_group_size": 128,
"fast_group_size": 16,
"block_kernels": [
[
1,
1
],
[
1,
3
],
[
3,
1
],
[
1,
1
]
],
"fusion_input_channels": [
32,
32,
64,
192
],
"fusion_output_channels": [
128,
256,
512,
3072
],
"fusion_time_kernel": 7,
"fusion_time_stride": 4,
"alpha": 0.2,
"scaled_activation": "gelu",
"ws_eps": 0.0001,
"weight_standardization": true,
"sample_rate": 16000,
"n_fft": 2048,
"hop_length": 160,
"win_length": 400,
"window": "hann",
"n_mels": 96,
"fmin": 0.0,
"fmax": 8000.0,
"mel_norm": 2.0,
"htk": true,
"power": 2.0,
"center": true,
"pad_mode": "reflect",
"mag_compression": "log10_nonneg",
"slice_hop": 200,
"look_backward": 150,
"look_forward": 150,
"standard_normalize": true,
"std_floor": 0.01,
"provenance": {
"source_repo": "https://github.com/PandoraMedia/music-audio-representations",
"paper": "https://arxiv.org/abs/2210.03799",
"model": "SF-NFNet-F0 (MULE)",
"config": "supporting_data/configs/mule_embedding_timeline.yml",
"code_license": "GPL-3.0-only",
"weights_license": "CC-BY-NC-4.0"
}
}