{ "embedding_dim": 1728, "slow_stem_channels": [ 16, 32, 64, 128 ], "slow_stem_kernels": [ [ 3, 1 ], [ 3, 1 ], [ 3, 1 ], [ 3, 3 ] ], "slow_stem_strides": [ [ 2, 8 ], [ 1, 1 ], [ 1, 1 ], [ 2, 2 ] ], "fast_stem_channels": [ 2, 4, 8, 16 ], "fast_stem_kernels": [ [ 3, 3 ], [ 3, 3 ], [ 3, 3 ], [ 3, 3 ] ], "fast_stem_strides": [ [ 2, 2 ], [ 1, 1 ], [ 1, 1 ], [ 2, 2 ] ], "slow_stage_in": [ 256, 512, 1024, 4608 ], "slow_stage_out": [ 256, 512, 1536, 1536 ], "fast_stage_in": [ 16, 32, 64, 192 ], "fast_stage_out": [ 32, 64, 192, 192 ], "slow_group_size": 128, "fast_group_size": 16, "block_kernels": [ [ 1, 1 ], [ 1, 3 ], [ 3, 1 ], [ 1, 1 ] ], "fusion_input_channels": [ 32, 32, 64, 192 ], "fusion_output_channels": [ 128, 256, 512, 3072 ], "fusion_time_kernel": 7, "fusion_time_stride": 4, "alpha": 0.2, "scaled_activation": "gelu", "ws_eps": 0.0001, "weight_standardization": true, "sample_rate": 16000, "n_fft": 2048, "hop_length": 160, "win_length": 400, "window": "hann", "n_mels": 96, "fmin": 0.0, "fmax": 8000.0, "mel_norm": 2.0, "htk": true, "power": 2.0, "center": true, "pad_mode": "reflect", "mag_compression": "log10_nonneg", "slice_hop": 200, "look_backward": 150, "look_forward": 150, "standard_normalize": true, "std_floor": 0.01, "provenance": { "source_repo": "https://github.com/PandoraMedia/music-audio-representations", "paper": "https://arxiv.org/abs/2210.03799", "model": "SF-NFNet-F0 (MULE)", "config": "supporting_data/configs/mule_embedding_timeline.yml", "code_license": "GPL-3.0-only", "weights_license": "CC-BY-NC-4.0" } }