{ "_class_name": "AceStepAudioTokenizer", "_diffusers_version": "0.39.0.dev0", "attention_bias": false, "attention_dropout": 0.0, "audio_acoustic_hidden_dim": 8, "fsq_dim": 32, "fsq_input_levels": [ 4, 4, 4 ], "fsq_input_num_quantizers": 1, "head_dim": 8, "hidden_size": 32, "intermediate_size": 64, "layer_types": null, "num_attention_heads": 4, "num_attention_pooler_hidden_layers": 1, "num_key_value_heads": 2, "pool_window_size": 2, "rms_norm_eps": 1e-06, "rope_theta": 10000.0, "sliding_window": 16 }