{ "architectures": [ "VocosModel" ], "hidden_dim": 512, "hop_length": 882, "input_channels": 1024, "intermediate_dim": 1536, "kernel_size": 7, "layer_norm_eps": 1e-06, "layer_scale_init_value": 0.125, "model_type": "vocos", "n_fft": 3528, "num_layers": 8, "padding": 3, "spec_padding": "same", "torch_dtype": "float32", "transformers_version": "4.53.0", "use_adaptive_norm": false }