{ "architectures": [ "FlowMatchingWithBigVGan" ], "dtype": "float32", "model_config": { "architectures": [ "FlowMatchingModel" ], "attention_dropout": 0.0, "cfg_dropout": 0.2, "cfg_strength": 0.7, "dt": 0.1, "dtype": "float32", "embedding_dim": 768, "hidden_size": 512, "intermediate_size": 1024, "max_position_embeddings": null, "mean": -5.8843, "model_type": "", "num_attention_heads": 2, "num_hidden_layers": 4, "num_mel_bins": 80, "predict_duration": true, "rope_theta": 10000.0, "std": 2.2615, "vocab_size": 8192 }, "model_type": "flow_matching_with_bigvgan", "transformers_version": "4.56.1", "vocoder_config": { "_name_or_path": "models/bigvgan", "activation": "snakebeta", "architectures": [ "BigVGAN" ], "dtype": "float32", "model_in_dim": 80, "model_type": "bigvgan", "resblock_dilation_sizes": [ [ 1, 3, 5 ], [ 1, 3, 5 ], [ 1, 3, 5 ] ], "resblock_kernel_sizes": [ 3, 7, 11 ], "snake_logscale": true, "upsample_initial_channel": 512, "upsample_kernel_sizes": [ 10, 9, 8, 4, 4 ], "upsample_rates": [ 5, 4, 4, 2, 2 ], "use_bias_at_final": false, "use_tanh_at_final": false } }