| { | |
| "format": "vibevoice-asr-hf-audio-encoder-v1", | |
| "transformers_git_revision": "cbb65a4815d44f1d8b8ff7f51cca24ce491fc09e", | |
| "audio_encoder_weight_format": "hf-vibevoice-asr-audio-v1", | |
| "audio_encoder_file": "audio_encoder.safetensors", | |
| "includes_wte": false, | |
| "includes_processor_files": false, | |
| "wte_key": null, | |
| "text_hidden_size": 3584, | |
| "text_vocab_size": 152064, | |
| "sample_rate": 24000, | |
| "acoustic_vae_std": 0.625, | |
| "speech_token_compress_ratio": 3200, | |
| "key_prefixes": { | |
| "acoustic_encoder": "model.acoustic_tokenizer_encoder.", | |
| "semantic_encoder": "model.semantic_tokenizer_encoder.", | |
| "projector": "model.multi_modal_projector.", | |
| "wte": null | |
| }, | |
| "tensor_counts": { | |
| "acoustic_encoder": 276, | |
| "semantic_encoder": 276, | |
| "projector": 10, | |
| "wte": 0, | |
| "omitted_decoder_or_unknown": 277 | |
| } | |
| } | |