{ "architectures": [ "PeAudioFrameLevelModel" ], "audio_config": { "attention_bias": false, "attention_dropout": 0.0, "dac_config": { "_name_or_path": "", "add_cross_attention": false, "architectures": null, "bos_token_id": null, "chunk_size_feed_forward": 0, "codebook_dim": 128, "codebook_loss_weight": 1.0, "codebook_size": 1024, "commitment_loss_weight": 0.25, "cross_attention_hidden_size": null, "decoder_hidden_size": 1536, "decoder_start_token_id": null, "downsampling_ratios": [ 2, 8, 10, 12 ], "dtype": null, "encoder_hidden_size": 64, "eos_token_id": null, "finetuning_task": null, "hidden_size": 1024, "hop_length": 1920, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "is_decoder": false, "is_encoder_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "model_type": "dac", "n_codebooks": 16, "output_attentions": false, "output_hidden_states": false, "pad_token_id": null, "prefix": null, "problem_type": null, "quantizer_dropout": 0, "return_dict": true, "sampling_rate": 48000, "sep_token_id": null, "task_specific_params": null, "tie_encoder_decoder": false, "tie_word_embeddings": true, "tokenizer_class": null, "upsampling_ratios": [ 12, 10, 8, 2 ] }, "head_dim": 128, "hidden_act": "silu", "hidden_size": 1024, "initializer_range": 0.02, "intermediate_size": 2752, "max_position_embeddings": 10000, "model_type": "pe_audio_encoder", "num_attention_heads": 8, "num_hidden_layers": 16, "num_key_value_heads": 8, "rms_norm_eps": 1e-05, "rope_parameters": { "rope_theta": 20000, "rope_type": "default" } }, "dtype": "float32", "model_type": "pe_audio", "text_config": { "attention_bias": false, "attention_dropout": 0.0, "classifier_activation": "gelu", "classifier_bias": false, "classifier_dropout": 0.0, "classifier_pooling": "cls", "cls_token_id": 50281, "decoder_bias": true, "deterministic_flash_attn": false, "embedding_dropout": 0.0, "global_attn_every_n_layers": 3, "hidden_activation": "gelu", "hidden_size": 1024, "initializer_cutoff_factor": 2.0, "initializer_range": 0.02, "intermediate_size": 2624, "layer_types": [ "full_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "full_attention" ], "local_attention": 128, "max_position_embeddings": 8192, "mlp_bias": false, "mlp_dropout": 0.0, "model_type": "modernbert", "norm_bias": false, "norm_eps": 1e-05, "num_attention_heads": 16, "num_hidden_layers": 22, "repad_logits_with_grad": false, "rope_parameters": { "full_attention": { "rope_theta": 160000.0, "rope_type": "default" }, "sliding_attention": { "rope_theta": 10000.0, "rope_type": "default" } }, "sparse_pred_ignore_index": -100, "sparse_prediction": false, "vocab_size": 50368 }, "transformers_version": "5.0.0.dev0" }