{ "audio_llama_proj_model": "", "beats_cfg": { "activation_dropout": 0.0, "activation_fn": "gelu", "attention_dropout": 0.0, "conv_bias": false, "conv_pos": 128, "conv_pos_groups": 16, "deep_norm": true, "dropout": 0.0, "dropout_input": 0.0, "embed_dim": 512, "encoder_attention_heads": 12, "encoder_embed_dim": 768, "encoder_ffn_embed_dim": 3072, "encoder_layerdrop": 0.05, "encoder_layers": 12, "finetuned_model": true, "gru_rel_pos": true, "input_patch_size": 16, "layer_norm_first": false, "layer_wise_gradient_decay_ratio": 0.6, "max_distance": 800, "num_buckets": 320, "predictor_class": 527, "predictor_dropout": 0.0, "relative_position_embedding": true }, "downsample_factor": 8, "end_sym": "<|end_of_text|>", "freeze_audio_QFormer": false, "freeze_audio_llama_proj": false, "freeze_beats": true, "llama_path": "meta-llama/Meta-Llama-3.1-8B-Instruct", "lora": true, "lora_alpha": 32, "lora_dropout": 0.1, "lora_rank": 32, "max_pooling": false, "max_txt_len": 160, "num_audio_query_token": 1, "prompt_template": "<|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", "second_per_window": 0.333333, "second_stride": 0.333333, "use_audio_Qformer": true, "window_level_Qformer": true }