{ "ada_rms_norm_t_cond": true, "ada_rms_norm_t_cond_dim": 32, "causal": true, "dim": 3072, "head_dim": 128, "hidden_dim": 9216, "model_max_length": 131072, "model_parallel": 1, "multimodal": { "whisper_model_args": { "encoder_args": { "audio_encoding_args": { "sampling_rate": 16000, "frame_rate": 12.5, "num_mel_bins": 128, "hop_length": 160, "window_size": 400, "chunk_length_s": null, "global_log_mel_max": 1.5, "transcription_format": "streaming" }, "dim": 1280, "n_layers": 32, "head_dim": 64, "hidden_dim": 5120, "n_heads": 32, "vocab_size": 131072, "n_kv_heads": 32, "use_biases": true, "use_cache": false, "rope_theta": 1000000.0, "causal": true, "norm_eps": 1e-05, "pos_embed": "rope", "max_source_positions": null, "ffn_type": "swiglu", "norm_type": "rms_norm", "sliding_window": 750 }, "downsample_args": { "downsample_factor": 4 } } }, "n_heads": 32, "n_kv_heads": 8, "n_layers": 26, "norm_eps": 1e-05, "quantization": { "group_size": 64, "bits": 6 }, "rope_theta": 1000000.0, "sliding_window": 8192, "tied_embeddings": true, "use_biases": false, "vocab_size": 131072 }