{ "dim": 3072, "n_layers": 26, "head_dim": 128, "hidden_dim": 9216, "n_heads": 32, "n_kv_heads": 8, "use_biases": false, "causal": true, "rope_theta": 1000000.0, "norm_eps": 1e-05, "vocab_size": 131072, "model_parallel": 1, "tied_embeddings": true, "sliding_window": 8192, "model_max_length": 131072, "multimodal": { "whisper_model_args": { "encoder_args": { "audio_encoding_args": { "sampling_rate": 16000, "frame_rate": 12.5, "num_mel_bins": 128, "hop_length": 160, "window_size": 400, "chunk_length_s": null, "global_log_mel_max": 1.5, "transcription_format": "streaming" }, "dim": 1280, "n_layers": 32, "head_dim": 64, "hidden_dim": 5120, "n_heads": 32, "vocab_size": 131072, "n_kv_heads": 32, "use_biases": true, "use_cache": false, "rope_theta": 1000000.0, "causal": true, "norm_eps": 1e-05, "pos_embed": "rope", "max_source_positions": null, "ffn_type": "swiglu", "norm_type": "rms_norm", "sliding_window": 750, "ragged_attention": "750" }, "downsample_args": { "downsample_factor": 4 } } }, "ada_rms_norm_t_cond": true, "ada_rms_norm_t_cond_dim": 32, "quantization_config": { "quant_method": "gptq", "bits": 4, "group_size": 128, "desc_act": false, "sym": true, "checkpoint_format": "gptq", "pack_dtype": "int32" } }