{ "dim": 3072, "n_layers": 30, "head_dim": 128, "hidden_dim": 8192, "n_heads": 32, "n_kv_heads": 8, "rope_theta": 100000000.0, "norm_eps": 1e-05, "vocab_size": 131072, "max_position_embeddings": 32768, "multimodal": { "whisper_model_args": { "encoder_args": { "dim": 1280, "n_layers": 32, "head_dim": 64, "hidden_dim": 5120, "n_heads": 20, "vocab_size": 51866, "max_source_positions": 1500, "audio_encoding_args": { "sampling_rate": 16000, "num_mel_bins": 128, "hop_length": 160, "window_size": 400 } }, "downsample_args": { "downsample_factor": 4 } } }, "quantization": { "config_groups": { "group_0": { "input_activations": { "dynamic": true, "num_bits": 8, "observer": null, "strategy": "token", "symmetric": true, "type": "float" }, "targets": [ "Linear" ], "weights": { "dynamic": false, "num_bits": 8, "observer": "minmax", "strategy": "tensor", "symmetric": true, "type": "float" } } }, "format": "float-quantized", "ignore": [ "lm_head", "output", "*whisper*" ], "quant_method": "compressed-tensors", "quantization_status": "compressed" } }